Sharing aggregate states between different aggregate functions

Started by David Rowleyover 10 years ago16 messages
#1David Rowley
david.rowley@2ndquadrant.com
1 attachment(s)

Simon and I have been going over some ideas about how to make improvements
to aggregate performance by cutting down on the duplicate work that's done
when 2 aggregate functions are used where one knows how to satisfy all the
requirements of the other.

To cut a long story short, all our ideas will require some additions or
modifications to CREATE AGGREGATE and also pg_dump support.

Tom came up with a more simple idea, that gets us some of the way, without
all that pg_dump stuff.
/messages/by-id/30851.1433860000@sss.pgh.pa.us

This basically allows an aggregate's state to be shared between other
aggregate functions when both aggregate's transition functions (and a few
other things) match
There's quite a number of aggregates in our standard set which will benefit
from this optimisation.

Please find attached a patch which implements this idea.

The performance improvements are as follows:

create table t1 as
select x.x::numeric from generate_series(1,1000000) x(x);

-- standard case.
select sum(x),avg(x) from t1;

Master:
Time: 350.303 ms
Time: 353.716 ms
Time: 349.703 ms

Patched:
Time: 227.687 ms
Time: 222.563 ms
Time: 224.691 ms

-- extreme case.
select
stddev_samp(x),stddev(x),variance(x),var_samp(x),var_pop(x),stddev_pop(x)
from t1;

Master:
Time: 1464.461 ms
Time: 1462.343 ms
Time: 1450.232 ms

Patched:
Time: 346.473 ms
Time: 348.445 ms
Time: 351.365 ms

Regards

David Rowley

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

sharing_agg_states_2c3d4a9_2015-06-15.patchapplication/octet-stream; name=sharing_agg_states_2c3d4a9_2015-06-15.patchDownload
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 2bf48c5..27020d8 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -150,11 +150,16 @@
 #include "utils/tuplesort.h"
 #include "utils/datum.h"
 
-
 /*
- * AggStatePerAggData - per-aggregate working state for the Agg scan
+ * AggStatePerAggStateData
+ *		Stores data about an aggregate state and how the aggregate state must
+ *		be calculated. This struct does not store anything which has any
+ *		concept of how to produce the final aggregate result. In order to
+ *		calculate the final result we must make use of an AggStatePerAggData.
+ *		The reason for this is so that we can share an aggregate state between
+ *		different aggregate functions, in order to save duplicating work.
  */
-typedef struct AggStatePerAggData
+typedef struct AggStatePerAggStateData
 {
 	/*
 	 * These values are set up during ExecInitAgg() and do not change
@@ -186,25 +191,14 @@ typedef struct AggStatePerAggData
 	 */
 	int			numTransInputs;
 
-	/*
-	 * Number of arguments to pass to the finalfn.  This is always at least 1
-	 * (the transition state value) plus any ordered-set direct args. If the
-	 * finalfn wants extra args then we pass nulls corresponding to the
-	 * aggregated input columns.
-	 */
-	int			numFinalArgs;
-
-	/* Oids of transfer functions */
+	/* Oid of transfer function */
 	Oid			transfn_oid;
-	Oid			finalfn_oid;	/* may be InvalidOid */
 
 	/*
-	 * fmgr lookup data for transfer functions --- only valid when
-	 * corresponding oid is not InvalidOid.  Note in particular that fn_strict
-	 * flags are kept here.
+	 * fmgr lookup data for transfer function.
+	 * Note in particular that the fn_strict flag is kept here.
 	 */
 	FmgrInfo	transfn;
-	FmgrInfo	finalfn;
 
 	/* Input collation derived for aggregate */
 	Oid			aggCollation;
@@ -288,7 +282,44 @@ typedef struct AggStatePerAggData
 	 * worth the extra space consumption.
 	 */
 	FunctionCallInfoData transfn_fcinfo;
-}	AggStatePerAggData;
+}	AggStatePerAggStateData;
+
+/*
+ * AggStatePerAggData
+ *		Stores required details on how to produce a final aggregate result.
+ *		To be of any use this must make use of an AggStatePerAggStateData
+ *		before any actual result can be produced. Logical separation of the
+ *		state and the final function data stored here makes sense as it allows
+ *		us to re-use an aggregate's state for more than one aggregate function
+ *		providing they share the same transfn and initValue.
+ */
+typedef struct AggStatePerAggData {
+	/*
+	 * These values are set up during ExecInitAgg() and do not change
+	 * thereafter:
+	 */
+
+	/* index to the corresponding state which this agg should use */
+	int			stateno;
+
+	/* Optional Oid of final function (may be InvalidOid) */
+	Oid			finalfn_oid;
+
+	/*
+	* fmgr lookup data for final function --- only valid when
+	* finalfn_oid oid is not InvalidOid.
+	*/
+	FmgrInfo	finalfn;
+
+	/*
+	* Number of arguments to pass to the finalfn.  This is always at least 1
+	* (the transition state value) plus any ordered-set direct args. If the
+	* finalfn wants extra args then we pass nulls corresponding to the
+	* aggregated input columns.
+	*/
+	int			numFinalArgs;
+
+} AggStatePerAggData;
 
 /*
  * AggStatePerGroupData - per-aggregate-per-group working state
@@ -358,25 +389,35 @@ typedef struct AggHashEntryData
 	AggStatePerGroupData pergroup[FLEXIBLE_ARRAY_MEMBER];
 }	AggHashEntryData;
 
+/*
+ * enum states to mark compatibility between aggregate functions.
+ * These are used to enable various optimizations which are applied to similar
+ * aggregate functions. See comments for find_compatible_aggref() for details.
+ */
+typedef enum AggRefCompatibility {
+	AGGREF_NO_MATCH = 0,	/* state is not compatible between aggregates. */
+	AGGREF_STATE_MATCH,		/* aggregates may share state only. */
+	AGGREF_EXACT_MATCH		/* aggregates may share state and finalfn. */
+} AggRefCompatibility;
 
 static void initialize_phase(AggState *aggstate, int newphase);
 static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
 static void initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
+					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset);
 static void advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+							AggStatePerAggState peraggstate,
 							AggStatePerGroup pergroupstate);
 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
 static void process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerAggState peraggstate,
 								 AggStatePerGroup pergroupstate);
 static void process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+								AggStatePerAggState peraggstate,
 								AggStatePerGroup pergroupstate);
 static void finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull);
 static void prepare_projection_slot(AggState *aggstate,
@@ -396,6 +437,10 @@ static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
+static AggRefCompatibility find_compatible_aggref(Aggref *newagg,
+				  AggState *aggstate, int lastaggno, int *foundaggno);
+static AggRefCompatibility aggref_has_compatible_states(Aggref *newagg,
+				  AggStatePerAgg peragg, AggStatePerAggState peraggstate);
 
 
 /*
@@ -498,7 +543,7 @@ fetch_input_tuple(AggState *aggstate)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
-initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
+initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
 					 AggStatePerGroup pergroupstate)
 {
 	/*
@@ -569,7 +614,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
 }
 
 /*
- * Initialize all aggregates for a new group of input values.
+ * Initialize all aggregate states for a new group of input values.
  *
  * If there are multiple grouping sets, we initialize only the first numReset
  * of them (the grouping sets are ordered so that the most specific one, which
@@ -580,26 +625,26 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
  */
 static void
 initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
+					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset)
 {
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
 	int			setno = 0;
 
 	if (numReset < 1)
 		numReset = numGroupingSets;
 
-	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+	for (stateno = 0; stateno < aggstate->numstates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerAggState peraggstate = &peraggstates[stateno];
 
 		for (setno = 0; setno < numReset; setno++)
 		{
 			AggStatePerGroup pergroupstate;
 
-			pergroupstate = &pergroup[aggno + (setno * (aggstate->numaggs))];
+			pergroupstate = &pergroup[stateno + (setno * (aggstate->numstates))];
 
 			aggstate->current_set = setno;
 
@@ -610,7 +655,7 @@ initialize_aggregates(AggState *aggstate,
 
 /*
  * Given new input value(s), advance the transition function of one aggregate
- * within one grouping set only (already set in aggstate->current_set)
+ * state within one grouping set only (already set in aggstate->current_set)
  *
  * The new values (and null flags) have been preloaded into argument positions
  * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again
@@ -621,7 +666,7 @@ initialize_aggregates(AggState *aggstate,
  */
 static void
 advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+							AggStatePerAggState peraggstate,
 							AggStatePerGroup pergroupstate)
 {
 	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
@@ -678,8 +723,8 @@ advance_transition_function(AggState *aggstate,
 	/* We run the transition functions in per-input-tuple memory context */
 	oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
 
-	/* set up aggstate->curperagg for AggGetAggref() */
-	aggstate->curperagg = peraggstate;
+	/* set up aggstate->curperaggstate for AggGetAggref() */
+	aggstate->curperaggstate = peraggstate;
 
 	/*
 	 * OK to call the transition function
@@ -690,7 +735,7 @@ advance_transition_function(AggState *aggstate,
 
 	newVal = FunctionCallInvoke(fcinfo);
 
-	aggstate->curperagg = NULL;
+	aggstate->curperaggstate = NULL;
 
 	/*
 	 * If pass-by-ref datatype, must copy the new value into aggcontext and
@@ -718,7 +763,7 @@ advance_transition_function(AggState *aggstate,
 }
 
 /*
- * Advance all the aggregates for one input tuple.  The input tuple
+ * Advance each aggregate state for one input tuple.  The input tuple
  * has been stored in tmpcontext->ecxt_outertuple, so that it is accessible
  * to ExecEvalExpr.  pergroup is the array of per-group structs to use
  * (this might be in a hashtable entry).
@@ -728,14 +773,14 @@ advance_transition_function(AggState *aggstate,
 static void
 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 {
-	int			aggno;
+	int			stateno;
 	int			setno = 0;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
-	int			numAggs = aggstate->numaggs;
+	int			numStates = aggstate->numstates;
 
-	for (aggno = 0; aggno < numAggs; aggno++)
+	for (stateno = 0; stateno < numStates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
+		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
 		ExprState  *filter = peraggstate->aggrefstate->aggfilter;
 		int			numTransInputs = peraggstate->numTransInputs;
 		int			i;
@@ -806,7 +851,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
-				AggStatePerGroup pergroupstate = &pergroup[aggno + (setno * numAggs)];
+				AggStatePerGroup pergroupstate = &pergroup[stateno + (setno * numStates)];
 
 				aggstate->current_set = setno;
 
@@ -841,7 +886,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
  */
 static void
 process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerAggState peraggstate,
 								 AggStatePerGroup pergroupstate)
 {
 	Datum		oldVal = (Datum) 0;
@@ -930,7 +975,7 @@ process_ordered_aggregate_single(AggState *aggstate,
  */
 static void
 process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+								AggStatePerAggState peraggstate,
 								AggStatePerGroup pergroupstate)
 {
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
@@ -1009,10 +1054,14 @@ process_ordered_aggregate_multi(AggState *aggstate,
  *
  * The finalfunction will be run, and the result delivered, in the
  * output-tuple context; caller's CurrentMemoryContext does not matter.
+ *
+ * The finalfn uses the state as set in the stateno. This also might be
+ * being used by another aggregate function, so it's important that we do
+ * nothing destructive here.
  */
 static void
 finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull)
 {
@@ -1021,6 +1070,7 @@ finalize_aggregate(AggState *aggstate,
 	MemoryContext oldContext;
 	int			i;
 	ListCell   *lc;
+	AggStatePerAggState peraggstate = &aggstate->peraggstate[peragg->stateno];
 
 	oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
 
@@ -1046,14 +1096,14 @@ finalize_aggregate(AggState *aggstate,
 	/*
 	 * Apply the agg's finalfn if one is provided, else return transValue.
 	 */
-	if (OidIsValid(peraggstate->finalfn_oid))
+	if (OidIsValid(peragg->finalfn_oid))
 	{
-		int			numFinalArgs = peraggstate->numFinalArgs;
+		int			numFinalArgs = peragg->numFinalArgs;
 
-		/* set up aggstate->curperagg for AggGetAggref() */
-		aggstate->curperagg = peraggstate;
+		/* set up aggstate->curperaggstate for AggGetAggref() */
+		aggstate->curperaggstate = peraggstate;
 
-		InitFunctionCallInfoData(fcinfo, &peraggstate->finalfn,
+		InitFunctionCallInfoData(fcinfo, &peragg->finalfn,
 								 numFinalArgs,
 								 peraggstate->aggCollation,
 								 (void *) aggstate, NULL);
@@ -1082,7 +1132,7 @@ finalize_aggregate(AggState *aggstate,
 			*resultVal = FunctionCallInvoke(&fcinfo);
 			*resultIsNull = fcinfo.isnull;
 		}
-		aggstate->curperagg = NULL;
+		aggstate->curperaggstate = NULL;
 	}
 	else
 	{
@@ -1173,7 +1223,7 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
  */
 static void
 finalize_aggregates(AggState *aggstate,
-					AggStatePerAgg peragg,
+					AggStatePerAgg peraggs,
 					AggStatePerGroup pergroup,
 					int currentSet)
 {
@@ -1189,10 +1239,12 @@ finalize_aggregates(AggState *aggstate,
 
 	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerAgg peragg = &peraggs[aggno];
+		int stateno = peragg->stateno;
+		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
 		AggStatePerGroup pergroupstate;
 
-		pergroupstate = &pergroup[aggno + (currentSet * (aggstate->numaggs))];
+		pergroupstate = &pergroup[stateno + (currentSet * (aggstate->numstates))];
 
 		if (peraggstate->numSortCols > 0)
 		{
@@ -1208,7 +1260,7 @@ finalize_aggregates(AggState *aggstate,
 												pergroupstate);
 		}
 
-		finalize_aggregate(aggstate, peraggstate, pergroupstate,
+		finalize_aggregate(aggstate, peragg, pergroupstate,
 						   &aggvalues[aggno], &aggnulls[aggno]);
 	}
 }
@@ -1428,7 +1480,7 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 	if (isnew)
 	{
 		/* initialize aggregates for new tuple group */
-		initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup, 0);
+		initialize_aggregates(aggstate, aggstate->peraggstate, entry->pergroup, 0);
 	}
 
 	return entry;
@@ -1505,6 +1557,7 @@ agg_retrieve_direct(AggState *aggstate)
 	ExprContext *econtext;
 	ExprContext *tmpcontext;
 	AggStatePerAgg peragg;
+	AggStatePerAggState peraggstate;
 	AggStatePerGroup pergroup;
 	TupleTableSlot *outerslot;
 	TupleTableSlot *firstSlot;
@@ -1527,6 +1580,7 @@ agg_retrieve_direct(AggState *aggstate)
 	tmpcontext = aggstate->tmpcontext;
 
 	peragg = aggstate->peragg;
+	peraggstate = aggstate->peraggstate;
 	pergroup = aggstate->pergroup;
 	firstSlot = aggstate->ss.ss_ScanTupleSlot;
 
@@ -1716,7 +1770,7 @@ agg_retrieve_direct(AggState *aggstate)
 			/*
 			 * Initialize working state for a new input tuple group.
 			 */
-			initialize_aggregates(aggstate, peragg, pergroup, numReset);
+			initialize_aggregates(aggstate, peraggstate, pergroup, numReset);
 
 			if (aggstate->grp_firstTuple != NULL)
 			{
@@ -1945,10 +1999,12 @@ AggState *
 ExecInitAgg(Agg *node, EState *estate, int eflags)
 {
 	AggState   *aggstate;
-	AggStatePerAgg peragg;
+	AggStatePerAgg	peraggs;
+	AggStatePerAggState peraggstates;
 	Plan	   *outerPlan;
 	ExprContext *econtext;
 	int			numaggs,
+				stateno,
 				aggno;
 	int			phase;
 	ListCell   *l;
@@ -1971,12 +2027,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	aggstate->aggs = NIL;
 	aggstate->numaggs = 0;
+	aggstate->numstates = 0;
 	aggstate->maxsets = 0;
 	aggstate->hashfunctions = NULL;
 	aggstate->projected_set = -1;
 	aggstate->current_set = 0;
 	aggstate->peragg = NULL;
-	aggstate->curperagg = NULL;
+	aggstate->peraggstate = NULL;
+	aggstate->curperaggstate = NULL;
 	aggstate->agg_done = false;
 	aggstate->input_done = false;
 	aggstate->pergroup = NULL;
@@ -2209,8 +2267,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
 	econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
 
-	peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
-	aggstate->peragg = peragg;
+	peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData)* numaggs);
+	peraggstates = (AggStatePerAggState) palloc0(sizeof(AggStatePerAggStateData) * numaggs);
+
+	aggstate->peragg = peraggs;
+	aggstate->peraggstate = peraggstates;
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
@@ -2232,18 +2293,17 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	/*
 	 * Perform lookups of aggregate function info, and initialize the
-	 * unchanging fields of the per-agg data.  We also detect duplicate
-	 * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0"). When
-	 * duplicates are detected, we only make an AggStatePerAgg struct for the
-	 * first one.  The clones are simply pointed at the same result entry by
-	 * giving them duplicate aggno values.
+	 * unchanging fields of the per-agg data.
 	 */
 	aggno = -1;
+	stateno = -1;
 	foreach(l, aggstate->aggs)
 	{
 		AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
 		Aggref	   *aggref = (Aggref *) aggrefstate->xprstate.expr;
-		AggStatePerAgg peraggstate;
+		AggStatePerAgg peragg;
+		AggStatePerAggState peraggstate;
+		AggRefCompatibility agg_match;
 		Oid			inputTypes[FUNC_MAX_ARGS];
 		int			numArguments;
 		int			numDirectArgs;
@@ -2260,40 +2320,82 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		Expr	   *transfnexpr,
 				   *finalfnexpr;
 		Datum		textInitVal;
-		int			i;
+		int			existing_aggno;
 		ListCell   *lc;
 
 		/* Planner should have assigned aggregate to correct level */
 		Assert(aggref->agglevelsup == 0);
 
-		/* Look for a previous duplicate aggregate */
-		for (i = 0; i <= aggno; i++)
+		/*
+		 * For performance reasons we detect duplicate aggregates (for example,
+		 * "SELECT sum(x) ... HAVING sum(x) > 0"). When duplicates are
+		 * detected, we only make an AggStatePerAgg struct for the first one.
+		 * The clones are simply pointed at the same result entry by giving
+		 * them duplicate aggno values. We also do our best to reuse duplicate
+		 * aggregate states. The query may use 2 or more aggregate functions
+		 * which share the same transition function and initial value therefore
+		 * would end up calculating the same state. In this case we can just
+		 * calculate the state once, however if the finalfns do not match then
+		 * we must create a new peragg to store the varying finalfn.
+		 */
+
+		/* check if we have previous agg or state matches that can be reused */
+		agg_match = find_compatible_aggref(aggref, aggstate, aggno,
+										   &existing_aggno);
+
+		if (agg_match == AGGREF_EXACT_MATCH)
 		{
-			if (equal(aggref, peragg[i].aggref) &&
-				!contain_volatile_functions((Node *) aggref))
-				break;
+			/* Exact match -- this must be using same aggregate function or
+			 * have the same transfn and finalfn. Just reuse the existing agg.
+			 */
+			aggrefstate->aggno = existing_aggno;
+			continue;
 		}
-		if (i <= aggno)
+
+		else if (agg_match == AGGREF_STATE_MATCH)
 		{
-			/* Found a match to an existing entry, so just mark it */
-			aggrefstate->aggno = i;
-			continue;
+			/*
+			 * State only match. The state can be reused, but the finalfn are
+			 * different. We'll need to create a new peragg for the new finalfn
+			 */
+			int existing_stateno = peraggs[existing_aggno].stateno;
+			peragg = &peraggs[++aggno];
+			peraggstate = &peraggstates[existing_stateno];
+			peragg->stateno = existing_stateno;
+		}
+		else		/* AGGREF_NO_MATCH */
+		{
+			/* Nothing matches, so assign a new state and a new per agg */
+			peraggstate = &peraggstates[++stateno];
+			peragg = &peraggs[++aggno];
+			peragg->stateno = stateno;
 		}
 
-		/* Nope, so assign a new PerAgg record */
-		peraggstate = &peragg[++aggno];
+		/*
+		 * When we pass through the following code in a AGGREF_STATE_MATCH
+		 * type match, the peraggstate will already have been setup by a
+		 * previous iteration of the loop, so we'll try where possible to
+		 * minimize as much rework of setting up the peraggstate as possible.
+		 * In reality it shouldn't matter as we'll just be setting it up the
+		 * same as it was previously, but for performance reasons we do skip
+		 * over some more expensive parts the 2nd time around.
+		 *
 
-		/* Mark Aggref state node with assigned index in the result array */
+		/* Mark Aggref state node with the index of which agg it should use */
 		aggrefstate->aggno = aggno;
 
-		/* Begin filling in the peraggstate data */
-		peraggstate->aggrefstate = aggrefstate;
-		peraggstate->aggref = aggref;
-		peraggstate->sortstates = (Tuplesortstate **)
-			palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
-
-		for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
-			peraggstate->sortstates[currentsortno] = NULL;
+		/* for state matches the peraggstate has already been setup */
+		if (agg_match == AGGREF_NO_MATCH)
+		{
+			/* Begin filling in the peraggstate data */
+			peraggstate->aggrefstate = aggrefstate;
+			peraggstate->aggref = aggref;
+			peraggstate->sortstates = (Tuplesortstate **)
+				palloc0(sizeof(Tuplesortstate *)* numGroupingSets);
+
+			for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
+				peraggstate->sortstates[currentsortno] = NULL;
+		}
 
 		/* Fetch the pg_aggregate row */
 		aggTuple = SearchSysCache1(AGGFNOID,
@@ -2311,8 +2413,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 						   get_func_name(aggref->aggfnoid));
 		InvokeFunctionExecuteHook(aggref->aggfnoid);
 
+		/* when reusing the state the transfns should match! */
+		Assert(agg_match == AGGREF_NO_MATCH ||
+			   peraggstate->transfn_oid == aggform->aggtransfn);
+
 		peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
-		peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
+		peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
 
 		/* Check that aggregate owner has permission to call component fns */
 		{
@@ -2327,12 +2433,20 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
 			ReleaseSysCache(procTuple);
 
-			aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
-										 ACL_EXECUTE);
-			if (aclresult != ACLCHECK_OK)
-				aclcheck_error(aclresult, ACL_KIND_PROC,
-							   get_func_name(transfn_oid));
-			InvokeFunctionExecuteHook(transfn_oid);
+			/*
+			 * If we're reusing an existing state then the permissions for
+			 * transfn were already checked when we setup that state.
+			 */
+			if (agg_match == AGGREF_NO_MATCH)
+			{
+				aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
+											 ACL_EXECUTE);
+				if (aclresult != ACLCHECK_OK)
+					aclcheck_error(aclresult, ACL_KIND_PROC,
+								   get_func_name(transfn_oid));
+				InvokeFunctionExecuteHook(transfn_oid);
+			}
+
 			if (OidIsValid(finalfn_oid))
 			{
 				aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
@@ -2367,9 +2481,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 		/* Detect how many arguments to pass to the finalfn */
 		if (aggform->aggfinalextra)
-			peraggstate->numFinalArgs = numArguments + 1;
+			peragg->numFinalArgs = numArguments + 1;
 		else
-			peraggstate->numFinalArgs = numDirectArgs + 1;
+			peragg->numFinalArgs = numDirectArgs + 1;
 
 		/* resolve actual type of transition state, if polymorphic */
 		aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid,
@@ -2377,32 +2491,62 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 												   inputTypes,
 												   numArguments);
 
-		/* build expression trees using actual argument & result types */
-		build_aggregate_fnexprs(inputTypes,
-								numArguments,
-								numDirectArgs,
-								peraggstate->numFinalArgs,
-								aggref->aggvariadic,
-								aggtranstype,
-								aggref->aggtype,
-								aggref->inputcollid,
-								transfn_oid,
-								InvalidOid,		/* invtrans is not needed here */
-								finalfn_oid,
-								&transfnexpr,
-								NULL,
-								&finalfnexpr);
-
-		/* set up infrastructure for calling the transfn and finalfn */
-		fmgr_info(transfn_oid, &peraggstate->transfn);
-		fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+		if (agg_match == AGGREF_NO_MATCH)
+		{
+			/* build expression trees using actual argument & result types */
+			build_aggregate_fnexprs(inputTypes,
+									numArguments,
+									numDirectArgs,
+									peragg->numFinalArgs,
+									aggref->aggvariadic,
+									aggtranstype,
+									aggref->aggtype,
+									aggref->inputcollid,
+									transfn_oid,
+									InvalidOid,		/* invtrans is not needed here */
+									finalfn_oid,
+									&transfnexpr,
+									NULL,
+									&finalfnexpr);
+
+			/* set up infrastructure for calling the transfn and finalfn */
+			fmgr_info(transfn_oid, &peraggstate->transfn);
+			fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+		}
+		else if (OidIsValid(finalfn_oid))
+		{
+			/*
+			 * AGGREF_STATE_MATCH -- transfn calling infrastructure already
+			 * built for this state
+			 */
+			build_aggregate_fnexprs(inputTypes,
+									numArguments,
+									numDirectArgs,
+									peragg->numFinalArgs,
+									aggref->aggvariadic,
+									aggtranstype,
+									aggref->aggtype,
+									aggref->inputcollid,
+									transfn_oid,
+									InvalidOid,		/* invtrans is not needed here */
+									finalfn_oid,
+									NULL,			/* transfn already done */
+									NULL,
+									&finalfnexpr);
+		}
 
 		if (OidIsValid(finalfn_oid))
 		{
-			fmgr_info(finalfn_oid, &peraggstate->finalfn);
-			fmgr_info_set_expr((Node *) finalfnexpr, &peraggstate->finalfn);
+			fmgr_info(finalfn_oid, &peragg->finalfn);
+			fmgr_info_set_expr((Node *) finalfnexpr, &peragg->finalfn);
 		}
 
+		/* if it's a state match then everything else has already been done */
+		if (agg_match != AGGREF_NO_MATCH)
+		{
+			ReleaseSysCache(aggTuple);
+			continue;
+		}
 		peraggstate->aggCollation = aggref->inputcollid;
 
 		InitFunctionCallInfoData(peraggstate->transfn_fcinfo,
@@ -2574,8 +2718,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		ReleaseSysCache(aggTuple);
 	}
 
-	/* Update numaggs to match number of unique aggregates found */
+	/*
+	 * Update numaggs to match the number of unique aggregates found.
+	 * Also set numstates to the number of unique aggregate states found.
+	 */
 	aggstate->numaggs = aggno + 1;
+	aggstate->numstates = stateno + 1;
 
 	return aggstate;
 }
@@ -2596,11 +2744,195 @@ GetAggInitVal(Datum textInitVal, Oid transtype)
 	return initVal;
 }
 
+/*
+ * find_compatible_aggref
+ *		Searches the previously looked at aggregates in order to find a
+ *		compatible aggregate or aggregate state. If a positive match is found
+ *		then foundaggno is set to the aggregate which matches.
+ *		When AGGREF_STATE_MATCH is returned the caller must only use the state
+ *		of the foundaggno, not the actual aggno itself.
+ *		When AGGREF_EXACT_MATCH is returned the caller may use both the aggno
+ *		and the state which that aggno uses.
+ *
+ * Scenario 1 -- An aggregate function appears more than once in query:
+ *
+ *		SELECT SUM(x) FROM ... HAVING SUM(x) > 0
+ *
+ * Since in this case the aggregates are both the same we can optimize by
+ * only calculating aggregate state and calling the finalfn just once. This
+ * would be an AGGREF_EXACT_MATCH, meaning both the state and the final
+ * function call are shared.
+ *
+ * Scenario 2 -- Two different aggregate functions appear in the query but
+ *				 the two functions happen to share the same transfn, but have
+ *				 different finalfn.
+ *
+ *		SELECT SUM(x), AVG(x) FROM ...
+ *
+ * Since in our case these two aggregates both share the same transfn, but
+ * naturally they have different finalfns. This situation is classed as an
+ * AGGREF_STATE_MATCH. This means that the same state can be shared by both
+ * aggregates. Since the finalfn call is not the same this cannot be reused.
+ * For this case to be valid the INITCOND of the aggregate, if one exists, must
+ * also match.
+ *
+ * Scenario 3 -- The same aggregate function is called with different
+ *				 parameters.
+ *
+ *		SELECT SUM(x),SUM(DISTINCT x) FROM ...
+ *		SELECT SUM(x),SUM(y) FROM ...
+ *		SELECT SUM(x),SUM(x) FILTER(WHERE x > 0) FROM ...
+ *
+ * All three of the above queries cannot share the same state and have to be
+ * calculated independently.
+ *
+ * Scenario 4 -- Different aggregates with the same parameters and the same
+ *				 transfn and finalfn.
+ *
+ *		SELECT SUM(x),SUM2(x) FROM ...
+ *
+ * A perhaps unlikely scenario where two aggregate functions exist which have,
+ * both the same transfn and the same finalfn. In this case we can report an
+ * AGGREF_EXACT_MATCH, providing the INITCOND of both aggregates are the same.
+ */
+static AggRefCompatibility
+find_compatible_aggref(Aggref *newagg, AggState *aggstate,
+					   int lastaggno, int *foundaggno)
+{
+	int aggno;
+	int statematchaggno;
+	AggStatePerAggState peraggstates;
+	AggStatePerAgg peraggs;
+
+	/* we mustn't reuse the aggref if it contains volatile function calls */
+	if (contain_volatile_functions((Node *)newagg))
+		return AGGREF_NO_MATCH;
+
+	statematchaggno = -1;
+	peraggstates = aggstate->peraggstate;
+	peraggs = aggstate->peragg;
+
+	/*
+	 * Search through the list of already seen aggregates. We'll stop when we
+	 * find an exact match, but until then we'll note any state matches that
+	 * we find. We may have to fall back on these should we fail to find an
+	 * exact match.
+	 */
+	for (aggno = 0; aggno <= lastaggno; aggno++)
+	{
+		AggRefCompatibility matchtype;
+		AggStatePerAgg peragg;
+		AggStatePerAggState peraggstate;
+
+		peragg = &peraggs[aggno];
+		peraggstate = &peraggstates[peragg->stateno];
+
+		/* lookup the match type of this agg */
+		matchtype = aggref_has_compatible_states(newagg, peragg, peraggstate);
+
+		/* if it's an exact match then we're done. */
+		if (matchtype == AGGREF_EXACT_MATCH)
+		{
+			*foundaggno = aggno;
+			return AGGREF_EXACT_MATCH;
+		}
+
+		/* remember any state matches, but keep on looking... */
+		else if (matchtype == AGGREF_STATE_MATCH)
+			statematchaggno = aggno;
+	}
+
+	/* no exact match found, but did we find a state match? */
+	if (statematchaggno >= 0)
+	{
+		*foundaggno = statematchaggno;
+		return AGGREF_STATE_MATCH;
+	}
+
+	return AGGREF_NO_MATCH;
+}
+
+/*
+ * aggref_has_compatible_states
+ *		Determines match type of this aggregate. See comments in
+ *		find_compatible_aggref() for details.
+ */
+static AggRefCompatibility
+aggref_has_compatible_states(Aggref *newagg, AggStatePerAgg peragg,
+							 AggStatePerAggState peraggstate)
+{
+	Aggref *existingRef = peraggstate->aggref;
+
+	/* all of the following must be the same or it's no match */
+	if (newagg->aggtype != existingRef->aggtype ||
+		newagg->aggcollid != existingRef->aggcollid ||
+		newagg->inputcollid != existingRef->inputcollid ||
+		newagg->aggstar != existingRef->aggstar ||
+		newagg->aggvariadic != existingRef->aggvariadic ||
+		newagg->aggkind != existingRef->aggkind ||
+		!equal(newagg->aggdirectargs, existingRef->aggdirectargs) ||
+		!equal(newagg->args, existingRef->args) ||
+		!equal(newagg->aggorder, existingRef->aggorder) ||
+		!equal(newagg->aggdistinct, existingRef->aggdistinct) ||
+		!equal(newagg->aggfilter, existingRef->aggfilter))
+		return AGGREF_NO_MATCH;
+
+	/* if it's the same aggregate function then report exact match */
+	if (newagg->aggfnoid == existingRef->aggfnoid)
+		return AGGREF_EXACT_MATCH;
+	else
+	{
+		/*
+		 * Aggregate functions differ. We'll need to do some more analysis
+		 * before we can know what the match type will be.
+		 * If the transfn match and the initvalue is the same then we can at
+		 * least let the newagg share the state, but if the finalfn also
+		 * happens to match then we can actually still report an exact match.
+		 */
+
+		HeapTuple			aggTuple;
+		Form_pg_aggregate	aggform;
+		bool				initValueIsNull;
+
+		/* Fetch the pg_aggregate row */
+		aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(newagg->aggfnoid));
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u", newagg->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+		ReleaseSysCache(aggTuple);
+
+		/* if the transfns are not the same then the state can't be shared */
+		if (aggform->aggtransfn != peraggstate->transfn_oid)
+			return AGGREF_NO_MATCH;
+
+		SysCacheGetAttr(AGGFNOID, aggTuple,
+			Anum_pg_aggregate_agginitval, &initValueIsNull);
+
+		/*
+		 * If both INITCONDs are null then the outcome depends
+		 * on if the finalfns match.
+		 */
+		if (initValueIsNull && peraggstate->initValueIsNull)
+		{
+			if (aggform->aggfinalfn != peragg->finalfn_oid)
+				return AGGREF_STATE_MATCH;
+			else
+				return AGGREF_EXACT_MATCH;
+		}
+
+		/*
+		 * XXX perhaps we should check the value of the initValue to see if
+		 * they match?
+		 */
+		return AGGREF_NO_MATCH;
+	}
+}
+
 void
 ExecEndAgg(AggState *node)
 {
 	PlanState  *outerPlan;
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2611,9 +2943,9 @@ ExecEndAgg(AggState *node)
 	if (node->sort_out)
 		tuplesort_end(node->sort_out);
 
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (stateno = 0; stateno < node->numstates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &node->peragg[aggno];
+		AggStatePerAggState peraggstate = &node->peraggstate[stateno];
 
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
@@ -2646,7 +2978,7 @@ ExecReScanAgg(AggState *node)
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
 	PlanState  *outerPlan = outerPlanState(node);
 	Agg		   *aggnode = (Agg *) node->ss.ps.plan;
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2678,11 +3010,11 @@ ExecReScanAgg(AggState *node)
 	}
 
 	/* Make sure we have closed any open tuplesorts */
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (stateno = 0; stateno < node->numstates; stateno++)
 	{
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			AggStatePerAgg peraggstate = &node->peragg[aggno];
+			AggStatePerAggState peraggstate = &node->peraggstate[stateno];
 
 			if (peraggstate->sortstates[setno])
 			{
@@ -2811,10 +3143,12 @@ AggGetAggref(FunctionCallInfo fcinfo)
 {
 	if (fcinfo->context && IsA(fcinfo->context, AggState))
 	{
-		AggStatePerAgg curperagg = ((AggState *) fcinfo->context)->curperagg;
+		AggStatePerAggState curperaggstate;
+
+		curperaggstate = ((AggState *)fcinfo->context)->curperaggstate;
 
-		if (curperagg)
-			return curperagg->aggref;
+		if (curperaggstate)
+			return curperaggstate->aggref;
 	}
 	return NULL;
 }
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 478d8ca..123cccb 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1863,42 +1863,45 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 	FuncExpr   *fexpr;
 	int			i;
 
-	/*
-	 * Build arg list to use in the transfn FuncExpr node. We really only care
-	 * that transfn can discover the actual argument types at runtime using
-	 * get_fn_expr_argtype(), so it's okay to use Param nodes that don't
-	 * correspond to any real Param.
-	 */
-	argp = makeNode(Param);
-	argp->paramkind = PARAM_EXEC;
-	argp->paramid = -1;
-	argp->paramtype = agg_state_type;
-	argp->paramtypmod = -1;
-	argp->paramcollid = agg_input_collation;
-	argp->location = -1;
-
-	args = list_make1(argp);
-
-	for (i = agg_num_direct_inputs; i < agg_num_inputs; i++)
+	if (transfnexpr != NULL)
 	{
+		/*
+		 * Build arg list to use in the transfn FuncExpr node. We really only care
+		 * that transfn can discover the actual argument types at runtime using
+		 * get_fn_expr_argtype(), so it's okay to use Param nodes that don't
+		 * correspond to any real Param.
+		 */
 		argp = makeNode(Param);
 		argp->paramkind = PARAM_EXEC;
 		argp->paramid = -1;
-		argp->paramtype = agg_input_types[i];
+		argp->paramtype = agg_state_type;
 		argp->paramtypmod = -1;
 		argp->paramcollid = agg_input_collation;
 		argp->location = -1;
-		args = lappend(args, argp);
-	}
 
-	fexpr = makeFuncExpr(transfn_oid,
-						 agg_state_type,
-						 args,
-						 InvalidOid,
-						 agg_input_collation,
-						 COERCE_EXPLICIT_CALL);
-	fexpr->funcvariadic = agg_variadic;
-	*transfnexpr = (Expr *) fexpr;
+		args = list_make1(argp);
+
+		for (i = agg_num_direct_inputs; i < agg_num_inputs; i++)
+		{
+			argp = makeNode(Param);
+			argp->paramkind = PARAM_EXEC;
+			argp->paramid = -1;
+			argp->paramtype = agg_input_types[i];
+			argp->paramtypmod = -1;
+			argp->paramcollid = agg_input_collation;
+			argp->location = -1;
+			args = lappend(args, argp);
+		}
+
+		fexpr = makeFuncExpr(transfn_oid,
+							 agg_state_type,
+							 args,
+							 InvalidOid,
+							 agg_input_collation,
+							 COERCE_EXPLICIT_CALL);
+		fexpr->funcvariadic = agg_variadic;
+		*transfnexpr = (Expr *) fexpr;
+	}
 
 	/*
 	 * Build invtransfn expression if requested, with same args as transfn
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index db5bd7f..af03214 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1815,6 +1815,7 @@ typedef struct GroupState
  */
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
+typedef struct AggStatePerAggStateData *AggStatePerAggState;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
 typedef struct AggStatePerPhaseData *AggStatePerPhase;
 
@@ -1823,14 +1824,16 @@ typedef struct AggState
 	ScanState	ss;				/* its first field is NodeTag */
 	List	   *aggs;			/* all Aggref nodes in targetlist & quals */
 	int			numaggs;		/* length of list (could be zero!) */
+	int			numstates;		/* number of peraggstate items */
 	AggStatePerPhase phase;		/* pointer to current phase data */
 	int			numphases;		/* number of phases */
 	int			current_phase;	/* current phase number */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	AggStatePerAgg peragg;		/* per-Aggref information */
+	AggStatePerAggState peraggstate; /* per-Agg State information */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
-	AggStatePerAgg curperagg;	/* identifies currently active aggregate */
+	AggStatePerAggState curperaggstate;	/* identifies currently active aggregate */
 	bool		input_done;		/* indicates end of input */
 	bool		agg_done;		/* indicates completion of Agg scan */
 	int			projected_set;	/* The last projected grouping set */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 8852051..4dad4fe 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1580,3 +1580,171 @@ select least_agg(variadic array[q1,q2]) from int8_tbl;
  -4567890123456789
 (1 row)
 
+-- test aggregates with common transition functions share the same states
+begin work;
+create type avg_state as (total bigint, count bigint);
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 2
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 4
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      6
+(1 row)
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 3
+ my_avg_init | my_sum_init 
+-------------+-------------
+           4 |          14
+(1 row)
+
+rollback;
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+NOTICE:  sum_transfn called with 1
+NOTICE:  sum_transfn called with 2
+NOTICE:  sum_transfn called with 3
+NOTICE:  sum_transfn called with 4
+ my_sum | my_half_sum 
+--------+-------------
+     10 |           5
+(1 row)
+
+rollback;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index a84327d..42c3b3c 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -590,3 +590,151 @@ drop view aggordview1;
 -- variadic aggregates
 select least_agg(q1,q2) from int8_tbl;
 select least_agg(variadic array[q1,q2]) from int8_tbl;
+
+
+-- test aggregates with common transition functions share the same states
+begin work;
+
+create type avg_state as (total bigint, count bigint);
+
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+
+rollback;
+
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+
+rollback;
#2David Rowley
david.rowley@2ndquadrant.com
In reply to: David Rowley (#1)
1 attachment(s)
Re: Sharing aggregate states between different aggregate functions

On 15 June 2015 at 12:05, David Rowley <david.rowley@2ndquadrant.com> wrote:

This basically allows an aggregate's state to be shared between other
aggregate functions when both aggregate's transition functions (and a few
other things) match
There's quite a number of aggregates in our standard set which will
benefit from this optimisation.

After compiling the original patch with another compiler, I noticed a
couple of warnings.

The attached fixes these.

Regards

David Rowley

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

sharing_agg_states_5f0bff89_2015-07-09.patchapplication/octet-stream; name=sharing_agg_states_5f0bff89_2015-07-09.patchDownload
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 2bf48c5..a53683b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -150,11 +150,16 @@
 #include "utils/tuplesort.h"
 #include "utils/datum.h"
 
-
 /*
- * AggStatePerAggData - per-aggregate working state for the Agg scan
+ * AggStatePerAggStateData
+ *		Stores data about an aggregate state and how the aggregate state must
+ *		be calculated. This struct does not store anything which has any
+ *		concept of how to produce the final aggregate result. In order to
+ *		calculate the final result we must make use of an AggStatePerAggData.
+ *		The reason for this is so that we can share an aggregate state between
+ *		different aggregate functions, in order to save duplicating work.
  */
-typedef struct AggStatePerAggData
+typedef struct AggStatePerAggStateData
 {
 	/*
 	 * These values are set up during ExecInitAgg() and do not change
@@ -186,25 +191,14 @@ typedef struct AggStatePerAggData
 	 */
 	int			numTransInputs;
 
-	/*
-	 * Number of arguments to pass to the finalfn.  This is always at least 1
-	 * (the transition state value) plus any ordered-set direct args. If the
-	 * finalfn wants extra args then we pass nulls corresponding to the
-	 * aggregated input columns.
-	 */
-	int			numFinalArgs;
-
-	/* Oids of transfer functions */
+	/* Oid of transfer function */
 	Oid			transfn_oid;
-	Oid			finalfn_oid;	/* may be InvalidOid */
 
 	/*
-	 * fmgr lookup data for transfer functions --- only valid when
-	 * corresponding oid is not InvalidOid.  Note in particular that fn_strict
-	 * flags are kept here.
+	 * fmgr lookup data for transfer function.
+	 * Note in particular that the fn_strict flag is kept here.
 	 */
 	FmgrInfo	transfn;
-	FmgrInfo	finalfn;
 
 	/* Input collation derived for aggregate */
 	Oid			aggCollation;
@@ -288,7 +282,44 @@ typedef struct AggStatePerAggData
 	 * worth the extra space consumption.
 	 */
 	FunctionCallInfoData transfn_fcinfo;
-}	AggStatePerAggData;
+}	AggStatePerAggStateData;
+
+/*
+ * AggStatePerAggData
+ *		Stores required details on how to produce a final aggregate result.
+ *		To be of any use this must make use of an AggStatePerAggStateData
+ *		before any actual result can be produced. Logical separation of the
+ *		state and the final function data stored here makes sense as it allows
+ *		us to re-use an aggregate's state for more than one aggregate function
+ *		providing they share the same transfn and initValue.
+ */
+typedef struct AggStatePerAggData {
+	/*
+	 * These values are set up during ExecInitAgg() and do not change
+	 * thereafter:
+	 */
+
+	/* index to the corresponding state which this agg should use */
+	int			stateno;
+
+	/* Optional Oid of final function (may be InvalidOid) */
+	Oid			finalfn_oid;
+
+	/*
+	* fmgr lookup data for final function --- only valid when
+	* finalfn_oid oid is not InvalidOid.
+	*/
+	FmgrInfo	finalfn;
+
+	/*
+	* Number of arguments to pass to the finalfn.  This is always at least 1
+	* (the transition state value) plus any ordered-set direct args. If the
+	* finalfn wants extra args then we pass nulls corresponding to the
+	* aggregated input columns.
+	*/
+	int			numFinalArgs;
+
+} AggStatePerAggData;
 
 /*
  * AggStatePerGroupData - per-aggregate-per-group working state
@@ -358,25 +389,35 @@ typedef struct AggHashEntryData
 	AggStatePerGroupData pergroup[FLEXIBLE_ARRAY_MEMBER];
 }	AggHashEntryData;
 
+/*
+ * enum states to mark compatibility between aggregate functions.
+ * These are used to enable various optimizations which are applied to similar
+ * aggregate functions. See comments for find_compatible_aggref() for details.
+ */
+typedef enum AggRefCompatibility {
+	AGGREF_NO_MATCH = 0,	/* state is not compatible between aggregates. */
+	AGGREF_STATE_MATCH,		/* aggregates may share state only. */
+	AGGREF_EXACT_MATCH		/* aggregates may share state and finalfn. */
+} AggRefCompatibility;
 
 static void initialize_phase(AggState *aggstate, int newphase);
 static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
 static void initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
+					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset);
 static void advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+							AggStatePerAggState peraggstate,
 							AggStatePerGroup pergroupstate);
 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
 static void process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerAggState peraggstate,
 								 AggStatePerGroup pergroupstate);
 static void process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+								AggStatePerAggState peraggstate,
 								AggStatePerGroup pergroupstate);
 static void finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull);
 static void prepare_projection_slot(AggState *aggstate,
@@ -396,6 +437,10 @@ static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
+static AggRefCompatibility find_compatible_aggref(Aggref *newagg,
+				  AggState *aggstate, int lastaggno, int *foundaggno);
+static AggRefCompatibility aggref_has_compatible_states(Aggref *newagg,
+				  AggStatePerAgg peragg, AggStatePerAggState peraggstate);
 
 
 /*
@@ -498,7 +543,7 @@ fetch_input_tuple(AggState *aggstate)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
-initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
+initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
 					 AggStatePerGroup pergroupstate)
 {
 	/*
@@ -569,7 +614,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
 }
 
 /*
- * Initialize all aggregates for a new group of input values.
+ * Initialize all aggregate states for a new group of input values.
  *
  * If there are multiple grouping sets, we initialize only the first numReset
  * of them (the grouping sets are ordered so that the most specific one, which
@@ -580,26 +625,26 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
  */
 static void
 initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
+					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset)
 {
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
 	int			setno = 0;
 
 	if (numReset < 1)
 		numReset = numGroupingSets;
 
-	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+	for (stateno = 0; stateno < aggstate->numstates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerAggState peraggstate = &peraggstates[stateno];
 
 		for (setno = 0; setno < numReset; setno++)
 		{
 			AggStatePerGroup pergroupstate;
 
-			pergroupstate = &pergroup[aggno + (setno * (aggstate->numaggs))];
+			pergroupstate = &pergroup[stateno + (setno * (aggstate->numstates))];
 
 			aggstate->current_set = setno;
 
@@ -610,7 +655,7 @@ initialize_aggregates(AggState *aggstate,
 
 /*
  * Given new input value(s), advance the transition function of one aggregate
- * within one grouping set only (already set in aggstate->current_set)
+ * state within one grouping set only (already set in aggstate->current_set)
  *
  * The new values (and null flags) have been preloaded into argument positions
  * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again
@@ -621,7 +666,7 @@ initialize_aggregates(AggState *aggstate,
  */
 static void
 advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+							AggStatePerAggState peraggstate,
 							AggStatePerGroup pergroupstate)
 {
 	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
@@ -678,8 +723,8 @@ advance_transition_function(AggState *aggstate,
 	/* We run the transition functions in per-input-tuple memory context */
 	oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
 
-	/* set up aggstate->curperagg for AggGetAggref() */
-	aggstate->curperagg = peraggstate;
+	/* set up aggstate->curperaggstate for AggGetAggref() */
+	aggstate->curperaggstate = peraggstate;
 
 	/*
 	 * OK to call the transition function
@@ -690,7 +735,7 @@ advance_transition_function(AggState *aggstate,
 
 	newVal = FunctionCallInvoke(fcinfo);
 
-	aggstate->curperagg = NULL;
+	aggstate->curperaggstate = NULL;
 
 	/*
 	 * If pass-by-ref datatype, must copy the new value into aggcontext and
@@ -718,7 +763,7 @@ advance_transition_function(AggState *aggstate,
 }
 
 /*
- * Advance all the aggregates for one input tuple.  The input tuple
+ * Advance each aggregate state for one input tuple.  The input tuple
  * has been stored in tmpcontext->ecxt_outertuple, so that it is accessible
  * to ExecEvalExpr.  pergroup is the array of per-group structs to use
  * (this might be in a hashtable entry).
@@ -728,14 +773,14 @@ advance_transition_function(AggState *aggstate,
 static void
 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 {
-	int			aggno;
+	int			stateno;
 	int			setno = 0;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
-	int			numAggs = aggstate->numaggs;
+	int			numStates = aggstate->numstates;
 
-	for (aggno = 0; aggno < numAggs; aggno++)
+	for (stateno = 0; stateno < numStates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
+		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
 		ExprState  *filter = peraggstate->aggrefstate->aggfilter;
 		int			numTransInputs = peraggstate->numTransInputs;
 		int			i;
@@ -806,7 +851,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
-				AggStatePerGroup pergroupstate = &pergroup[aggno + (setno * numAggs)];
+				AggStatePerGroup pergroupstate = &pergroup[stateno + (setno * numStates)];
 
 				aggstate->current_set = setno;
 
@@ -841,7 +886,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
  */
 static void
 process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerAggState peraggstate,
 								 AggStatePerGroup pergroupstate)
 {
 	Datum		oldVal = (Datum) 0;
@@ -930,7 +975,7 @@ process_ordered_aggregate_single(AggState *aggstate,
  */
 static void
 process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+								AggStatePerAggState peraggstate,
 								AggStatePerGroup pergroupstate)
 {
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
@@ -1009,10 +1054,14 @@ process_ordered_aggregate_multi(AggState *aggstate,
  *
  * The finalfunction will be run, and the result delivered, in the
  * output-tuple context; caller's CurrentMemoryContext does not matter.
+ *
+ * The finalfn uses the state as set in the stateno. This also might be
+ * being used by another aggregate function, so it's important that we do
+ * nothing destructive here.
  */
 static void
 finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull)
 {
@@ -1021,6 +1070,7 @@ finalize_aggregate(AggState *aggstate,
 	MemoryContext oldContext;
 	int			i;
 	ListCell   *lc;
+	AggStatePerAggState peraggstate = &aggstate->peraggstate[peragg->stateno];
 
 	oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
 
@@ -1046,14 +1096,14 @@ finalize_aggregate(AggState *aggstate,
 	/*
 	 * Apply the agg's finalfn if one is provided, else return transValue.
 	 */
-	if (OidIsValid(peraggstate->finalfn_oid))
+	if (OidIsValid(peragg->finalfn_oid))
 	{
-		int			numFinalArgs = peraggstate->numFinalArgs;
+		int			numFinalArgs = peragg->numFinalArgs;
 
-		/* set up aggstate->curperagg for AggGetAggref() */
-		aggstate->curperagg = peraggstate;
+		/* set up aggstate->curperaggstate for AggGetAggref() */
+		aggstate->curperaggstate = peraggstate;
 
-		InitFunctionCallInfoData(fcinfo, &peraggstate->finalfn,
+		InitFunctionCallInfoData(fcinfo, &peragg->finalfn,
 								 numFinalArgs,
 								 peraggstate->aggCollation,
 								 (void *) aggstate, NULL);
@@ -1082,7 +1132,7 @@ finalize_aggregate(AggState *aggstate,
 			*resultVal = FunctionCallInvoke(&fcinfo);
 			*resultIsNull = fcinfo.isnull;
 		}
-		aggstate->curperagg = NULL;
+		aggstate->curperaggstate = NULL;
 	}
 	else
 	{
@@ -1173,7 +1223,7 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
  */
 static void
 finalize_aggregates(AggState *aggstate,
-					AggStatePerAgg peragg,
+					AggStatePerAgg peraggs,
 					AggStatePerGroup pergroup,
 					int currentSet)
 {
@@ -1189,10 +1239,12 @@ finalize_aggregates(AggState *aggstate,
 
 	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerAgg peragg = &peraggs[aggno];
+		int stateno = peragg->stateno;
+		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
 		AggStatePerGroup pergroupstate;
 
-		pergroupstate = &pergroup[aggno + (currentSet * (aggstate->numaggs))];
+		pergroupstate = &pergroup[stateno + (currentSet * (aggstate->numstates))];
 
 		if (peraggstate->numSortCols > 0)
 		{
@@ -1208,7 +1260,7 @@ finalize_aggregates(AggState *aggstate,
 												pergroupstate);
 		}
 
-		finalize_aggregate(aggstate, peraggstate, pergroupstate,
+		finalize_aggregate(aggstate, peragg, pergroupstate,
 						   &aggvalues[aggno], &aggnulls[aggno]);
 	}
 }
@@ -1428,7 +1480,7 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 	if (isnew)
 	{
 		/* initialize aggregates for new tuple group */
-		initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup, 0);
+		initialize_aggregates(aggstate, aggstate->peraggstate, entry->pergroup, 0);
 	}
 
 	return entry;
@@ -1505,6 +1557,7 @@ agg_retrieve_direct(AggState *aggstate)
 	ExprContext *econtext;
 	ExprContext *tmpcontext;
 	AggStatePerAgg peragg;
+	AggStatePerAggState peraggstate;
 	AggStatePerGroup pergroup;
 	TupleTableSlot *outerslot;
 	TupleTableSlot *firstSlot;
@@ -1527,6 +1580,7 @@ agg_retrieve_direct(AggState *aggstate)
 	tmpcontext = aggstate->tmpcontext;
 
 	peragg = aggstate->peragg;
+	peraggstate = aggstate->peraggstate;
 	pergroup = aggstate->pergroup;
 	firstSlot = aggstate->ss.ss_ScanTupleSlot;
 
@@ -1716,7 +1770,7 @@ agg_retrieve_direct(AggState *aggstate)
 			/*
 			 * Initialize working state for a new input tuple group.
 			 */
-			initialize_aggregates(aggstate, peragg, pergroup, numReset);
+			initialize_aggregates(aggstate, peraggstate, pergroup, numReset);
 
 			if (aggstate->grp_firstTuple != NULL)
 			{
@@ -1945,10 +1999,12 @@ AggState *
 ExecInitAgg(Agg *node, EState *estate, int eflags)
 {
 	AggState   *aggstate;
-	AggStatePerAgg peragg;
+	AggStatePerAgg	peraggs;
+	AggStatePerAggState peraggstates;
 	Plan	   *outerPlan;
 	ExprContext *econtext;
 	int			numaggs,
+				stateno,
 				aggno;
 	int			phase;
 	ListCell   *l;
@@ -1971,12 +2027,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	aggstate->aggs = NIL;
 	aggstate->numaggs = 0;
+	aggstate->numstates = 0;
 	aggstate->maxsets = 0;
 	aggstate->hashfunctions = NULL;
 	aggstate->projected_set = -1;
 	aggstate->current_set = 0;
 	aggstate->peragg = NULL;
-	aggstate->curperagg = NULL;
+	aggstate->peraggstate = NULL;
+	aggstate->curperaggstate = NULL;
 	aggstate->agg_done = false;
 	aggstate->input_done = false;
 	aggstate->pergroup = NULL;
@@ -2209,8 +2267,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
 	econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
 
-	peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
-	aggstate->peragg = peragg;
+	peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData)* numaggs);
+	peraggstates = (AggStatePerAggState) palloc0(sizeof(AggStatePerAggStateData) * numaggs);
+
+	aggstate->peragg = peraggs;
+	aggstate->peraggstate = peraggstates;
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
@@ -2232,18 +2293,17 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	/*
 	 * Perform lookups of aggregate function info, and initialize the
-	 * unchanging fields of the per-agg data.  We also detect duplicate
-	 * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0"). When
-	 * duplicates are detected, we only make an AggStatePerAgg struct for the
-	 * first one.  The clones are simply pointed at the same result entry by
-	 * giving them duplicate aggno values.
+	 * unchanging fields of the per-agg data.
 	 */
 	aggno = -1;
+	stateno = -1;
 	foreach(l, aggstate->aggs)
 	{
 		AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
 		Aggref	   *aggref = (Aggref *) aggrefstate->xprstate.expr;
-		AggStatePerAgg peraggstate;
+		AggStatePerAgg peragg;
+		AggStatePerAggState peraggstate;
+		AggRefCompatibility agg_match;
 		Oid			inputTypes[FUNC_MAX_ARGS];
 		int			numArguments;
 		int			numDirectArgs;
@@ -2260,40 +2320,82 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		Expr	   *transfnexpr,
 				   *finalfnexpr;
 		Datum		textInitVal;
-		int			i;
+		int			existing_aggno;
 		ListCell   *lc;
 
 		/* Planner should have assigned aggregate to correct level */
 		Assert(aggref->agglevelsup == 0);
 
-		/* Look for a previous duplicate aggregate */
-		for (i = 0; i <= aggno; i++)
+		/*
+		 * For performance reasons we detect duplicate aggregates (for example,
+		 * "SELECT sum(x) ... HAVING sum(x) > 0"). When duplicates are
+		 * detected, we only make an AggStatePerAgg struct for the first one.
+		 * The clones are simply pointed at the same result entry by giving
+		 * them duplicate aggno values. We also do our best to reuse duplicate
+		 * aggregate states. The query may use 2 or more aggregate functions
+		 * which share the same transition function and initial value therefore
+		 * would end up calculating the same state. In this case we can just
+		 * calculate the state once, however if the finalfns do not match then
+		 * we must create a new peragg to store the varying finalfn.
+		 */
+
+		/* check if we have previous agg or state matches that can be reused */
+		agg_match = find_compatible_aggref(aggref, aggstate, aggno,
+										   &existing_aggno);
+
+		if (agg_match == AGGREF_EXACT_MATCH)
 		{
-			if (equal(aggref, peragg[i].aggref) &&
-				!contain_volatile_functions((Node *) aggref))
-				break;
+			/* Exact match -- this must be using same aggregate function or
+			 * have the same transfn and finalfn. Just reuse the existing agg.
+			 */
+			aggrefstate->aggno = existing_aggno;
+			continue;
 		}
-		if (i <= aggno)
+
+		else if (agg_match == AGGREF_STATE_MATCH)
 		{
-			/* Found a match to an existing entry, so just mark it */
-			aggrefstate->aggno = i;
-			continue;
+			/*
+			 * State only match. The state can be reused, but the finalfn are
+			 * different. We'll need to create a new peragg for the new finalfn
+			 */
+			int existing_stateno = peraggs[existing_aggno].stateno;
+			peragg = &peraggs[++aggno];
+			peraggstate = &peraggstates[existing_stateno];
+			peragg->stateno = existing_stateno;
+		}
+		else		/* AGGREF_NO_MATCH */
+		{
+			/* Nothing matches, so assign a new state and a new per agg */
+			peraggstate = &peraggstates[++stateno];
+			peragg = &peraggs[++aggno];
+			peragg->stateno = stateno;
 		}
 
-		/* Nope, so assign a new PerAgg record */
-		peraggstate = &peragg[++aggno];
+		/*
+		 * When we pass through the following code in a AGGREF_STATE_MATCH
+		 * type match, the peraggstate will already have been setup by a
+		 * previous iteration of the loop, so we'll try where possible to
+		 * minimize as much rework of setting up the peraggstate as possible.
+		 * In reality it shouldn't matter as we'll just be setting it up the
+		 * same as it was previously, but for performance reasons we do skip
+		 * over some more expensive parts the 2nd time around.
+		 */
 
-		/* Mark Aggref state node with assigned index in the result array */
+		/* Mark Aggref state node with the index of which agg it should use */
 		aggrefstate->aggno = aggno;
 
-		/* Begin filling in the peraggstate data */
-		peraggstate->aggrefstate = aggrefstate;
-		peraggstate->aggref = aggref;
-		peraggstate->sortstates = (Tuplesortstate **)
-			palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
-
-		for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
-			peraggstate->sortstates[currentsortno] = NULL;
+		/* for state matches the peraggstate has already been setup */
+		if (agg_match == AGGREF_NO_MATCH)
+		{
+			/* Begin filling in the peraggstate data */
+			peraggstate->aggrefstate = aggrefstate;
+			peraggstate->aggref = aggref;
+			peraggstate->sortstates = (Tuplesortstate **)
+				palloc0(sizeof(Tuplesortstate *)* numGroupingSets);
+
+			for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
+				peraggstate->sortstates[currentsortno] = NULL;
+		}
 
 		/* Fetch the pg_aggregate row */
 		aggTuple = SearchSysCache1(AGGFNOID,
@@ -2311,8 +2413,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 						   get_func_name(aggref->aggfnoid));
 		InvokeFunctionExecuteHook(aggref->aggfnoid);
 
+		/* when reusing the state the transfns should match! */
+		Assert(agg_match == AGGREF_NO_MATCH ||
+			   peraggstate->transfn_oid == aggform->aggtransfn);
+
 		peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
-		peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
+		peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
 
 		/* Check that aggregate owner has permission to call component fns */
 		{
@@ -2327,12 +2433,20 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
 			ReleaseSysCache(procTuple);
 
-			aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
-										 ACL_EXECUTE);
-			if (aclresult != ACLCHECK_OK)
-				aclcheck_error(aclresult, ACL_KIND_PROC,
-							   get_func_name(transfn_oid));
-			InvokeFunctionExecuteHook(transfn_oid);
+			/*
+			 * If we're reusing an existing state then the permissions for
+			 * transfn were already checked when we setup that state.
+			 */
+			if (agg_match == AGGREF_NO_MATCH)
+			{
+				aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
+											 ACL_EXECUTE);
+				if (aclresult != ACLCHECK_OK)
+					aclcheck_error(aclresult, ACL_KIND_PROC,
+								   get_func_name(transfn_oid));
+				InvokeFunctionExecuteHook(transfn_oid);
+			}
+
 			if (OidIsValid(finalfn_oid))
 			{
 				aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
@@ -2367,9 +2481,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 		/* Detect how many arguments to pass to the finalfn */
 		if (aggform->aggfinalextra)
-			peraggstate->numFinalArgs = numArguments + 1;
+			peragg->numFinalArgs = numArguments + 1;
 		else
-			peraggstate->numFinalArgs = numDirectArgs + 1;
+			peragg->numFinalArgs = numDirectArgs + 1;
 
 		/* resolve actual type of transition state, if polymorphic */
 		aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid,
@@ -2377,32 +2491,65 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 												   inputTypes,
 												   numArguments);
 
-		/* build expression trees using actual argument & result types */
-		build_aggregate_fnexprs(inputTypes,
-								numArguments,
-								numDirectArgs,
-								peraggstate->numFinalArgs,
-								aggref->aggvariadic,
-								aggtranstype,
-								aggref->aggtype,
-								aggref->inputcollid,
-								transfn_oid,
-								InvalidOid,		/* invtrans is not needed here */
-								finalfn_oid,
-								&transfnexpr,
-								NULL,
-								&finalfnexpr);
-
-		/* set up infrastructure for calling the transfn and finalfn */
-		fmgr_info(transfn_oid, &peraggstate->transfn);
-		fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+		/* for no match we must setup the transfn and finalfn if one exists */
+		if (agg_match == AGGREF_NO_MATCH)
+		{
+			/* build expression trees using actual argument & result types */
+			build_aggregate_fnexprs(inputTypes,
+									numArguments,
+									numDirectArgs,
+									peragg->numFinalArgs,
+									aggref->aggvariadic,
+									aggtranstype,
+									aggref->aggtype,
+									aggref->inputcollid,
+									transfn_oid,
+									InvalidOid,		/* invtrans is not needed here */
+									finalfn_oid,
+									&transfnexpr,
+									NULL,
+									&finalfnexpr);
+
+			/* set up infrastructure for calling the transfn and finalfn */
+			fmgr_info(transfn_oid, &peraggstate->transfn);
+			fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+		}
+
+		/* else we just have to setup the finalfn, if one exists */
+		else if (OidIsValid(finalfn_oid))
+		{
+			/*
+			 * AGGREF_STATE_MATCH -- transfn calling infrastructure already
+			 * built for this state
+			 */
+			build_aggregate_fnexprs(inputTypes,
+									numArguments,
+									numDirectArgs,
+									peragg->numFinalArgs,
+									aggref->aggvariadic,
+									aggtranstype,
+									aggref->aggtype,
+									aggref->inputcollid,
+									InvalidOid,		/* transfn already done */
+									InvalidOid,		/* invtrans is not needed here */
+									finalfn_oid,
+									NULL,
+									NULL,
+									&finalfnexpr);
+		}
 
 		if (OidIsValid(finalfn_oid))
 		{
-			fmgr_info(finalfn_oid, &peraggstate->finalfn);
-			fmgr_info_set_expr((Node *) finalfnexpr, &peraggstate->finalfn);
+			fmgr_info(finalfn_oid, &peragg->finalfn);
+			fmgr_info_set_expr((Node *) finalfnexpr, &peragg->finalfn);
 		}
 
+		/* if it's a state match then everything else has already been done */
+		if (agg_match != AGGREF_NO_MATCH)
+		{
+			ReleaseSysCache(aggTuple);
+			continue;
+		}
 		peraggstate->aggCollation = aggref->inputcollid;
 
 		InitFunctionCallInfoData(peraggstate->transfn_fcinfo,
@@ -2574,8 +2721,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		ReleaseSysCache(aggTuple);
 	}
 
-	/* Update numaggs to match number of unique aggregates found */
+	/*
+	 * Update numaggs to match the number of unique aggregates found.
+	 * Also set numstates to the number of unique aggregate states found.
+	 */
 	aggstate->numaggs = aggno + 1;
+	aggstate->numstates = stateno + 1;
 
 	return aggstate;
 }
@@ -2596,11 +2747,195 @@ GetAggInitVal(Datum textInitVal, Oid transtype)
 	return initVal;
 }
 
+/*
+ * find_compatible_aggref
+ *		Searches the previously looked at aggregates in order to find a
+ *		compatible aggregate or aggregate state. If a positive match is found
+ *		then foundaggno is set to the aggregate which matches.
+ *		When AGGREF_STATE_MATCH is returned the caller must only use the state
+ *		of the foundaggno, not the actual aggno itself.
+ *		When AGGREF_EXACT_MATCH is returned the caller may use both the aggno
+ *		and the state which that aggno uses.
+ *
+ * Scenario 1 -- An aggregate function appears more than once in query:
+ *
+ *		SELECT SUM(x) FROM ... HAVING SUM(x) > 0
+ *
+ * Since in this case the aggregates are both the same we can optimize by
+ * only calculating aggregate state and calling the finalfn just once. This
+ * would be an AGGREF_EXACT_MATCH, meaning both the state and the final
+ * function call are shared.
+ *
+ * Scenario 2 -- Two different aggregate functions appear in the query but
+ *				 the two functions happen to share the same transfn, but have
+ *				 different finalfn.
+ *
+ *		SELECT SUM(x), AVG(x) FROM ...
+ *
+ * Since in our case these two aggregates both share the same transfn, but
+ * naturally they have different finalfns. This situation is classed as an
+ * AGGREF_STATE_MATCH. This means that the same state can be shared by both
+ * aggregates. Since the finalfn call is not the same this cannot be reused.
+ * For this case to be valid the INITCOND of the aggregate, if one exists, must
+ * also match.
+ *
+ * Scenario 3 -- The same aggregate function is called with different
+ *				 parameters.
+ *
+ *		SELECT SUM(x),SUM(DISTINCT x) FROM ...
+ *		SELECT SUM(x),SUM(y) FROM ...
+ *		SELECT SUM(x),SUM(x) FILTER(WHERE x > 0) FROM ...
+ *
+ * All three of the above queries cannot share the same state and have to be
+ * calculated independently.
+ *
+ * Scenario 4 -- Different aggregates with the same parameters and the same
+ *				 transfn and finalfn.
+ *
+ *		SELECT SUM(x),SUM2(x) FROM ...
+ *
+ * A perhaps unlikely scenario where two aggregate functions exist which have,
+ * both the same transfn and the same finalfn. In this case we can report an
+ * AGGREF_EXACT_MATCH, providing the INITCOND of both aggregates are the same.
+ */
+static AggRefCompatibility
+find_compatible_aggref(Aggref *newagg, AggState *aggstate,
+					   int lastaggno, int *foundaggno)
+{
+	int aggno;
+	int statematchaggno;
+	AggStatePerAggState peraggstates;
+	AggStatePerAgg peraggs;
+
+	/* we mustn't reuse the aggref if it contains volatile function calls */
+	if (contain_volatile_functions((Node *)newagg))
+		return AGGREF_NO_MATCH;
+
+	statematchaggno = -1;
+	peraggstates = aggstate->peraggstate;
+	peraggs = aggstate->peragg;
+
+	/*
+	 * Search through the list of already seen aggregates. We'll stop when we
+	 * find an exact match, but until then we'll note any state matches that
+	 * we find. We may have to fall back on these should we fail to find an
+	 * exact match.
+	 */
+	for (aggno = 0; aggno <= lastaggno; aggno++)
+	{
+		AggRefCompatibility matchtype;
+		AggStatePerAgg peragg;
+		AggStatePerAggState peraggstate;
+
+		peragg = &peraggs[aggno];
+		peraggstate = &peraggstates[peragg->stateno];
+
+		/* lookup the match type of this agg */
+		matchtype = aggref_has_compatible_states(newagg, peragg, peraggstate);
+
+		/* if it's an exact match then we're done. */
+		if (matchtype == AGGREF_EXACT_MATCH)
+		{
+			*foundaggno = aggno;
+			return AGGREF_EXACT_MATCH;
+		}
+
+		/* remember any state matches, but keep on looking... */
+		else if (matchtype == AGGREF_STATE_MATCH)
+			statematchaggno = aggno;
+	}
+
+	/* no exact match found, but did we find a state match? */
+	if (statematchaggno >= 0)
+	{
+		*foundaggno = statematchaggno;
+		return AGGREF_STATE_MATCH;
+	}
+
+	return AGGREF_NO_MATCH;
+}
+
+/*
+ * aggref_has_compatible_states
+ *		Determines match type of this aggregate. See comments in
+ *		find_compatible_aggref() for details.
+ */
+static AggRefCompatibility
+aggref_has_compatible_states(Aggref *newagg, AggStatePerAgg peragg,
+							 AggStatePerAggState peraggstate)
+{
+	Aggref *existingRef = peraggstate->aggref;
+
+	/* all of the following must be the same or it's no match */
+	if (newagg->aggtype != existingRef->aggtype ||
+		newagg->aggcollid != existingRef->aggcollid ||
+		newagg->inputcollid != existingRef->inputcollid ||
+		newagg->aggstar != existingRef->aggstar ||
+		newagg->aggvariadic != existingRef->aggvariadic ||
+		newagg->aggkind != existingRef->aggkind ||
+		!equal(newagg->aggdirectargs, existingRef->aggdirectargs) ||
+		!equal(newagg->args, existingRef->args) ||
+		!equal(newagg->aggorder, existingRef->aggorder) ||
+		!equal(newagg->aggdistinct, existingRef->aggdistinct) ||
+		!equal(newagg->aggfilter, existingRef->aggfilter))
+		return AGGREF_NO_MATCH;
+
+	/* if it's the same aggregate function then report exact match */
+	if (newagg->aggfnoid == existingRef->aggfnoid)
+		return AGGREF_EXACT_MATCH;
+	else
+	{
+		/*
+		 * Aggregate functions differ. We'll need to do some more analysis
+		 * before we can know what the match type will be.
+		 * If the transfn match and the initvalue is the same then we can at
+		 * least let the newagg share the state, but if the finalfn also
+		 * happens to match then we can actually still report an exact match.
+		 */
+
+		HeapTuple			aggTuple;
+		Form_pg_aggregate	aggform;
+		bool				initValueIsNull;
+
+		/* Fetch the pg_aggregate row */
+		aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(newagg->aggfnoid));
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u", newagg->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+		ReleaseSysCache(aggTuple);
+
+		/* if the transfns are not the same then the state can't be shared */
+		if (aggform->aggtransfn != peraggstate->transfn_oid)
+			return AGGREF_NO_MATCH;
+
+		SysCacheGetAttr(AGGFNOID, aggTuple,
+			Anum_pg_aggregate_agginitval, &initValueIsNull);
+
+		/*
+		 * If both INITCONDs are null then the outcome depends
+		 * on if the finalfns match.
+		 */
+		if (initValueIsNull && peraggstate->initValueIsNull)
+		{
+			if (aggform->aggfinalfn != peragg->finalfn_oid)
+				return AGGREF_STATE_MATCH;
+			else
+				return AGGREF_EXACT_MATCH;
+		}
+
+		/*
+		 * XXX perhaps we should check the value of the initValue to see if
+		 * they match?
+		 */
+		return AGGREF_NO_MATCH;
+	}
+}
+
 void
 ExecEndAgg(AggState *node)
 {
 	PlanState  *outerPlan;
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2611,9 +2946,9 @@ ExecEndAgg(AggState *node)
 	if (node->sort_out)
 		tuplesort_end(node->sort_out);
 
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (stateno = 0; stateno < node->numstates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &node->peragg[aggno];
+		AggStatePerAggState peraggstate = &node->peraggstate[stateno];
 
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
@@ -2646,7 +2981,7 @@ ExecReScanAgg(AggState *node)
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
 	PlanState  *outerPlan = outerPlanState(node);
 	Agg		   *aggnode = (Agg *) node->ss.ps.plan;
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2678,11 +3013,11 @@ ExecReScanAgg(AggState *node)
 	}
 
 	/* Make sure we have closed any open tuplesorts */
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (stateno = 0; stateno < node->numstates; stateno++)
 	{
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			AggStatePerAgg peraggstate = &node->peragg[aggno];
+			AggStatePerAggState peraggstate = &node->peraggstate[stateno];
 
 			if (peraggstate->sortstates[setno])
 			{
@@ -2811,10 +3146,12 @@ AggGetAggref(FunctionCallInfo fcinfo)
 {
 	if (fcinfo->context && IsA(fcinfo->context, AggState))
 	{
-		AggStatePerAgg curperagg = ((AggState *) fcinfo->context)->curperagg;
+		AggStatePerAggState curperaggstate;
+
+		curperaggstate = ((AggState *)fcinfo->context)->curperaggstate;
 
-		if (curperagg)
-			return curperagg->aggref;
+		if (curperaggstate)
+			return curperaggstate->aggref;
 	}
 	return NULL;
 }
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 478d8ca..d34e16c 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1835,7 +1835,8 @@ resolve_aggregate_transtype(Oid aggfuncid,
  * the direct arguments followed by the aggregated arguments.
  *
  * transfn_oid, invtransfn_oid and finalfn_oid identify the funcs to be
- * called; the latter two may be InvalidOid.
+ * called; any of these maybe InvalidOid, however if invtransfn_oid is set then
+ * transfn_oid must also be set.
  *
  * Pointers to the constructed trees are returned into *transfnexpr,
  * *invtransfnexpr and *finalfnexpr. If there is no invtransfn or finalfn,
@@ -1863,61 +1864,67 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 	FuncExpr   *fexpr;
 	int			i;
 
-	/*
-	 * Build arg list to use in the transfn FuncExpr node. We really only care
-	 * that transfn can discover the actual argument types at runtime using
-	 * get_fn_expr_argtype(), so it's okay to use Param nodes that don't
-	 * correspond to any real Param.
-	 */
-	argp = makeNode(Param);
-	argp->paramkind = PARAM_EXEC;
-	argp->paramid = -1;
-	argp->paramtype = agg_state_type;
-	argp->paramtypmod = -1;
-	argp->paramcollid = agg_input_collation;
-	argp->location = -1;
-
-	args = list_make1(argp);
-
-	for (i = agg_num_direct_inputs; i < agg_num_inputs; i++)
+	if (transfnexpr != NULL)
 	{
-		argp = makeNode(Param);
-		argp->paramkind = PARAM_EXEC;
-		argp->paramid = -1;
-		argp->paramtype = agg_input_types[i];
-		argp->paramtypmod = -1;
-		argp->paramcollid = agg_input_collation;
-		argp->location = -1;
-		args = lappend(args, argp);
-	}
+		if (OidIsValid(transfn_oid))
+		{
+			/*
+			 * Build arg list to use in the transfn FuncExpr node. We really only care
+			 * that transfn can discover the actual argument types at runtime using
+			 * get_fn_expr_argtype(), so it's okay to use Param nodes that don't
+			 * correspond to any real Param.
+			 */
+			argp = makeNode(Param);
+			argp->paramkind = PARAM_EXEC;
+			argp->paramid = -1;
+			argp->paramtype = agg_state_type;
+			argp->paramtypmod = -1;
+			argp->paramcollid = agg_input_collation;
+			argp->location = -1;
 
-	fexpr = makeFuncExpr(transfn_oid,
-						 agg_state_type,
-						 args,
-						 InvalidOid,
-						 agg_input_collation,
-						 COERCE_EXPLICIT_CALL);
-	fexpr->funcvariadic = agg_variadic;
-	*transfnexpr = (Expr *) fexpr;
+			args = list_make1(argp);
 
-	/*
-	 * Build invtransfn expression if requested, with same args as transfn
-	 */
-	if (invtransfnexpr != NULL)
-	{
-		if (OidIsValid(invtransfn_oid))
-		{
-			fexpr = makeFuncExpr(invtransfn_oid,
+			for (i = agg_num_direct_inputs; i < agg_num_inputs; i++)
+			{
+				argp = makeNode(Param);
+				argp->paramkind = PARAM_EXEC;
+				argp->paramid = -1;
+				argp->paramtype = agg_input_types[i];
+				argp->paramtypmod = -1;
+				argp->paramcollid = agg_input_collation;
+				argp->location = -1;
+				args = lappend(args, argp);
+			}
+
+			fexpr = makeFuncExpr(transfn_oid,
 								 agg_state_type,
 								 args,
 								 InvalidOid,
 								 agg_input_collation,
 								 COERCE_EXPLICIT_CALL);
 			fexpr->funcvariadic = agg_variadic;
-			*invtransfnexpr = (Expr *) fexpr;
+			*transfnexpr = (Expr *) fexpr;
+
+			/*
+			 * Build invtransfn expression if requested, with same args as transfn
+			 */
+			if (invtransfnexpr != NULL)
+			{
+				if (OidIsValid(invtransfn_oid))
+				{
+					fexpr = makeFuncExpr(invtransfn_oid,
+						agg_state_type,
+						args,
+						InvalidOid,
+						agg_input_collation,
+						COERCE_EXPLICIT_CALL);
+					fexpr->funcvariadic = agg_variadic;
+					*invtransfnexpr = (Expr *)fexpr;
+				}
+				else
+					*invtransfnexpr = NULL;
+			}
 		}
-		else
-			*invtransfnexpr = NULL;
 	}
 
 	/* see if we have a final function */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 541ee18..27de9cb 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1816,6 +1816,7 @@ typedef struct GroupState
  */
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
+typedef struct AggStatePerAggStateData *AggStatePerAggState;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
 typedef struct AggStatePerPhaseData *AggStatePerPhase;
 
@@ -1824,14 +1825,16 @@ typedef struct AggState
 	ScanState	ss;				/* its first field is NodeTag */
 	List	   *aggs;			/* all Aggref nodes in targetlist & quals */
 	int			numaggs;		/* length of list (could be zero!) */
+	int			numstates;		/* number of peraggstate items */
 	AggStatePerPhase phase;		/* pointer to current phase data */
 	int			numphases;		/* number of phases */
 	int			current_phase;	/* current phase number */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	AggStatePerAgg peragg;		/* per-Aggref information */
+	AggStatePerAggState peraggstate; /* per-Agg State information */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
-	AggStatePerAgg curperagg;	/* identifies currently active aggregate */
+	AggStatePerAggState curperaggstate;	/* identifies currently active aggregate */
 	bool		input_done;		/* indicates end of input */
 	bool		agg_done;		/* indicates completion of Agg scan */
 	int			projected_set;	/* The last projected grouping set */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 8852051..4dad4fe 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1580,3 +1580,171 @@ select least_agg(variadic array[q1,q2]) from int8_tbl;
  -4567890123456789
 (1 row)
 
+-- test aggregates with common transition functions share the same states
+begin work;
+create type avg_state as (total bigint, count bigint);
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 2
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 4
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      6
+(1 row)
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 3
+ my_avg_init | my_sum_init 
+-------------+-------------
+           4 |          14
+(1 row)
+
+rollback;
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+NOTICE:  sum_transfn called with 1
+NOTICE:  sum_transfn called with 2
+NOTICE:  sum_transfn called with 3
+NOTICE:  sum_transfn called with 4
+ my_sum | my_half_sum 
+--------+-------------
+     10 |           5
+(1 row)
+
+rollback;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index a84327d..42c3b3c 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -590,3 +590,151 @@ drop view aggordview1;
 -- variadic aggregates
 select least_agg(q1,q2) from int8_tbl;
 select least_agg(variadic array[q1,q2]) from int8_tbl;
+
+
+-- test aggregates with common transition functions share the same states
+begin work;
+
+create type avg_state as (total bigint, count bigint);
+
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+
+rollback;
+
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+
+rollback;
#3Heikki Linnakangas
hlinnaka@iki.fi
In reply to: David Rowley (#2)
1 attachment(s)
Re: Sharing aggregate states between different aggregate functions

On 07/09/2015 12:44 PM, David Rowley wrote:

On 15 June 2015 at 12:05, David Rowley <david.rowley@2ndquadrant.com> wrote:

This basically allows an aggregate's state to be shared between other
aggregate functions when both aggregate's transition functions (and a few
other things) match
There's quite a number of aggregates in our standard set which will
benefit from this optimisation.

After compiling the original patch with another compiler, I noticed a
couple of warnings.

The attached fixes these.

I spent some time reviewing this. I refactored the ExecInitAgg code
rather heavily to make it more readable (IMHO); see attached. What do
you think? Did I break anything?

Some comments:

* In aggref_has_compatible_states(), you give up if aggtype or aggcollid
differ. But those properties apply to the final function, so you were
leaving some money on the table by disallowing state-sharing if they differ.

* The filter and input expressions are initialized for every AggRef,
before the deduplication logic kicks in. The AggrefExprState.aggfilter,
aggdirectargs and args fields really belong to the AggStatePerAggState
struct instead. This is not a new issue, but now that we have a
convenient per-aggstate struct to put them in, let's do so.

* There was a reference-after free bug in aggref_has_compatible_states;
you cannot ReleaseSysCache and then continue pointing to the struct.

* The code was a bit fuzzy on which parts of the per-aggstate are filled
in at what time. Some of the fields were overwritten every time a match
was found. With the same values, so no harm done, but I found it
confusing. I refactored ExecInitAgg in the attached patch to clear that up.

* There API of build_aggregate_fnexprs() was a bit strange now that some
callers use it to only fill in the final function, some call it to fill
both the transition functions and the final function. I split it to two
separate functions.

* I wonder if we should do this duplicate elimination at plan time. It's
very fast, so I'm not worried about that right now, but you had grand
plans to expand this so that an aggregate could optionally use one of
many different kinds of state values. At that point, it certainly seems
like a planning decision to decide which aggregates share state. I think
we can leave it as it is for now, but it's something to perhaps consider
later.

BTW, the name of the AggStatePerAggStateData struct is pretty horrible.
The repeated "AggState" feels awkward. Now that I've stared at the patch
for a some time, it doesn't bother me anymore, but it took me quite a
while to over that. I'm sure it will for others too. And it's not just
that struct, the comments talk about "aggregate state", which could be
confused to mean "AggState", but it actually means
AggStatePerAggStateData. I don't have any great suggestions, but can you
come up a better naming scheme?

- Heikki

Attachments:

sharing_aggstate-heikki-1.patchapplication/x-patch; name=sharing_aggstate-heikki-1.patchDownload
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index 0f911f2..fd922bd 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -4485,35 +4485,15 @@ ExecInitExpr(Expr *node, PlanState *parent)
 			break;
 		case T_Aggref:
 			{
-				Aggref	   *aggref = (Aggref *) node;
 				AggrefExprState *astate = makeNode(AggrefExprState);
 
 				astate->xprstate.evalfunc = (ExprStateEvalFunc) ExecEvalAggref;
 				if (parent && IsA(parent, AggState))
 				{
 					AggState   *aggstate = (AggState *) parent;
-					int			naggs;
 
 					aggstate->aggs = lcons(astate, aggstate->aggs);
-					naggs = ++aggstate->numaggs;
-
-					astate->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs,
-																  parent);
-					astate->args = (List *) ExecInitExpr((Expr *) aggref->args,
-														 parent);
-					astate->aggfilter = ExecInitExpr(aggref->aggfilter,
-													 parent);
-
-					/*
-					 * Complain if the aggregate's arguments contain any
-					 * aggregates; nested agg functions are semantically
-					 * nonsensical.  (This should have been caught earlier,
-					 * but we defend against it here anyway.)
-					 */
-					if (naggs != aggstate->numaggs)
-						ereport(ERROR,
-								(errcode(ERRCODE_GROUPING_ERROR),
-						errmsg("aggregate function calls cannot be nested")));
+					aggstate->numaggs++;
 				}
 				else
 				{
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 2bf48c5..fcc3859 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -152,17 +152,28 @@
 
 
 /*
- * AggStatePerAggData - per-aggregate working state for the Agg scan
+ * AggStatePerAggStateData - per aggregate state data for the Agg scan
+ *
+ * Working state for calculating the aggregate state, using the state
+ * transition function. This struct does not store the information needed
+ * to produce the final aggregate result from the state value; that's stored
+ * in AggStatePerAggData instead. This separation allows multiple aggregate
+ * results to be produced from a single state value.
  */
-typedef struct AggStatePerAggData
+typedef struct AggStatePerAggStateData
 {
 	/*
 	 * These values are set up during ExecInitAgg() and do not change
 	 * thereafter:
 	 */
 
-	/* Links to Aggref expr and state nodes this working state is for */
-	AggrefExprState *aggrefstate;
+	/*
+	 * Link to an Aggref expr this working state is for.
+	 *
+	 * There can actually be multiple AggRef's sharing the same working state,
+	 * as long as the inputs and transition state are identical. This points
+	 * to the first of them.
+	 */
 	Aggref	   *aggref;
 
 	/*
@@ -186,25 +197,22 @@ typedef struct AggStatePerAggData
 	 */
 	int			numTransInputs;
 
-	/*
-	 * Number of arguments to pass to the finalfn.  This is always at least 1
-	 * (the transition state value) plus any ordered-set direct args. If the
-	 * finalfn wants extra args then we pass nulls corresponding to the
-	 * aggregated input columns.
-	 */
-	int			numFinalArgs;
-
-	/* Oids of transfer functions */
+	/* Oid of the state transition function */
 	Oid			transfn_oid;
-	Oid			finalfn_oid;	/* may be InvalidOid */
+
+	/* Oid of state value's datatype */
+	Oid			aggtranstype;
+
+	/* ExprStates of the FILTER and argument expressions. */
+	ExprState  *aggfilter;		/* state of FILTER expression, if any */
+	List	   *args;			/* states of aggregated-argument expressions */
+	List	   *aggdirectargs;	/* states of direct-argument expressions */
 
 	/*
-	 * fmgr lookup data for transfer functions --- only valid when
-	 * corresponding oid is not InvalidOid.  Note in particular that fn_strict
-	 * flags are kept here.
+	 * fmgr lookup data for transfer function.  Note in particular that the
+	 * fn_strict flag is kept here.
 	 */
 	FmgrInfo	transfn;
-	FmgrInfo	finalfn;
 
 	/* Input collation derived for aggregate */
 	Oid			aggCollation;
@@ -236,17 +244,15 @@ typedef struct AggStatePerAggData
 	bool		initValueIsNull;
 
 	/*
-	 * We need the len and byval info for the agg's input, result, and
-	 * transition data types in order to know how to copy/delete values.
+	 * We need the len and byval info for the agg's input and transition data
+	 * types in order to know how to copy/delete values.
 	 *
 	 * Note that the info for the input type is used only when handling
 	 * DISTINCT aggs with just one argument, so there is only one input type.
 	 */
 	int16		inputtypeLen,
-				resulttypeLen,
 				transtypeLen;
 	bool		inputtypeByVal,
-				resulttypeByVal,
 				transtypeByVal;
 
 	/*
@@ -288,6 +294,48 @@ typedef struct AggStatePerAggData
 	 * worth the extra space consumption.
 	 */
 	FunctionCallInfoData transfn_fcinfo;
+}	AggStatePerAggStateData;
+
+/*
+ * AggStatePerAggData - per-aggregate working state
+ *
+ * This contains the information needed to produce a final aggregate result
+ * from the state value.
+ */
+typedef struct AggStatePerAggData
+{
+	/*
+	 * These values are set up during ExecInitAgg() and do not change
+	 * thereafter:
+	 */
+
+	/* index to the corresponding per-aggstate which this agg should use */
+	int			stateno;
+
+	/* Optional Oid of final function (may be InvalidOid) */
+	Oid			finalfn_oid;
+
+	/*
+	 * fmgr lookup data for final function --- only valid when finalfn_oid oid
+	 * is not InvalidOid.
+	 */
+	FmgrInfo	finalfn;
+
+	/*
+	 * Number of arguments to pass to the finalfn.  This is always at least 1
+	 * (the transition state value) plus any ordered-set direct args. If the
+	 * finalfn wants extra args then we pass nulls corresponding to the
+	 * aggregated input columns.
+	 */
+	int			numFinalArgs;
+
+	/*
+	 * We need the len and byval info for the agg's result data type in order
+	 * to know how to copy/delete values.
+	 */
+	int16		resulttypeLen;
+	bool		resulttypeByVal;
+
 }	AggStatePerAggData;
 
 /*
@@ -358,25 +406,36 @@ typedef struct AggHashEntryData
 	AggStatePerGroupData pergroup[FLEXIBLE_ARRAY_MEMBER];
 }	AggHashEntryData;
 
+/*
+ * enum states to mark compatibility between aggregate functions.
+ * These are used to enable various optimizations which are applied to similar
+ * aggregate functions. See comments for find_compatible_aggref() for details.
+ */
+typedef enum AggRefCompatibility
+{
+	AGGREF_NO_MATCH,			/* state is not compatible between aggregates */
+	AGGREF_STATE_MATCH,			/* aggregates may share state only */
+	AGGREF_EXACT_MATCH			/* aggregates may share state and finalfn */
+}	AggRefCompatibility;
 
 static void initialize_phase(AggState *aggstate, int newphase);
 static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
 static void initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
+					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset);
 static void advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+							AggStatePerAggState peraggstate,
 							AggStatePerGroup pergroupstate);
 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
 static void process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerAggState peraggstate,
 								 AggStatePerGroup pergroupstate);
 static void process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+								AggStatePerAggState peraggstate,
 								AggStatePerGroup pergroupstate);
 static void finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull);
 static void prepare_projection_slot(AggState *aggstate,
@@ -396,6 +455,14 @@ static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
+static void build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
+							 AggState *aggsate, EState *estate,
+							 Aggref *aggref, HeapTuple aggtuple,
+							 Oid *inputTypes, int numArguments);
+static AggRefCompatibility find_compatible_aggref(Aggref *newagg,
+					   AggState *aggstate, int lastaggno, int *foundaggno);
+static AggRefCompatibility aggref_has_compatible_states(Aggref *newagg,
+					 AggStatePerAgg peragg, AggStatePerAggState peraggstate);
 
 
 /*
@@ -498,7 +565,7 @@ fetch_input_tuple(AggState *aggstate)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
-initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
+initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
 					 AggStatePerGroup pergroupstate)
 {
 	/*
@@ -569,7 +636,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
 }
 
 /*
- * Initialize all aggregates for a new group of input values.
+ * Initialize all aggregate states for a new group of input values.
  *
  * If there are multiple grouping sets, we initialize only the first numReset
  * of them (the grouping sets are ordered so that the most specific one, which
@@ -580,26 +647,26 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
  */
 static void
 initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
+					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset)
 {
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
 	int			setno = 0;
 
 	if (numReset < 1)
 		numReset = numGroupingSets;
 
-	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+	for (stateno = 0; stateno < aggstate->numstates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerAggState peraggstate = &peraggstates[stateno];
 
 		for (setno = 0; setno < numReset; setno++)
 		{
 			AggStatePerGroup pergroupstate;
 
-			pergroupstate = &pergroup[aggno + (setno * (aggstate->numaggs))];
+			pergroupstate = &pergroup[stateno + (setno * (aggstate->numstates))];
 
 			aggstate->current_set = setno;
 
@@ -610,7 +677,7 @@ initialize_aggregates(AggState *aggstate,
 
 /*
  * Given new input value(s), advance the transition function of one aggregate
- * within one grouping set only (already set in aggstate->current_set)
+ * state within one grouping set only (already set in aggstate->current_set)
  *
  * The new values (and null flags) have been preloaded into argument positions
  * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again
@@ -621,7 +688,7 @@ initialize_aggregates(AggState *aggstate,
  */
 static void
 advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+							AggStatePerAggState peraggstate,
 							AggStatePerGroup pergroupstate)
 {
 	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
@@ -678,8 +745,8 @@ advance_transition_function(AggState *aggstate,
 	/* We run the transition functions in per-input-tuple memory context */
 	oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
 
-	/* set up aggstate->curperagg for AggGetAggref() */
-	aggstate->curperagg = peraggstate;
+	/* set up aggstate->curperaggstate for AggGetAggref() */
+	aggstate->curperaggstate = peraggstate;
 
 	/*
 	 * OK to call the transition function
@@ -690,7 +757,7 @@ advance_transition_function(AggState *aggstate,
 
 	newVal = FunctionCallInvoke(fcinfo);
 
-	aggstate->curperagg = NULL;
+	aggstate->curperaggstate = NULL;
 
 	/*
 	 * If pass-by-ref datatype, must copy the new value into aggcontext and
@@ -718,7 +785,7 @@ advance_transition_function(AggState *aggstate,
 }
 
 /*
- * Advance all the aggregates for one input tuple.  The input tuple
+ * Advance each aggregate state for one input tuple.  The input tuple
  * has been stored in tmpcontext->ecxt_outertuple, so that it is accessible
  * to ExecEvalExpr.  pergroup is the array of per-group structs to use
  * (this might be in a hashtable entry).
@@ -728,15 +795,15 @@ advance_transition_function(AggState *aggstate,
 static void
 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 {
-	int			aggno;
+	int			stateno;
 	int			setno = 0;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
-	int			numAggs = aggstate->numaggs;
+	int			numStates = aggstate->numstates;
 
-	for (aggno = 0; aggno < numAggs; aggno++)
+	for (stateno = 0; stateno < numStates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
-		ExprState  *filter = peraggstate->aggrefstate->aggfilter;
+		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
+		ExprState  *filter = peraggstate->aggfilter;
 		int			numTransInputs = peraggstate->numTransInputs;
 		int			i;
 		TupleTableSlot *slot;
@@ -806,7 +873,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
-				AggStatePerGroup pergroupstate = &pergroup[aggno + (setno * numAggs)];
+				AggStatePerGroup pergroupstate = &pergroup[stateno + (setno * numStates)];
 
 				aggstate->current_set = setno;
 
@@ -841,7 +908,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
  */
 static void
 process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerAggState peraggstate,
 								 AggStatePerGroup pergroupstate)
 {
 	Datum		oldVal = (Datum) 0;
@@ -930,7 +997,7 @@ process_ordered_aggregate_single(AggState *aggstate,
  */
 static void
 process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+								AggStatePerAggState peraggstate,
 								AggStatePerGroup pergroupstate)
 {
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
@@ -1009,10 +1076,14 @@ process_ordered_aggregate_multi(AggState *aggstate,
  *
  * The finalfunction will be run, and the result delivered, in the
  * output-tuple context; caller's CurrentMemoryContext does not matter.
+ *
+ * The finalfn uses the state as set in the stateno. This also might be
+ * being used by another aggregate function, so it's important that we do
+ * nothing destructive here.
  */
 static void
 finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull)
 {
@@ -1021,6 +1092,7 @@ finalize_aggregate(AggState *aggstate,
 	MemoryContext oldContext;
 	int			i;
 	ListCell   *lc;
+	AggStatePerAggState peraggstate = &aggstate->peraggstate[peragg->stateno];
 
 	oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
 
@@ -1031,7 +1103,7 @@ finalize_aggregate(AggState *aggstate,
 	 * for the transition state value.
 	 */
 	i = 1;
-	foreach(lc, peraggstate->aggrefstate->aggdirectargs)
+	foreach(lc, peraggstate->aggdirectargs)
 	{
 		ExprState  *expr = (ExprState *) lfirst(lc);
 
@@ -1046,14 +1118,14 @@ finalize_aggregate(AggState *aggstate,
 	/*
 	 * Apply the agg's finalfn if one is provided, else return transValue.
 	 */
-	if (OidIsValid(peraggstate->finalfn_oid))
+	if (OidIsValid(peragg->finalfn_oid))
 	{
-		int			numFinalArgs = peraggstate->numFinalArgs;
+		int			numFinalArgs = peragg->numFinalArgs;
 
-		/* set up aggstate->curperagg for AggGetAggref() */
-		aggstate->curperagg = peraggstate;
+		/* set up aggstate->curperaggstate for AggGetAggref() */
+		aggstate->curperaggstate = peraggstate;
 
-		InitFunctionCallInfoData(fcinfo, &peraggstate->finalfn,
+		InitFunctionCallInfoData(fcinfo, &peragg->finalfn,
 								 numFinalArgs,
 								 peraggstate->aggCollation,
 								 (void *) aggstate, NULL);
@@ -1082,7 +1154,7 @@ finalize_aggregate(AggState *aggstate,
 			*resultVal = FunctionCallInvoke(&fcinfo);
 			*resultIsNull = fcinfo.isnull;
 		}
-		aggstate->curperagg = NULL;
+		aggstate->curperaggstate = NULL;
 	}
 	else
 	{
@@ -1093,12 +1165,12 @@ finalize_aggregate(AggState *aggstate,
 	/*
 	 * If result is pass-by-ref, make sure it is in the right context.
 	 */
-	if (!peraggstate->resulttypeByVal && !*resultIsNull &&
+	if (!peragg->resulttypeByVal && !*resultIsNull &&
 		!MemoryContextContains(CurrentMemoryContext,
 							   DatumGetPointer(*resultVal)))
 		*resultVal = datumCopy(*resultVal,
-							   peraggstate->resulttypeByVal,
-							   peraggstate->resulttypeLen);
+							   peragg->resulttypeByVal,
+							   peragg->resulttypeLen);
 
 	MemoryContextSwitchTo(oldContext);
 }
@@ -1173,7 +1245,7 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
  */
 static void
 finalize_aggregates(AggState *aggstate,
-					AggStatePerAgg peragg,
+					AggStatePerAgg peraggs,
 					AggStatePerGroup pergroup,
 					int currentSet)
 {
@@ -1189,10 +1261,12 @@ finalize_aggregates(AggState *aggstate,
 
 	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerAgg peragg = &peraggs[aggno];
+		int			stateno = peragg->stateno;
+		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
 		AggStatePerGroup pergroupstate;
 
-		pergroupstate = &pergroup[aggno + (currentSet * (aggstate->numaggs))];
+		pergroupstate = &pergroup[stateno + (currentSet * (aggstate->numstates))];
 
 		if (peraggstate->numSortCols > 0)
 		{
@@ -1208,7 +1282,7 @@ finalize_aggregates(AggState *aggstate,
 												pergroupstate);
 		}
 
-		finalize_aggregate(aggstate, peraggstate, pergroupstate,
+		finalize_aggregate(aggstate, peragg, pergroupstate,
 						   &aggvalues[aggno], &aggnulls[aggno]);
 	}
 }
@@ -1428,7 +1502,7 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 	if (isnew)
 	{
 		/* initialize aggregates for new tuple group */
-		initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup, 0);
+		initialize_aggregates(aggstate, aggstate->peraggstate, entry->pergroup, 0);
 	}
 
 	return entry;
@@ -1505,6 +1579,7 @@ agg_retrieve_direct(AggState *aggstate)
 	ExprContext *econtext;
 	ExprContext *tmpcontext;
 	AggStatePerAgg peragg;
+	AggStatePerAggState peraggstate;
 	AggStatePerGroup pergroup;
 	TupleTableSlot *outerslot;
 	TupleTableSlot *firstSlot;
@@ -1527,6 +1602,7 @@ agg_retrieve_direct(AggState *aggstate)
 	tmpcontext = aggstate->tmpcontext;
 
 	peragg = aggstate->peragg;
+	peraggstate = aggstate->peraggstate;
 	pergroup = aggstate->pergroup;
 	firstSlot = aggstate->ss.ss_ScanTupleSlot;
 
@@ -1716,7 +1792,7 @@ agg_retrieve_direct(AggState *aggstate)
 			/*
 			 * Initialize working state for a new input tuple group.
 			 */
-			initialize_aggregates(aggstate, peragg, pergroup, numReset);
+			initialize_aggregates(aggstate, peraggstate, pergroup, numReset);
 
 			if (aggstate->grp_firstTuple != NULL)
 			{
@@ -1945,17 +2021,18 @@ AggState *
 ExecInitAgg(Agg *node, EState *estate, int eflags)
 {
 	AggState   *aggstate;
-	AggStatePerAgg peragg;
+	AggStatePerAgg peraggs;
+	AggStatePerAggState peraggstates;
 	Plan	   *outerPlan;
 	ExprContext *econtext;
 	int			numaggs,
+				stateno,
 				aggno;
 	int			phase;
 	ListCell   *l;
 	Bitmapset  *all_grouped_cols = NULL;
 	int			numGroupingSets = 1;
 	int			numPhases;
-	int			currentsortno = 0;
 	int			i = 0;
 	int			j = 0;
 
@@ -1971,12 +2048,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	aggstate->aggs = NIL;
 	aggstate->numaggs = 0;
+	aggstate->numstates = 0;
 	aggstate->maxsets = 0;
 	aggstate->hashfunctions = NULL;
 	aggstate->projected_set = -1;
 	aggstate->current_set = 0;
 	aggstate->peragg = NULL;
-	aggstate->curperagg = NULL;
+	aggstate->peraggstate = NULL;
+	aggstate->curperaggstate = NULL;
 	aggstate->agg_done = false;
 	aggstate->input_done = false;
 	aggstate->pergroup = NULL;
@@ -2209,8 +2288,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
 	econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
 
-	peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
-	aggstate->peragg = peragg;
+	peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
+	peraggstates = (AggStatePerAggState) palloc0(sizeof(AggStatePerAggStateData) * numaggs);
+
+	aggstate->peragg = peraggs;
+	aggstate->peraggstate = peraggstates;
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
@@ -2232,69 +2314,67 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	/*
 	 * Perform lookups of aggregate function info, and initialize the
-	 * unchanging fields of the per-agg data.  We also detect duplicate
-	 * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0"). When
-	 * duplicates are detected, we only make an AggStatePerAgg struct for the
-	 * first one.  The clones are simply pointed at the same result entry by
-	 * giving them duplicate aggno values.
+	 * unchanging fields of the per-agg data.
 	 */
 	aggno = -1;
+	stateno = -1;
 	foreach(l, aggstate->aggs)
 	{
 		AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
 		Aggref	   *aggref = (Aggref *) aggrefstate->xprstate.expr;
-		AggStatePerAgg peraggstate;
+		AggStatePerAgg peragg;
+		AggStatePerAggState peraggstate;
+		AggRefCompatibility agg_match;
 		Oid			inputTypes[FUNC_MAX_ARGS];
 		int			numArguments;
 		int			numDirectArgs;
-		int			numInputs;
-		int			numSortCols;
-		int			numDistinctCols;
-		List	   *sortlist;
 		HeapTuple	aggTuple;
 		Form_pg_aggregate aggform;
-		Oid			aggtranstype;
 		AclResult	aclresult;
 		Oid			transfn_oid,
 					finalfn_oid;
-		Expr	   *transfnexpr,
-				   *finalfnexpr;
-		Datum		textInitVal;
-		int			i;
-		ListCell   *lc;
+		Expr	   *finalfnexpr;
+		int			existing_aggno;
 
 		/* Planner should have assigned aggregate to correct level */
 		Assert(aggref->agglevelsup == 0);
 
-		/* Look for a previous duplicate aggregate */
-		for (i = 0; i <= aggno; i++)
-		{
-			if (equal(aggref, peragg[i].aggref) &&
-				!contain_volatile_functions((Node *) aggref))
-				break;
-		}
-		if (i <= aggno)
+		/*
+		 * For performance reasons we detect duplicate aggregates (for
+		 * example, "SELECT sum(x) ... HAVING sum(x) > 0"). When duplicates
+		 * are detected, we only make an AggStatePerAgg struct for the first
+		 * one. The clones are simply pointed at the same result entry by
+		 * giving them duplicate aggno values. We also do our best to reuse
+		 * duplicate aggregate states. The query may use 2 or more aggregate
+		 * functions which share the same transition function and initial
+		 * value therefore would end up calculating the same state. In this
+		 * case we can just calculate the state once, however if the finalfns
+		 * do not match then we must create a new peragg to store the varying
+		 * finalfn.
+		 */
+
+		/* check if we have previous agg or state matches that can be reused */
+		agg_match = find_compatible_aggref(aggref, aggstate, aggno,
+										   &existing_aggno);
+		if (agg_match == AGGREF_EXACT_MATCH)
 		{
-			/* Found a match to an existing entry, so just mark it */
-			aggrefstate->aggno = i;
+			/*
+			 * Exact match -- this must be using same aggregate function or
+			 * have the same transfn and finalfn. Just reuse the existing agg.
+			 */
+			aggrefstate->aggno = existing_aggno;
 			continue;
 		}
 
-		/* Nope, so assign a new PerAgg record */
-		peraggstate = &peragg[++aggno];
+		/*
+		 * Otherwise set up a new Per-Agg for this, and possibly a new
+		 * per-AggState too.
+		 */
 
 		/* Mark Aggref state node with assigned index in the result array */
+		peragg = &peraggs[++aggno];
 		aggrefstate->aggno = aggno;
 
-		/* Begin filling in the peraggstate data */
-		peraggstate->aggrefstate = aggrefstate;
-		peraggstate->aggref = aggref;
-		peraggstate->sortstates = (Tuplesortstate **)
-			palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
-
-		for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
-			peraggstate->sortstates[currentsortno] = NULL;
-
 		/* Fetch the pg_aggregate row */
 		aggTuple = SearchSysCache1(AGGFNOID,
 								   ObjectIdGetDatum(aggref->aggfnoid));
@@ -2311,8 +2391,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 						   get_func_name(aggref->aggfnoid));
 		InvokeFunctionExecuteHook(aggref->aggfnoid);
 
-		peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
-		peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
+		transfn_oid = aggform->aggtransfn;
+		peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
 
 		/* Check that aggregate owner has permission to call component fns */
 		{
@@ -2327,12 +2407,20 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
 			ReleaseSysCache(procTuple);
 
-			aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
-										 ACL_EXECUTE);
-			if (aclresult != ACLCHECK_OK)
-				aclcheck_error(aclresult, ACL_KIND_PROC,
-							   get_func_name(transfn_oid));
-			InvokeFunctionExecuteHook(transfn_oid);
+			/*
+			 * If we're reusing an existing state, no need to check the
+			 * transfn permission again.
+			 */
+			if (agg_match == AGGREF_NO_MATCH)
+			{
+				aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
+											 ACL_EXECUTE);
+				if (aclresult != ACLCHECK_OK)
+					aclcheck_error(aclresult, ACL_KIND_PROC,
+								   get_func_name(transfn_oid));
+				InvokeFunctionExecuteHook(transfn_oid);
+			}
+
 			if (OidIsValid(finalfn_oid))
 			{
 				aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
@@ -2350,236 +2438,333 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		 * agg accepts ANY or a polymorphic type.
 		 */
 		numArguments = get_aggregate_argtypes(aggref, inputTypes);
-		peraggstate->numArguments = numArguments;
 
 		/* Count the "direct" arguments, if any */
 		numDirectArgs = list_length(aggref->aggdirectargs);
 
-		/* Count the number of aggregated input columns */
-		numInputs = list_length(aggref->args);
-		peraggstate->numInputs = numInputs;
-
-		/* Detect how many arguments to pass to the transfn */
-		if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
-			peraggstate->numTransInputs = numInputs;
+		/*
+		 * Build working state for invoking the transition function (or look
+		 * up previously initialized working state, if we can share it).
+		 */
+		if (agg_match == AGGREF_NO_MATCH)
+		{
+			peraggstate = &peraggstates[++stateno];
+			build_peraggstate_for_aggref(peraggstate, aggstate, estate,
+										 aggref,
+										 aggTuple, inputTypes, numArguments);
+			peragg->stateno = stateno;
+		}
 		else
-			peraggstate->numTransInputs = numArguments;
+		{
+			int			existing_stateno = peraggs[existing_aggno].stateno;
+
+			peraggstate = &peraggstates[existing_stateno];
+			peragg->stateno = existing_stateno;
+
+			/* when reusing the state the transfns should match! */
+			Assert(peraggstate->transfn_oid == aggform->aggtransfn);
+		}
 
 		/* Detect how many arguments to pass to the finalfn */
 		if (aggform->aggfinalextra)
-			peraggstate->numFinalArgs = numArguments + 1;
+			peragg->numFinalArgs = numArguments + 1;
 		else
-			peraggstate->numFinalArgs = numDirectArgs + 1;
-
-		/* resolve actual type of transition state, if polymorphic */
-		aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid,
-												   aggform->aggtranstype,
-												   inputTypes,
-												   numArguments);
-
-		/* build expression trees using actual argument & result types */
-		build_aggregate_fnexprs(inputTypes,
-								numArguments,
-								numDirectArgs,
-								peraggstate->numFinalArgs,
-								aggref->aggvariadic,
-								aggtranstype,
-								aggref->aggtype,
-								aggref->inputcollid,
-								transfn_oid,
-								InvalidOid,		/* invtrans is not needed here */
-								finalfn_oid,
-								&transfnexpr,
-								NULL,
-								&finalfnexpr);
-
-		/* set up infrastructure for calling the transfn and finalfn */
-		fmgr_info(transfn_oid, &peraggstate->transfn);
-		fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+			peragg->numFinalArgs = numDirectArgs + 1;
 
+		/*
+		 * build expression trees using actual argument & result types for the
+		 * finalfn, if it exists
+		 */
 		if (OidIsValid(finalfn_oid))
 		{
-			fmgr_info(finalfn_oid, &peraggstate->finalfn);
-			fmgr_info_set_expr((Node *) finalfnexpr, &peraggstate->finalfn);
+			build_aggregate_finalfn_expr(inputTypes,
+										 peragg->numFinalArgs,
+										 peraggstate->aggtranstype,
+										 aggref->aggtype,
+										 aggref->inputcollid,
+										 finalfn_oid,
+										 &finalfnexpr);
+			fmgr_info(finalfn_oid, &peragg->finalfn);
+			fmgr_info_set_expr((Node *) finalfnexpr, &peragg->finalfn);
 		}
 
-		peraggstate->aggCollation = aggref->inputcollid;
+		/* get info about the result type's datatype */
+		get_typlenbyval(aggref->aggtype,
+						&peragg->resulttypeLen,
+						&peragg->resulttypeByVal);
 
-		InitFunctionCallInfoData(peraggstate->transfn_fcinfo,
-								 &peraggstate->transfn,
-								 peraggstate->numTransInputs + 1,
-								 peraggstate->aggCollation,
-								 (void *) aggstate, NULL);
+		ReleaseSysCache(aggTuple);
+	}
 
-		/* get info about relevant datatypes */
-		get_typlenbyval(aggref->aggtype,
-						&peraggstate->resulttypeLen,
-						&peraggstate->resulttypeByVal);
-		get_typlenbyval(aggtranstype,
-						&peraggstate->transtypeLen,
-						&peraggstate->transtypeByVal);
+	/*
+	 * Update numaggs to match the number of unique aggregates found. Also set
+	 * numstates to the number of unique aggregate states found.
+	 */
+	aggstate->numaggs = aggno + 1;
+	aggstate->numstates = stateno + 1;
 
-		/*
-		 * initval is potentially null, so don't try to access it as a struct
-		 * field. Must do it the hard way with SysCacheGetAttr.
-		 */
-		textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
-									  Anum_pg_aggregate_agginitval,
-									  &peraggstate->initValueIsNull);
+	return aggstate;
+}
 
-		if (peraggstate->initValueIsNull)
-			peraggstate->initValue = (Datum) 0;
-		else
-			peraggstate->initValue = GetAggInitVal(textInitVal,
-												   aggtranstype);
+/*
+ * Build the state needed to calculate a state value for an aggregate.
+ *
+ * This initializes all the fields in 'peraggstate'. 'aggTuple',
+ * 'inputTypes' and 'numArguments' could be derived from 'aggref', but the
+ * caller has calculated them already, so might as well pass them.
+ */
+static void
+build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
+							 AggState *aggstate, EState *estate,
+							 Aggref *aggref, HeapTuple aggTuple,
+							 Oid *inputTypes, int numArguments)
+{
+	Form_pg_aggregate aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+	int			numGroupingSets = Max(aggstate->maxsets, 1);
+	Expr	   *transfnexpr;
+	ListCell   *lc;
+	int			numInputs;
+	int			numDirectArgs;
+	List	   *sortlist;
+	int			numSortCols;
+	int			numDistinctCols;
+	int			currentsortno;
+	int			naggs;
+	int			i;
+	Datum		textInitVal;
+	Oid			transfn_oid;
 
-		/*
-		 * If the transfn is strict and the initval is NULL, make sure input
-		 * type and transtype are the same (or at least binary-compatible), so
-		 * that it's OK to use the first aggregated input value as the initial
-		 * transValue.  This should have been checked at agg definition time,
-		 * but we must check again in case the transfn's strictness property
-		 * has been changed.
-		 */
-		if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
-		{
-			if (numArguments <= numDirectArgs ||
-				!IsBinaryCoercible(inputTypes[numDirectArgs], aggtranstype))
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
-						 errmsg("aggregate %u needs to have compatible input type and transition type",
-								aggref->aggfnoid)));
-		}
+	/* Begin filling in the peraggstate data */
+	peraggstate->aggref = aggref;
+	peraggstate->aggCollation = aggref->inputcollid;
+	peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
 
-		/*
-		 * Get a tupledesc corresponding to the aggregated inputs (including
-		 * sort expressions) of the agg.
-		 */
-		peraggstate->evaldesc = ExecTypeFromTL(aggref->args, false);
+	/* Count the "direct" arguments, if any */
+	numDirectArgs = list_length(aggref->aggdirectargs);
+
+	/* Count the number of aggregated input columns */
+	peraggstate->numInputs = numInputs = list_length(aggref->args);
+
+	/* resolve actual type of transition state, if polymorphic */
+	peraggstate->aggtranstype =
+		resolve_aggregate_transtype(aggref->aggfnoid,
+									aggform->aggtranstype,
+									inputTypes,
+									numArguments);
+
+	/* Detect how many arguments to pass to the transfn */
+	if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+		peraggstate->numTransInputs = numInputs;
+	else
+		peraggstate->numTransInputs = numArguments;
+
+	/*
+	 * Set up infrastructure for calling the transfn
+	 */
+	build_aggregate_transfn_expr(inputTypes,
+								 numArguments,
+								 numDirectArgs,
+								 aggref->aggvariadic,
+								 peraggstate->aggtranstype,
+								 aggref->inputcollid,
+								 transfn_oid,
+								 InvalidOid,	/* invtrans is not needed here */
+								 &transfnexpr,
+								 NULL);
+	fmgr_info(peraggstate->transfn_oid, &peraggstate->transfn);
+	fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+
+	InitFunctionCallInfoData(peraggstate->transfn_fcinfo,
+							 &peraggstate->transfn,
+							 peraggstate->numTransInputs + 1,
+							 peraggstate->aggCollation,
+							 (void *) aggstate, NULL);
+
+
+	/*
+	 * Look up the initial value.
+	 *
+	 * initval is potentially null, so don't try to access it as a struct
+	 * field. Must do it the hard way with SysCacheGetAttr.
+	 */
+	textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
+								  Anum_pg_aggregate_agginitval,
+								  &peraggstate->initValueIsNull);
+
+	if (peraggstate->initValueIsNull)
+		peraggstate->initValue = (Datum) 0;
+	else
+		peraggstate->initValue = GetAggInitVal(textInitVal,
+											   peraggstate->aggtranstype);
 
-		/* Create slot we're going to do argument evaluation in */
-		peraggstate->evalslot = ExecInitExtraTupleSlot(estate);
-		ExecSetSlotDescriptor(peraggstate->evalslot, peraggstate->evaldesc);
+	/*
+	 * If the transfn is strict and the initval is NULL, make sure input type
+	 * and transtype are the same (or at least binary-compatible), so that
+	 * it's OK to use the first aggregated input value as the initial
+	 * transValue.  This should have been checked at agg definition time, but
+	 * we must check again in case the transfn's strictness property has been
+	 * changed.
+	 */
+	if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
+	{
+		if (numArguments <= numDirectArgs ||
+			!IsBinaryCoercible(inputTypes[numDirectArgs],
+							   peraggstate->aggtranstype))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate needs to have compatible input type and transition type")));
+	}
+
+	/* get info about the state value's datatype */
+	get_typlenbyval(peraggstate->aggtranstype,
+					&peraggstate->transtypeLen,
+					&peraggstate->transtypeByVal);
 
-		/* Set up projection info for evaluation */
-		peraggstate->evalproj = ExecBuildProjectionInfo(aggrefstate->args,
-														aggstate->tmpcontext,
-														peraggstate->evalslot,
-														NULL);
+	/*
+	 * Get a tupledesc corresponding to the aggregated inputs (including sort
+	 * expressions) of the agg.
+	 */
+	peraggstate->evaldesc = ExecTypeFromTL(aggref->args, false);
 
+	/* Create slot we're going to do argument evaluation in */
+	peraggstate->evalslot = ExecInitExtraTupleSlot(estate);
+	ExecSetSlotDescriptor(peraggstate->evalslot, peraggstate->evaldesc);
+
+	/* Initialize the input and FILTER expressions */
+	naggs = aggstate->numaggs;
+	peraggstate->aggfilter = ExecInitExpr(aggref->aggfilter,
+										  (PlanState *) aggstate);
+	peraggstate->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs,
+													 (PlanState *) aggstate);
+	peraggstate->args = (List *) ExecInitExpr((Expr *) aggref->args,
+											  (PlanState *) aggstate);
+
+	/*
+	 * Complain if the aggregate's arguments contain any  aggregates; nested
+	 * agg functions are semantically nonsensical.  (This should have been
+	 * caught earlier, but we defend against it here anyway.)
+	 */
+	if (naggs != aggstate->numaggs)
+		ereport(ERROR,
+				(errcode(ERRCODE_GROUPING_ERROR),
+				 errmsg("aggregate function calls cannot be nested")));
+
+	/* Set up projection info for evaluation */
+	peraggstate->evalproj = ExecBuildProjectionInfo(peraggstate->args,
+													aggstate->tmpcontext,
+													peraggstate->evalslot,
+													NULL);
+
+	/*
+	 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
+	 * have a list of SortGroupClause nodes; fish out the data in them and
+	 * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
+	 * however; the agg's transfn and finalfn are responsible for that.
+	 *
+	 * Note that by construction, if there is a DISTINCT clause then the ORDER
+	 * BY clause is a prefix of it (see transformDistinctClause).
+	 */
+	if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+	{
+		sortlist = NIL;
+		numSortCols = numDistinctCols = 0;
+	}
+	else if (aggref->aggdistinct)
+	{
+		sortlist = aggref->aggdistinct;
+		numSortCols = numDistinctCols = list_length(sortlist);
+		Assert(numSortCols >= list_length(aggref->aggorder));
+	}
+	else
+	{
+		sortlist = aggref->aggorder;
+		numSortCols = list_length(sortlist);
+		numDistinctCols = 0;
+	}
+
+	peraggstate->numSortCols = numSortCols;
+	peraggstate->numDistinctCols = numDistinctCols;
+
+	if (numSortCols > 0)
+	{
 		/*
-		 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
-		 * have a list of SortGroupClause nodes; fish out the data in them and
-		 * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
-		 * however; the agg's transfn and finalfn are responsible for that.
-		 *
-		 * Note that by construction, if there is a DISTINCT clause then the
-		 * ORDER BY clause is a prefix of it (see transformDistinctClause).
+		 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
+		 * (yet)
 		 */
-		if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
-		{
-			sortlist = NIL;
-			numSortCols = numDistinctCols = 0;
-		}
-		else if (aggref->aggdistinct)
+		Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+
+		/* If we have only one input, we need its len/byval info. */
+		if (numInputs == 1)
 		{
-			sortlist = aggref->aggdistinct;
-			numSortCols = numDistinctCols = list_length(sortlist);
-			Assert(numSortCols >= list_length(aggref->aggorder));
+			get_typlenbyval(inputTypes[numDirectArgs],
+							&peraggstate->inputtypeLen,
+							&peraggstate->inputtypeByVal);
 		}
-		else
+		else if (numDistinctCols > 0)
 		{
-			sortlist = aggref->aggorder;
-			numSortCols = list_length(sortlist);
-			numDistinctCols = 0;
+			/* we will need an extra slot to store prior values */
+			peraggstate->uniqslot = ExecInitExtraTupleSlot(estate);
+			ExecSetSlotDescriptor(peraggstate->uniqslot,
+								  peraggstate->evaldesc);
 		}
 
-		peraggstate->numSortCols = numSortCols;
-		peraggstate->numDistinctCols = numDistinctCols;
-
-		if (numSortCols > 0)
+		/* Extract the sort information for use later */
+		peraggstate->sortColIdx =
+			(AttrNumber *) palloc(numSortCols * sizeof(AttrNumber));
+		peraggstate->sortOperators =
+			(Oid *) palloc(numSortCols * sizeof(Oid));
+		peraggstate->sortCollations =
+			(Oid *) palloc(numSortCols * sizeof(Oid));
+		peraggstate->sortNullsFirst =
+			(bool *) palloc(numSortCols * sizeof(bool));
+
+		i = 0;
+		foreach(lc, sortlist)
 		{
-			/*
-			 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
-			 * (yet)
-			 */
-			Assert(node->aggstrategy != AGG_HASHED);
-
-			/* If we have only one input, we need its len/byval info. */
-			if (numInputs == 1)
-			{
-				get_typlenbyval(inputTypes[numDirectArgs],
-								&peraggstate->inputtypeLen,
-								&peraggstate->inputtypeByVal);
-			}
-			else if (numDistinctCols > 0)
-			{
-				/* we will need an extra slot to store prior values */
-				peraggstate->uniqslot = ExecInitExtraTupleSlot(estate);
-				ExecSetSlotDescriptor(peraggstate->uniqslot,
-									  peraggstate->evaldesc);
-			}
+			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
+			TargetEntry *tle = get_sortgroupclause_tle(sortcl, aggref->args);
 
-			/* Extract the sort information for use later */
-			peraggstate->sortColIdx =
-				(AttrNumber *) palloc(numSortCols * sizeof(AttrNumber));
-			peraggstate->sortOperators =
-				(Oid *) palloc(numSortCols * sizeof(Oid));
-			peraggstate->sortCollations =
-				(Oid *) palloc(numSortCols * sizeof(Oid));
-			peraggstate->sortNullsFirst =
-				(bool *) palloc(numSortCols * sizeof(bool));
+			/* the parser should have made sure of this */
+			Assert(OidIsValid(sortcl->sortop));
 
-			i = 0;
-			foreach(lc, sortlist)
-			{
-				SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
-				TargetEntry *tle = get_sortgroupclause_tle(sortcl,
-														   aggref->args);
-
-				/* the parser should have made sure of this */
-				Assert(OidIsValid(sortcl->sortop));
-
-				peraggstate->sortColIdx[i] = tle->resno;
-				peraggstate->sortOperators[i] = sortcl->sortop;
-				peraggstate->sortCollations[i] = exprCollation((Node *) tle->expr);
-				peraggstate->sortNullsFirst[i] = sortcl->nulls_first;
-				i++;
-			}
-			Assert(i == numSortCols);
+			peraggstate->sortColIdx[i] = tle->resno;
+			peraggstate->sortOperators[i] = sortcl->sortop;
+			peraggstate->sortCollations[i] = exprCollation((Node *) tle->expr);
+			peraggstate->sortNullsFirst[i] = sortcl->nulls_first;
+			i++;
 		}
+		Assert(i == numSortCols);
+	}
 
-		if (aggref->aggdistinct)
-		{
-			Assert(numArguments > 0);
+	if (aggref->aggdistinct)
+	{
+		Assert(numArguments > 0);
 
-			/*
-			 * We need the equal function for each DISTINCT comparison we will
-			 * make.
-			 */
-			peraggstate->equalfns =
-				(FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo));
+		/*
+		 * We need the equal function for each DISTINCT comparison we will
+		 * make.
+		 */
+		peraggstate->equalfns =
+			(FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo));
 
-			i = 0;
-			foreach(lc, aggref->aggdistinct)
-			{
-				SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
+		i = 0;
+		foreach(lc, aggref->aggdistinct)
+		{
+			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
 
-				fmgr_info(get_opcode(sortcl->eqop), &peraggstate->equalfns[i]);
-				i++;
-			}
-			Assert(i == numDistinctCols);
+			fmgr_info(get_opcode(sortcl->eqop), &peraggstate->equalfns[i]);
+			i++;
 		}
-
-		ReleaseSysCache(aggTuple);
+		Assert(i == numDistinctCols);
 	}
 
-	/* Update numaggs to match number of unique aggregates found */
-	aggstate->numaggs = aggno + 1;
-
-	return aggstate;
+	peraggstate->sortstates = (Tuplesortstate **)
+		palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
+	for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
+		peraggstate->sortstates[currentsortno] = NULL;
 }
 
+
 static Datum
 GetAggInitVal(Datum textInitVal, Oid transtype)
 {
@@ -2596,11 +2781,199 @@ GetAggInitVal(Datum textInitVal, Oid transtype)
 	return initVal;
 }
 
+/*
+ * find_compatible_aggref
+ *		Searches the previously looked at aggregates in order to find a
+ *		compatible aggregate or aggregate state. If a positive match is found
+ *		then foundaggno is set to the aggregate which matches.
+ *		When AGGREF_STATE_MATCH is returned the caller must only use the state
+ *		of the foundaggno, not the actual aggno itself.
+ *		When AGGREF_EXACT_MATCH is returned the caller may use both the aggno
+ *		and the state which that aggno uses.
+ *
+ * Scenario 1 -- An aggregate function appears more than once in query:
+ *
+ *		SELECT SUM(x) FROM ... HAVING SUM(x) > 0
+ *
+ * Since in this case the aggregates are both the same we can optimize by
+ * only calculating aggregate state and calling the finalfn just once. This
+ * would be an AGGREF_EXACT_MATCH, meaning both the state and the final
+ * function call are shared.
+ *
+ * Scenario 2 -- Two different aggregate functions appear in the query but
+ *				 the two functions happen to share the same transfn, but have
+ *				 different finalfn.
+ *
+ *		SELECT SUM(x), AVG(x) FROM ...
+ *
+ * Since in our case these two aggregates both share the same transfn, but
+ * naturally they have different finalfns. This situation is classed as an
+ * AGGREF_STATE_MATCH. This means that the same state can be shared by both
+ * aggregates. Since the finalfn call is not the same this cannot be reused.
+ * For this case to be valid the INITCOND of the aggregate, if one exists, must
+ * also match.
+ *
+ * Scenario 3 -- The same aggregate function is called with different
+ *				 parameters.
+ *
+ *		SELECT SUM(x),SUM(DISTINCT x) FROM ...
+ *		SELECT SUM(x),SUM(y) FROM ...
+ *		SELECT SUM(x),SUM(x) FILTER(WHERE x > 0) FROM ...
+ *
+ * All three of the above queries cannot share the same state and have to be
+ * calculated independently.
+ *
+ * Scenario 4 -- Different aggregates with the same parameters and the same
+ *				 transfn and finalfn.
+ *
+ *		SELECT SUM(x),SUM2(x) FROM ...
+ *
+ * A perhaps unlikely scenario where two aggregate functions exist which have,
+ * both the same transfn and the same finalfn. In this case we can report an
+ * AGGREF_EXACT_MATCH, providing the INITCOND of both aggregates are the same.
+ */
+static AggRefCompatibility
+find_compatible_aggref(Aggref *newagg, AggState *aggstate,
+					   int lastaggno, int *foundaggno)
+{
+	int			aggno;
+	int			statematchaggno;
+	AggStatePerAggState peraggstates;
+	AggStatePerAgg peraggs;
+
+	/* we mustn't reuse the aggref if it contains volatile function calls */
+	if (contain_volatile_functions((Node *) newagg))
+		return AGGREF_NO_MATCH;
+
+	statematchaggno = -1;
+	peraggstates = aggstate->peraggstate;
+	peraggs = aggstate->peragg;
+
+	/*
+	 * Search through the list of already seen aggregates. We'll stop when we
+	 * find an exact match, but until then we'll note any state matches that
+	 * we find. We may have to fall back on these should we fail to find an
+	 * exact match.
+	 */
+	for (aggno = 0; aggno <= lastaggno; aggno++)
+	{
+		AggRefCompatibility matchtype;
+		AggStatePerAgg peragg;
+		AggStatePerAggState peraggstate;
+
+		peragg = &peraggs[aggno];
+		peraggstate = &peraggstates[peragg->stateno];
+
+		/* lookup the match type of this agg */
+		matchtype = aggref_has_compatible_states(newagg, peragg, peraggstate);
+
+		/* if it's an exact match then we're done. */
+		if (matchtype == AGGREF_EXACT_MATCH)
+		{
+			*foundaggno = aggno;
+			return AGGREF_EXACT_MATCH;
+		}
+
+		/* remember any state matches, but keep on looking... */
+		else if (matchtype == AGGREF_STATE_MATCH)
+			statematchaggno = aggno;
+	}
+
+	/* no exact match found, but did we find a state match? */
+	if (statematchaggno >= 0)
+	{
+		*foundaggno = statematchaggno;
+		return AGGREF_STATE_MATCH;
+	}
+
+	return AGGREF_NO_MATCH;
+}
+
+/*
+ * aggref_has_compatible_states
+ *		Determines match type of this aggregate. See comments in
+ *		find_compatible_aggref() for details.
+ */
+static AggRefCompatibility
+aggref_has_compatible_states(Aggref *newagg,
+							 AggStatePerAgg peragg,
+							 AggStatePerAggState peraggstate)
+{
+	Aggref	   *existingRef = peraggstate->aggref;
+
+	/* all of the following must be the same or it's no match */
+	if (newagg->inputcollid != existingRef->inputcollid ||
+		newagg->aggstar != existingRef->aggstar ||
+		newagg->aggvariadic != existingRef->aggvariadic ||
+		newagg->aggkind != existingRef->aggkind ||
+		!equal(newagg->aggdirectargs, existingRef->aggdirectargs) ||
+		!equal(newagg->args, existingRef->args) ||
+		!equal(newagg->aggorder, existingRef->aggorder) ||
+		!equal(newagg->aggdistinct, existingRef->aggdistinct) ||
+		!equal(newagg->aggfilter, existingRef->aggfilter))
+		return AGGREF_NO_MATCH;
+
+	/* if it's the same aggregate function then report exact match */
+	if (newagg->aggfnoid == existingRef->aggfnoid &&
+		newagg->aggtype == existingRef->aggtype &&
+		newagg->aggcollid == existingRef->aggcollid)
+		return AGGREF_EXACT_MATCH;
+	else
+	{
+		/*
+		 * Aggregate functions differ. We'll need to do some more analysis
+		 * before we can know what the match type will be. If the transfn
+		 * match and the initvalue is the same then we can at least let the
+		 * newagg share the state, but if the finalfn also happens to match
+		 * then we can actually still report an exact match.
+		 */
+		HeapTuple	aggTuple;
+		Form_pg_aggregate aggform;
+		bool		initValueIsNull;
+
+		/* Fetch the pg_aggregate row */
+		aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(newagg->aggfnoid));
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u", newagg->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+		/* if the transfns are not the same then the state can't be shared */
+		if (aggform->aggtransfn != peraggstate->transfn_oid)
+		{
+			ReleaseSysCache(aggTuple);
+			return AGGREF_NO_MATCH;
+		}
+
+		SysCacheGetAttr(AGGFNOID, aggTuple,
+						Anum_pg_aggregate_agginitval, &initValueIsNull);
+
+		ReleaseSysCache(aggTuple);
+
+		/*
+		 * If both INITCONDs are null then the outcome depends on if the
+		 * finalfns match.
+		 */
+		if (initValueIsNull && peraggstate->initValueIsNull)
+		{
+			if (aggform->aggfinalfn != peragg->finalfn_oid)
+				return AGGREF_STATE_MATCH;
+			else
+				return AGGREF_EXACT_MATCH;
+		}
+
+		/*
+		 * XXX perhaps we should check the value of the initValue to see if
+		 * they match?
+		 */
+		return AGGREF_NO_MATCH;
+	}
+}
+
 void
 ExecEndAgg(AggState *node)
 {
 	PlanState  *outerPlan;
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2611,9 +2984,9 @@ ExecEndAgg(AggState *node)
 	if (node->sort_out)
 		tuplesort_end(node->sort_out);
 
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (stateno = 0; stateno < node->numstates; stateno++)
 	{
-		AggStatePerAgg peraggstate = &node->peragg[aggno];
+		AggStatePerAggState peraggstate = &node->peraggstate[stateno];
 
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
@@ -2646,7 +3019,7 @@ ExecReScanAgg(AggState *node)
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
 	PlanState  *outerPlan = outerPlanState(node);
 	Agg		   *aggnode = (Agg *) node->ss.ps.plan;
-	int			aggno;
+	int			stateno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2678,11 +3051,11 @@ ExecReScanAgg(AggState *node)
 	}
 
 	/* Make sure we have closed any open tuplesorts */
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (stateno = 0; stateno < node->numstates; stateno++)
 	{
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			AggStatePerAgg peraggstate = &node->peragg[aggno];
+			AggStatePerAggState peraggstate = &node->peraggstate[stateno];
 
 			if (peraggstate->sortstates[setno])
 			{
@@ -2811,10 +3184,12 @@ AggGetAggref(FunctionCallInfo fcinfo)
 {
 	if (fcinfo->context && IsA(fcinfo->context, AggState))
 	{
-		AggStatePerAgg curperagg = ((AggState *) fcinfo->context)->curperagg;
+		AggStatePerAggState curperaggstate;
+
+		curperaggstate = ((AggState *) fcinfo->context)->curperaggstate;
 
-		if (curperagg)
-			return curperagg->aggref;
+		if (curperaggstate)
+			return curperaggstate->aggref;
 	}
 	return NULL;
 }
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index ecf96f8..c371d4d 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -2218,20 +2218,16 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc,
 											   numArguments);
 
 	/* build expression trees using actual argument & result types */
-	build_aggregate_fnexprs(inputTypes,
-							numArguments,
-							0,	/* no ordered-set window functions yet */
-							peraggstate->numFinalArgs,
-							false,		/* no variadic window functions yet */
-							aggtranstype,
-							wfunc->wintype,
-							wfunc->inputcollid,
-							transfn_oid,
-							invtransfn_oid,
-							finalfn_oid,
-							&transfnexpr,
-							&invtransfnexpr,
-							&finalfnexpr);
+	build_aggregate_transfn_expr(inputTypes,
+								 numArguments,
+								 0,	/* no ordered-set window functions yet */
+								 false,		/* no variadic window functions yet */
+								 wfunc->wintype,
+								 wfunc->inputcollid,
+								 transfn_oid,
+								 invtransfn_oid,
+								 &transfnexpr,
+								 &invtransfnexpr);
 
 	/* set up infrastructure for calling the transfn(s) and finalfn */
 	fmgr_info(transfn_oid, &peraggstate->transfn);
@@ -2245,6 +2241,13 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc,
 
 	if (OidIsValid(finalfn_oid))
 	{
+		build_aggregate_finalfn_expr(inputTypes,
+									 peraggstate->numFinalArgs,
+									 aggtranstype,
+									 wfunc->wintype,
+									 wfunc->inputcollid,
+									 finalfn_oid,
+									 &finalfnexpr);
 		fmgr_info(finalfn_oid, &peraggstate->finalfn);
 		fmgr_info_set_expr((Node *) finalfnexpr, &peraggstate->finalfn);
 	}
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 478d8ca..65e6a85 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1819,44 +1819,40 @@ resolve_aggregate_transtype(Oid aggfuncid,
 }
 
 /*
- * Create expression trees for the transition and final functions
+ * Create an expression tree for the transition functions
  * of an aggregate.  These are needed so that polymorphic functions
- * can be used within an aggregate --- without the expression trees,
+ * can be used within an aggregate --- without the expression tree,
  * such functions would not know the datatypes they are supposed to use.
  * (The trees will never actually be executed, however, so we can skimp
  * a bit on correctness.)
  *
- * agg_input_types, agg_state_type, agg_result_type identify the input,
- * transition, and result types of the aggregate.  These should all be
- * resolved to actual types (ie, none should ever be ANYELEMENT etc).
+ * agg_input_types identifies the input types of the aggregate.  These should
+ * be resolved to actual types (ie, none should ever be ANYELEMENT etc).
  * agg_input_collation is the aggregate function's input collation.
  *
  * For an ordered-set aggregate, remember that agg_input_types describes
  * the direct arguments followed by the aggregated arguments.
  *
- * transfn_oid, invtransfn_oid and finalfn_oid identify the funcs to be
- * called; the latter two may be InvalidOid.
+ * transfn_oid and invtransfn_oid identify the funcs to be called; the
+ * latter may be InvalidOid, however if invtransfn_oid is set then
+ * transfn_oid must also be set.
  *
  * Pointers to the constructed trees are returned into *transfnexpr,
- * *invtransfnexpr and *finalfnexpr. If there is no invtransfn or finalfn,
- * the respective pointers are set to NULL.  Since use of the invtransfn is
- * optional, NULL may be passed for invtransfnexpr.
+ * *invtransfnexpr. If there is no invtransfn, the respective pointer is set
+ * to NULL.  Since use of the invtransfn is optional, NULL may be passed for
+ * invtransfnexpr.
  */
 void
-build_aggregate_fnexprs(Oid *agg_input_types,
+build_aggregate_transfn_expr(Oid *agg_input_types,
 						int agg_num_inputs,
 						int agg_num_direct_inputs,
-						int num_finalfn_inputs,
 						bool agg_variadic,
 						Oid agg_state_type,
-						Oid agg_result_type,
 						Oid agg_input_collation,
 						Oid transfn_oid,
 						Oid invtransfn_oid,
-						Oid finalfn_oid,
 						Expr **transfnexpr,
-						Expr **invtransfnexpr,
-						Expr **finalfnexpr)
+						Expr **invtransfnexpr)
 {
 	Param	   *argp;
 	List	   *args;
@@ -1919,13 +1915,24 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 		else
 			*invtransfnexpr = NULL;
 	}
+}
 
-	/* see if we have a final function */
-	if (!OidIsValid(finalfn_oid))
-	{
-		*finalfnexpr = NULL;
-		return;
-	}
+/*
+ * Like build_aggregate_transfn_expr, but creates an expression tree for
+ * the final function of an aggregate, rather than the transition function.
+ */
+void
+build_aggregate_finalfn_expr(Oid *agg_input_types,
+						int num_finalfn_inputs,
+						Oid agg_state_type,
+						Oid agg_result_type,
+						Oid agg_input_collation,
+						Oid finalfn_oid,
+						Expr **finalfnexpr)
+{
+	Param	   *argp;
+	List	   *args;
+	int			i;
 
 	/*
 	 * Build expr tree for final function
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 303fc3c..65c0f74 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -609,9 +609,6 @@ typedef struct WholeRowVarExprState
 typedef struct AggrefExprState
 {
 	ExprState	xprstate;
-	List	   *aggdirectargs;	/* states of direct-argument expressions */
-	List	   *args;			/* states of aggregated-argument expressions */
-	ExprState  *aggfilter;		/* state of FILTER expression, if any */
 	int			aggno;			/* ID number for agg within its plan node */
 } AggrefExprState;
 
@@ -1825,6 +1822,7 @@ typedef struct GroupState
  */
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
+typedef struct AggStatePerAggStateData *AggStatePerAggState;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
 typedef struct AggStatePerPhaseData *AggStatePerPhase;
 
@@ -1833,14 +1831,16 @@ typedef struct AggState
 	ScanState	ss;				/* its first field is NodeTag */
 	List	   *aggs;			/* all Aggref nodes in targetlist & quals */
 	int			numaggs;		/* length of list (could be zero!) */
+	int			numstates;		/* number of peraggstate items */
 	AggStatePerPhase phase;		/* pointer to current phase data */
 	int			numphases;		/* number of phases */
 	int			current_phase;	/* current phase number */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	AggStatePerAgg peragg;		/* per-Aggref information */
+	AggStatePerAggState peraggstate; /* per-Agg State information */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
-	AggStatePerAgg curperagg;	/* identifies currently active aggregate */
+	AggStatePerAggState curperaggstate;	/* identifies currently active aggregate */
 	bool		input_done;		/* indicates end of input */
 	bool		agg_done;		/* indicates completion of Agg scan */
 	int			projected_set;	/* The last projected grouping set */
diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h
index 6a5f9bb..e2b3894 100644
--- a/src/include/parser/parse_agg.h
+++ b/src/include/parser/parse_agg.h
@@ -35,19 +35,23 @@ extern Oid resolve_aggregate_transtype(Oid aggfuncid,
 							Oid *inputTypes,
 							int numArguments);
 
-extern void build_aggregate_fnexprs(Oid *agg_input_types,
+extern void build_aggregate_transfn_expr(Oid *agg_input_types,
 						int agg_num_inputs,
 						int agg_num_direct_inputs,
-						int num_finalfn_inputs,
 						bool agg_variadic,
 						Oid agg_state_type,
-						Oid agg_result_type,
 						Oid agg_input_collation,
 						Oid transfn_oid,
 						Oid invtransfn_oid,
-						Oid finalfn_oid,
 						Expr **transfnexpr,
-						Expr **invtransfnexpr,
+						Expr **invtransfnexpr);
+
+extern void build_aggregate_finalfn_expr(Oid *agg_input_types,
+						int num_finalfn_inputs,
+						Oid agg_state_type,
+						Oid agg_result_type,
+						Oid agg_input_collation,
+						Oid finalfn_oid,
 						Expr **finalfnexpr);
 
 #endif   /* PARSE_AGG_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 8852051..4dad4fe 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1580,3 +1580,171 @@ select least_agg(variadic array[q1,q2]) from int8_tbl;
  -4567890123456789
 (1 row)
 
+-- test aggregates with common transition functions share the same states
+begin work;
+create type avg_state as (total bigint, count bigint);
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 2
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 4
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      6
+(1 row)
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 3
+ my_avg_init | my_sum_init 
+-------------+-------------
+           4 |          14
+(1 row)
+
+rollback;
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+NOTICE:  sum_transfn called with 1
+NOTICE:  sum_transfn called with 2
+NOTICE:  sum_transfn called with 3
+NOTICE:  sum_transfn called with 4
+ my_sum | my_half_sum 
+--------+-------------
+     10 |           5
+(1 row)
+
+rollback;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index a84327d..42c3b3c 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -590,3 +590,151 @@ drop view aggordview1;
 -- variadic aggregates
 select least_agg(q1,q2) from int8_tbl;
 select least_agg(variadic array[q1,q2]) from int8_tbl;
+
+
+-- test aggregates with common transition functions share the same states
+begin work;
+
+create type avg_state as (total bigint, count bigint);
+
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+
+rollback;
+
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+
+rollback;
#4David Rowley
david.rowley@2ndquadrant.com
In reply to: Heikki Linnakangas (#3)
1 attachment(s)
Re: Sharing aggregate states between different aggregate functions

On 27 July 2015 at 03:24, Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 07/09/2015 12:44 PM, David Rowley wrote:

On 15 June 2015 at 12:05, David Rowley <david.rowley@2ndquadrant.com>
wrote:

This basically allows an aggregate's state to be shared between other
aggregate functions when both aggregate's transition functions (and a few
other things) match
There's quite a number of aggregates in our standard set which will
benefit from this optimisation.

After compiling the original patch with another compiler, I noticed a

couple of warnings.

The attached fixes these.

I spent some time reviewing this. I refactored the ExecInitAgg code rather
heavily to make it more readable (IMHO); see attached. What do you think?
Did I break anything?

Thanks for taking the time to look at this and makes these fixes.

I'm just looking over your changes:

- ereport(ERROR,
- (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
- errmsg("aggregate %u needs to have compatible input type and transition
type",
- aggref->aggfnoid)));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate needs to have compatible input type and transition
type")));

I can't quite see the reason to remove the agg OID from the error message
here. It seems to be still valid to use as build_peraggstate_for_aggref()
only is called when nothing is shared.

- * agg_input_types, agg_state_type, agg_result_type identify the input,
- * transition, and result types of the aggregate.  These should all be
- * resolved to actual types (ie, none should ever be ANYELEMENT etc).
+ * agg_input_types identifies the input types of the aggregate.  These
should
+ * be resolved to actual types (ie, none should ever be ANYELEMENT etc).

I'm not sure I understand why agg_state_type and agg_result_type have
changed here.

+ peraggstate->sortstates = (Tuplesortstate **)
+ palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
+ for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
+ peraggstate->sortstates[currentsortno] = NULL;

This was not you, but this NULL setting looks unneeded due to the palloc0().

Some comments:

* In aggref_has_compatible_states(), you give up if aggtype or aggcollid
differ. But those properties apply to the final function, so you were
leaving some money on the table by disallowing state-sharing if they differ.

Good catch, and accurate analogy. Thanks for fixing.

* The filter and input expressions are initialized for every AggRef,
before the deduplication logic kicks in. The AggrefExprState.aggfilter,
aggdirectargs and args fields really belong to the AggStatePerAggState
struct instead. This is not a new issue, but now that we have a convenient
per-aggstate struct to put them in, let's do so.

Good idea. I failed to notice that code over there in execQual.c so I agree
that where you've moved it to is much better.

* There was a reference-after free bug in aggref_has_compatible_states;
you cannot ReleaseSysCache and then continue pointing to the struct.

Thanks for fixing.

In this function I also wasn't quite sure if it was with comparing both
non-NULL INITCOND's here. I believe my code comments may slightly
contradict what the code actually does, as the comments talk about them
having to match, but the code just bails if any are non-NULL. The reason I
didn't check them was because it seems inevitable that some duplicate work
needs to be done when setting up the INITCOND. Perhaps it's worth it?

select aggfnoid || '(' || typname || ')',aggtransfn,agginitval
from pg_aggregate
inner join pg_type on aggtranstype = oid
where aggtransfn in (select aggtransfn
from pg_aggregate
group by aggtransfn
having count(*)>1)
order by aggtransfn;

This indicates that everything using float4_accum as a transfn could
benefit from that. I just wasn't sure how far to go.

* The code was a bit fuzzy on which parts of the per-aggstate are filled
in at what time. Some of the fields were overwritten every time a match was
found. With the same values, so no harm done, but I found it confusing. I
refactored ExecInitAgg in the attached patch to clear that up.

* There API of build_aggregate_fnexprs() was a bit strange now that some
callers use it to only fill in the final function, some call it to fill
both the transition functions and the final function. I split it to two
separate functions.

That's much better.

* I wonder if we should do this duplicate elimination at plan time. It's
very fast, so I'm not worried about that right now, but you had grand plans
to expand this so that an aggregate could optionally use one of many
different kinds of state values. At that point, it certainly seems like a
planning decision to decide which aggregates share state. I think we can
leave it as it is for now, but it's something to perhaps consider later.

I don't think I'm going to get the time to work on the "supporting
aggregate" stuff you're talking about, but I think it's a good enough idea
to keep around for the future, so I think this shared aggregate states
stuff probably should go into nodeAgg.c for now. I have to say though, I
was a little surprised to find this code in the executor rather than the
planner when I first started on this.

BTW, the name of the AggStatePerAggStateData struct is pretty horrible.
The repeated "AggState" feels awkward. Now that I've stared at the patch
for a some time, it doesn't bother me anymore, but it took me quite a while
to over that. I'm sure it will for others too. And it's not just that
struct, the comments talk about "aggregate state", which could be confused
to mean "AggState", but it actually means AggStatePerAggStateData. I don't
have any great suggestions, but can you come up a better naming scheme?

I agree, they're horrible. The thing that's causing the biggest problem is
the struct named AggState, which carries state for *all* aggregates, and
has nothing to do with "transition state", so it seems there's two
different meanings if the word "state" and I've used both meanings in the
name for AggStatePerAggStateData.

Perhaps just renaming AggStatePerAggStateData to AggStateTransStateData
would be good enough?

I've attached a delta patch based on your patch, in this I've:

1. Renamed AggStatePerAggStateData to AggStateTransStateData and all
variables using that are renamed to suit better.
2. Removed surplus peraggstate->sortstates[currentsortno] = NULL; (not
related to this patch, but since we're moving that part of the code, we'd
better fix)
3. Put back the missing aggfnoid from the error message.
4. Changed initialize_aggregates() to not pass the states. They're already
in AggState and we're using aggstate->numstates to get the count of the
items in that array, so it seems wrong to allow a different array to ever
be passed in.
5. Changed wording of a few comments to try and reduce confusing of 'state'
and 'transition state'.
6. Renamed AggState.peraggstate to transstates. I pluralised this to try to
reduce confusion of the single state pointers named 'transstate' in the
functions in nodeAgg.c. I did think that peragg should also become peraggs
and pergroup should become pergroups, but didn't change those.

Anything else I changed is self explanatory.

What do you think?

Regards

David Rowley

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

sharing_aggstate-heikki-1_delta1.patchapplication/octet-stream; name=sharing_aggstate-heikki-1_delta1.patchDownload
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index fcc3859..984216d 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -152,15 +152,15 @@
 
 
 /*
- * AggStatePerAggStateData - per aggregate state data for the Agg scan
+ * AggStateTransStateData - per aggregate state data for the Agg scan
  *
- * Working state for calculating the aggregate state, using the state
- * transition function. This struct does not store the information needed
- * to produce the final aggregate result from the state value; that's stored
+ * Working state for calculating the aggregate's transition state, using the
+ * state transition function. This struct does not store the information needed
+ * to produce the final aggregate result from the transition state, that's stored
  * in AggStatePerAggData instead. This separation allows multiple aggregate
  * results to be produced from a single state value.
  */
-typedef struct AggStatePerAggStateData
+typedef struct AggStateTransStateData
 {
 	/*
 	 * These values are set up during ExecInitAgg() and do not change
@@ -209,7 +209,7 @@ typedef struct AggStatePerAggStateData
 	List	   *aggdirectargs;	/* states of direct-argument expressions */
 
 	/*
-	 * fmgr lookup data for transfer function.  Note in particular that the
+	 * fmgr lookup data for transition function.  Note in particular that the
 	 * fn_strict flag is kept here.
 	 */
 	FmgrInfo	transfn;
@@ -294,7 +294,7 @@ typedef struct AggStatePerAggStateData
 	 * worth the extra space consumption.
 	 */
 	FunctionCallInfoData transfn_fcinfo;
-}	AggStatePerAggStateData;
+}	AggStateTransStateData;
 
 /*
  * AggStatePerAggData - per-aggregate working state
@@ -421,18 +421,17 @@ typedef enum AggRefCompatibility
 static void initialize_phase(AggState *aggstate, int newphase);
 static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
 static void initialize_aggregates(AggState *aggstate,
-					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset);
 static void advance_transition_function(AggState *aggstate,
-							AggStatePerAggState peraggstate,
+							AggStateTransState transstate,
 							AggStatePerGroup pergroupstate);
 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
 static void process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAggState peraggstate,
+								 AggStateTransState transstate,
 								 AggStatePerGroup pergroupstate);
 static void process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAggState peraggstate,
+								AggStateTransState transstate,
 								AggStatePerGroup pergroupstate);
 static void finalize_aggregate(AggState *aggstate,
 				   AggStatePerAgg peragg,
@@ -455,14 +454,14 @@ static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
-static void build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
+static void build_transstate_for_aggref(AggStateTransState transstate,
 							 AggState *aggsate, EState *estate,
 							 Aggref *aggref, HeapTuple aggtuple,
 							 Oid *inputTypes, int numArguments);
 static AggRefCompatibility find_compatible_aggref(Aggref *newagg,
 					   AggState *aggstate, int lastaggno, int *foundaggno);
 static AggRefCompatibility aggref_has_compatible_states(Aggref *newagg,
-					 AggStatePerAgg peragg, AggStatePerAggState peraggstate);
+				AggStatePerAgg peragg, AggStateTransState transstate);
 
 
 /*
@@ -565,20 +564,20 @@ fetch_input_tuple(AggState *aggstate)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
-initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
+initialize_aggregate(AggState *aggstate, AggStateTransState transstate,
 					 AggStatePerGroup pergroupstate)
 {
 	/*
 	 * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
 	 */
-	if (peraggstate->numSortCols > 0)
+	if (transstate->numSortCols > 0)
 	{
 		/*
 		 * In case of rescan, maybe there could be an uncompleted sort
 		 * operation?  Clean it up if so.
 		 */
-		if (peraggstate->sortstates[aggstate->current_set])
-			tuplesort_end(peraggstate->sortstates[aggstate->current_set]);
+		if (transstate->sortstates[aggstate->current_set])
+			tuplesort_end(transstate->sortstates[aggstate->current_set]);
 
 
 		/*
@@ -586,21 +585,21 @@ initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
 		 * otherwise sort the full tuple.  (See comments for
 		 * process_ordered_aggregate_single.)
 		 */
-		if (peraggstate->numInputs == 1)
-			peraggstate->sortstates[aggstate->current_set] =
-				tuplesort_begin_datum(peraggstate->evaldesc->attrs[0]->atttypid,
-									  peraggstate->sortOperators[0],
-									  peraggstate->sortCollations[0],
-									  peraggstate->sortNullsFirst[0],
+		if (transstate->numInputs == 1)
+			transstate->sortstates[aggstate->current_set] =
+				tuplesort_begin_datum(transstate->evaldesc->attrs[0]->atttypid,
+									  transstate->sortOperators[0],
+									  transstate->sortCollations[0],
+									  transstate->sortNullsFirst[0],
 									  work_mem, false);
 		else
-			peraggstate->sortstates[aggstate->current_set] =
-				tuplesort_begin_heap(peraggstate->evaldesc,
-									 peraggstate->numSortCols,
-									 peraggstate->sortColIdx,
-									 peraggstate->sortOperators,
-									 peraggstate->sortCollations,
-									 peraggstate->sortNullsFirst,
+			transstate->sortstates[aggstate->current_set] =
+				tuplesort_begin_heap(transstate->evaldesc,
+									 transstate->numSortCols,
+									 transstate->sortColIdx,
+									 transstate->sortOperators,
+									 transstate->sortCollations,
+									 transstate->sortNullsFirst,
 									 work_mem, false);
 	}
 
@@ -610,20 +609,20 @@ initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
 	 * Note that when the initial value is pass-by-ref, we must copy it (into
 	 * the aggcontext) since we will pfree the transValue later.
 	 */
-	if (peraggstate->initValueIsNull)
-		pergroupstate->transValue = peraggstate->initValue;
+	if (transstate->initValueIsNull)
+		pergroupstate->transValue = transstate->initValue;
 	else
 	{
 		MemoryContext oldContext;
 
 		oldContext = MemoryContextSwitchTo(
 		aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
-		pergroupstate->transValue = datumCopy(peraggstate->initValue,
-											  peraggstate->transtypeByVal,
-											  peraggstate->transtypeLen);
+		pergroupstate->transValue = datumCopy(transstate->initValue,
+											  transstate->transtypeByVal,
+											  transstate->transtypeLen);
 		MemoryContextSwitchTo(oldContext);
 	}
-	pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
+	pergroupstate->transValueIsNull = transstate->initValueIsNull;
 
 	/*
 	 * If the initial value for the transition state doesn't exist in the
@@ -632,11 +631,11 @@ initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
 	 * aggregates like max() and min().) The noTransValue flag signals that we
 	 * still need to do this.
 	 */
-	pergroupstate->noTransValue = peraggstate->initValueIsNull;
+	pergroupstate->noTransValue = transstate->initValueIsNull;
 }
 
 /*
- * Initialize all aggregate states for a new group of input values.
+ * Initialize all aggregate transition states for a new group of input values.
  *
  * If there are multiple grouping sets, we initialize only the first numReset
  * of them (the grouping sets are ordered so that the most specific one, which
@@ -647,20 +646,20 @@ initialize_aggregate(AggState *aggstate, AggStatePerAggState peraggstate,
  */
 static void
 initialize_aggregates(AggState *aggstate,
-					  AggStatePerAggState peraggstates,
 					  AggStatePerGroup pergroup,
 					  int numReset)
 {
-	int			stateno;
-	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
-	int			setno = 0;
+	int					stateno;
+	int					numGroupingSets = Max(aggstate->phase->numsets, 1);
+	int					setno = 0;
+	AggStateTransState	transstates = aggstate->transstates;
 
 	if (numReset < 1)
 		numReset = numGroupingSets;
 
 	for (stateno = 0; stateno < aggstate->numstates; stateno++)
 	{
-		AggStatePerAggState peraggstate = &peraggstates[stateno];
+		AggStateTransState transstate = &transstates[stateno];
 
 		for (setno = 0; setno < numReset; setno++)
 		{
@@ -670,7 +669,7 @@ initialize_aggregates(AggState *aggstate,
 
 			aggstate->current_set = setno;
 
-			initialize_aggregate(aggstate, peraggstate, pergroupstate);
+			initialize_aggregate(aggstate, transstate, pergroupstate);
 		}
 	}
 }
@@ -680,7 +679,7 @@ initialize_aggregates(AggState *aggstate,
  * state within one grouping set only (already set in aggstate->current_set)
  *
  * The new values (and null flags) have been preloaded into argument positions
- * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again
+ * 1 and up in transstate->transfn_fcinfo, so that we needn't copy them again
  * to pass to the transition function.  We also expect that the static fields
  * of the fcinfo are already initialized; that was done by ExecInitAgg().
  *
@@ -688,20 +687,20 @@ initialize_aggregates(AggState *aggstate,
  */
 static void
 advance_transition_function(AggState *aggstate,
-							AggStatePerAggState peraggstate,
+							AggStateTransState transstate,
 							AggStatePerGroup pergroupstate)
 {
-	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
+	FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
 	MemoryContext oldContext;
 	Datum		newVal;
 
-	if (peraggstate->transfn.fn_strict)
+	if (transstate->transfn.fn_strict)
 	{
 		/*
 		 * For a strict transfn, nothing happens when there's a NULL input; we
 		 * just keep the prior transValue.
 		 */
-		int			numTransInputs = peraggstate->numTransInputs;
+		int			numTransInputs = transstate->numTransInputs;
 		int			i;
 
 		for (i = 1; i <= numTransInputs; i++)
@@ -723,8 +722,8 @@ advance_transition_function(AggState *aggstate,
 			oldContext = MemoryContextSwitchTo(
 											   aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
 			pergroupstate->transValue = datumCopy(fcinfo->arg[1],
-												  peraggstate->transtypeByVal,
-												  peraggstate->transtypeLen);
+												  transstate->transtypeByVal,
+												  transstate->transtypeLen);
 			pergroupstate->transValueIsNull = false;
 			pergroupstate->noTransValue = false;
 			MemoryContextSwitchTo(oldContext);
@@ -745,8 +744,8 @@ advance_transition_function(AggState *aggstate,
 	/* We run the transition functions in per-input-tuple memory context */
 	oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
 
-	/* set up aggstate->curperaggstate for AggGetAggref() */
-	aggstate->curperaggstate = peraggstate;
+	/* set up aggstate->curtransstate for AggGetAggref() */
+	aggstate->curtransstate = transstate;
 
 	/*
 	 * OK to call the transition function
@@ -757,22 +756,22 @@ advance_transition_function(AggState *aggstate,
 
 	newVal = FunctionCallInvoke(fcinfo);
 
-	aggstate->curperaggstate = NULL;
+	aggstate->curtransstate = NULL;
 
 	/*
 	 * If pass-by-ref datatype, must copy the new value into aggcontext and
 	 * pfree the prior transValue.  But if transfn returned a pointer to its
 	 * first input, we don't need to do anything.
 	 */
-	if (!peraggstate->transtypeByVal &&
+	if (!transstate->transtypeByVal &&
 		DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
 	{
 		if (!fcinfo->isnull)
 		{
 			MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
 			newVal = datumCopy(newVal,
-							   peraggstate->transtypeByVal,
-							   peraggstate->transtypeLen);
+							   transstate->transtypeByVal,
+							   transstate->transtypeLen);
 		}
 		if (!pergroupstate->transValueIsNull)
 			pfree(DatumGetPointer(pergroupstate->transValue));
@@ -785,10 +784,10 @@ advance_transition_function(AggState *aggstate,
 }
 
 /*
- * Advance each aggregate state for one input tuple.  The input tuple
- * has been stored in tmpcontext->ecxt_outertuple, so that it is accessible
- * to ExecEvalExpr.  pergroup is the array of per-group structs to use
- * (this might be in a hashtable entry).
+ * Advance each aggregate transition state for one input tuple.  The input
+ * tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
+ * accessible to ExecEvalExpr.  pergroup is the array of per-group structs to
+ * use (this might be in a hashtable entry).
  *
  * When called, CurrentMemoryContext should be the per-query context.
  */
@@ -802,9 +801,9 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 
 	for (stateno = 0; stateno < numStates; stateno++)
 	{
-		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
-		ExprState  *filter = peraggstate->aggfilter;
-		int			numTransInputs = peraggstate->numTransInputs;
+		AggStateTransState transstate = &aggstate->transstates[stateno];
+		ExprState  *filter = transstate->aggfilter;
+		int			numTransInputs = transstate->numTransInputs;
 		int			i;
 		TupleTableSlot *slot;
 
@@ -821,12 +820,12 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 		}
 
 		/* Evaluate the current input expressions for this aggregate */
-		slot = ExecProject(peraggstate->evalproj, NULL);
+		slot = ExecProject(transstate->evalproj, NULL);
 
-		if (peraggstate->numSortCols > 0)
+		if (transstate->numSortCols > 0)
 		{
 			/* DISTINCT and/or ORDER BY case */
-			Assert(slot->tts_nvalid == peraggstate->numInputs);
+			Assert(slot->tts_nvalid == transstate->numInputs);
 
 			/*
 			 * If the transfn is strict, we want to check for nullity before
@@ -835,7 +834,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 			 * not numInputs, since nullity in columns used only for sorting
 			 * is not relevant here.
 			 */
-			if (peraggstate->transfn.fn_strict)
+			if (transstate->transfn.fn_strict)
 			{
 				for (i = 0; i < numTransInputs; i++)
 				{
@@ -849,18 +848,18 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
 				/* OK, put the tuple into the tuplesort object */
-				if (peraggstate->numInputs == 1)
-					tuplesort_putdatum(peraggstate->sortstates[setno],
+				if (transstate->numInputs == 1)
+					tuplesort_putdatum(transstate->sortstates[setno],
 									   slot->tts_values[0],
 									   slot->tts_isnull[0]);
 				else
-					tuplesort_puttupleslot(peraggstate->sortstates[setno], slot);
+					tuplesort_puttupleslot(transstate->sortstates[setno], slot);
 			}
 		}
 		else
 		{
 			/* We can apply the transition function immediately */
-			FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
+			FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
 
 			/* Load values into fcinfo */
 			/* Start from 1, since the 0th arg will be the transition value */
@@ -877,7 +876,8 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 
 				aggstate->current_set = setno;
 
-				advance_transition_function(aggstate, peraggstate, pergroupstate);
+				advance_transition_function(aggstate, transstate,
+											pergroupstate);
 			}
 		}
 	}
@@ -908,7 +908,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
  */
 static void
 process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAggState peraggstate,
+								 AggStateTransState transstate,
 								 AggStatePerGroup pergroupstate)
 {
 	Datum		oldVal = (Datum) 0;
@@ -916,14 +916,14 @@ process_ordered_aggregate_single(AggState *aggstate,
 	bool		haveOldVal = false;
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
 	MemoryContext oldContext;
-	bool		isDistinct = (peraggstate->numDistinctCols > 0);
-	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
+	bool		isDistinct = (transstate->numDistinctCols > 0);
+	FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
 	Datum	   *newVal;
 	bool	   *isNull;
 
-	Assert(peraggstate->numDistinctCols < 2);
+	Assert(transstate->numDistinctCols < 2);
 
-	tuplesort_performsort(peraggstate->sortstates[aggstate->current_set]);
+	tuplesort_performsort(transstate->sortstates[aggstate->current_set]);
 
 	/* Load the column into argument 1 (arg 0 will be transition value) */
 	newVal = fcinfo->arg + 1;
@@ -935,7 +935,7 @@ process_ordered_aggregate_single(AggState *aggstate,
 	 * pfree them when they are no longer needed.
 	 */
 
-	while (tuplesort_getdatum(peraggstate->sortstates[aggstate->current_set],
+	while (tuplesort_getdatum(transstate->sortstates[aggstate->current_set],
 							  true, newVal, isNull))
 	{
 		/*
@@ -954,18 +954,18 @@ process_ordered_aggregate_single(AggState *aggstate,
 			haveOldVal &&
 			((oldIsNull && *isNull) ||
 			 (!oldIsNull && !*isNull &&
-			  DatumGetBool(FunctionCall2(&peraggstate->equalfns[0],
+			  DatumGetBool(FunctionCall2(&transstate->equalfns[0],
 										 oldVal, *newVal)))))
 		{
 			/* equal to prior, so forget this one */
-			if (!peraggstate->inputtypeByVal && !*isNull)
+			if (!transstate->inputtypeByVal && !*isNull)
 				pfree(DatumGetPointer(*newVal));
 		}
 		else
 		{
-			advance_transition_function(aggstate, peraggstate, pergroupstate);
+			advance_transition_function(aggstate, transstate, pergroupstate);
 			/* forget the old value, if any */
-			if (!oldIsNull && !peraggstate->inputtypeByVal)
+			if (!oldIsNull && !transstate->inputtypeByVal)
 				pfree(DatumGetPointer(oldVal));
 			/* and remember the new one for subsequent equality checks */
 			oldVal = *newVal;
@@ -976,11 +976,11 @@ process_ordered_aggregate_single(AggState *aggstate,
 		MemoryContextSwitchTo(oldContext);
 	}
 
-	if (!oldIsNull && !peraggstate->inputtypeByVal)
+	if (!oldIsNull && !transstate->inputtypeByVal)
 		pfree(DatumGetPointer(oldVal));
 
-	tuplesort_end(peraggstate->sortstates[aggstate->current_set]);
-	peraggstate->sortstates[aggstate->current_set] = NULL;
+	tuplesort_end(transstate->sortstates[aggstate->current_set]);
+	transstate->sortstates[aggstate->current_set] = NULL;
 }
 
 /*
@@ -997,25 +997,25 @@ process_ordered_aggregate_single(AggState *aggstate,
  */
 static void
 process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAggState peraggstate,
+								AggStateTransState transstate,
 								AggStatePerGroup pergroupstate)
 {
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
-	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
-	TupleTableSlot *slot1 = peraggstate->evalslot;
-	TupleTableSlot *slot2 = peraggstate->uniqslot;
-	int			numTransInputs = peraggstate->numTransInputs;
-	int			numDistinctCols = peraggstate->numDistinctCols;
+	FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
+	TupleTableSlot *slot1 = transstate->evalslot;
+	TupleTableSlot *slot2 = transstate->uniqslot;
+	int			numTransInputs = transstate->numTransInputs;
+	int			numDistinctCols = transstate->numDistinctCols;
 	bool		haveOldValue = false;
 	int			i;
 
-	tuplesort_performsort(peraggstate->sortstates[aggstate->current_set]);
+	tuplesort_performsort(transstate->sortstates[aggstate->current_set]);
 
 	ExecClearTuple(slot1);
 	if (slot2)
 		ExecClearTuple(slot2);
 
-	while (tuplesort_gettupleslot(peraggstate->sortstates[aggstate->current_set],
+	while (tuplesort_gettupleslot(transstate->sortstates[aggstate->current_set],
 								  true, slot1))
 	{
 		/*
@@ -1029,8 +1029,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 			!haveOldValue ||
 			!execTuplesMatch(slot1, slot2,
 							 numDistinctCols,
-							 peraggstate->sortColIdx,
-							 peraggstate->equalfns,
+							 transstate->sortColIdx,
+							 transstate->equalfns,
 							 workcontext))
 		{
 			/* Load values into fcinfo */
@@ -1041,7 +1041,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
 				fcinfo->argnull[i + 1] = slot1->tts_isnull[i];
 			}
 
-			advance_transition_function(aggstate, peraggstate, pergroupstate);
+			advance_transition_function(aggstate, transstate, pergroupstate);
 
 			if (numDistinctCols > 0)
 			{
@@ -1064,8 +1064,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 	if (slot2)
 		ExecClearTuple(slot2);
 
-	tuplesort_end(peraggstate->sortstates[aggstate->current_set]);
-	peraggstate->sortstates[aggstate->current_set] = NULL;
+	tuplesort_end(transstate->sortstates[aggstate->current_set]);
+	transstate->sortstates[aggstate->current_set] = NULL;
 }
 
 /*
@@ -1092,7 +1092,7 @@ finalize_aggregate(AggState *aggstate,
 	MemoryContext oldContext;
 	int			i;
 	ListCell   *lc;
-	AggStatePerAggState peraggstate = &aggstate->peraggstate[peragg->stateno];
+	AggStateTransState transstate = &aggstate->transstates[peragg->stateno];
 
 	oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
 
@@ -1103,7 +1103,7 @@ finalize_aggregate(AggState *aggstate,
 	 * for the transition state value.
 	 */
 	i = 1;
-	foreach(lc, peraggstate->aggdirectargs)
+	foreach(lc, transstate->aggdirectargs)
 	{
 		ExprState  *expr = (ExprState *) lfirst(lc);
 
@@ -1122,12 +1122,12 @@ finalize_aggregate(AggState *aggstate,
 	{
 		int			numFinalArgs = peragg->numFinalArgs;
 
-		/* set up aggstate->curperaggstate for AggGetAggref() */
-		aggstate->curperaggstate = peraggstate;
+		/* set up aggstate->curtransstate for AggGetAggref() */
+		aggstate->curtransstate = transstate;
 
 		InitFunctionCallInfoData(fcinfo, &peragg->finalfn,
 								 numFinalArgs,
-								 peraggstate->aggCollation,
+								 transstate->aggCollation,
 								 (void *) aggstate, NULL);
 
 		/* Fill in the transition state value */
@@ -1154,7 +1154,7 @@ finalize_aggregate(AggState *aggstate,
 			*resultVal = FunctionCallInvoke(&fcinfo);
 			*resultIsNull = fcinfo.isnull;
 		}
-		aggstate->curperaggstate = NULL;
+		aggstate->curtransstate = NULL;
 	}
 	else
 	{
@@ -1263,22 +1263,22 @@ finalize_aggregates(AggState *aggstate,
 	{
 		AggStatePerAgg peragg = &peraggs[aggno];
 		int			stateno = peragg->stateno;
-		AggStatePerAggState peraggstate = &aggstate->peraggstate[stateno];
+		AggStateTransState transstate = &aggstate->transstates[stateno];
 		AggStatePerGroup pergroupstate;
 
 		pergroupstate = &pergroup[stateno + (currentSet * (aggstate->numstates))];
 
-		if (peraggstate->numSortCols > 0)
+		if (transstate->numSortCols > 0)
 		{
 			Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
 
-			if (peraggstate->numInputs == 1)
+			if (transstate->numInputs == 1)
 				process_ordered_aggregate_single(aggstate,
-												 peraggstate,
+												 transstate,
 												 pergroupstate);
 			else
 				process_ordered_aggregate_multi(aggstate,
-												peraggstate,
+												transstate,
 												pergroupstate);
 		}
 
@@ -1502,7 +1502,7 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 	if (isnew)
 	{
 		/* initialize aggregates for new tuple group */
-		initialize_aggregates(aggstate, aggstate->peraggstate, entry->pergroup, 0);
+		initialize_aggregates(aggstate, entry->pergroup, 0);
 	}
 
 	return entry;
@@ -1579,7 +1579,6 @@ agg_retrieve_direct(AggState *aggstate)
 	ExprContext *econtext;
 	ExprContext *tmpcontext;
 	AggStatePerAgg peragg;
-	AggStatePerAggState peraggstate;
 	AggStatePerGroup pergroup;
 	TupleTableSlot *outerslot;
 	TupleTableSlot *firstSlot;
@@ -1602,7 +1601,6 @@ agg_retrieve_direct(AggState *aggstate)
 	tmpcontext = aggstate->tmpcontext;
 
 	peragg = aggstate->peragg;
-	peraggstate = aggstate->peraggstate;
 	pergroup = aggstate->pergroup;
 	firstSlot = aggstate->ss.ss_ScanTupleSlot;
 
@@ -1792,7 +1790,7 @@ agg_retrieve_direct(AggState *aggstate)
 			/*
 			 * Initialize working state for a new input tuple group.
 			 */
-			initialize_aggregates(aggstate, peraggstate, pergroup, numReset);
+			initialize_aggregates(aggstate, pergroup, numReset);
 
 			if (aggstate->grp_firstTuple != NULL)
 			{
@@ -2022,7 +2020,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 {
 	AggState   *aggstate;
 	AggStatePerAgg peraggs;
-	AggStatePerAggState peraggstates;
+	AggStateTransState transstates;
 	Plan	   *outerPlan;
 	ExprContext *econtext;
 	int			numaggs,
@@ -2054,8 +2052,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->projected_set = -1;
 	aggstate->current_set = 0;
 	aggstate->peragg = NULL;
-	aggstate->peraggstate = NULL;
-	aggstate->curperaggstate = NULL;
+	aggstate->transstates = NULL;
+	aggstate->curtransstate = NULL;
 	aggstate->agg_done = false;
 	aggstate->input_done = false;
 	aggstate->pergroup = NULL;
@@ -2289,10 +2287,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
 
 	peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
-	peraggstates = (AggStatePerAggState) palloc0(sizeof(AggStatePerAggStateData) * numaggs);
+	transstates = (AggStateTransState) palloc0(sizeof(AggStateTransStateData)* numaggs);
 
 	aggstate->peragg = peraggs;
-	aggstate->peraggstate = peraggstates;
+	aggstate->transstates = transstates;
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
@@ -2323,7 +2321,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
 		Aggref	   *aggref = (Aggref *) aggrefstate->xprstate.expr;
 		AggStatePerAgg peragg;
-		AggStatePerAggState peraggstate;
+		AggStateTransState transstate;
 		AggRefCompatibility agg_match;
 		Oid			inputTypes[FUNC_MAX_ARGS];
 		int			numArguments;
@@ -2347,10 +2345,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		 * giving them duplicate aggno values. We also do our best to reuse
 		 * duplicate aggregate states. The query may use 2 or more aggregate
 		 * functions which share the same transition function and initial
-		 * value therefore would end up calculating the same state. In this
-		 * case we can just calculate the state once, however if the finalfns
-		 * do not match then we must create a new peragg to store the varying
-		 * finalfn.
+		 * value therefore would end up building an identical transition state.
+		 * In this case we can just calculate the state once, however if the
+		 * finalfns do not match then we must create a new peragg to store the
+		 * varying finalfn.
 		 */
 
 		/* check if we have previous agg or state matches that can be reused */
@@ -2408,8 +2406,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			ReleaseSysCache(procTuple);
 
 			/*
-			 * If we're reusing an existing state, no need to check the
-			 * transfn permission again.
+			 * We only need to check permissions on the transfn if we're not
+			 * reusing the transition state.
 			 */
 			if (agg_match == AGGREF_NO_MATCH)
 			{
@@ -2448,21 +2446,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		 */
 		if (agg_match == AGGREF_NO_MATCH)
 		{
-			peraggstate = &peraggstates[++stateno];
-			build_peraggstate_for_aggref(peraggstate, aggstate, estate,
+			transstate = &transstates[++stateno];
+			build_transstate_for_aggref(transstate, aggstate, estate,
 										 aggref,
 										 aggTuple, inputTypes, numArguments);
 			peragg->stateno = stateno;
 		}
-		else
+		else		/* AGGREF_STATE_MATCH */
 		{
 			int			existing_stateno = peraggs[existing_aggno].stateno;
 
-			peraggstate = &peraggstates[existing_stateno];
+			transstate = &transstates[existing_stateno];
 			peragg->stateno = existing_stateno;
 
-			/* when reusing the state the transfns should match! */
-			Assert(peraggstate->transfn_oid == aggform->aggtransfn);
+			/* when reusing the state, the transfns should match! */
+			Assert(transstate->transfn_oid == aggform->aggtransfn);
 		}
 
 		/* Detect how many arguments to pass to the finalfn */
@@ -2479,7 +2477,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		{
 			build_aggregate_finalfn_expr(inputTypes,
 										 peragg->numFinalArgs,
-										 peraggstate->aggtranstype,
+										 transstate->aggtranstype,
 										 aggref->aggtype,
 										 aggref->inputcollid,
 										 finalfn_oid,
@@ -2509,12 +2507,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 /*
  * Build the state needed to calculate a state value for an aggregate.
  *
- * This initializes all the fields in 'peraggstate'. 'aggTuple',
+ * This initializes all the fields in 'transstate'. 'aggTuple',
  * 'inputTypes' and 'numArguments' could be derived from 'aggref', but the
  * caller has calculated them already, so might as well pass them.
  */
 static void
-build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
+build_transstate_for_aggref(AggStateTransState transstate,
 							 AggState *aggstate, EState *estate,
 							 Aggref *aggref, HeapTuple aggTuple,
 							 Oid *inputTypes, int numArguments)
@@ -2528,25 +2526,24 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 	List	   *sortlist;
 	int			numSortCols;
 	int			numDistinctCols;
-	int			currentsortno;
 	int			naggs;
 	int			i;
 	Datum		textInitVal;
 	Oid			transfn_oid;
 
-	/* Begin filling in the peraggstate data */
-	peraggstate->aggref = aggref;
-	peraggstate->aggCollation = aggref->inputcollid;
-	peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
+	/* Begin filling in the transstate data */
+	transstate->aggref = aggref;
+	transstate->aggCollation = aggref->inputcollid;
+	transstate->transfn_oid = transfn_oid = aggform->aggtransfn;
 
 	/* Count the "direct" arguments, if any */
 	numDirectArgs = list_length(aggref->aggdirectargs);
 
 	/* Count the number of aggregated input columns */
-	peraggstate->numInputs = numInputs = list_length(aggref->args);
+	transstate->numInputs = numInputs = list_length(aggref->args);
 
 	/* resolve actual type of transition state, if polymorphic */
-	peraggstate->aggtranstype =
+	transstate->aggtranstype =
 		resolve_aggregate_transtype(aggref->aggfnoid,
 									aggform->aggtranstype,
 									inputTypes,
@@ -2554,9 +2551,9 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 
 	/* Detect how many arguments to pass to the transfn */
 	if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
-		peraggstate->numTransInputs = numInputs;
+		transstate->numTransInputs = numInputs;
 	else
-		peraggstate->numTransInputs = numArguments;
+		transstate->numTransInputs = numArguments;
 
 	/*
 	 * Set up infrastructure for calling the transfn
@@ -2565,19 +2562,19 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 								 numArguments,
 								 numDirectArgs,
 								 aggref->aggvariadic,
-								 peraggstate->aggtranstype,
+								 transstate->aggtranstype,
 								 aggref->inputcollid,
 								 transfn_oid,
 								 InvalidOid,	/* invtrans is not needed here */
 								 &transfnexpr,
 								 NULL);
-	fmgr_info(peraggstate->transfn_oid, &peraggstate->transfn);
-	fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+	fmgr_info(transfn_oid, &transstate->transfn);
+	fmgr_info_set_expr((Node *) transfnexpr, &transstate->transfn);
 
-	InitFunctionCallInfoData(peraggstate->transfn_fcinfo,
-							 &peraggstate->transfn,
-							 peraggstate->numTransInputs + 1,
-							 peraggstate->aggCollation,
+	InitFunctionCallInfoData(transstate->transfn_fcinfo,
+							 &transstate->transfn,
+							 transstate->numTransInputs + 1,
+							 transstate->aggCollation,
 							 (void *) aggstate, NULL);
 
 
@@ -2589,13 +2586,13 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 	 */
 	textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
 								  Anum_pg_aggregate_agginitval,
-								  &peraggstate->initValueIsNull);
+								  &transstate->initValueIsNull);
 
-	if (peraggstate->initValueIsNull)
-		peraggstate->initValue = (Datum) 0;
+	if (transstate->initValueIsNull)
+		transstate->initValue = (Datum) 0;
 	else
-		peraggstate->initValue = GetAggInitVal(textInitVal,
-											   peraggstate->aggtranstype);
+		transstate->initValue = GetAggInitVal(textInitVal,
+											  transstate->aggtranstype);
 
 	/*
 	 * If the transfn is strict and the initval is NULL, make sure input type
@@ -2605,39 +2602,40 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 	 * we must check again in case the transfn's strictness property has been
 	 * changed.
 	 */
-	if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
+	if (transstate->transfn.fn_strict && transstate->initValueIsNull)
 	{
 		if (numArguments <= numDirectArgs ||
 			!IsBinaryCoercible(inputTypes[numDirectArgs],
-							   peraggstate->aggtranstype))
+							   transstate->aggtranstype))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
-					 errmsg("aggregate needs to have compatible input type and transition type")));
+					 errmsg("aggregate %u needs to have compatible input type and transition type",
+							aggref->aggfnoid)));
 	}
 
 	/* get info about the state value's datatype */
-	get_typlenbyval(peraggstate->aggtranstype,
-					&peraggstate->transtypeLen,
-					&peraggstate->transtypeByVal);
+	get_typlenbyval(transstate->aggtranstype,
+					&transstate->transtypeLen,
+					&transstate->transtypeByVal);
 
 	/*
 	 * Get a tupledesc corresponding to the aggregated inputs (including sort
 	 * expressions) of the agg.
 	 */
-	peraggstate->evaldesc = ExecTypeFromTL(aggref->args, false);
+	transstate->evaldesc = ExecTypeFromTL(aggref->args, false);
 
 	/* Create slot we're going to do argument evaluation in */
-	peraggstate->evalslot = ExecInitExtraTupleSlot(estate);
-	ExecSetSlotDescriptor(peraggstate->evalslot, peraggstate->evaldesc);
+	transstate->evalslot = ExecInitExtraTupleSlot(estate);
+	ExecSetSlotDescriptor(transstate->evalslot, transstate->evaldesc);
 
 	/* Initialize the input and FILTER expressions */
 	naggs = aggstate->numaggs;
-	peraggstate->aggfilter = ExecInitExpr(aggref->aggfilter,
-										  (PlanState *) aggstate);
-	peraggstate->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs,
-													 (PlanState *) aggstate);
-	peraggstate->args = (List *) ExecInitExpr((Expr *) aggref->args,
-											  (PlanState *) aggstate);
+	transstate->aggfilter = ExecInitExpr(aggref->aggfilter,
+										 (PlanState *) aggstate);
+	transstate->aggdirectargs = (List *) ExecInitExpr((Expr *)aggref->aggdirectargs,
+													  (PlanState *) aggstate);
+	transstate->args = (List *)ExecInitExpr((Expr *)aggref->args,
+											(PlanState *) aggstate);
 
 	/*
 	 * Complain if the aggregate's arguments contain any  aggregates; nested
@@ -2650,10 +2648,10 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 				 errmsg("aggregate function calls cannot be nested")));
 
 	/* Set up projection info for evaluation */
-	peraggstate->evalproj = ExecBuildProjectionInfo(peraggstate->args,
-													aggstate->tmpcontext,
-													peraggstate->evalslot,
-													NULL);
+	transstate->evalproj = ExecBuildProjectionInfo(transstate->args,
+												   aggstate->tmpcontext,
+												   transstate->evalslot,
+												   NULL);
 
 	/*
 	 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
@@ -2682,8 +2680,8 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 		numDistinctCols = 0;
 	}
 
-	peraggstate->numSortCols = numSortCols;
-	peraggstate->numDistinctCols = numDistinctCols;
+	transstate->numSortCols = numSortCols;
+	transstate->numDistinctCols = numDistinctCols;
 
 	if (numSortCols > 0)
 	{
@@ -2697,25 +2695,25 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 		if (numInputs == 1)
 		{
 			get_typlenbyval(inputTypes[numDirectArgs],
-							&peraggstate->inputtypeLen,
-							&peraggstate->inputtypeByVal);
+							&transstate->inputtypeLen,
+							&transstate->inputtypeByVal);
 		}
 		else if (numDistinctCols > 0)
 		{
 			/* we will need an extra slot to store prior values */
-			peraggstate->uniqslot = ExecInitExtraTupleSlot(estate);
-			ExecSetSlotDescriptor(peraggstate->uniqslot,
-								  peraggstate->evaldesc);
+			transstate->uniqslot = ExecInitExtraTupleSlot(estate);
+			ExecSetSlotDescriptor(transstate->uniqslot,
+								  transstate->evaldesc);
 		}
 
 		/* Extract the sort information for use later */
-		peraggstate->sortColIdx =
+		transstate->sortColIdx =
 			(AttrNumber *) palloc(numSortCols * sizeof(AttrNumber));
-		peraggstate->sortOperators =
+		transstate->sortOperators =
 			(Oid *) palloc(numSortCols * sizeof(Oid));
-		peraggstate->sortCollations =
+		transstate->sortCollations =
 			(Oid *) palloc(numSortCols * sizeof(Oid));
-		peraggstate->sortNullsFirst =
+		transstate->sortNullsFirst =
 			(bool *) palloc(numSortCols * sizeof(bool));
 
 		i = 0;
@@ -2727,10 +2725,10 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 			/* the parser should have made sure of this */
 			Assert(OidIsValid(sortcl->sortop));
 
-			peraggstate->sortColIdx[i] = tle->resno;
-			peraggstate->sortOperators[i] = sortcl->sortop;
-			peraggstate->sortCollations[i] = exprCollation((Node *) tle->expr);
-			peraggstate->sortNullsFirst[i] = sortcl->nulls_first;
+			transstate->sortColIdx[i] = tle->resno;
+			transstate->sortOperators[i] = sortcl->sortop;
+			transstate->sortCollations[i] = exprCollation((Node *)tle->expr);
+			transstate->sortNullsFirst[i] = sortcl->nulls_first;
 			i++;
 		}
 		Assert(i == numSortCols);
@@ -2744,7 +2742,7 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 		 * We need the equal function for each DISTINCT comparison we will
 		 * make.
 		 */
-		peraggstate->equalfns =
+		transstate->equalfns =
 			(FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo));
 
 		i = 0;
@@ -2752,16 +2750,14 @@ build_peraggstate_for_aggref(AggStatePerAggState peraggstate,
 		{
 			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
 
-			fmgr_info(get_opcode(sortcl->eqop), &peraggstate->equalfns[i]);
+			fmgr_info(get_opcode(sortcl->eqop), &transstate->equalfns[i]);
 			i++;
 		}
 		Assert(i == numDistinctCols);
 	}
 
-	peraggstate->sortstates = (Tuplesortstate **)
+	transstate->sortstates = (Tuplesortstate **)
 		palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
-	for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
-		peraggstate->sortstates[currentsortno] = NULL;
 }
 
 
@@ -2838,7 +2834,7 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 {
 	int			aggno;
 	int			statematchaggno;
-	AggStatePerAggState peraggstates;
+	AggStateTransState transstates;
 	AggStatePerAgg peraggs;
 
 	/* we mustn't reuse the aggref if it contains volatile function calls */
@@ -2846,7 +2842,7 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 		return AGGREF_NO_MATCH;
 
 	statematchaggno = -1;
-	peraggstates = aggstate->peraggstate;
+	transstates = aggstate->transstates;
 	peraggs = aggstate->peragg;
 
 	/*
@@ -2859,13 +2855,13 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 	{
 		AggRefCompatibility matchtype;
 		AggStatePerAgg peragg;
-		AggStatePerAggState peraggstate;
+		AggStateTransState transstate;
 
 		peragg = &peraggs[aggno];
-		peraggstate = &peraggstates[peragg->stateno];
+		transstate = &transstates[peragg->stateno];
 
 		/* lookup the match type of this agg */
-		matchtype = aggref_has_compatible_states(newagg, peragg, peraggstate);
+		matchtype = aggref_has_compatible_states(newagg, peragg, transstate);
 
 		/* if it's an exact match then we're done. */
 		if (matchtype == AGGREF_EXACT_MATCH)
@@ -2897,9 +2893,9 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 static AggRefCompatibility
 aggref_has_compatible_states(Aggref *newagg,
 							 AggStatePerAgg peragg,
-							 AggStatePerAggState peraggstate)
+							 AggStateTransState transstate)
 {
-	Aggref	   *existingRef = peraggstate->aggref;
+	Aggref	   *existingRef = transstate->aggref;
 
 	/* all of the following must be the same or it's no match */
 	if (newagg->inputcollid != existingRef->inputcollid ||
@@ -2938,7 +2934,7 @@ aggref_has_compatible_states(Aggref *newagg,
 		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
 
 		/* if the transfns are not the same then the state can't be shared */
-		if (aggform->aggtransfn != peraggstate->transfn_oid)
+		if (aggform->aggtransfn != transstate->transfn_oid)
 		{
 			ReleaseSysCache(aggTuple);
 			return AGGREF_NO_MATCH;
@@ -2953,7 +2949,7 @@ aggref_has_compatible_states(Aggref *newagg,
 		 * If both INITCONDs are null then the outcome depends on if the
 		 * finalfns match.
 		 */
-		if (initValueIsNull && peraggstate->initValueIsNull)
+		if (initValueIsNull && transstate->initValueIsNull)
 		{
 			if (aggform->aggfinalfn != peragg->finalfn_oid)
 				return AGGREF_STATE_MATCH;
@@ -2986,12 +2982,12 @@ ExecEndAgg(AggState *node)
 
 	for (stateno = 0; stateno < node->numstates; stateno++)
 	{
-		AggStatePerAggState peraggstate = &node->peraggstate[stateno];
+		AggStateTransState transstate = &node->transstates[stateno];
 
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			if (peraggstate->sortstates[setno])
-				tuplesort_end(peraggstate->sortstates[setno]);
+			if (transstate->sortstates[setno])
+				tuplesort_end(transstate->sortstates[setno]);
 		}
 	}
 
@@ -3055,12 +3051,12 @@ ExecReScanAgg(AggState *node)
 	{
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			AggStatePerAggState peraggstate = &node->peraggstate[stateno];
+			AggStateTransState transstate = &node->transstates[stateno];
 
-			if (peraggstate->sortstates[setno])
+			if (transstate->sortstates[setno])
 			{
-				tuplesort_end(peraggstate->sortstates[setno]);
-				peraggstate->sortstates[setno] = NULL;
+				tuplesort_end(transstate->sortstates[setno]);
+				transstate->sortstates[setno] = NULL;
 			}
 		}
 	}
@@ -3184,12 +3180,12 @@ AggGetAggref(FunctionCallInfo fcinfo)
 {
 	if (fcinfo->context && IsA(fcinfo->context, AggState))
 	{
-		AggStatePerAggState curperaggstate;
+		AggStateTransState curtransstate;
 
-		curperaggstate = ((AggState *) fcinfo->context)->curperaggstate;
+		curtransstate = ((AggState *)fcinfo->context)->curtransstate;
 
-		if (curperaggstate)
-			return curperaggstate->aggref;
+		if (curtransstate)
+			return curtransstate->aggref;
 	}
 	return NULL;
 }
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 65c0f74..3579f3b 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1822,7 +1822,7 @@ typedef struct GroupState
  */
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
-typedef struct AggStatePerAggStateData *AggStatePerAggState;
+typedef struct AggStateTransStateData *AggStateTransState;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
 typedef struct AggStatePerPhaseData *AggStatePerPhase;
 
@@ -1837,10 +1837,10 @@ typedef struct AggState
 	int			current_phase;	/* current phase number */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	AggStatePerAgg peragg;		/* per-Aggref information */
-	AggStatePerAggState peraggstate; /* per-Agg State information */
+	AggStateTransState transstates; /* per-Agg State information */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
-	AggStatePerAggState curperaggstate;	/* identifies currently active aggregate */
+	AggStateTransState curtransstate;	/* identifies currently active aggregate */
 	bool		input_done;		/* indicates end of input */
 	bool		agg_done;		/* indicates completion of Agg scan */
 	int			projected_set;	/* The last projected grouping set */
#5Haribabu Kommi
kommi.haribabu@gmail.com
In reply to: David Rowley (#2)
Re: Sharing aggregate states between different aggregate functions

On Thu, Jul 9, 2015 at 7:44 PM, David Rowley
<david.rowley@2ndquadrant.com> wrote:

On 15 June 2015 at 12:05, David Rowley <david.rowley@2ndquadrant.com> wrote:

This basically allows an aggregate's state to be shared between other
aggregate functions when both aggregate's transition functions (and a few
other things) match
There's quite a number of aggregates in our standard set which will
benefit from this optimisation.

After compiling the original patch with another compiler, I noticed a couple
of warnings.

The attached fixes these.

I did some performance tests on the patch. This patch shown good
improvement for same column aggregates. With int or bigint datatype columns,
this patch doesn't show any visible performance difference. But with numeric
datatype it shows good improvement.

select sum(x), avg(y) from test where x < $1;

Different columns:

selectivity Head patch
(millions)
0.1 315 322
0.3 367 376
0.5 419 427
1 551 558
2 824 826

select sum(x), avg(x) from test where x < $1;

Same column:

selectivity Head patch
(millions)
0.1 314 314
0.3 363 343
0.5 412 373
1 536 440
2 795 586

Regards,
Hari Babu
Fujitsu Australia

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6David Rowley
david.rowley@2ndquadrant.com
In reply to: Haribabu Kommi (#5)
Re: Sharing aggregate states between different aggregate functions

On 27 July 2015 at 18:15, Haribabu Kommi <kommi.haribabu@gmail.com> wrote:

On Thu, Jul 9, 2015 at 7:44 PM, David Rowley
<david.rowley@2ndquadrant.com> wrote:

On 15 June 2015 at 12:05, David Rowley <david.rowley@2ndquadrant.com>

wrote:

This basically allows an aggregate's state to be shared between other
aggregate functions when both aggregate's transition functions (and a

few

other things) match
There's quite a number of aggregates in our standard set which will
benefit from this optimisation.

After compiling the original patch with another compiler, I noticed a

couple

of warnings.

The attached fixes these.

I did some performance tests on the patch. This patch shown good
improvement for same column aggregates. With int or bigint datatype
columns,
this patch doesn't show any visible performance difference. But with
numeric
datatype it shows good improvement.

Thanks for testing this.

You should only see an improvement on aggregates listed here:

select aggfnoid::oid, aggfnoid || '(' || typname ||
')',aggtransfn,agginitval
from pg_aggregate ag
inner join pg_proc pr on aggfnoid = pr.oid
inner join pg_type tp on pr.proargtypes[0] = tp.oid
where ag.aggtransfn in (select aggtransfn
from pg_aggregate
group by aggtransfn
having count(*)>1)
and ag.agginitval is null
order by ag.aggtransfn;

Regards

David Rowley

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services

#7Heikki Linnakangas
hlinnaka@iki.fi
In reply to: David Rowley (#4)
Re: Sharing aggregate states between different aggregate functions

On 07/27/2015 08:34 AM, David Rowley wrote:

- * agg_input_types, agg_state_type, agg_result_type identify the input,
- * transition, and result types of the aggregate.  These should all be
- * resolved to actual types (ie, none should ever be ANYELEMENT etc).
+ * agg_input_types identifies the input types of the aggregate.  These
should
+ * be resolved to actual types (ie, none should ever be ANYELEMENT etc).

I'm not sure I understand why agg_state_type and agg_result_type have
changed here.

The function no longer has the agg_result_type argument, but the removal
of agg_state_type from the comment was a mistake.

+ peraggstate->sortstates = (Tuplesortstate **)
+ palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
+ for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
+ peraggstate->sortstates[currentsortno] = NULL;

This was not you, but this NULL setting looks unneeded due to the palloc0().

Yeah, I noticed that too. Ok, let's take it out.

In this function I also wasn't quite sure if it was with comparing both
non-NULL INITCOND's here. I believe my code comments may slightly
contradict what the code actually does, as the comments talk about them
having to match, but the code just bails if any are non-NULL. The reason I
didn't check them was because it seems inevitable that some duplicate work
needs to be done when setting up the INITCOND. Perhaps it's worth it?

It would be nice to handle non-NULL initconds. I think you'll have to
check that the input function isn't volatile. Or perhaps just call the
input function, and check that the resulting Datum is byte-per-byte
identical, although that might be awkward to do with the current code
structure.

BTW, the name of the AggStatePerAggStateData struct is pretty horrible.
The repeated "AggState" feels awkward. Now that I've stared at the patch
for a some time, it doesn't bother me anymore, but it took me quite a while
to over that. I'm sure it will for others too. And it's not just that
struct, the comments talk about "aggregate state", which could be confused
to mean "AggState", but it actually means AggStatePerAggStateData. I don't
have any great suggestions, but can you come up a better naming scheme?

I agree, they're horrible. The thing that's causing the biggest problem is
the struct named AggState, which carries state for *all* aggregates, and
has nothing to do with "transition state", so it seems there's two
different meanings if the word "state" and I've used both meanings in the
name for AggStatePerAggStateData.

Perhaps just renaming AggStatePerAggStateData to AggStateTransStateData
would be good enough?

Hmm. I think it should be "AggStatePerTransData" then, to keep the same
pattern as AggStatePerAggData and AggStatePerGroupData.

I've attached a delta patch based on your patch, in this I've:

1. Renamed AggStatePerAggStateData to AggStateTransStateData and all
variables using that are renamed to suit better.
2. Removed surplus peraggstate->sortstates[currentsortno] = NULL; (not
related to this patch, but since we're moving that part of the code, we'd
better fix)
3. Put back the missing aggfnoid from the error message.
4. Changed initialize_aggregates() to not pass the states. They're already
in AggState and we're using aggstate->numstates to get the count of the
items in that array, so it seems wrong to allow a different array to ever
be passed in.
5. Changed wording of a few comments to try and reduce confusing of 'state'
and 'transition state'.
6. Renamed AggState.peraggstate to transstates. I pluralised this to try to
reduce confusion of the single state pointers named 'transstate' in the
functions in nodeAgg.c. I did think that peragg should also become peraggs
and pergroup should become pergroups, but didn't change those.

Anything else I changed is self explanatory.

What do you think?

Looks good, thanks!

- Heikki

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#8David Rowley
david.rowley@2ndquadrant.com
In reply to: Heikki Linnakangas (#7)
Re: Sharing aggregate states between different aggregate functions

On 27 July 2015 at 20:11, Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 07/27/2015 08:34 AM, David Rowley wrote:

In this function I also wasn't quite sure if it was with comparing both

non-NULL INITCOND's here. I believe my code comments may slightly
contradict what the code actually does, as the comments talk about them
having to match, but the code just bails if any are non-NULL. The reason I
didn't check them was because it seems inevitable that some duplicate work
needs to be done when setting up the INITCOND. Perhaps it's worth it?

It would be nice to handle non-NULL initconds. I think you'll have to
check that the input function isn't volatile. Or perhaps just call the
input function, and check that the resulting Datum is byte-per-byte
identical, although that might be awkward to do with the current code
structure.

Yeah, it's awkward, as I just managed to remind myself:

The aggtranstype needs to be known before we can call GetAggInitVal() on
the initval datum.

That currently happens in build_transstate_for_aggref() only when we've
decided to create a new state.

transstate->initValue = GetAggInitVal(textInitVal,
transstate->aggtranstype);

And to get the aggtranstype:

transstate->aggtranstype =
resolve_aggregate_transtype(aggref->aggfnoid,
aggform->aggtranstype,
inputTypes,
numArguments);

Of course, not impossible, but lots of code gets duplicated.

Regards

David Rowley

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services

#9David Rowley
david.rowley@2ndquadrant.com
In reply to: Heikki Linnakangas (#7)
1 attachment(s)
Re: Sharing aggregate states between different aggregate functions

On 27 July 2015 at 20:11, Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 07/27/2015 08:34 AM, David Rowley wrote:

- * agg_input_types, agg_state_type, agg_result_type identify the input,
- * transition, and result types of the aggregate.  These should all be
- * resolved to actual types (ie, none should ever be ANYELEMENT etc).
+ * agg_input_types identifies the input types of the aggregate.  These
should
+ * be resolved to actual types (ie, none should ever be ANYELEMENT etc).

I'm not sure I understand why agg_state_type and agg_result_type have
changed here.

The function no longer has the agg_result_type argument, but the removal
of agg_state_type from the comment was a mistake.

I've put agg_state_type back in the attached delta which is again based on
your version of the patch.

+ peraggstate->sortstates = (Tuplesortstate **)

+ palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
+ for (currentsortno = 0; currentsortno < numGroupingSets;
currentsortno++)
+ peraggstate->sortstates[currentsortno] = NULL;

This was not you, but this NULL setting looks unneeded due to the
palloc0().

Yeah, I noticed that too. Ok, let's take it out.

Removed in attached.

In this function I also wasn't quite sure if it was with comparing both

non-NULL INITCOND's here. I believe my code comments may slightly
contradict what the code actually does, as the comments talk about them
having to match, but the code just bails if any are non-NULL. The reason I
didn't check them was because it seems inevitable that some duplicate work
needs to be done when setting up the INITCOND. Perhaps it's worth it?

It would be nice to handle non-NULL initconds. I think you'll have to
check that the input function isn't volatile. Or perhaps just call the
input function, and check that the resulting Datum is byte-per-byte
identical, although that might be awkward to do with the current code
structure.

I've not done anything with this.
I'd not thought of an input function being volatile before, but I guess
it's possible, which makes me a bit scared that we could be treading on
ground we shouldn't be. I know it's more of an output function thing than
an input function thing, but a GUC like extra_float_digits could cause
problems here.

In summary, I'm much less confident it's safe to enable the optimisation in
this case.

BTW, the name of the AggStatePerAggStateData struct is pretty horrible.

The repeated "AggState" feels awkward. Now that I've stared at the patch
for a some time, it doesn't bother me anymore, but it took me quite a
while
to over that. I'm sure it will for others too. And it's not just that
struct, the comments talk about "aggregate state", which could be
confused
to mean "AggState", but it actually means AggStatePerAggStateData. I
don't
have any great suggestions, but can you come up a better naming scheme?

I agree, they're horrible. The thing that's causing the biggest problem is
the struct named AggState, which carries state for *all* aggregates, and
has nothing to do with "transition state", so it seems there's two
different meanings if the word "state" and I've used both meanings in the
name for AggStatePerAggStateData.

Perhaps just renaming AggStatePerAggStateData to AggStateTransStateData
would be good enough?

Hmm. I think it should be "AggStatePerTransData" then, to keep the same
pattern as AggStatePerAggData and AggStatePerGroupData.

Sounds good. I've renamed it to that in the attached delta patch.

Regards

David Rowley

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

sharing_aggstate-heikki-1_delta2.patchapplication/octet-stream; name=sharing_aggstate-heikki-1_delta2.patchDownload
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 984216d..d4dcb9b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -152,15 +152,15 @@
 
 
 /*
- * AggStateTransStateData - per aggregate state data for the Agg scan
+ * AggStatePerTransData - per aggregate transition state data for the Agg scan
  *
  * Working state for calculating the aggregate's transition state, using the
  * state transition function. This struct does not store the information needed
  * to produce the final aggregate result from the transition state, that's stored
  * in AggStatePerAggData instead. This separation allows multiple aggregate
- * results to be produced from a single state value.
+ * results to be produced from a single transition state.
  */
-typedef struct AggStateTransStateData
+typedef struct AggStatePerTransData
 {
 	/*
 	 * These values are set up during ExecInitAgg() and do not change
@@ -294,7 +294,7 @@ typedef struct AggStateTransStateData
 	 * worth the extra space consumption.
 	 */
 	FunctionCallInfoData transfn_fcinfo;
-}	AggStateTransStateData;
+}	AggStatePerTransData;
 
 /*
  * AggStatePerAggData - per-aggregate working state
@@ -309,8 +309,8 @@ typedef struct AggStatePerAggData
 	 * thereafter:
 	 */
 
-	/* index to the corresponding per-aggstate which this agg should use */
-	int			stateno;
+	/* index to the corresponding per-trans state which this agg should use */
+	int			transno;
 
 	/* Optional Oid of final function (may be InvalidOid) */
 	Oid			finalfn_oid;
@@ -424,14 +424,14 @@ static void initialize_aggregates(AggState *aggstate,
 					  AggStatePerGroup pergroup,
 					  int numReset);
 static void advance_transition_function(AggState *aggstate,
-							AggStateTransState transstate,
+							AggStatePerTrans pertrans,
 							AggStatePerGroup pergroupstate);
 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
 static void process_ordered_aggregate_single(AggState *aggstate,
-								 AggStateTransState transstate,
+								 AggStatePerTrans pertrans,
 								 AggStatePerGroup pergroupstate);
 static void process_ordered_aggregate_multi(AggState *aggstate,
-								AggStateTransState transstate,
+								AggStatePerTrans pertrans,
 								AggStatePerGroup pergroupstate);
 static void finalize_aggregate(AggState *aggstate,
 				   AggStatePerAgg peragg,
@@ -454,14 +454,14 @@ static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
-static void build_transstate_for_aggref(AggStateTransState transstate,
+static void build_pertrans_for_aggref(AggStatePerTrans pertrans,
 							 AggState *aggsate, EState *estate,
 							 Aggref *aggref, HeapTuple aggtuple,
 							 Oid *inputTypes, int numArguments);
 static AggRefCompatibility find_compatible_aggref(Aggref *newagg,
 					   AggState *aggstate, int lastaggno, int *foundaggno);
 static AggRefCompatibility aggref_has_compatible_states(Aggref *newagg,
-				AggStatePerAgg peragg, AggStateTransState transstate);
+						AggStatePerAgg peragg, AggStatePerTrans pertrans);
 
 
 /*
@@ -564,20 +564,20 @@ fetch_input_tuple(AggState *aggstate)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
-initialize_aggregate(AggState *aggstate, AggStateTransState transstate,
+initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
 					 AggStatePerGroup pergroupstate)
 {
 	/*
 	 * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
 	 */
-	if (transstate->numSortCols > 0)
+	if (pertrans->numSortCols > 0)
 	{
 		/*
 		 * In case of rescan, maybe there could be an uncompleted sort
 		 * operation?  Clean it up if so.
 		 */
-		if (transstate->sortstates[aggstate->current_set])
-			tuplesort_end(transstate->sortstates[aggstate->current_set]);
+		if (pertrans->sortstates[aggstate->current_set])
+			tuplesort_end(pertrans->sortstates[aggstate->current_set]);
 
 
 		/*
@@ -585,21 +585,21 @@ initialize_aggregate(AggState *aggstate, AggStateTransState transstate,
 		 * otherwise sort the full tuple.  (See comments for
 		 * process_ordered_aggregate_single.)
 		 */
-		if (transstate->numInputs == 1)
-			transstate->sortstates[aggstate->current_set] =
-				tuplesort_begin_datum(transstate->evaldesc->attrs[0]->atttypid,
-									  transstate->sortOperators[0],
-									  transstate->sortCollations[0],
-									  transstate->sortNullsFirst[0],
+		if (pertrans->numInputs == 1)
+			pertrans->sortstates[aggstate->current_set] =
+				tuplesort_begin_datum(pertrans->evaldesc->attrs[0]->atttypid,
+									  pertrans->sortOperators[0],
+									  pertrans->sortCollations[0],
+									  pertrans->sortNullsFirst[0],
 									  work_mem, false);
 		else
-			transstate->sortstates[aggstate->current_set] =
-				tuplesort_begin_heap(transstate->evaldesc,
-									 transstate->numSortCols,
-									 transstate->sortColIdx,
-									 transstate->sortOperators,
-									 transstate->sortCollations,
-									 transstate->sortNullsFirst,
+			pertrans->sortstates[aggstate->current_set] =
+				tuplesort_begin_heap(pertrans->evaldesc,
+									 pertrans->numSortCols,
+									 pertrans->sortColIdx,
+									 pertrans->sortOperators,
+									 pertrans->sortCollations,
+									 pertrans->sortNullsFirst,
 									 work_mem, false);
 	}
 
@@ -609,20 +609,20 @@ initialize_aggregate(AggState *aggstate, AggStateTransState transstate,
 	 * Note that when the initial value is pass-by-ref, we must copy it (into
 	 * the aggcontext) since we will pfree the transValue later.
 	 */
-	if (transstate->initValueIsNull)
-		pergroupstate->transValue = transstate->initValue;
+	if (pertrans->initValueIsNull)
+		pergroupstate->transValue = pertrans->initValue;
 	else
 	{
 		MemoryContext oldContext;
 
 		oldContext = MemoryContextSwitchTo(
 		aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
-		pergroupstate->transValue = datumCopy(transstate->initValue,
-											  transstate->transtypeByVal,
-											  transstate->transtypeLen);
+		pergroupstate->transValue = datumCopy(pertrans->initValue,
+											  pertrans->transtypeByVal,
+											  pertrans->transtypeLen);
 		MemoryContextSwitchTo(oldContext);
 	}
-	pergroupstate->transValueIsNull = transstate->initValueIsNull;
+	pergroupstate->transValueIsNull = pertrans->initValueIsNull;
 
 	/*
 	 * If the initial value for the transition state doesn't exist in the
@@ -631,7 +631,7 @@ initialize_aggregate(AggState *aggstate, AggStateTransState transstate,
 	 * aggregates like max() and min().) The noTransValue flag signals that we
 	 * still need to do this.
 	 */
-	pergroupstate->noTransValue = transstate->initValueIsNull;
+	pergroupstate->noTransValue = pertrans->initValueIsNull;
 }
 
 /*
@@ -649,27 +649,27 @@ initialize_aggregates(AggState *aggstate,
 					  AggStatePerGroup pergroup,
 					  int numReset)
 {
-	int					stateno;
+	int					transno;
 	int					numGroupingSets = Max(aggstate->phase->numsets, 1);
 	int					setno = 0;
-	AggStateTransState	transstates = aggstate->transstates;
+	AggStatePerTrans	transstates = aggstate->pertrans;
 
 	if (numReset < 1)
 		numReset = numGroupingSets;
 
-	for (stateno = 0; stateno < aggstate->numstates; stateno++)
+	for (transno = 0; transno < aggstate->numtrans; transno++)
 	{
-		AggStateTransState transstate = &transstates[stateno];
+		AggStatePerTrans pertrans = &transstates[transno];
 
 		for (setno = 0; setno < numReset; setno++)
 		{
 			AggStatePerGroup pergroupstate;
 
-			pergroupstate = &pergroup[stateno + (setno * (aggstate->numstates))];
+			pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
 
 			aggstate->current_set = setno;
 
-			initialize_aggregate(aggstate, transstate, pergroupstate);
+			initialize_aggregate(aggstate, pertrans, pergroupstate);
 		}
 	}
 }
@@ -679,28 +679,28 @@ initialize_aggregates(AggState *aggstate,
  * state within one grouping set only (already set in aggstate->current_set)
  *
  * The new values (and null flags) have been preloaded into argument positions
- * 1 and up in transstate->transfn_fcinfo, so that we needn't copy them again
- * to pass to the transition function.  We also expect that the static fields
- * of the fcinfo are already initialized; that was done by ExecInitAgg().
+ * 1 and up in pertrans->transfn_fcinfo, so that we needn't copy them again to
+ * pass to the transition function.  We also expect that the static fields of
+ * the fcinfo are already initialized; that was done by ExecInitAgg().
  *
  * It doesn't matter which memory context this is called in.
  */
 static void
 advance_transition_function(AggState *aggstate,
-							AggStateTransState transstate,
+AggStatePerTrans pertrans,
 							AggStatePerGroup pergroupstate)
 {
-	FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
+	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 	MemoryContext oldContext;
 	Datum		newVal;
 
-	if (transstate->transfn.fn_strict)
+	if (pertrans->transfn.fn_strict)
 	{
 		/*
 		 * For a strict transfn, nothing happens when there's a NULL input; we
 		 * just keep the prior transValue.
 		 */
-		int			numTransInputs = transstate->numTransInputs;
+		int			numTransInputs = pertrans->numTransInputs;
 		int			i;
 
 		for (i = 1; i <= numTransInputs; i++)
@@ -722,8 +722,8 @@ advance_transition_function(AggState *aggstate,
 			oldContext = MemoryContextSwitchTo(
 											   aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
 			pergroupstate->transValue = datumCopy(fcinfo->arg[1],
-												  transstate->transtypeByVal,
-												  transstate->transtypeLen);
+												  pertrans->transtypeByVal,
+												  pertrans->transtypeLen);
 			pergroupstate->transValueIsNull = false;
 			pergroupstate->noTransValue = false;
 			MemoryContextSwitchTo(oldContext);
@@ -744,8 +744,8 @@ advance_transition_function(AggState *aggstate,
 	/* We run the transition functions in per-input-tuple memory context */
 	oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
 
-	/* set up aggstate->curtransstate for AggGetAggref() */
-	aggstate->curtransstate = transstate;
+	/* set up aggstate->curpertrans for AggGetAggref() */
+	aggstate->curpertrans = pertrans;
 
 	/*
 	 * OK to call the transition function
@@ -756,22 +756,22 @@ advance_transition_function(AggState *aggstate,
 
 	newVal = FunctionCallInvoke(fcinfo);
 
-	aggstate->curtransstate = NULL;
+	aggstate->curpertrans = NULL;
 
 	/*
 	 * If pass-by-ref datatype, must copy the new value into aggcontext and
 	 * pfree the prior transValue.  But if transfn returned a pointer to its
 	 * first input, we don't need to do anything.
 	 */
-	if (!transstate->transtypeByVal &&
+	if (!pertrans->transtypeByVal &&
 		DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
 	{
 		if (!fcinfo->isnull)
 		{
 			MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
 			newVal = datumCopy(newVal,
-							   transstate->transtypeByVal,
-							   transstate->transtypeLen);
+							   pertrans->transtypeByVal,
+							   pertrans->transtypeLen);
 		}
 		if (!pergroupstate->transValueIsNull)
 			pfree(DatumGetPointer(pergroupstate->transValue));
@@ -794,16 +794,16 @@ advance_transition_function(AggState *aggstate,
 static void
 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 {
-	int			stateno;
+	int			transno;
 	int			setno = 0;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
-	int			numStates = aggstate->numstates;
+	int			numTrans = aggstate->numtrans;
 
-	for (stateno = 0; stateno < numStates; stateno++)
+	for (transno = 0; transno < numTrans; transno++)
 	{
-		AggStateTransState transstate = &aggstate->transstates[stateno];
-		ExprState  *filter = transstate->aggfilter;
-		int			numTransInputs = transstate->numTransInputs;
+		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
+		ExprState  *filter = pertrans->aggfilter;
+		int			numTransInputs = pertrans->numTransInputs;
 		int			i;
 		TupleTableSlot *slot;
 
@@ -820,12 +820,12 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 		}
 
 		/* Evaluate the current input expressions for this aggregate */
-		slot = ExecProject(transstate->evalproj, NULL);
+		slot = ExecProject(pertrans->evalproj, NULL);
 
-		if (transstate->numSortCols > 0)
+		if (pertrans->numSortCols > 0)
 		{
 			/* DISTINCT and/or ORDER BY case */
-			Assert(slot->tts_nvalid == transstate->numInputs);
+			Assert(slot->tts_nvalid == pertrans->numInputs);
 
 			/*
 			 * If the transfn is strict, we want to check for nullity before
@@ -834,7 +834,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 			 * not numInputs, since nullity in columns used only for sorting
 			 * is not relevant here.
 			 */
-			if (transstate->transfn.fn_strict)
+			if (pertrans->transfn.fn_strict)
 			{
 				for (i = 0; i < numTransInputs; i++)
 				{
@@ -848,18 +848,18 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
 				/* OK, put the tuple into the tuplesort object */
-				if (transstate->numInputs == 1)
-					tuplesort_putdatum(transstate->sortstates[setno],
+				if (pertrans->numInputs == 1)
+					tuplesort_putdatum(pertrans->sortstates[setno],
 									   slot->tts_values[0],
 									   slot->tts_isnull[0]);
 				else
-					tuplesort_puttupleslot(transstate->sortstates[setno], slot);
+					tuplesort_puttupleslot(pertrans->sortstates[setno], slot);
 			}
 		}
 		else
 		{
 			/* We can apply the transition function immediately */
-			FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
+			FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 
 			/* Load values into fcinfo */
 			/* Start from 1, since the 0th arg will be the transition value */
@@ -872,12 +872,11 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
-				AggStatePerGroup pergroupstate = &pergroup[stateno + (setno * numStates)];
+				AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
 
 				aggstate->current_set = setno;
 
-				advance_transition_function(aggstate, transstate,
-											pergroupstate);
+				advance_transition_function(aggstate, pertrans, pergroupstate);
 			}
 		}
 	}
@@ -908,7 +907,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
  */
 static void
 process_ordered_aggregate_single(AggState *aggstate,
-								 AggStateTransState transstate,
+								 AggStatePerTrans pertrans,
 								 AggStatePerGroup pergroupstate)
 {
 	Datum		oldVal = (Datum) 0;
@@ -916,14 +915,14 @@ process_ordered_aggregate_single(AggState *aggstate,
 	bool		haveOldVal = false;
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
 	MemoryContext oldContext;
-	bool		isDistinct = (transstate->numDistinctCols > 0);
-	FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
+	bool		isDistinct = (pertrans->numDistinctCols > 0);
+	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 	Datum	   *newVal;
 	bool	   *isNull;
 
-	Assert(transstate->numDistinctCols < 2);
+	Assert(pertrans->numDistinctCols < 2);
 
-	tuplesort_performsort(transstate->sortstates[aggstate->current_set]);
+	tuplesort_performsort(pertrans->sortstates[aggstate->current_set]);
 
 	/* Load the column into argument 1 (arg 0 will be transition value) */
 	newVal = fcinfo->arg + 1;
@@ -935,7 +934,7 @@ process_ordered_aggregate_single(AggState *aggstate,
 	 * pfree them when they are no longer needed.
 	 */
 
-	while (tuplesort_getdatum(transstate->sortstates[aggstate->current_set],
+	while (tuplesort_getdatum(pertrans->sortstates[aggstate->current_set],
 							  true, newVal, isNull))
 	{
 		/*
@@ -954,18 +953,18 @@ process_ordered_aggregate_single(AggState *aggstate,
 			haveOldVal &&
 			((oldIsNull && *isNull) ||
 			 (!oldIsNull && !*isNull &&
-			  DatumGetBool(FunctionCall2(&transstate->equalfns[0],
+			  DatumGetBool(FunctionCall2(&pertrans->equalfns[0],
 										 oldVal, *newVal)))))
 		{
 			/* equal to prior, so forget this one */
-			if (!transstate->inputtypeByVal && !*isNull)
+			if (!pertrans->inputtypeByVal && !*isNull)
 				pfree(DatumGetPointer(*newVal));
 		}
 		else
 		{
-			advance_transition_function(aggstate, transstate, pergroupstate);
+			advance_transition_function(aggstate, pertrans, pergroupstate);
 			/* forget the old value, if any */
-			if (!oldIsNull && !transstate->inputtypeByVal)
+			if (!oldIsNull && !pertrans->inputtypeByVal)
 				pfree(DatumGetPointer(oldVal));
 			/* and remember the new one for subsequent equality checks */
 			oldVal = *newVal;
@@ -976,11 +975,11 @@ process_ordered_aggregate_single(AggState *aggstate,
 		MemoryContextSwitchTo(oldContext);
 	}
 
-	if (!oldIsNull && !transstate->inputtypeByVal)
+	if (!oldIsNull && !pertrans->inputtypeByVal)
 		pfree(DatumGetPointer(oldVal));
 
-	tuplesort_end(transstate->sortstates[aggstate->current_set]);
-	transstate->sortstates[aggstate->current_set] = NULL;
+	tuplesort_end(pertrans->sortstates[aggstate->current_set]);
+	pertrans->sortstates[aggstate->current_set] = NULL;
 }
 
 /*
@@ -997,25 +996,25 @@ process_ordered_aggregate_single(AggState *aggstate,
  */
 static void
 process_ordered_aggregate_multi(AggState *aggstate,
-								AggStateTransState transstate,
+AggStatePerTrans pertrans,
 								AggStatePerGroup pergroupstate)
 {
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
-	FunctionCallInfo fcinfo = &transstate->transfn_fcinfo;
-	TupleTableSlot *slot1 = transstate->evalslot;
-	TupleTableSlot *slot2 = transstate->uniqslot;
-	int			numTransInputs = transstate->numTransInputs;
-	int			numDistinctCols = transstate->numDistinctCols;
+	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
+	TupleTableSlot *slot1 = pertrans->evalslot;
+	TupleTableSlot *slot2 = pertrans->uniqslot;
+	int			numTransInputs = pertrans->numTransInputs;
+	int			numDistinctCols = pertrans->numDistinctCols;
 	bool		haveOldValue = false;
 	int			i;
 
-	tuplesort_performsort(transstate->sortstates[aggstate->current_set]);
+	tuplesort_performsort(pertrans->sortstates[aggstate->current_set]);
 
 	ExecClearTuple(slot1);
 	if (slot2)
 		ExecClearTuple(slot2);
 
-	while (tuplesort_gettupleslot(transstate->sortstates[aggstate->current_set],
+	while (tuplesort_gettupleslot(pertrans->sortstates[aggstate->current_set],
 								  true, slot1))
 	{
 		/*
@@ -1029,8 +1028,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 			!haveOldValue ||
 			!execTuplesMatch(slot1, slot2,
 							 numDistinctCols,
-							 transstate->sortColIdx,
-							 transstate->equalfns,
+							 pertrans->sortColIdx,
+							 pertrans->equalfns,
 							 workcontext))
 		{
 			/* Load values into fcinfo */
@@ -1041,7 +1040,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
 				fcinfo->argnull[i + 1] = slot1->tts_isnull[i];
 			}
 
-			advance_transition_function(aggstate, transstate, pergroupstate);
+			advance_transition_function(aggstate, pertrans, pergroupstate);
 
 			if (numDistinctCols > 0)
 			{
@@ -1064,8 +1063,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 	if (slot2)
 		ExecClearTuple(slot2);
 
-	tuplesort_end(transstate->sortstates[aggstate->current_set]);
-	transstate->sortstates[aggstate->current_set] = NULL;
+	tuplesort_end(pertrans->sortstates[aggstate->current_set]);
+	pertrans->sortstates[aggstate->current_set] = NULL;
 }
 
 /*
@@ -1077,7 +1076,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
  * The finalfunction will be run, and the result delivered, in the
  * output-tuple context; caller's CurrentMemoryContext does not matter.
  *
- * The finalfn uses the state as set in the stateno. This also might be
+ * The finalfn uses the state as set in the transno. This also might be
  * being used by another aggregate function, so it's important that we do
  * nothing destructive here.
  */
@@ -1092,7 +1091,7 @@ finalize_aggregate(AggState *aggstate,
 	MemoryContext oldContext;
 	int			i;
 	ListCell   *lc;
-	AggStateTransState transstate = &aggstate->transstates[peragg->stateno];
+	AggStatePerTrans pertrans = &aggstate->pertrans[peragg->transno];
 
 	oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
 
@@ -1103,7 +1102,7 @@ finalize_aggregate(AggState *aggstate,
 	 * for the transition state value.
 	 */
 	i = 1;
-	foreach(lc, transstate->aggdirectargs)
+	foreach(lc, pertrans->aggdirectargs)
 	{
 		ExprState  *expr = (ExprState *) lfirst(lc);
 
@@ -1122,12 +1121,12 @@ finalize_aggregate(AggState *aggstate,
 	{
 		int			numFinalArgs = peragg->numFinalArgs;
 
-		/* set up aggstate->curtransstate for AggGetAggref() */
-		aggstate->curtransstate = transstate;
+		/* set up aggstate->curpertrans for AggGetAggref() */
+		aggstate->curpertrans = pertrans;
 
 		InitFunctionCallInfoData(fcinfo, &peragg->finalfn,
 								 numFinalArgs,
-								 transstate->aggCollation,
+								 pertrans->aggCollation,
 								 (void *) aggstate, NULL);
 
 		/* Fill in the transition state value */
@@ -1154,7 +1153,7 @@ finalize_aggregate(AggState *aggstate,
 			*resultVal = FunctionCallInvoke(&fcinfo);
 			*resultIsNull = fcinfo.isnull;
 		}
-		aggstate->curtransstate = NULL;
+		aggstate->curpertrans = NULL;
 	}
 	else
 	{
@@ -1262,23 +1261,23 @@ finalize_aggregates(AggState *aggstate,
 	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 	{
 		AggStatePerAgg peragg = &peraggs[aggno];
-		int			stateno = peragg->stateno;
-		AggStateTransState transstate = &aggstate->transstates[stateno];
+		int			transno = peragg->transno;
+		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
 		AggStatePerGroup pergroupstate;
 
-		pergroupstate = &pergroup[stateno + (currentSet * (aggstate->numstates))];
+		pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
 
-		if (transstate->numSortCols > 0)
+		if (pertrans->numSortCols > 0)
 		{
 			Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
 
-			if (transstate->numInputs == 1)
+			if (pertrans->numInputs == 1)
 				process_ordered_aggregate_single(aggstate,
-												 transstate,
+												 pertrans,
 												 pergroupstate);
 			else
 				process_ordered_aggregate_multi(aggstate,
-												transstate,
+												pertrans,
 												pergroupstate);
 		}
 
@@ -2020,11 +2019,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 {
 	AggState   *aggstate;
 	AggStatePerAgg peraggs;
-	AggStateTransState transstates;
+	AggStatePerTrans pertransstates;
 	Plan	   *outerPlan;
 	ExprContext *econtext;
 	int			numaggs,
-				stateno,
+				transno,
 				aggno;
 	int			phase;
 	ListCell   *l;
@@ -2046,14 +2045,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	aggstate->aggs = NIL;
 	aggstate->numaggs = 0;
-	aggstate->numstates = 0;
+	aggstate->numtrans = 0;
 	aggstate->maxsets = 0;
 	aggstate->hashfunctions = NULL;
 	aggstate->projected_set = -1;
 	aggstate->current_set = 0;
 	aggstate->peragg = NULL;
-	aggstate->transstates = NULL;
-	aggstate->curtransstate = NULL;
+	aggstate->pertrans = NULL;
+	aggstate->curpertrans = NULL;
 	aggstate->agg_done = false;
 	aggstate->input_done = false;
 	aggstate->pergroup = NULL;
@@ -2287,10 +2286,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
 
 	peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
-	transstates = (AggStateTransState) palloc0(sizeof(AggStateTransStateData)* numaggs);
+	pertransstates = (AggStatePerTrans) palloc0(sizeof(AggStatePerTransData)* numaggs);
 
 	aggstate->peragg = peraggs;
-	aggstate->transstates = transstates;
+	aggstate->pertrans = pertransstates;
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
@@ -2315,13 +2314,13 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 * unchanging fields of the per-agg data.
 	 */
 	aggno = -1;
-	stateno = -1;
+	transno = -1;
 	foreach(l, aggstate->aggs)
 	{
 		AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
 		Aggref	   *aggref = (Aggref *) aggrefstate->xprstate.expr;
 		AggStatePerAgg peragg;
-		AggStateTransState transstate;
+		AggStatePerTrans pertrans;
 		AggRefCompatibility agg_match;
 		Oid			inputTypes[FUNC_MAX_ARGS];
 		int			numArguments;
@@ -2446,21 +2445,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		 */
 		if (agg_match == AGGREF_NO_MATCH)
 		{
-			transstate = &transstates[++stateno];
-			build_transstate_for_aggref(transstate, aggstate, estate,
+			pertrans = &pertransstates[++transno];
+			build_pertrans_for_aggref(pertrans, aggstate, estate,
 										 aggref,
 										 aggTuple, inputTypes, numArguments);
-			peragg->stateno = stateno;
+			peragg->transno = transno;
 		}
 		else		/* AGGREF_STATE_MATCH */
 		{
-			int			existing_stateno = peraggs[existing_aggno].stateno;
+			int			existing_transno = peraggs[existing_aggno].transno;
 
-			transstate = &transstates[existing_stateno];
-			peragg->stateno = existing_stateno;
+			pertrans = &pertransstates[existing_transno];
+			peragg->transno = existing_transno;
 
 			/* when reusing the state, the transfns should match! */
-			Assert(transstate->transfn_oid == aggform->aggtransfn);
+			Assert(pertrans->transfn_oid == aggform->aggtransfn);
 		}
 
 		/* Detect how many arguments to pass to the finalfn */
@@ -2477,7 +2476,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		{
 			build_aggregate_finalfn_expr(inputTypes,
 										 peragg->numFinalArgs,
-										 transstate->aggtranstype,
+										 pertrans->aggtranstype,
 										 aggref->aggtype,
 										 aggref->inputcollid,
 										 finalfn_oid,
@@ -2499,7 +2498,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 * numstates to the number of unique aggregate states found.
 	 */
 	aggstate->numaggs = aggno + 1;
-	aggstate->numstates = stateno + 1;
+	aggstate->numtrans = transno + 1;
 
 	return aggstate;
 }
@@ -2507,12 +2506,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 /*
  * Build the state needed to calculate a state value for an aggregate.
  *
- * This initializes all the fields in 'transstate'. 'aggTuple',
+ * This initializes all the fields in 'pertrans'. 'aggTuple',
  * 'inputTypes' and 'numArguments' could be derived from 'aggref', but the
  * caller has calculated them already, so might as well pass them.
  */
 static void
-build_transstate_for_aggref(AggStateTransState transstate,
+build_pertrans_for_aggref(AggStatePerTrans pertrans,
 							 AggState *aggstate, EState *estate,
 							 Aggref *aggref, HeapTuple aggTuple,
 							 Oid *inputTypes, int numArguments)
@@ -2531,19 +2530,19 @@ build_transstate_for_aggref(AggStateTransState transstate,
 	Datum		textInitVal;
 	Oid			transfn_oid;
 
-	/* Begin filling in the transstate data */
-	transstate->aggref = aggref;
-	transstate->aggCollation = aggref->inputcollid;
-	transstate->transfn_oid = transfn_oid = aggform->aggtransfn;
+	/* Begin filling in the pertrans data */
+	pertrans->aggref = aggref;
+	pertrans->aggCollation = aggref->inputcollid;
+	pertrans->transfn_oid = transfn_oid = aggform->aggtransfn;
 
 	/* Count the "direct" arguments, if any */
 	numDirectArgs = list_length(aggref->aggdirectargs);
 
 	/* Count the number of aggregated input columns */
-	transstate->numInputs = numInputs = list_length(aggref->args);
+	pertrans->numInputs = numInputs = list_length(aggref->args);
 
 	/* resolve actual type of transition state, if polymorphic */
-	transstate->aggtranstype =
+	pertrans->aggtranstype =
 		resolve_aggregate_transtype(aggref->aggfnoid,
 									aggform->aggtranstype,
 									inputTypes,
@@ -2551,9 +2550,9 @@ build_transstate_for_aggref(AggStateTransState transstate,
 
 	/* Detect how many arguments to pass to the transfn */
 	if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
-		transstate->numTransInputs = numInputs;
+		pertrans->numTransInputs = numInputs;
 	else
-		transstate->numTransInputs = numArguments;
+		pertrans->numTransInputs = numArguments;
 
 	/*
 	 * Set up infrastructure for calling the transfn
@@ -2562,19 +2561,19 @@ build_transstate_for_aggref(AggStateTransState transstate,
 								 numArguments,
 								 numDirectArgs,
 								 aggref->aggvariadic,
-								 transstate->aggtranstype,
+								 pertrans->aggtranstype,
 								 aggref->inputcollid,
 								 transfn_oid,
 								 InvalidOid,	/* invtrans is not needed here */
 								 &transfnexpr,
 								 NULL);
-	fmgr_info(transfn_oid, &transstate->transfn);
-	fmgr_info_set_expr((Node *) transfnexpr, &transstate->transfn);
+	fmgr_info(transfn_oid, &pertrans->transfn);
+	fmgr_info_set_expr((Node *) transfnexpr, &pertrans->transfn);
 
-	InitFunctionCallInfoData(transstate->transfn_fcinfo,
-							 &transstate->transfn,
-							 transstate->numTransInputs + 1,
-							 transstate->aggCollation,
+	InitFunctionCallInfoData(pertrans->transfn_fcinfo,
+							 &pertrans->transfn,
+							 pertrans->numTransInputs + 1,
+							 pertrans->aggCollation,
 							 (void *) aggstate, NULL);
 
 
@@ -2586,13 +2585,13 @@ build_transstate_for_aggref(AggStateTransState transstate,
 	 */
 	textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
 								  Anum_pg_aggregate_agginitval,
-								  &transstate->initValueIsNull);
+								  &pertrans->initValueIsNull);
 
-	if (transstate->initValueIsNull)
-		transstate->initValue = (Datum) 0;
+	if (pertrans->initValueIsNull)
+		pertrans->initValue = (Datum) 0;
 	else
-		transstate->initValue = GetAggInitVal(textInitVal,
-											  transstate->aggtranstype);
+		pertrans->initValue = GetAggInitVal(textInitVal,
+											pertrans->aggtranstype);
 
 	/*
 	 * If the transfn is strict and the initval is NULL, make sure input type
@@ -2602,11 +2601,11 @@ build_transstate_for_aggref(AggStateTransState transstate,
 	 * we must check again in case the transfn's strictness property has been
 	 * changed.
 	 */
-	if (transstate->transfn.fn_strict && transstate->initValueIsNull)
+	if (pertrans->transfn.fn_strict && pertrans->initValueIsNull)
 	{
 		if (numArguments <= numDirectArgs ||
 			!IsBinaryCoercible(inputTypes[numDirectArgs],
-							   transstate->aggtranstype))
+							   pertrans->aggtranstype))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
 					 errmsg("aggregate %u needs to have compatible input type and transition type",
@@ -2614,27 +2613,27 @@ build_transstate_for_aggref(AggStateTransState transstate,
 	}
 
 	/* get info about the state value's datatype */
-	get_typlenbyval(transstate->aggtranstype,
-					&transstate->transtypeLen,
-					&transstate->transtypeByVal);
+	get_typlenbyval(pertrans->aggtranstype,
+					&pertrans->transtypeLen,
+					&pertrans->transtypeByVal);
 
 	/*
 	 * Get a tupledesc corresponding to the aggregated inputs (including sort
 	 * expressions) of the agg.
 	 */
-	transstate->evaldesc = ExecTypeFromTL(aggref->args, false);
+	pertrans->evaldesc = ExecTypeFromTL(aggref->args, false);
 
 	/* Create slot we're going to do argument evaluation in */
-	transstate->evalslot = ExecInitExtraTupleSlot(estate);
-	ExecSetSlotDescriptor(transstate->evalslot, transstate->evaldesc);
+	pertrans->evalslot = ExecInitExtraTupleSlot(estate);
+	ExecSetSlotDescriptor(pertrans->evalslot, pertrans->evaldesc);
 
 	/* Initialize the input and FILTER expressions */
 	naggs = aggstate->numaggs;
-	transstate->aggfilter = ExecInitExpr(aggref->aggfilter,
+	pertrans->aggfilter = ExecInitExpr(aggref->aggfilter,
 										 (PlanState *) aggstate);
-	transstate->aggdirectargs = (List *) ExecInitExpr((Expr *)aggref->aggdirectargs,
+	pertrans->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs,
 													  (PlanState *) aggstate);
-	transstate->args = (List *)ExecInitExpr((Expr *)aggref->args,
+	pertrans->args = (List *) ExecInitExpr((Expr *) aggref->args,
 											(PlanState *) aggstate);
 
 	/*
@@ -2648,10 +2647,10 @@ build_transstate_for_aggref(AggStateTransState transstate,
 				 errmsg("aggregate function calls cannot be nested")));
 
 	/* Set up projection info for evaluation */
-	transstate->evalproj = ExecBuildProjectionInfo(transstate->args,
-												   aggstate->tmpcontext,
-												   transstate->evalslot,
-												   NULL);
+	pertrans->evalproj = ExecBuildProjectionInfo(pertrans->args,
+												 aggstate->tmpcontext,
+												 pertrans->evalslot,
+												 NULL);
 
 	/*
 	 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
@@ -2680,8 +2679,8 @@ build_transstate_for_aggref(AggStateTransState transstate,
 		numDistinctCols = 0;
 	}
 
-	transstate->numSortCols = numSortCols;
-	transstate->numDistinctCols = numDistinctCols;
+	pertrans->numSortCols = numSortCols;
+	pertrans->numDistinctCols = numDistinctCols;
 
 	if (numSortCols > 0)
 	{
@@ -2695,25 +2694,25 @@ build_transstate_for_aggref(AggStateTransState transstate,
 		if (numInputs == 1)
 		{
 			get_typlenbyval(inputTypes[numDirectArgs],
-							&transstate->inputtypeLen,
-							&transstate->inputtypeByVal);
+							&pertrans->inputtypeLen,
+							&pertrans->inputtypeByVal);
 		}
 		else if (numDistinctCols > 0)
 		{
 			/* we will need an extra slot to store prior values */
-			transstate->uniqslot = ExecInitExtraTupleSlot(estate);
-			ExecSetSlotDescriptor(transstate->uniqslot,
-								  transstate->evaldesc);
+			pertrans->uniqslot = ExecInitExtraTupleSlot(estate);
+			ExecSetSlotDescriptor(pertrans->uniqslot,
+								  pertrans->evaldesc);
 		}
 
 		/* Extract the sort information for use later */
-		transstate->sortColIdx =
+		pertrans->sortColIdx =
 			(AttrNumber *) palloc(numSortCols * sizeof(AttrNumber));
-		transstate->sortOperators =
+		pertrans->sortOperators =
 			(Oid *) palloc(numSortCols * sizeof(Oid));
-		transstate->sortCollations =
+		pertrans->sortCollations =
 			(Oid *) palloc(numSortCols * sizeof(Oid));
-		transstate->sortNullsFirst =
+		pertrans->sortNullsFirst =
 			(bool *) palloc(numSortCols * sizeof(bool));
 
 		i = 0;
@@ -2725,10 +2724,10 @@ build_transstate_for_aggref(AggStateTransState transstate,
 			/* the parser should have made sure of this */
 			Assert(OidIsValid(sortcl->sortop));
 
-			transstate->sortColIdx[i] = tle->resno;
-			transstate->sortOperators[i] = sortcl->sortop;
-			transstate->sortCollations[i] = exprCollation((Node *)tle->expr);
-			transstate->sortNullsFirst[i] = sortcl->nulls_first;
+			pertrans->sortColIdx[i] = tle->resno;
+			pertrans->sortOperators[i] = sortcl->sortop;
+			pertrans->sortCollations[i] = exprCollation((Node *) tle->expr);
+			pertrans->sortNullsFirst[i] = sortcl->nulls_first;
 			i++;
 		}
 		Assert(i == numSortCols);
@@ -2742,7 +2741,7 @@ build_transstate_for_aggref(AggStateTransState transstate,
 		 * We need the equal function for each DISTINCT comparison we will
 		 * make.
 		 */
-		transstate->equalfns =
+		pertrans->equalfns =
 			(FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo));
 
 		i = 0;
@@ -2750,13 +2749,13 @@ build_transstate_for_aggref(AggStateTransState transstate,
 		{
 			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
 
-			fmgr_info(get_opcode(sortcl->eqop), &transstate->equalfns[i]);
+			fmgr_info(get_opcode(sortcl->eqop), &pertrans->equalfns[i]);
 			i++;
 		}
 		Assert(i == numDistinctCols);
 	}
 
-	transstate->sortstates = (Tuplesortstate **)
+	pertrans->sortstates = (Tuplesortstate **)
 		palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
 }
 
@@ -2834,7 +2833,7 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 {
 	int			aggno;
 	int			statematchaggno;
-	AggStateTransState transstates;
+	AggStatePerTrans pertransstates;
 	AggStatePerAgg peraggs;
 
 	/* we mustn't reuse the aggref if it contains volatile function calls */
@@ -2842,7 +2841,7 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 		return AGGREF_NO_MATCH;
 
 	statematchaggno = -1;
-	transstates = aggstate->transstates;
+	pertransstates = aggstate->pertrans;
 	peraggs = aggstate->peragg;
 
 	/*
@@ -2855,13 +2854,13 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 	{
 		AggRefCompatibility matchtype;
 		AggStatePerAgg peragg;
-		AggStateTransState transstate;
+		AggStatePerTrans pertrans;
 
 		peragg = &peraggs[aggno];
-		transstate = &transstates[peragg->stateno];
+		pertrans = &pertransstates[peragg->transno];
 
 		/* lookup the match type of this agg */
-		matchtype = aggref_has_compatible_states(newagg, peragg, transstate);
+		matchtype = aggref_has_compatible_states(newagg, peragg, pertrans);
 
 		/* if it's an exact match then we're done. */
 		if (matchtype == AGGREF_EXACT_MATCH)
@@ -2893,9 +2892,9 @@ find_compatible_aggref(Aggref *newagg, AggState *aggstate,
 static AggRefCompatibility
 aggref_has_compatible_states(Aggref *newagg,
 							 AggStatePerAgg peragg,
-							 AggStateTransState transstate)
+							 AggStatePerTrans pertrans)
 {
-	Aggref	   *existingRef = transstate->aggref;
+	Aggref	   *existingRef = pertrans->aggref;
 
 	/* all of the following must be the same or it's no match */
 	if (newagg->inputcollid != existingRef->inputcollid ||
@@ -2934,7 +2933,7 @@ aggref_has_compatible_states(Aggref *newagg,
 		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
 
 		/* if the transfns are not the same then the state can't be shared */
-		if (aggform->aggtransfn != transstate->transfn_oid)
+		if (aggform->aggtransfn != pertrans->transfn_oid)
 		{
 			ReleaseSysCache(aggTuple);
 			return AGGREF_NO_MATCH;
@@ -2949,7 +2948,7 @@ aggref_has_compatible_states(Aggref *newagg,
 		 * If both INITCONDs are null then the outcome depends on if the
 		 * finalfns match.
 		 */
-		if (initValueIsNull && transstate->initValueIsNull)
+		if (initValueIsNull && pertrans->initValueIsNull)
 		{
 			if (aggform->aggfinalfn != peragg->finalfn_oid)
 				return AGGREF_STATE_MATCH;
@@ -2969,7 +2968,7 @@ void
 ExecEndAgg(AggState *node)
 {
 	PlanState  *outerPlan;
-	int			stateno;
+	int			transno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2980,14 +2979,14 @@ ExecEndAgg(AggState *node)
 	if (node->sort_out)
 		tuplesort_end(node->sort_out);
 
-	for (stateno = 0; stateno < node->numstates; stateno++)
+	for (transno = 0; transno < node->numtrans; transno++)
 	{
-		AggStateTransState transstate = &node->transstates[stateno];
+		AggStatePerTrans pertrans = &node->pertrans[transno];
 
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			if (transstate->sortstates[setno])
-				tuplesort_end(transstate->sortstates[setno]);
+			if (pertrans->sortstates[setno])
+				tuplesort_end(pertrans->sortstates[setno]);
 		}
 	}
 
@@ -3015,7 +3014,7 @@ ExecReScanAgg(AggState *node)
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
 	PlanState  *outerPlan = outerPlanState(node);
 	Agg		   *aggnode = (Agg *) node->ss.ps.plan;
-	int			stateno;
+	int			transno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -3047,16 +3046,16 @@ ExecReScanAgg(AggState *node)
 	}
 
 	/* Make sure we have closed any open tuplesorts */
-	for (stateno = 0; stateno < node->numstates; stateno++)
+	for (transno = 0; transno < node->numtrans; transno++)
 	{
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			AggStateTransState transstate = &node->transstates[stateno];
+			AggStatePerTrans pertrans = &node->pertrans[transno];
 
-			if (transstate->sortstates[setno])
+			if (pertrans->sortstates[setno])
 			{
-				tuplesort_end(transstate->sortstates[setno]);
-				transstate->sortstates[setno] = NULL;
+				tuplesort_end(pertrans->sortstates[setno]);
+				pertrans->sortstates[setno] = NULL;
 			}
 		}
 	}
@@ -3180,12 +3179,12 @@ AggGetAggref(FunctionCallInfo fcinfo)
 {
 	if (fcinfo->context && IsA(fcinfo->context, AggState))
 	{
-		AggStateTransState curtransstate;
+		AggStatePerTrans curpertrans;
 
-		curtransstate = ((AggState *)fcinfo->context)->curtransstate;
+		curpertrans = ((AggState *)fcinfo->context)->curpertrans;
 
-		if (curtransstate)
-			return curtransstate->aggref;
+		if (curpertrans)
+			return curpertrans->aggref;
 	}
 	return NULL;
 }
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 65e6a85..e0fb8fb 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1826,8 +1826,9 @@ resolve_aggregate_transtype(Oid aggfuncid,
  * (The trees will never actually be executed, however, so we can skimp
  * a bit on correctness.)
  *
- * agg_input_types identifies the input types of the aggregate.  These should
- * be resolved to actual types (ie, none should ever be ANYELEMENT etc).
+ * agg_input_types and agg_state_type identifies the input types of the
+ * aggregate.  These should be resolved to actual types (ie, none should
+ * ever be ANYELEMENT etc).
  * agg_input_collation is the aggregate function's input collation.
  *
  * For an ordered-set aggregate, remember that agg_input_types describes
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 3579f3b..7091a9d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1822,7 +1822,7 @@ typedef struct GroupState
  */
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
-typedef struct AggStateTransStateData *AggStateTransState;
+typedef struct AggStatePerTransData *AggStatePerTrans;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
 typedef struct AggStatePerPhaseData *AggStatePerPhase;
 
@@ -1831,16 +1831,16 @@ typedef struct AggState
 	ScanState	ss;				/* its first field is NodeTag */
 	List	   *aggs;			/* all Aggref nodes in targetlist & quals */
 	int			numaggs;		/* length of list (could be zero!) */
-	int			numstates;		/* number of peraggstate items */
+	int			numtrans;		/* number of pertrans items */
 	AggStatePerPhase phase;		/* pointer to current phase data */
 	int			numphases;		/* number of phases */
 	int			current_phase;	/* current phase number */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	AggStatePerAgg peragg;		/* per-Aggref information */
-	AggStateTransState transstates; /* per-Agg State information */
+	AggStatePerTrans pertrans;	/* per-Agg trans state information */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
-	AggStateTransState curtransstate;	/* identifies currently active aggregate */
+	AggStatePerTrans curpertrans;	/* currently active trans state */
 	bool		input_done;		/* indicates end of input */
 	bool		agg_done;		/* indicates completion of Agg scan */
 	int			projected_set;	/* The last projected grouping set */
#10Heikki Linnakangas
hlinnaka@iki.fi
In reply to: David Rowley (#9)
1 attachment(s)
Re: Sharing aggregate states between different aggregate functions

On 07/28/2015 04:14 AM, David Rowley wrote:

On 27 July 2015 at 20:11, Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 07/27/2015 08:34 AM, David Rowley wrote:

In this function I also wasn't quite sure if it was with comparing both
non-NULL INITCOND's here. I believe my code comments may slightly
contradict what the code actually does, as the comments talk about them
having to match, but the code just bails if any are non-NULL. The reason I
didn't check them was because it seems inevitable that some duplicate work
needs to be done when setting up the INITCOND. Perhaps it's worth it?

It would be nice to handle non-NULL initconds. I think you'll have to
check that the input function isn't volatile. Or perhaps just call the
input function, and check that the resulting Datum is byte-per-byte
identical, although that might be awkward to do with the current code
structure.

I've not done anything with this.
I'd not thought of an input function being volatile before, but I guess
it's possible, which makes me a bit scared that we could be treading on
ground we shouldn't be. I know it's more of an output function thing than
an input function thing, but a GUC like extra_float_digits could cause
problems here.

Yeah, a volatile input function seems highly unlikely, but who knows.
BTW, we're also not checking if the transition or final functions are
volatile. But that was the same before this patch too.

It sure would be nice to support the built-in float aggregates, so I
took a stab at this. I heavily restructured the code again, so that
there are now two separate steps. First, we check for any identical
Aggrefs that could be shared. If that fails, we proceed to the
permission checks, look up the transition function and build the initial
datum. And then we call another function that tries to find an existing,
compatible per-trans structure. I think this actually looks better than
before, and checking for identical init values is now easy. This does
lose one optimization: if there are two aggregates with identical
transition functions and final functions, they are not merged into a
single per-Agg struct. They still share the same per-Trans struct,
though, and I think that's enough.

How does the attached patch look to you? The comments still need some
cleanup, in particular, the explanations of the different scenarios
don't belong where they are anymore.

BTW, the permission checks were not correct before. You cannot skip the
check on the transition function when you're sharing the per-trans
state. We check that the aggregate's owner has permission to execute the
transition function, and the previous aggregate whose state value we're
sharing might have different owner.

Hmm. I think it should be "AggStatePerTransData" then, to keep the same
pattern as AggStatePerAggData and AggStatePerGroupData.

Sounds good. I've renamed it to that in the attached delta patch.

Thanks!

- Heikki

Attachments:

sharing_aggstate-heikki-2.patchbinary/octet-stream; name=sharing_aggstate-heikki-2.patchDownload
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index 0f911f2..fd922bd 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -4485,35 +4485,15 @@ ExecInitExpr(Expr *node, PlanState *parent)
 			break;
 		case T_Aggref:
 			{
-				Aggref	   *aggref = (Aggref *) node;
 				AggrefExprState *astate = makeNode(AggrefExprState);
 
 				astate->xprstate.evalfunc = (ExprStateEvalFunc) ExecEvalAggref;
 				if (parent && IsA(parent, AggState))
 				{
 					AggState   *aggstate = (AggState *) parent;
-					int			naggs;
 
 					aggstate->aggs = lcons(astate, aggstate->aggs);
-					naggs = ++aggstate->numaggs;
-
-					astate->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs,
-																  parent);
-					astate->args = (List *) ExecInitExpr((Expr *) aggref->args,
-														 parent);
-					astate->aggfilter = ExecInitExpr(aggref->aggfilter,
-													 parent);
-
-					/*
-					 * Complain if the aggregate's arguments contain any
-					 * aggregates; nested agg functions are semantically
-					 * nonsensical.  (This should have been caught earlier,
-					 * but we defend against it here anyway.)
-					 */
-					if (naggs != aggstate->numaggs)
-						ereport(ERROR,
-								(errcode(ERRCODE_GROUPING_ERROR),
-						errmsg("aggregate function calls cannot be nested")));
+					aggstate->numaggs++;
 				}
 				else
 				{
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 2bf48c5..3162980 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -152,17 +152,28 @@
 
 
 /*
- * AggStatePerAggData - per-aggregate working state for the Agg scan
+ * AggStatePerTransData - per aggregate state value information
+ *
+ * Working state for updating the aggregate's state value, by calling the
+ * transition function with an input row. This struct does not store the
+ * information needed to produce the final aggregate result from the transition
+ * state, that's stored in AggStatePerAggData instead. This separation allows
+ * multiple aggregate results to be produced from a single state value.
  */
-typedef struct AggStatePerAggData
+typedef struct AggStatePerTransData
 {
 	/*
 	 * These values are set up during ExecInitAgg() and do not change
 	 * thereafter:
 	 */
 
-	/* Links to Aggref expr and state nodes this working state is for */
-	AggrefExprState *aggrefstate;
+	/*
+	 * Link to an Aggref expr this state value is for.
+	 *
+	 * There can be multiple AggRef's sharing the same state value, as long as
+	 * the inputs and transition function are identical. This points to the
+	 * first one of them.
+	 */
 	Aggref	   *aggref;
 
 	/*
@@ -186,25 +197,22 @@ typedef struct AggStatePerAggData
 	 */
 	int			numTransInputs;
 
-	/*
-	 * Number of arguments to pass to the finalfn.  This is always at least 1
-	 * (the transition state value) plus any ordered-set direct args. If the
-	 * finalfn wants extra args then we pass nulls corresponding to the
-	 * aggregated input columns.
-	 */
-	int			numFinalArgs;
-
-	/* Oids of transfer functions */
+	/* Oid of the state transition function */
 	Oid			transfn_oid;
-	Oid			finalfn_oid;	/* may be InvalidOid */
+
+	/* Oid of state value's datatype */
+	Oid			aggtranstype;
+
+	/* ExprStates of the FILTER and argument expressions. */
+	ExprState  *aggfilter;		/* state of FILTER expression, if any */
+	List	   *args;			/* states of aggregated-argument expressions */
+	List	   *aggdirectargs;	/* states of direct-argument expressions */
 
 	/*
-	 * fmgr lookup data for transfer functions --- only valid when
-	 * corresponding oid is not InvalidOid.  Note in particular that fn_strict
-	 * flags are kept here.
+	 * fmgr lookup data for transition function.  Note in particular that the
+	 * fn_strict flag is kept here.
 	 */
 	FmgrInfo	transfn;
-	FmgrInfo	finalfn;
 
 	/* Input collation derived for aggregate */
 	Oid			aggCollation;
@@ -236,17 +244,15 @@ typedef struct AggStatePerAggData
 	bool		initValueIsNull;
 
 	/*
-	 * We need the len and byval info for the agg's input, result, and
-	 * transition data types in order to know how to copy/delete values.
+	 * We need the len and byval info for the agg's input and transition data
+	 * types in order to know how to copy/delete values.
 	 *
 	 * Note that the info for the input type is used only when handling
 	 * DISTINCT aggs with just one argument, so there is only one input type.
 	 */
 	int16		inputtypeLen,
-				resulttypeLen,
 				transtypeLen;
 	bool		inputtypeByVal,
-				resulttypeByVal,
 				transtypeByVal;
 
 	/*
@@ -288,6 +294,54 @@ typedef struct AggStatePerAggData
 	 * worth the extra space consumption.
 	 */
 	FunctionCallInfoData transfn_fcinfo;
+}	AggStatePerTransData;
+
+/*
+ * AggStatePerAggData - per-aggregate information
+ *
+ * This contains the information needed to call the final function, to produce
+ * a final aggregate result from the state value. If there are multiple
+ * identical AggRefs in the query, they can all share the same per-agg data.
+ *
+ * These values are set up during ExecInitAgg() and do not change thereafter.
+ */
+typedef struct AggStatePerAggData
+{
+	/*
+	 * Link to an Aggref expr this state value is for.
+	 *
+	 * There can be multiple identical AggRef's sharing the same per-agg. This
+	 * points to the first one of them.
+	 */
+	Aggref	   *aggref;
+
+	/* index to the state value which this agg should use */
+	int			transno;
+
+	/* Optional Oid of final function (may be InvalidOid) */
+	Oid			finalfn_oid;
+
+	/*
+	 * fmgr lookup data for final function --- only valid when finalfn_oid oid
+	 * is not InvalidOid.
+	 */
+	FmgrInfo	finalfn;
+
+	/*
+	 * Number of arguments to pass to the finalfn.  This is always at least 1
+	 * (the transition state value) plus any ordered-set direct args. If the
+	 * finalfn wants extra args then we pass nulls corresponding to the
+	 * aggregated input columns.
+	 */
+	int			numFinalArgs;
+
+	/*
+	 * We need the len and byval info for the agg's result data type in order
+	 * to know how to copy/delete values.
+	 */
+	int16		resulttypeLen;
+	bool		resulttypeByVal;
+
 }	AggStatePerAggData;
 
 /*
@@ -358,25 +412,23 @@ typedef struct AggHashEntryData
 	AggStatePerGroupData pergroup[FLEXIBLE_ARRAY_MEMBER];
 }	AggHashEntryData;
 
-
 static void initialize_phase(AggState *aggstate, int newphase);
 static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
 static void initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
 					  AggStatePerGroup pergroup,
 					  int numReset);
 static void advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+							AggStatePerTrans pertrans,
 							AggStatePerGroup pergroupstate);
 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
 static void process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerTrans pertrans,
 								 AggStatePerGroup pergroupstate);
 static void process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+								AggStatePerTrans pertrans,
 								AggStatePerGroup pergroupstate);
 static void finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull);
 static void prepare_projection_slot(AggState *aggstate,
@@ -396,6 +448,17 @@ static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
+static void build_pertrans_for_aggref(AggStatePerTrans pertrans,
+							 AggState *aggsate, EState *estate,
+							 Aggref *aggref, Oid aggtransfn, Oid aggtranstype,
+						  Datum initValue, bool initValueIsNull,
+									  Oid *inputTypes, int numArguments);
+static int find_compatible_peragg(Aggref *newagg, AggState *aggstate,
+					   int lastaggno, List **same_input_transnos);
+static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
+						 Oid aggtransfn, Oid aggtranstype,
+						 Datum initValue, bool initValueIsNull,
+						 List *possible_matches);
 
 
 /*
@@ -498,20 +561,20 @@ fetch_input_tuple(AggState *aggstate)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
-initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
+initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
 					 AggStatePerGroup pergroupstate)
 {
 	/*
 	 * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
 	 */
-	if (peraggstate->numSortCols > 0)
+	if (pertrans->numSortCols > 0)
 	{
 		/*
 		 * In case of rescan, maybe there could be an uncompleted sort
 		 * operation?  Clean it up if so.
 		 */
-		if (peraggstate->sortstates[aggstate->current_set])
-			tuplesort_end(peraggstate->sortstates[aggstate->current_set]);
+		if (pertrans->sortstates[aggstate->current_set])
+			tuplesort_end(pertrans->sortstates[aggstate->current_set]);
 
 
 		/*
@@ -519,21 +582,21 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
 		 * otherwise sort the full tuple.  (See comments for
 		 * process_ordered_aggregate_single.)
 		 */
-		if (peraggstate->numInputs == 1)
-			peraggstate->sortstates[aggstate->current_set] =
-				tuplesort_begin_datum(peraggstate->evaldesc->attrs[0]->atttypid,
-									  peraggstate->sortOperators[0],
-									  peraggstate->sortCollations[0],
-									  peraggstate->sortNullsFirst[0],
+		if (pertrans->numInputs == 1)
+			pertrans->sortstates[aggstate->current_set] =
+				tuplesort_begin_datum(pertrans->evaldesc->attrs[0]->atttypid,
+									  pertrans->sortOperators[0],
+									  pertrans->sortCollations[0],
+									  pertrans->sortNullsFirst[0],
 									  work_mem, false);
 		else
-			peraggstate->sortstates[aggstate->current_set] =
-				tuplesort_begin_heap(peraggstate->evaldesc,
-									 peraggstate->numSortCols,
-									 peraggstate->sortColIdx,
-									 peraggstate->sortOperators,
-									 peraggstate->sortCollations,
-									 peraggstate->sortNullsFirst,
+			pertrans->sortstates[aggstate->current_set] =
+				tuplesort_begin_heap(pertrans->evaldesc,
+									 pertrans->numSortCols,
+									 pertrans->sortColIdx,
+									 pertrans->sortOperators,
+									 pertrans->sortCollations,
+									 pertrans->sortNullsFirst,
 									 work_mem, false);
 	}
 
@@ -543,20 +606,20 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
 	 * Note that when the initial value is pass-by-ref, we must copy it (into
 	 * the aggcontext) since we will pfree the transValue later.
 	 */
-	if (peraggstate->initValueIsNull)
-		pergroupstate->transValue = peraggstate->initValue;
+	if (pertrans->initValueIsNull)
+		pergroupstate->transValue = pertrans->initValue;
 	else
 	{
 		MemoryContext oldContext;
 
 		oldContext = MemoryContextSwitchTo(
 		aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
-		pergroupstate->transValue = datumCopy(peraggstate->initValue,
-											  peraggstate->transtypeByVal,
-											  peraggstate->transtypeLen);
+		pergroupstate->transValue = datumCopy(pertrans->initValue,
+											  pertrans->transtypeByVal,
+											  pertrans->transtypeLen);
 		MemoryContextSwitchTo(oldContext);
 	}
-	pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
+	pergroupstate->transValueIsNull = pertrans->initValueIsNull;
 
 	/*
 	 * If the initial value for the transition state doesn't exist in the
@@ -565,11 +628,11 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
 	 * aggregates like max() and min().) The noTransValue flag signals that we
 	 * still need to do this.
 	 */
-	pergroupstate->noTransValue = peraggstate->initValueIsNull;
+	pergroupstate->noTransValue = pertrans->initValueIsNull;
 }
 
 /*
- * Initialize all aggregates for a new group of input values.
+ * Initialize all aggregate transition states for a new group of input values.
  *
  * If there are multiple grouping sets, we initialize only the first numReset
  * of them (the grouping sets are ordered so that the most specific one, which
@@ -580,61 +643,61 @@ initialize_aggregate(AggState *aggstate, AggStatePerAgg peraggstate,
  */
 static void
 initialize_aggregates(AggState *aggstate,
-					  AggStatePerAgg peragg,
 					  AggStatePerGroup pergroup,
 					  int numReset)
 {
-	int			aggno;
-	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
-	int			setno = 0;
+	int					transno;
+	int					numGroupingSets = Max(aggstate->phase->numsets, 1);
+	int					setno = 0;
+	AggStatePerTrans	transstates = aggstate->pertrans;
 
 	if (numReset < 1)
 		numReset = numGroupingSets;
 
-	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+	for (transno = 0; transno < aggstate->numtrans; transno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerTrans pertrans = &transstates[transno];
 
 		for (setno = 0; setno < numReset; setno++)
 		{
 			AggStatePerGroup pergroupstate;
 
-			pergroupstate = &pergroup[aggno + (setno * (aggstate->numaggs))];
+			pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
 
 			aggstate->current_set = setno;
 
-			initialize_aggregate(aggstate, peraggstate, pergroupstate);
+			initialize_aggregate(aggstate, pertrans, pergroupstate);
 		}
 	}
 }
 
 /*
  * Given new input value(s), advance the transition function of one aggregate
- * within one grouping set only (already set in aggstate->current_set)
+ * state within one grouping set only (already set in aggstate->current_set)
  *
  * The new values (and null flags) have been preloaded into argument positions
- * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again
- * to pass to the transition function.  We also expect that the static fields
- * of the fcinfo are already initialized; that was done by ExecInitAgg().
+ * 1 and up in pertrans->transfn_fcinfo, so that we needn't copy them again to
+ * pass to the transition function.  We also expect that the static fields of
+ * the fcinfo are already initialized; that was done by ExecInitAgg().
  *
  * It doesn't matter which memory context this is called in.
  */
 static void
 advance_transition_function(AggState *aggstate,
-							AggStatePerAgg peraggstate,
+AggStatePerTrans pertrans,
 							AggStatePerGroup pergroupstate)
 {
-	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
+	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 	MemoryContext oldContext;
 	Datum		newVal;
 
-	if (peraggstate->transfn.fn_strict)
+	if (pertrans->transfn.fn_strict)
 	{
 		/*
 		 * For a strict transfn, nothing happens when there's a NULL input; we
 		 * just keep the prior transValue.
 		 */
-		int			numTransInputs = peraggstate->numTransInputs;
+		int			numTransInputs = pertrans->numTransInputs;
 		int			i;
 
 		for (i = 1; i <= numTransInputs; i++)
@@ -656,8 +719,8 @@ advance_transition_function(AggState *aggstate,
 			oldContext = MemoryContextSwitchTo(
 											   aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
 			pergroupstate->transValue = datumCopy(fcinfo->arg[1],
-												  peraggstate->transtypeByVal,
-												  peraggstate->transtypeLen);
+												  pertrans->transtypeByVal,
+												  pertrans->transtypeLen);
 			pergroupstate->transValueIsNull = false;
 			pergroupstate->noTransValue = false;
 			MemoryContextSwitchTo(oldContext);
@@ -678,8 +741,8 @@ advance_transition_function(AggState *aggstate,
 	/* We run the transition functions in per-input-tuple memory context */
 	oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
 
-	/* set up aggstate->curperagg for AggGetAggref() */
-	aggstate->curperagg = peraggstate;
+	/* set up aggstate->curpertrans for AggGetAggref() */
+	aggstate->curpertrans = pertrans;
 
 	/*
 	 * OK to call the transition function
@@ -690,22 +753,22 @@ advance_transition_function(AggState *aggstate,
 
 	newVal = FunctionCallInvoke(fcinfo);
 
-	aggstate->curperagg = NULL;
+	aggstate->curpertrans = NULL;
 
 	/*
 	 * If pass-by-ref datatype, must copy the new value into aggcontext and
 	 * pfree the prior transValue.  But if transfn returned a pointer to its
 	 * first input, we don't need to do anything.
 	 */
-	if (!peraggstate->transtypeByVal &&
+	if (!pertrans->transtypeByVal &&
 		DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
 	{
 		if (!fcinfo->isnull)
 		{
 			MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
 			newVal = datumCopy(newVal,
-							   peraggstate->transtypeByVal,
-							   peraggstate->transtypeLen);
+							   pertrans->transtypeByVal,
+							   pertrans->transtypeLen);
 		}
 		if (!pergroupstate->transValueIsNull)
 			pfree(DatumGetPointer(pergroupstate->transValue));
@@ -718,26 +781,26 @@ advance_transition_function(AggState *aggstate,
 }
 
 /*
- * Advance all the aggregates for one input tuple.  The input tuple
- * has been stored in tmpcontext->ecxt_outertuple, so that it is accessible
- * to ExecEvalExpr.  pergroup is the array of per-group structs to use
- * (this might be in a hashtable entry).
+ * Advance each aggregate transition state for one input tuple.  The input
+ * tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
+ * accessible to ExecEvalExpr.  pergroup is the array of per-group structs to
+ * use (this might be in a hashtable entry).
  *
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 {
-	int			aggno;
+	int			transno;
 	int			setno = 0;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
-	int			numAggs = aggstate->numaggs;
+	int			numTrans = aggstate->numtrans;
 
-	for (aggno = 0; aggno < numAggs; aggno++)
+	for (transno = 0; transno < numTrans; transno++)
 	{
-		AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
-		ExprState  *filter = peraggstate->aggrefstate->aggfilter;
-		int			numTransInputs = peraggstate->numTransInputs;
+		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
+		ExprState  *filter = pertrans->aggfilter;
+		int			numTransInputs = pertrans->numTransInputs;
 		int			i;
 		TupleTableSlot *slot;
 
@@ -754,12 +817,12 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 		}
 
 		/* Evaluate the current input expressions for this aggregate */
-		slot = ExecProject(peraggstate->evalproj, NULL);
+		slot = ExecProject(pertrans->evalproj, NULL);
 
-		if (peraggstate->numSortCols > 0)
+		if (pertrans->numSortCols > 0)
 		{
 			/* DISTINCT and/or ORDER BY case */
-			Assert(slot->tts_nvalid == peraggstate->numInputs);
+			Assert(slot->tts_nvalid == pertrans->numInputs);
 
 			/*
 			 * If the transfn is strict, we want to check for nullity before
@@ -768,7 +831,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 			 * not numInputs, since nullity in columns used only for sorting
 			 * is not relevant here.
 			 */
-			if (peraggstate->transfn.fn_strict)
+			if (pertrans->transfn.fn_strict)
 			{
 				for (i = 0; i < numTransInputs; i++)
 				{
@@ -782,18 +845,18 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
 				/* OK, put the tuple into the tuplesort object */
-				if (peraggstate->numInputs == 1)
-					tuplesort_putdatum(peraggstate->sortstates[setno],
+				if (pertrans->numInputs == 1)
+					tuplesort_putdatum(pertrans->sortstates[setno],
 									   slot->tts_values[0],
 									   slot->tts_isnull[0]);
 				else
-					tuplesort_puttupleslot(peraggstate->sortstates[setno], slot);
+					tuplesort_puttupleslot(pertrans->sortstates[setno], slot);
 			}
 		}
 		else
 		{
 			/* We can apply the transition function immediately */
-			FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
+			FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 
 			/* Load values into fcinfo */
 			/* Start from 1, since the 0th arg will be the transition value */
@@ -806,11 +869,11 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 
 			for (setno = 0; setno < numGroupingSets; setno++)
 			{
-				AggStatePerGroup pergroupstate = &pergroup[aggno + (setno * numAggs)];
+				AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
 
 				aggstate->current_set = setno;
 
-				advance_transition_function(aggstate, peraggstate, pergroupstate);
+				advance_transition_function(aggstate, pertrans, pergroupstate);
 			}
 		}
 	}
@@ -841,7 +904,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
  */
 static void
 process_ordered_aggregate_single(AggState *aggstate,
-								 AggStatePerAgg peraggstate,
+								 AggStatePerTrans pertrans,
 								 AggStatePerGroup pergroupstate)
 {
 	Datum		oldVal = (Datum) 0;
@@ -849,14 +912,14 @@ process_ordered_aggregate_single(AggState *aggstate,
 	bool		haveOldVal = false;
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
 	MemoryContext oldContext;
-	bool		isDistinct = (peraggstate->numDistinctCols > 0);
-	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
+	bool		isDistinct = (pertrans->numDistinctCols > 0);
+	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 	Datum	   *newVal;
 	bool	   *isNull;
 
-	Assert(peraggstate->numDistinctCols < 2);
+	Assert(pertrans->numDistinctCols < 2);
 
-	tuplesort_performsort(peraggstate->sortstates[aggstate->current_set]);
+	tuplesort_performsort(pertrans->sortstates[aggstate->current_set]);
 
 	/* Load the column into argument 1 (arg 0 will be transition value) */
 	newVal = fcinfo->arg + 1;
@@ -868,7 +931,7 @@ process_ordered_aggregate_single(AggState *aggstate,
 	 * pfree them when they are no longer needed.
 	 */
 
-	while (tuplesort_getdatum(peraggstate->sortstates[aggstate->current_set],
+	while (tuplesort_getdatum(pertrans->sortstates[aggstate->current_set],
 							  true, newVal, isNull))
 	{
 		/*
@@ -887,18 +950,18 @@ process_ordered_aggregate_single(AggState *aggstate,
 			haveOldVal &&
 			((oldIsNull && *isNull) ||
 			 (!oldIsNull && !*isNull &&
-			  DatumGetBool(FunctionCall2(&peraggstate->equalfns[0],
+			  DatumGetBool(FunctionCall2(&pertrans->equalfns[0],
 										 oldVal, *newVal)))))
 		{
 			/* equal to prior, so forget this one */
-			if (!peraggstate->inputtypeByVal && !*isNull)
+			if (!pertrans->inputtypeByVal && !*isNull)
 				pfree(DatumGetPointer(*newVal));
 		}
 		else
 		{
-			advance_transition_function(aggstate, peraggstate, pergroupstate);
+			advance_transition_function(aggstate, pertrans, pergroupstate);
 			/* forget the old value, if any */
-			if (!oldIsNull && !peraggstate->inputtypeByVal)
+			if (!oldIsNull && !pertrans->inputtypeByVal)
 				pfree(DatumGetPointer(oldVal));
 			/* and remember the new one for subsequent equality checks */
 			oldVal = *newVal;
@@ -909,11 +972,11 @@ process_ordered_aggregate_single(AggState *aggstate,
 		MemoryContextSwitchTo(oldContext);
 	}
 
-	if (!oldIsNull && !peraggstate->inputtypeByVal)
+	if (!oldIsNull && !pertrans->inputtypeByVal)
 		pfree(DatumGetPointer(oldVal));
 
-	tuplesort_end(peraggstate->sortstates[aggstate->current_set]);
-	peraggstate->sortstates[aggstate->current_set] = NULL;
+	tuplesort_end(pertrans->sortstates[aggstate->current_set]);
+	pertrans->sortstates[aggstate->current_set] = NULL;
 }
 
 /*
@@ -930,25 +993,25 @@ process_ordered_aggregate_single(AggState *aggstate,
  */
 static void
 process_ordered_aggregate_multi(AggState *aggstate,
-								AggStatePerAgg peraggstate,
+AggStatePerTrans pertrans,
 								AggStatePerGroup pergroupstate)
 {
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
-	FunctionCallInfo fcinfo = &peraggstate->transfn_fcinfo;
-	TupleTableSlot *slot1 = peraggstate->evalslot;
-	TupleTableSlot *slot2 = peraggstate->uniqslot;
-	int			numTransInputs = peraggstate->numTransInputs;
-	int			numDistinctCols = peraggstate->numDistinctCols;
+	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
+	TupleTableSlot *slot1 = pertrans->evalslot;
+	TupleTableSlot *slot2 = pertrans->uniqslot;
+	int			numTransInputs = pertrans->numTransInputs;
+	int			numDistinctCols = pertrans->numDistinctCols;
 	bool		haveOldValue = false;
 	int			i;
 
-	tuplesort_performsort(peraggstate->sortstates[aggstate->current_set]);
+	tuplesort_performsort(pertrans->sortstates[aggstate->current_set]);
 
 	ExecClearTuple(slot1);
 	if (slot2)
 		ExecClearTuple(slot2);
 
-	while (tuplesort_gettupleslot(peraggstate->sortstates[aggstate->current_set],
+	while (tuplesort_gettupleslot(pertrans->sortstates[aggstate->current_set],
 								  true, slot1))
 	{
 		/*
@@ -962,8 +1025,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 			!haveOldValue ||
 			!execTuplesMatch(slot1, slot2,
 							 numDistinctCols,
-							 peraggstate->sortColIdx,
-							 peraggstate->equalfns,
+							 pertrans->sortColIdx,
+							 pertrans->equalfns,
 							 workcontext))
 		{
 			/* Load values into fcinfo */
@@ -974,7 +1037,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
 				fcinfo->argnull[i + 1] = slot1->tts_isnull[i];
 			}
 
-			advance_transition_function(aggstate, peraggstate, pergroupstate);
+			advance_transition_function(aggstate, pertrans, pergroupstate);
 
 			if (numDistinctCols > 0)
 			{
@@ -997,8 +1060,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 	if (slot2)
 		ExecClearTuple(slot2);
 
-	tuplesort_end(peraggstate->sortstates[aggstate->current_set]);
-	peraggstate->sortstates[aggstate->current_set] = NULL;
+	tuplesort_end(pertrans->sortstates[aggstate->current_set]);
+	pertrans->sortstates[aggstate->current_set] = NULL;
 }
 
 /*
@@ -1009,10 +1072,14 @@ process_ordered_aggregate_multi(AggState *aggstate,
  *
  * The finalfunction will be run, and the result delivered, in the
  * output-tuple context; caller's CurrentMemoryContext does not matter.
+ *
+ * The finalfn uses the state as set in the transno. This also might be
+ * being used by another aggregate function, so it's important that we do
+ * nothing destructive here.
  */
 static void
 finalize_aggregate(AggState *aggstate,
-				   AggStatePerAgg peraggstate,
+				   AggStatePerAgg peragg,
 				   AggStatePerGroup pergroupstate,
 				   Datum *resultVal, bool *resultIsNull)
 {
@@ -1021,6 +1088,7 @@ finalize_aggregate(AggState *aggstate,
 	MemoryContext oldContext;
 	int			i;
 	ListCell   *lc;
+	AggStatePerTrans pertrans = &aggstate->pertrans[peragg->transno];
 
 	oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
 
@@ -1031,7 +1099,7 @@ finalize_aggregate(AggState *aggstate,
 	 * for the transition state value.
 	 */
 	i = 1;
-	foreach(lc, peraggstate->aggrefstate->aggdirectargs)
+	foreach(lc, pertrans->aggdirectargs)
 	{
 		ExprState  *expr = (ExprState *) lfirst(lc);
 
@@ -1046,16 +1114,16 @@ finalize_aggregate(AggState *aggstate,
 	/*
 	 * Apply the agg's finalfn if one is provided, else return transValue.
 	 */
-	if (OidIsValid(peraggstate->finalfn_oid))
+	if (OidIsValid(peragg->finalfn_oid))
 	{
-		int			numFinalArgs = peraggstate->numFinalArgs;
+		int			numFinalArgs = peragg->numFinalArgs;
 
-		/* set up aggstate->curperagg for AggGetAggref() */
-		aggstate->curperagg = peraggstate;
+		/* set up aggstate->curpertrans for AggGetAggref() */
+		aggstate->curpertrans = pertrans;
 
-		InitFunctionCallInfoData(fcinfo, &peraggstate->finalfn,
+		InitFunctionCallInfoData(fcinfo, &peragg->finalfn,
 								 numFinalArgs,
-								 peraggstate->aggCollation,
+								 pertrans->aggCollation,
 								 (void *) aggstate, NULL);
 
 		/* Fill in the transition state value */
@@ -1082,7 +1150,7 @@ finalize_aggregate(AggState *aggstate,
 			*resultVal = FunctionCallInvoke(&fcinfo);
 			*resultIsNull = fcinfo.isnull;
 		}
-		aggstate->curperagg = NULL;
+		aggstate->curpertrans = NULL;
 	}
 	else
 	{
@@ -1093,12 +1161,12 @@ finalize_aggregate(AggState *aggstate,
 	/*
 	 * If result is pass-by-ref, make sure it is in the right context.
 	 */
-	if (!peraggstate->resulttypeByVal && !*resultIsNull &&
+	if (!peragg->resulttypeByVal && !*resultIsNull &&
 		!MemoryContextContains(CurrentMemoryContext,
 							   DatumGetPointer(*resultVal)))
 		*resultVal = datumCopy(*resultVal,
-							   peraggstate->resulttypeByVal,
-							   peraggstate->resulttypeLen);
+							   peragg->resulttypeByVal,
+							   peragg->resulttypeLen);
 
 	MemoryContextSwitchTo(oldContext);
 }
@@ -1173,7 +1241,7 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
  */
 static void
 finalize_aggregates(AggState *aggstate,
-					AggStatePerAgg peragg,
+					AggStatePerAgg peraggs,
 					AggStatePerGroup pergroup,
 					int currentSet)
 {
@@ -1189,26 +1257,28 @@ finalize_aggregates(AggState *aggstate,
 
 	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 	{
-		AggStatePerAgg peraggstate = &peragg[aggno];
+		AggStatePerAgg peragg = &peraggs[aggno];
+		int			transno = peragg->transno;
+		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
 		AggStatePerGroup pergroupstate;
 
-		pergroupstate = &pergroup[aggno + (currentSet * (aggstate->numaggs))];
+		pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
 
-		if (peraggstate->numSortCols > 0)
+		if (pertrans->numSortCols > 0)
 		{
 			Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
 
-			if (peraggstate->numInputs == 1)
+			if (pertrans->numInputs == 1)
 				process_ordered_aggregate_single(aggstate,
-												 peraggstate,
+												 pertrans,
 												 pergroupstate);
 			else
 				process_ordered_aggregate_multi(aggstate,
-												peraggstate,
+												pertrans,
 												pergroupstate);
 		}
 
-		finalize_aggregate(aggstate, peraggstate, pergroupstate,
+		finalize_aggregate(aggstate, peragg, pergroupstate,
 						   &aggvalues[aggno], &aggnulls[aggno]);
 	}
 }
@@ -1428,7 +1498,7 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 	if (isnew)
 	{
 		/* initialize aggregates for new tuple group */
-		initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup, 0);
+		initialize_aggregates(aggstate, entry->pergroup, 0);
 	}
 
 	return entry;
@@ -1716,7 +1786,7 @@ agg_retrieve_direct(AggState *aggstate)
 			/*
 			 * Initialize working state for a new input tuple group.
 			 */
-			initialize_aggregates(aggstate, peragg, pergroup, numReset);
+			initialize_aggregates(aggstate, pergroup, numReset);
 
 			if (aggstate->grp_firstTuple != NULL)
 			{
@@ -1945,17 +2015,18 @@ AggState *
 ExecInitAgg(Agg *node, EState *estate, int eflags)
 {
 	AggState   *aggstate;
-	AggStatePerAgg peragg;
+	AggStatePerAgg peraggs;
+	AggStatePerTrans pertransstates;
 	Plan	   *outerPlan;
 	ExprContext *econtext;
 	int			numaggs,
+				transno,
 				aggno;
 	int			phase;
 	ListCell   *l;
 	Bitmapset  *all_grouped_cols = NULL;
 	int			numGroupingSets = 1;
 	int			numPhases;
-	int			currentsortno = 0;
 	int			i = 0;
 	int			j = 0;
 
@@ -1971,12 +2042,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	aggstate->aggs = NIL;
 	aggstate->numaggs = 0;
+	aggstate->numtrans = 0;
 	aggstate->maxsets = 0;
 	aggstate->hashfunctions = NULL;
 	aggstate->projected_set = -1;
 	aggstate->current_set = 0;
 	aggstate->peragg = NULL;
-	aggstate->curperagg = NULL;
+	aggstate->pertrans = NULL;
+	aggstate->curpertrans = NULL;
 	aggstate->agg_done = false;
 	aggstate->input_done = false;
 	aggstate->pergroup = NULL;
@@ -2209,8 +2282,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
 	econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
 
-	peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
-	aggstate->peragg = peragg;
+	peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
+	pertransstates = (AggStatePerTrans) palloc0(sizeof(AggStatePerTransData)* numaggs);
+
+	aggstate->peragg = peraggs;
+	aggstate->pertrans = pertransstates;
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
@@ -2232,69 +2308,56 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	/*
 	 * Perform lookups of aggregate function info, and initialize the
-	 * unchanging fields of the per-agg data.  We also detect duplicate
-	 * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0"). When
-	 * duplicates are detected, we only make an AggStatePerAgg struct for the
-	 * first one.  The clones are simply pointed at the same result entry by
-	 * giving them duplicate aggno values.
+	 * unchanging fields of the per-agg and per-trans data.
 	 */
 	aggno = -1;
+	transno = -1;
 	foreach(l, aggstate->aggs)
 	{
 		AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
 		Aggref	   *aggref = (Aggref *) aggrefstate->xprstate.expr;
-		AggStatePerAgg peraggstate;
+		AggStatePerAgg peragg;
+		AggStatePerTrans pertrans;
+		int			existing_aggno;
+		int			existing_transno;
+		List	   *same_input_transnos;
 		Oid			inputTypes[FUNC_MAX_ARGS];
 		int			numArguments;
 		int			numDirectArgs;
-		int			numInputs;
-		int			numSortCols;
-		int			numDistinctCols;
-		List	   *sortlist;
 		HeapTuple	aggTuple;
 		Form_pg_aggregate aggform;
-		Oid			aggtranstype;
 		AclResult	aclresult;
 		Oid			transfn_oid,
 					finalfn_oid;
-		Expr	   *transfnexpr,
-				   *finalfnexpr;
+		Expr	   *finalfnexpr;
+		Oid			aggtranstype;
 		Datum		textInitVal;
-		int			i;
-		ListCell   *lc;
+		Datum		initValue;
+		bool		initValueIsNull;
 
 		/* Planner should have assigned aggregate to correct level */
 		Assert(aggref->agglevelsup == 0);
 
-		/* Look for a previous duplicate aggregate */
-		for (i = 0; i <= aggno; i++)
-		{
-			if (equal(aggref, peragg[i].aggref) &&
-				!contain_volatile_functions((Node *) aggref))
-				break;
-		}
-		if (i <= aggno)
+		/*
+		 * For performance reasons we detect duplicate aggregates (for
+		 * example, "SELECT sum(x) ... HAVING sum(x) > 0"). When duplicates
+		 * are detected, we only make an AggStatePerAgg struct for the first
+		 * one. The clones are simply pointed at the same result entry by
+		 * giving them duplicate aggno values.
+		 */
+		existing_aggno = find_compatible_peragg(aggref, aggstate, aggno,
+												&same_input_transnos);
+		if (existing_aggno != -1)
 		{
-			/* Found a match to an existing entry, so just mark it */
-			aggrefstate->aggno = i;
+			aggrefstate->aggno = existing_aggno;
 			continue;
 		}
 
-		/* Nope, so assign a new PerAgg record */
-		peraggstate = &peragg[++aggno];
-
 		/* Mark Aggref state node with assigned index in the result array */
+		peragg = &peraggs[++aggno];
+		peragg->aggref = aggref;
 		aggrefstate->aggno = aggno;
 
-		/* Begin filling in the peraggstate data */
-		peraggstate->aggrefstate = aggrefstate;
-		peraggstate->aggref = aggref;
-		peraggstate->sortstates = (Tuplesortstate **)
-			palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
-
-		for (currentsortno = 0; currentsortno < numGroupingSets; currentsortno++)
-			peraggstate->sortstates[currentsortno] = NULL;
-
 		/* Fetch the pg_aggregate row */
 		aggTuple = SearchSysCache1(AGGFNOID,
 								   ObjectIdGetDatum(aggref->aggfnoid));
@@ -2311,8 +2374,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 						   get_func_name(aggref->aggfnoid));
 		InvokeFunctionExecuteHook(aggref->aggfnoid);
 
-		peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
-		peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
+		transfn_oid = aggform->aggtransfn;
+		peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
 
 		/* Check that aggregate owner has permission to call component fns */
 		{
@@ -2350,74 +2413,43 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		 * agg accepts ANY or a polymorphic type.
 		 */
 		numArguments = get_aggregate_argtypes(aggref, inputTypes);
-		peraggstate->numArguments = numArguments;
 
 		/* Count the "direct" arguments, if any */
 		numDirectArgs = list_length(aggref->aggdirectargs);
 
-		/* Count the number of aggregated input columns */
-		numInputs = list_length(aggref->args);
-		peraggstate->numInputs = numInputs;
-
-		/* Detect how many arguments to pass to the transfn */
-		if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
-			peraggstate->numTransInputs = numInputs;
-		else
-			peraggstate->numTransInputs = numArguments;
-
-		/* Detect how many arguments to pass to the finalfn */
-		if (aggform->aggfinalextra)
-			peraggstate->numFinalArgs = numArguments + 1;
-		else
-			peraggstate->numFinalArgs = numDirectArgs + 1;
-
 		/* resolve actual type of transition state, if polymorphic */
 		aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid,
 												   aggform->aggtranstype,
 												   inputTypes,
 												   numArguments);
 
-		/* build expression trees using actual argument & result types */
-		build_aggregate_fnexprs(inputTypes,
-								numArguments,
-								numDirectArgs,
-								peraggstate->numFinalArgs,
-								aggref->aggvariadic,
-								aggtranstype,
-								aggref->aggtype,
-								aggref->inputcollid,
-								transfn_oid,
-								InvalidOid,		/* invtrans is not needed here */
-								finalfn_oid,
-								&transfnexpr,
-								NULL,
-								&finalfnexpr);
-
-		/* set up infrastructure for calling the transfn and finalfn */
-		fmgr_info(transfn_oid, &peraggstate->transfn);
-		fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn);
+		/* Detect how many arguments to pass to the finalfn */
+		if (aggform->aggfinalextra)
+			peragg->numFinalArgs = numArguments + 1;
+		else
+			peragg->numFinalArgs = numDirectArgs + 1;
 
+		/*
+		 * build expression trees using actual argument & result types for the
+		 * finalfn, if it exists
+		 */
 		if (OidIsValid(finalfn_oid))
 		{
-			fmgr_info(finalfn_oid, &peraggstate->finalfn);
-			fmgr_info_set_expr((Node *) finalfnexpr, &peraggstate->finalfn);
+			build_aggregate_finalfn_expr(inputTypes,
+										 peragg->numFinalArgs,
+										 aggtranstype,
+										 aggref->aggtype,
+										 aggref->inputcollid,
+										 finalfn_oid,
+										 &finalfnexpr);
+			fmgr_info(finalfn_oid, &peragg->finalfn);
+			fmgr_info_set_expr((Node *) finalfnexpr, &peragg->finalfn);
 		}
 
-		peraggstate->aggCollation = aggref->inputcollid;
-
-		InitFunctionCallInfoData(peraggstate->transfn_fcinfo,
-								 &peraggstate->transfn,
-								 peraggstate->numTransInputs + 1,
-								 peraggstate->aggCollation,
-								 (void *) aggstate, NULL);
-
-		/* get info about relevant datatypes */
+		/* get info about the result type's datatype */
 		get_typlenbyval(aggref->aggtype,
-						&peraggstate->resulttypeLen,
-						&peraggstate->resulttypeByVal);
-		get_typlenbyval(aggtranstype,
-						&peraggstate->transtypeLen,
-						&peraggstate->transtypeByVal);
+						&peragg->resulttypeLen,
+						&peragg->resulttypeByVal);
 
 		/*
 		 * initval is potentially null, so don't try to access it as a struct
@@ -2425,161 +2457,287 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		 */
 		textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
 									  Anum_pg_aggregate_agginitval,
-									  &peraggstate->initValueIsNull);
-
-		if (peraggstate->initValueIsNull)
-			peraggstate->initValue = (Datum) 0;
+									  &initValueIsNull);
+		if (initValueIsNull)
+			initValue = (Datum) 0;
 		else
-			peraggstate->initValue = GetAggInitVal(textInitVal,
-												   aggtranstype);
+			initValue = GetAggInitVal(textInitVal, aggtranstype);
 
 		/*
-		 * If the transfn is strict and the initval is NULL, make sure input
-		 * type and transtype are the same (or at least binary-compatible), so
-		 * that it's OK to use the first aggregated input value as the initial
-		 * transValue.  This should have been checked at agg definition time,
-		 * but we must check again in case the transfn's strictness property
-		 * has been changed.
+		 * Build working state for invoking the transition function, or look
+		 * up previously initialized working state, if we can share it.
+		 *
+		 * find_compatible_peragg() already collected a list of per-Trans's
+		 * with the same inputs. Check if any of them have the transition
+		 * function and initial value.
 		 */
-		if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
+		existing_transno = find_compatible_pertrans(aggstate, aggref,
+													transfn_oid, aggtranstype,
+													initValue, initValueIsNull,
+													same_input_transnos);
+		if (existing_transno != -1)
 		{
-			if (numArguments <= numDirectArgs ||
-				!IsBinaryCoercible(inputTypes[numDirectArgs], aggtranstype))
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
-						 errmsg("aggregate %u needs to have compatible input type and transition type",
-								aggref->aggfnoid)));
+			pertrans = &pertransstates[existing_transno];
+			peragg->transno = existing_transno;
 		}
+		else
+		{
+			pertrans = &pertransstates[++transno];
+			build_pertrans_for_aggref(pertrans, aggstate, estate,
+									  aggref, transfn_oid, aggtranstype,
+									  initValue, initValueIsNull,
+									  inputTypes, numArguments);
+			peragg->transno = transno;
+		}
+		ReleaseSysCache(aggTuple);
+	}
 
-		/*
-		 * Get a tupledesc corresponding to the aggregated inputs (including
-		 * sort expressions) of the agg.
-		 */
-		peraggstate->evaldesc = ExecTypeFromTL(aggref->args, false);
+	/*
+	 * Update numaggs to match the number of unique aggregates found. Also set
+	 * numstates to the number of unique aggregate states found.
+	 */
+	aggstate->numaggs = aggno + 1;
+	aggstate->numtrans = transno + 1;
+
+	return aggstate;
+}
+
+/*
+ * Build the state needed to calculate a state value for an aggregate.
+ *
+ * This initializes all the fields in 'pertrans'. 'aggTuple',
+ * 'inputTypes' and 'numArguments' could be derived from 'aggref', but the
+ * caller has calculated them already, so might as well pass them.
+ */
+static void
+build_pertrans_for_aggref(AggStatePerTrans pertrans,
+						  AggState *aggstate, EState *estate,
+						  Aggref *aggref,
+						  Oid aggtransfn, Oid aggtranstype,
+						  Datum initValue, bool initValueIsNull,
+						  Oid *inputTypes, int numArguments)
+{
+	int			numGroupingSets = Max(aggstate->maxsets, 1);
+	Expr	   *transfnexpr;
+	ListCell   *lc;
+	int			numInputs;
+	int			numDirectArgs;
+	List	   *sortlist;
+	int			numSortCols;
+	int			numDistinctCols;
+	int			naggs;
+	int			i;
+
+	/* Begin filling in the pertrans data */
+	pertrans->aggref = aggref;
+	pertrans->aggCollation = aggref->inputcollid;
+	pertrans->transfn_oid = aggtransfn;
+	pertrans->initValue = initValue;
+	pertrans->initValueIsNull = initValueIsNull;
+
+	/* Count the "direct" arguments, if any */
+	numDirectArgs = list_length(aggref->aggdirectargs);
+
+	/* Count the number of aggregated input columns */
+	pertrans->numInputs = numInputs = list_length(aggref->args);
+
+	pertrans->aggtranstype = aggtranstype;
+
+	/* Detect how many arguments to pass to the transfn */
+	if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+		pertrans->numTransInputs = numInputs;
+	else
+		pertrans->numTransInputs = numArguments;
+
+	/*
+	 * Set up infrastructure for calling the transfn
+	 */
+	build_aggregate_transfn_expr(inputTypes,
+								 numArguments,
+								 numDirectArgs,
+								 aggref->aggvariadic,
+								 aggtranstype,
+								 aggref->inputcollid,
+								 aggtransfn,
+								 InvalidOid,	/* invtrans is not needed here */
+								 &transfnexpr,
+								 NULL);
+	fmgr_info(aggtransfn, &pertrans->transfn);
+	fmgr_info_set_expr((Node *) transfnexpr, &pertrans->transfn);
+
+	InitFunctionCallInfoData(pertrans->transfn_fcinfo,
+							 &pertrans->transfn,
+							 pertrans->numTransInputs + 1,
+							 pertrans->aggCollation,
+							 (void *) aggstate, NULL);
+
+	/*
+	 * If the transfn is strict and the initval is NULL, make sure input type
+	 * and transtype are the same (or at least binary-compatible), so that
+	 * it's OK to use the first aggregated input value as the initial
+	 * transValue.  This should have been checked at agg definition time, but
+	 * we must check again in case the transfn's strictness property has been
+	 * changed.
+	 */
+	if (pertrans->transfn.fn_strict && pertrans->initValueIsNull)
+	{
+		if (numArguments <= numDirectArgs ||
+			!IsBinaryCoercible(inputTypes[numDirectArgs],
+							   aggtranstype))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate %u needs to have compatible input type and transition type",
+							aggref->aggfnoid)));
+	}
+
+	/* get info about the state value's datatype */
+	get_typlenbyval(aggtranstype,
+					&pertrans->transtypeLen,
+					&pertrans->transtypeByVal);
+
+	/*
+	 * Get a tupledesc corresponding to the aggregated inputs (including sort
+	 * expressions) of the agg.
+	 */
+	pertrans->evaldesc = ExecTypeFromTL(aggref->args, false);
 
-		/* Create slot we're going to do argument evaluation in */
-		peraggstate->evalslot = ExecInitExtraTupleSlot(estate);
-		ExecSetSlotDescriptor(peraggstate->evalslot, peraggstate->evaldesc);
+	/* Create slot we're going to do argument evaluation in */
+	pertrans->evalslot = ExecInitExtraTupleSlot(estate);
+	ExecSetSlotDescriptor(pertrans->evalslot, pertrans->evaldesc);
+
+	/* Initialize the input and FILTER expressions */
+	naggs = aggstate->numaggs;
+	pertrans->aggfilter = ExecInitExpr(aggref->aggfilter,
+										 (PlanState *) aggstate);
+	pertrans->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs,
+													  (PlanState *) aggstate);
+	pertrans->args = (List *) ExecInitExpr((Expr *) aggref->args,
+											(PlanState *) aggstate);
+
+	/*
+	 * Complain if the aggregate's arguments contain any  aggregates; nested
+	 * agg functions are semantically nonsensical.  (This should have been
+	 * caught earlier, but we defend against it here anyway.)
+	 */
+	if (naggs != aggstate->numaggs)
+		ereport(ERROR,
+				(errcode(ERRCODE_GROUPING_ERROR),
+				 errmsg("aggregate function calls cannot be nested")));
+
+	/* Set up projection info for evaluation */
+	pertrans->evalproj = ExecBuildProjectionInfo(pertrans->args,
+												 aggstate->tmpcontext,
+												 pertrans->evalslot,
+												 NULL);
+
+	/*
+	 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
+	 * have a list of SortGroupClause nodes; fish out the data in them and
+	 * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
+	 * however; the agg's transfn and finalfn are responsible for that.
+	 *
+	 * Note that by construction, if there is a DISTINCT clause then the ORDER
+	 * BY clause is a prefix of it (see transformDistinctClause).
+	 */
+	if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+	{
+		sortlist = NIL;
+		numSortCols = numDistinctCols = 0;
+	}
+	else if (aggref->aggdistinct)
+	{
+		sortlist = aggref->aggdistinct;
+		numSortCols = numDistinctCols = list_length(sortlist);
+		Assert(numSortCols >= list_length(aggref->aggorder));
+	}
+	else
+	{
+		sortlist = aggref->aggorder;
+		numSortCols = list_length(sortlist);
+		numDistinctCols = 0;
+	}
 
-		/* Set up projection info for evaluation */
-		peraggstate->evalproj = ExecBuildProjectionInfo(aggrefstate->args,
-														aggstate->tmpcontext,
-														peraggstate->evalslot,
-														NULL);
+	pertrans->numSortCols = numSortCols;
+	pertrans->numDistinctCols = numDistinctCols;
 
+	if (numSortCols > 0)
+	{
 		/*
-		 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
-		 * have a list of SortGroupClause nodes; fish out the data in them and
-		 * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
-		 * however; the agg's transfn and finalfn are responsible for that.
-		 *
-		 * Note that by construction, if there is a DISTINCT clause then the
-		 * ORDER BY clause is a prefix of it (see transformDistinctClause).
+		 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
+		 * (yet)
 		 */
-		if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
-		{
-			sortlist = NIL;
-			numSortCols = numDistinctCols = 0;
-		}
-		else if (aggref->aggdistinct)
+		Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+
+		/* If we have only one input, we need its len/byval info. */
+		if (numInputs == 1)
 		{
-			sortlist = aggref->aggdistinct;
-			numSortCols = numDistinctCols = list_length(sortlist);
-			Assert(numSortCols >= list_length(aggref->aggorder));
+			get_typlenbyval(inputTypes[numDirectArgs],
+							&pertrans->inputtypeLen,
+							&pertrans->inputtypeByVal);
 		}
-		else
+		else if (numDistinctCols > 0)
 		{
-			sortlist = aggref->aggorder;
-			numSortCols = list_length(sortlist);
-			numDistinctCols = 0;
+			/* we will need an extra slot to store prior values */
+			pertrans->uniqslot = ExecInitExtraTupleSlot(estate);
+			ExecSetSlotDescriptor(pertrans->uniqslot,
+								  pertrans->evaldesc);
 		}
 
-		peraggstate->numSortCols = numSortCols;
-		peraggstate->numDistinctCols = numDistinctCols;
-
-		if (numSortCols > 0)
+		/* Extract the sort information for use later */
+		pertrans->sortColIdx =
+			(AttrNumber *) palloc(numSortCols * sizeof(AttrNumber));
+		pertrans->sortOperators =
+			(Oid *) palloc(numSortCols * sizeof(Oid));
+		pertrans->sortCollations =
+			(Oid *) palloc(numSortCols * sizeof(Oid));
+		pertrans->sortNullsFirst =
+			(bool *) palloc(numSortCols * sizeof(bool));
+
+		i = 0;
+		foreach(lc, sortlist)
 		{
-			/*
-			 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
-			 * (yet)
-			 */
-			Assert(node->aggstrategy != AGG_HASHED);
-
-			/* If we have only one input, we need its len/byval info. */
-			if (numInputs == 1)
-			{
-				get_typlenbyval(inputTypes[numDirectArgs],
-								&peraggstate->inputtypeLen,
-								&peraggstate->inputtypeByVal);
-			}
-			else if (numDistinctCols > 0)
-			{
-				/* we will need an extra slot to store prior values */
-				peraggstate->uniqslot = ExecInitExtraTupleSlot(estate);
-				ExecSetSlotDescriptor(peraggstate->uniqslot,
-									  peraggstate->evaldesc);
-			}
+			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
+			TargetEntry *tle = get_sortgroupclause_tle(sortcl, aggref->args);
 
-			/* Extract the sort information for use later */
-			peraggstate->sortColIdx =
-				(AttrNumber *) palloc(numSortCols * sizeof(AttrNumber));
-			peraggstate->sortOperators =
-				(Oid *) palloc(numSortCols * sizeof(Oid));
-			peraggstate->sortCollations =
-				(Oid *) palloc(numSortCols * sizeof(Oid));
-			peraggstate->sortNullsFirst =
-				(bool *) palloc(numSortCols * sizeof(bool));
+			/* the parser should have made sure of this */
+			Assert(OidIsValid(sortcl->sortop));
 
-			i = 0;
-			foreach(lc, sortlist)
-			{
-				SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
-				TargetEntry *tle = get_sortgroupclause_tle(sortcl,
-														   aggref->args);
-
-				/* the parser should have made sure of this */
-				Assert(OidIsValid(sortcl->sortop));
-
-				peraggstate->sortColIdx[i] = tle->resno;
-				peraggstate->sortOperators[i] = sortcl->sortop;
-				peraggstate->sortCollations[i] = exprCollation((Node *) tle->expr);
-				peraggstate->sortNullsFirst[i] = sortcl->nulls_first;
-				i++;
-			}
-			Assert(i == numSortCols);
+			pertrans->sortColIdx[i] = tle->resno;
+			pertrans->sortOperators[i] = sortcl->sortop;
+			pertrans->sortCollations[i] = exprCollation((Node *) tle->expr);
+			pertrans->sortNullsFirst[i] = sortcl->nulls_first;
+			i++;
 		}
+		Assert(i == numSortCols);
+	}
 
-		if (aggref->aggdistinct)
-		{
-			Assert(numArguments > 0);
+	if (aggref->aggdistinct)
+	{
+		Assert(numArguments > 0);
 
-			/*
-			 * We need the equal function for each DISTINCT comparison we will
-			 * make.
-			 */
-			peraggstate->equalfns =
-				(FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo));
+		/*
+		 * We need the equal function for each DISTINCT comparison we will
+		 * make.
+		 */
+		pertrans->equalfns =
+			(FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo));
 
-			i = 0;
-			foreach(lc, aggref->aggdistinct)
-			{
-				SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
+		i = 0;
+		foreach(lc, aggref->aggdistinct)
+		{
+			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
 
-				fmgr_info(get_opcode(sortcl->eqop), &peraggstate->equalfns[i]);
-				i++;
-			}
-			Assert(i == numDistinctCols);
+			fmgr_info(get_opcode(sortcl->eqop), &pertrans->equalfns[i]);
+			i++;
 		}
-
-		ReleaseSysCache(aggTuple);
+		Assert(i == numDistinctCols);
 	}
 
-	/* Update numaggs to match number of unique aggregates found */
-	aggstate->numaggs = aggno + 1;
-
-	return aggstate;
+	pertrans->sortstates = (Tuplesortstate **)
+		palloc0(sizeof(Tuplesortstate *) * numGroupingSets);
 }
 
+
 static Datum
 GetAggInitVal(Datum textInitVal, Oid transtype)
 {
@@ -2596,11 +2754,174 @@ GetAggInitVal(Datum textInitVal, Oid transtype)
 	return initVal;
 }
 
+/*
+ * find_compatible_peragg - search for a previously initialized per-Agg struct
+ *
+ * Searches the previously looked at aggregates in order to find a compatible
+ * aggregate. If a positive match is found then foundaggno is set to the
+ * aggregate which matches.
+ *
+ * As a side-effect, this also collects a list of existing per-Trans structs
+ * with matching inputs. If no identical AggRef is found, the list is passed
+ * later to find_compatible_perstate, to see if we can at least reuse the
+ * state value of another aggregate.
+ *
+ * FIXME: The below examples are a good, but they don't belong here anymore.
+ *
+ * Scenario 1 -- An aggregate function appears more than once in query:
+ *
+ *		SELECT SUM(x) FROM ... HAVING SUM(x) > 0
+ *
+ * Since in this case the aggregates are both the same we can optimize by
+ * only calculating aggregate state and calling the finalfn just once. This
+ * would be an AGGREF_EXACT_MATCH, meaning both the state and the final
+ * function call are shared.
+ *
+ * Scenario 2 -- Two different aggregate functions appear in the query but
+ *				 the two functions happen to share the same transfn, but have
+ *				 different finalfn.
+ *
+ *		SELECT SUM(x), AVG(x) FROM ...
+ *
+ * Since in our case these two aggregates both share the same transfn, but
+ * naturally they have different finalfns. This situation is classed as an
+ * AGGREF_STATE_MATCH. This means that the same state can be shared by both
+ * aggregates. Since the finalfn call is not the same this cannot be reused.
+ * For this case to be valid the INITCOND of the aggregate, if one exists, must
+ * also match.
+ *
+ * Scenario 3 -- The same aggregate function is called with different
+ *				 parameters.
+ *
+ *		SELECT SUM(x),SUM(DISTINCT x) FROM ...
+ *		SELECT SUM(x),SUM(y) FROM ...
+ *		SELECT SUM(x),SUM(x) FILTER(WHERE x > 0) FROM ...
+ *
+ * All three of the above queries cannot share the same state and have to be
+ * calculated independently.
+ *
+ * Scenario 4 -- Different aggregates with the same parameters and the same
+ *				 transfn and finalfn.
+ *
+ *		SELECT SUM(x),SUM2(x) FROM ...
+ *
+ * A perhaps unlikely scenario where two aggregate functions exist which have,
+ * both the same transfn and the same finalfn. In this case we can report an
+ * AGGREF_EXACT_MATCH, providing the INITCOND of both aggregates are the same.
+ *
+ *
+ * Returns -1 if no match found.
+ */
+static int
+find_compatible_peragg(Aggref *newagg, AggState *aggstate,
+					   int lastaggno, List **same_input_transnos)
+{
+	int			aggno;
+	AggStatePerAgg peraggs;
+
+	*same_input_transnos = NIL;
+
+	/* we mustn't reuse the aggref if it contains volatile function calls */
+	if (contain_volatile_functions((Node *) newagg))
+		return -1;
+
+	peraggs = aggstate->peragg;
+
+	/*
+	 * Search through the list of already seen aggregates. We'll stop when we
+	 * find an exact match, but until then we'll note any state matches that
+	 * we find. We may have to fall back on these should we fail to find an
+	 * exact match.
+	 */
+	for (aggno = 0; aggno <= lastaggno; aggno++)
+	{
+		AggStatePerAgg peragg;
+		Aggref	   *existingRef;
+
+		peragg = &peraggs[aggno];
+		existingRef = peragg->aggref;
+
+		/* all of the following must be the same or it's no match */
+		if (newagg->inputcollid != existingRef->inputcollid ||
+			newagg->aggstar != existingRef->aggstar ||
+			newagg->aggvariadic != existingRef->aggvariadic ||
+			newagg->aggkind != existingRef->aggkind ||
+			!equal(newagg->aggdirectargs, existingRef->aggdirectargs) ||
+			!equal(newagg->args, existingRef->args) ||
+			!equal(newagg->aggorder, existingRef->aggorder) ||
+			!equal(newagg->aggdistinct, existingRef->aggdistinct) ||
+			!equal(newagg->aggfilter, existingRef->aggfilter))
+			continue;
+
+		/* if it's the same aggregate function then report exact match */
+		if (newagg->aggfnoid == existingRef->aggfnoid &&
+			newagg->aggtype == existingRef->aggtype &&
+			newagg->aggcollid == existingRef->aggcollid)
+		{
+			list_free(*same_input_transnos);
+			*same_input_transnos = NIL;
+			return aggno;
+		}
+
+		/*
+		 * Not identical, but it had the same inputs. Return it to the caller,
+		 * in case we can re-use its per-trans state.
+		 */
+		*same_input_transnos = lappend_int(*same_input_transnos,
+										   peragg->transno);
+	}
+
+	return -1;
+}
+
+/*
+ * find_compatible_pertrans - search for a previously initialized per-Trans
+ * struct
+ *
+ * Searches the list of transnos for a per-Trans struct for the same
+ * transition state and initial condition.
+ */
+static int
+find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
+						 Oid aggtransfn, Oid aggtranstype,
+						 Datum initValue, bool initValueIsNull,
+						 List *transnos)
+{
+	ListCell	   *lc;
+
+	/*
+	 * If both INITCONDs are null then the outcome depends on if the
+	 * finalfns match.
+	 */
+	foreach (lc, transnos)
+	{
+		int			transno = lfirst_int(lc);
+		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
+
+		/*
+		 * if the transfns or transition state types are not the same then the
+		 * state can't be shared.
+		 */
+		if (aggtransfn != pertrans->transfn_oid ||
+			aggtranstype != pertrans->aggtranstype)
+			continue;
+
+		if (initValueIsNull && pertrans->initValueIsNull)
+			return transno;
+
+		if (!initValueIsNull && !pertrans->initValueIsNull &&
+			datumIsEqual(initValue, pertrans->initValue,
+						 pertrans->transtypeByVal, pertrans->transtypeLen))
+			return transno;
+	}
+	return -1;
+}
+
 void
 ExecEndAgg(AggState *node)
 {
 	PlanState  *outerPlan;
-	int			aggno;
+	int			transno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2611,14 +2932,14 @@ ExecEndAgg(AggState *node)
 	if (node->sort_out)
 		tuplesort_end(node->sort_out);
 
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (transno = 0; transno < node->numtrans; transno++)
 	{
-		AggStatePerAgg peraggstate = &node->peragg[aggno];
+		AggStatePerTrans pertrans = &node->pertrans[transno];
 
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			if (peraggstate->sortstates[setno])
-				tuplesort_end(peraggstate->sortstates[setno]);
+			if (pertrans->sortstates[setno])
+				tuplesort_end(pertrans->sortstates[setno]);
 		}
 	}
 
@@ -2646,7 +2967,7 @@ ExecReScanAgg(AggState *node)
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
 	PlanState  *outerPlan = outerPlanState(node);
 	Agg		   *aggnode = (Agg *) node->ss.ps.plan;
-	int			aggno;
+	int			transno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
 
@@ -2678,16 +2999,16 @@ ExecReScanAgg(AggState *node)
 	}
 
 	/* Make sure we have closed any open tuplesorts */
-	for (aggno = 0; aggno < node->numaggs; aggno++)
+	for (transno = 0; transno < node->numtrans; transno++)
 	{
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			AggStatePerAgg peraggstate = &node->peragg[aggno];
+			AggStatePerTrans pertrans = &node->pertrans[transno];
 
-			if (peraggstate->sortstates[setno])
+			if (pertrans->sortstates[setno])
 			{
-				tuplesort_end(peraggstate->sortstates[setno]);
-				peraggstate->sortstates[setno] = NULL;
+				tuplesort_end(pertrans->sortstates[setno]);
+				pertrans->sortstates[setno] = NULL;
 			}
 		}
 	}
@@ -2811,10 +3132,12 @@ AggGetAggref(FunctionCallInfo fcinfo)
 {
 	if (fcinfo->context && IsA(fcinfo->context, AggState))
 	{
-		AggStatePerAgg curperagg = ((AggState *) fcinfo->context)->curperagg;
+		AggStatePerTrans curpertrans;
+
+		curpertrans = ((AggState *)fcinfo->context)->curpertrans;
 
-		if (curperagg)
-			return curperagg->aggref;
+		if (curpertrans)
+			return curpertrans->aggref;
 	}
 	return NULL;
 }
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index ecf96f8..c371d4d 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -2218,20 +2218,16 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc,
 											   numArguments);
 
 	/* build expression trees using actual argument & result types */
-	build_aggregate_fnexprs(inputTypes,
-							numArguments,
-							0,	/* no ordered-set window functions yet */
-							peraggstate->numFinalArgs,
-							false,		/* no variadic window functions yet */
-							aggtranstype,
-							wfunc->wintype,
-							wfunc->inputcollid,
-							transfn_oid,
-							invtransfn_oid,
-							finalfn_oid,
-							&transfnexpr,
-							&invtransfnexpr,
-							&finalfnexpr);
+	build_aggregate_transfn_expr(inputTypes,
+								 numArguments,
+								 0,	/* no ordered-set window functions yet */
+								 false,		/* no variadic window functions yet */
+								 wfunc->wintype,
+								 wfunc->inputcollid,
+								 transfn_oid,
+								 invtransfn_oid,
+								 &transfnexpr,
+								 &invtransfnexpr);
 
 	/* set up infrastructure for calling the transfn(s) and finalfn */
 	fmgr_info(transfn_oid, &peraggstate->transfn);
@@ -2245,6 +2241,13 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc,
 
 	if (OidIsValid(finalfn_oid))
 	{
+		build_aggregate_finalfn_expr(inputTypes,
+									 peraggstate->numFinalArgs,
+									 aggtranstype,
+									 wfunc->wintype,
+									 wfunc->inputcollid,
+									 finalfn_oid,
+									 &finalfnexpr);
 		fmgr_info(finalfn_oid, &peraggstate->finalfn);
 		fmgr_info_set_expr((Node *) finalfnexpr, &peraggstate->finalfn);
 	}
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 478d8ca..e0fb8fb 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1819,44 +1819,41 @@ resolve_aggregate_transtype(Oid aggfuncid,
 }
 
 /*
- * Create expression trees for the transition and final functions
+ * Create an expression tree for the transition functions
  * of an aggregate.  These are needed so that polymorphic functions
- * can be used within an aggregate --- without the expression trees,
+ * can be used within an aggregate --- without the expression tree,
  * such functions would not know the datatypes they are supposed to use.
  * (The trees will never actually be executed, however, so we can skimp
  * a bit on correctness.)
  *
- * agg_input_types, agg_state_type, agg_result_type identify the input,
- * transition, and result types of the aggregate.  These should all be
- * resolved to actual types (ie, none should ever be ANYELEMENT etc).
+ * agg_input_types and agg_state_type identifies the input types of the
+ * aggregate.  These should be resolved to actual types (ie, none should
+ * ever be ANYELEMENT etc).
  * agg_input_collation is the aggregate function's input collation.
  *
  * For an ordered-set aggregate, remember that agg_input_types describes
  * the direct arguments followed by the aggregated arguments.
  *
- * transfn_oid, invtransfn_oid and finalfn_oid identify the funcs to be
- * called; the latter two may be InvalidOid.
+ * transfn_oid and invtransfn_oid identify the funcs to be called; the
+ * latter may be InvalidOid, however if invtransfn_oid is set then
+ * transfn_oid must also be set.
  *
  * Pointers to the constructed trees are returned into *transfnexpr,
- * *invtransfnexpr and *finalfnexpr. If there is no invtransfn or finalfn,
- * the respective pointers are set to NULL.  Since use of the invtransfn is
- * optional, NULL may be passed for invtransfnexpr.
+ * *invtransfnexpr. If there is no invtransfn, the respective pointer is set
+ * to NULL.  Since use of the invtransfn is optional, NULL may be passed for
+ * invtransfnexpr.
  */
 void
-build_aggregate_fnexprs(Oid *agg_input_types,
+build_aggregate_transfn_expr(Oid *agg_input_types,
 						int agg_num_inputs,
 						int agg_num_direct_inputs,
-						int num_finalfn_inputs,
 						bool agg_variadic,
 						Oid agg_state_type,
-						Oid agg_result_type,
 						Oid agg_input_collation,
 						Oid transfn_oid,
 						Oid invtransfn_oid,
-						Oid finalfn_oid,
 						Expr **transfnexpr,
-						Expr **invtransfnexpr,
-						Expr **finalfnexpr)
+						Expr **invtransfnexpr)
 {
 	Param	   *argp;
 	List	   *args;
@@ -1919,13 +1916,24 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 		else
 			*invtransfnexpr = NULL;
 	}
+}
 
-	/* see if we have a final function */
-	if (!OidIsValid(finalfn_oid))
-	{
-		*finalfnexpr = NULL;
-		return;
-	}
+/*
+ * Like build_aggregate_transfn_expr, but creates an expression tree for
+ * the final function of an aggregate, rather than the transition function.
+ */
+void
+build_aggregate_finalfn_expr(Oid *agg_input_types,
+						int num_finalfn_inputs,
+						Oid agg_state_type,
+						Oid agg_result_type,
+						Oid agg_input_collation,
+						Oid finalfn_oid,
+						Expr **finalfnexpr)
+{
+	Param	   *argp;
+	List	   *args;
+	int			i;
 
 	/*
 	 * Build expr tree for final function
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 303fc3c..7091a9d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -609,9 +609,6 @@ typedef struct WholeRowVarExprState
 typedef struct AggrefExprState
 {
 	ExprState	xprstate;
-	List	   *aggdirectargs;	/* states of direct-argument expressions */
-	List	   *args;			/* states of aggregated-argument expressions */
-	ExprState  *aggfilter;		/* state of FILTER expression, if any */
 	int			aggno;			/* ID number for agg within its plan node */
 } AggrefExprState;
 
@@ -1825,6 +1822,7 @@ typedef struct GroupState
  */
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
+typedef struct AggStatePerTransData *AggStatePerTrans;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
 typedef struct AggStatePerPhaseData *AggStatePerPhase;
 
@@ -1833,14 +1831,16 @@ typedef struct AggState
 	ScanState	ss;				/* its first field is NodeTag */
 	List	   *aggs;			/* all Aggref nodes in targetlist & quals */
 	int			numaggs;		/* length of list (could be zero!) */
+	int			numtrans;		/* number of pertrans items */
 	AggStatePerPhase phase;		/* pointer to current phase data */
 	int			numphases;		/* number of phases */
 	int			current_phase;	/* current phase number */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	AggStatePerAgg peragg;		/* per-Aggref information */
+	AggStatePerTrans pertrans;	/* per-Agg trans state information */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
-	AggStatePerAgg curperagg;	/* identifies currently active aggregate */
+	AggStatePerTrans curpertrans;	/* currently active trans state */
 	bool		input_done;		/* indicates end of input */
 	bool		agg_done;		/* indicates completion of Agg scan */
 	int			projected_set;	/* The last projected grouping set */
diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h
index 6a5f9bb..e2b3894 100644
--- a/src/include/parser/parse_agg.h
+++ b/src/include/parser/parse_agg.h
@@ -35,19 +35,23 @@ extern Oid resolve_aggregate_transtype(Oid aggfuncid,
 							Oid *inputTypes,
 							int numArguments);
 
-extern void build_aggregate_fnexprs(Oid *agg_input_types,
+extern void build_aggregate_transfn_expr(Oid *agg_input_types,
 						int agg_num_inputs,
 						int agg_num_direct_inputs,
-						int num_finalfn_inputs,
 						bool agg_variadic,
 						Oid agg_state_type,
-						Oid agg_result_type,
 						Oid agg_input_collation,
 						Oid transfn_oid,
 						Oid invtransfn_oid,
-						Oid finalfn_oid,
 						Expr **transfnexpr,
-						Expr **invtransfnexpr,
+						Expr **invtransfnexpr);
+
+extern void build_aggregate_finalfn_expr(Oid *agg_input_types,
+						int num_finalfn_inputs,
+						Oid agg_state_type,
+						Oid agg_result_type,
+						Oid agg_input_collation,
+						Oid finalfn_oid,
 						Expr **finalfnexpr);
 
 #endif   /* PARSE_AGG_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 8852051..4dad4fe 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1580,3 +1580,171 @@ select least_agg(variadic array[q1,q2]) from int8_tbl;
  -4567890123456789
 (1 row)
 
+-- test aggregates with common transition functions share the same states
+begin work;
+create type avg_state as (total bigint, count bigint);
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      4
+(1 row)
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 2
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 4
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      2 |      6
+(1 row)
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 3
+ my_avg_init | my_sum_init 
+-------------+-------------
+           4 |          14
+(1 row)
+
+rollback;
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+NOTICE:  sum_transfn called with 1
+NOTICE:  sum_transfn called with 2
+NOTICE:  sum_transfn called with 3
+NOTICE:  sum_transfn called with 4
+ my_sum | my_half_sum 
+--------+-------------
+     10 |           5
+(1 row)
+
+rollback;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index a84327d..42c3b3c 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -590,3 +590,151 @@ drop view aggordview1;
 -- variadic aggregates
 select least_agg(q1,q2) from int8_tbl;
 select least_agg(variadic array[q1,q2]) from int8_tbl;
+
+
+-- test aggregates with common transition functions share the same states
+begin work;
+
+create type avg_state as (total bigint, count bigint);
+
+create or replace function avg_transfn(state avg_state, n int) returns avg_state as
+$$
+declare new_state avg_state;
+begin
+	raise notice 'avg_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state.total := n;
+			new_state.count := 1;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state.total := state.total + n;
+		state.count := state.count + 1;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function avg_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total / state.count;
+	end if;
+end
+$$ language plpgsql;
+
+create function sum_finalfn(state avg_state) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state.total;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_avg(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn
+);
+
+create aggregate my_sum(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn
+);
+
+-- aggregate state should be shared as transfn is the same for both aggs.
+select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- shouldn't share states due to the distinctness not matching.
+select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+
+-- this should not share the state due to different input columns.
+select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
+
+
+create aggregate my_sum_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = sum_finalfn,
+   initcond = '(10,0)'
+);
+
+create aggregate my_avg_init(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(5,0)'
+);
+
+-- Varying INITCONDs should cause the states not to be shared.
+select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+
+rollback;
+
+-- test aggregate state sharing to ensure it works if one aggregate has a
+-- finalfn and the other one has none.
+begin work;
+
+create or replace function sum_transfn(state int4, n int4) returns int4 as
+$$
+declare new_state int4;
+begin
+	raise notice 'sum_transfn called with %', n;
+	if state is null then
+		if n is not null then
+			new_state := n;
+			return new_state;
+		end if;
+		return null;
+	elsif n is not null then
+		state := state + n;
+		return state;
+	end if;
+
+	return null;
+end
+$$ language plpgsql;
+
+create function halfsum_finalfn(state int4) returns int4 as
+$$
+begin
+	if state is null then
+		return NULL;
+	else
+		return state / 2;
+	end if;
+end
+$$ language plpgsql;
+
+create aggregate my_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn
+);
+
+create aggregate my_half_sum(int4)
+(
+   stype = int4,
+   sfunc = sum_transfn,
+   finalfunc = halfsum_finalfn
+);
+
+-- Agg state should be shared even though my_sum has no finalfn
+select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
+
+rollback;
#11Tom Lane
tgl@sss.pgh.pa.us
In reply to: Heikki Linnakangas (#10)
Re: Sharing aggregate states between different aggregate functions

Heikki Linnakangas <hlinnaka@iki.fi> writes:

On 07/28/2015 04:14 AM, David Rowley wrote:

I'd not thought of an input function being volatile before, but I guess
it's possible, which makes me a bit scared that we could be treading on
ground we shouldn't be. I know it's more of an output function thing than
an input function thing, but a GUC like extra_float_digits could cause
problems here.

GUC dependence is considered to make a function stable not volatile.
(I realize you can probably break that if you try hard enough, but
then you get to keep both pieces.)

Yeah, a volatile input function seems highly unlikely, but who knows.

We have a project policy against volatile I/O functions. One reason why
is that it would break the assumption that record_in/record_out can be
marked stable. I think there are other reasons too.

BTW, we're also not checking if the transition or final functions are
volatile. But that was the same before this patch too.

Up to now it hasn't mattered. Possibly this patch should refuse to
combine states across volatile transition functions?

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#12Heikki Linnakangas
hlinnaka@iki.fi
In reply to: Tom Lane (#11)
Re: Sharing aggregate states between different aggregate functions

On 07/28/2015 07:18 PM, Tom Lane wrote:

Heikki Linnakangas <hlinnaka@iki.fi> writes:

On 07/28/2015 04:14 AM, David Rowley wrote:
Yeah, a volatile input function seems highly unlikely, but who knows.

We have a project policy against volatile I/O functions. One reason why
is that it would break the assumption that record_in/record_out can be
marked stable. I think there are other reasons too.

Ok. In the latest patch I'm not relying that anyway, so it doesn't
matter, but good to know.

BTW, we're also not checking if the transition or final functions are
volatile. But that was the same before this patch too.

Up to now it hasn't mattered.

Yes, it has. We combine identical aggregates even without this patch.
For example:

SELECT sum(x), sum(x) FROM foo

Sum(x) gets calculated only once. If its transition function or final
function was volatile, that could produce two different results if we
ran the aggregate twice.

No-one's complained so far, and I can't think of a use case for a
volatile transition or final function, so maybe it's not worth worrying
about. Then again, checking for the volatility of those functions would
be easy too.

- Heikki

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#13Tom Lane
tgl@sss.pgh.pa.us
In reply to: Heikki Linnakangas (#12)
Re: Sharing aggregate states between different aggregate functions

Heikki Linnakangas <hlinnaka@iki.fi> writes:

On 07/28/2015 07:18 PM, Tom Lane wrote:

Heikki Linnakangas <hlinnaka@iki.fi> writes:

BTW, we're also not checking if the transition or final functions are
volatile. But that was the same before this patch too.

Up to now it hasn't mattered.

Yes, it has. We combine identical aggregates even without this patch.

Ah, right, how'd I forget about that?

No-one's complained so far, and I can't think of a use case for a
volatile transition or final function, so maybe it's not worth worrying
about. Then again, checking for the volatility of those functions would
be easy too.

Given the lack of complaints, I tend to agree that it's not the province
of this patch to make a change in that policy.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#14David Rowley
david.rowley@2ndquadrant.com
In reply to: Heikki Linnakangas (#10)
1 attachment(s)
Re: Sharing aggregate states between different aggregate functions

On 29 July 2015 at 03:45, Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 07/28/2015 04:14 AM, David Rowley wrote:

On 27 July 2015 at 20:11, Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 07/27/2015 08:34 AM, David Rowley wrote:

In this function I also wasn't quite sure if it was with comparing both

non-NULL INITCOND's here. I believe my code comments may slightly
contradict what the code actually does, as the comments talk about them
having to match, but the code just bails if any are non-NULL. The
reason I
didn't check them was because it seems inevitable that some duplicate
work
needs to be done when setting up the INITCOND. Perhaps it's worth it?

It would be nice to handle non-NULL initconds. I think you'll have to
check that the input function isn't volatile. Or perhaps just call the
input function, and check that the resulting Datum is byte-per-byte
identical, although that might be awkward to do with the current code
structure.

I've not done anything with this.
I'd not thought of an input function being volatile before, but I guess
it's possible, which makes me a bit scared that we could be treading on
ground we shouldn't be. I know it's more of an output function thing than
an input function thing, but a GUC like extra_float_digits could cause
problems here.

Yeah, a volatile input function seems highly unlikely, but who knows. BTW,
we're also not checking if the transition or final functions are volatile.
But that was the same before this patch too.

It sure would be nice to support the built-in float aggregates, so I took
a stab at this. I heavily restructured the code again, so that there are
now two separate steps. First, we check for any identical Aggrefs that
could be shared. If that fails, we proceed to the permission checks, look
up the transition function and build the initial datum. And then we call
another function that tries to find an existing, compatible per-trans
structure. I think this actually looks better than before, and checking for
identical init values is now easy. This does lose one optimization: if
there are two aggregates with identical transition functions and final
functions, they are not merged into a single per-Agg struct. They still
share the same per-Trans struct, though, and I think that's enough.

How does the attached patch look to you? The comments still need some
cleanup, in particular, the explanations of the different scenarios don't
belong where they are anymore.

I've read over the patch and you've managed to implement the init value
checking much more cleanly than I had imagined it to be.
I like the 2 stage checking.

Attached is a delta patched which is based
on sharing_aggstate-heikki-2.patch to fix up the out-dated comments and
also a few more test scenarios which test the sharing works with matching
INITCOND and that it does not when they don't match.

What do you think?

BTW, the permission checks were not correct before. You cannot skip the
check on the transition function when you're sharing the per-trans state.
We check that the aggregate's owner has permission to execute the
transition function, and the previous aggregate whose state value we're
sharing might have different owner.

oops, thank for noticing that and fixing.

Regards

David Rowley

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

sharing_aggstate-heikki-2_delta1.patchapplication/octet-stream; name=sharing_aggstate-heikki-2_delta1.patchDownload
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 3162980..ffe7120 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -684,7 +684,7 @@ initialize_aggregates(AggState *aggstate,
  */
 static void
 advance_transition_function(AggState *aggstate,
-AggStatePerTrans pertrans,
+							AggStatePerTrans pertrans,
 							AggStatePerGroup pergroupstate)
 {
 	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
@@ -993,7 +993,7 @@ process_ordered_aggregate_single(AggState *aggstate,
  */
 static void
 process_ordered_aggregate_multi(AggState *aggstate,
-AggStatePerTrans pertrans,
+								AggStatePerTrans pertrans,
 								AggStatePerGroup pergroupstate)
 {
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
@@ -1257,8 +1257,8 @@ finalize_aggregates(AggState *aggstate,
 
 	for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 	{
-		AggStatePerAgg peragg = &peraggs[aggno];
-		int			transno = peragg->transno;
+		AggStatePerAgg	peragg = &peraggs[aggno];
+		int				transno = peragg->transno;
 		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
 		AggStatePerGroup pergroupstate;
 
@@ -2306,9 +2306,41 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		aggstate->pergroup = pergroup;
 	}
 
-	/*
+	/* -----------------
 	 * Perform lookups of aggregate function info, and initialize the
 	 * unchanging fields of the per-agg and per-trans data.
+	 *
+	 * Here we perform optimization in the form of 'merging' duplicate
+	 * aggregate functions so that their state and final values are re-used
+	 * rather than needlessly being re-calculated independently. We also
+	 * perform a 'semi-merge' of aggregates which can share the same transition
+	 * state as another aggregate, but cannot share the same peragg due to
+	 * having different final functions.
+	 *
+	 * Scenarios:
+	 *
+	 * 1.	An aggregate function appears more than once in query:
+	 *
+	 *		SELECT SUM(x) FROM ... HAVING SUM(x) > 0
+	 *
+	 * Since in this case the aggregates are both the same we can optimize by
+	 * only calculating aggregate state and calling the final function just
+	 * once. In this case both aggregates will share the same 'aggno' value.
+	 *
+	 * 2.	Two different aggregate functions appear in the query but the two
+	 *		functions happen to share the same transition function and initial
+	 *		value, but have different final functions.
+	 *
+	 *		SELECT SUM(x), AVG(x) FROM ...
+	 *
+	 * In this case we must create a new peragg for the varying aggregate, but
+	 * since the transition function and initial value are the same, both
+	 * aggregate functions may share the same transition state.
+	 *
+	 * For either of these optimizations to be valid the aggregate parameters
+	 * mustn't contain any volatile functions and must be exactly the same,
+	 * including any modifiers such as ORDER BY, DISTINCT and FILTER.
+	 * -----------------
 	 */
 	aggno = -1;
 	transno = -1;
@@ -2338,17 +2370,17 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		/* Planner should have assigned aggregate to correct level */
 		Assert(aggref->agglevelsup == 0);
 
-		/*
-		 * For performance reasons we detect duplicate aggregates (for
-		 * example, "SELECT sum(x) ... HAVING sum(x) > 0"). When duplicates
-		 * are detected, we only make an AggStatePerAgg struct for the first
-		 * one. The clones are simply pointed at the same result entry by
-		 * giving them duplicate aggno values.
-		 */
+		/* 1. check for already processed aggs which can be re-used */
 		existing_aggno = find_compatible_peragg(aggref, aggstate, aggno,
 												&same_input_transnos);
 		if (existing_aggno != -1)
 		{
+			/*
+			 * existing compatible agg found, just reuse the existing one for
+			 * this aggregate. The existing one is already initialized, so the
+			 * only thing we need to setup is to point it to the existing
+			 * aggregate's aggno which it should use.
+			 */
 			aggrefstate->aggno = existing_aggno;
 			continue;
 		}
@@ -2464,11 +2496,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			initValue = GetAggInitVal(textInitVal, aggtranstype);
 
 		/*
-		 * Build working state for invoking the transition function, or look
+		 * 2. Build working state for invoking the transition function, or look
 		 * up previously initialized working state, if we can share it.
 		 *
 		 * find_compatible_peragg() already collected a list of per-Trans's
-		 * with the same inputs. Check if any of them have the transition
+		 * with the same inputs. Check if any of them have the same transition
 		 * function and initial value.
 		 */
 		existing_transno = find_compatible_pertrans(aggstate, aggref,
@@ -2477,6 +2509,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 													same_input_transnos);
 		if (existing_transno != -1)
 		{
+			/*
+			 * existing compatible trans found, just reuse the existing one for
+			 * this aggregate .The existing one is already initialized, so the
+			 * only thing we need to setup is to point it to the existing
+			 * 'transno'.
+			 */
 			pertrans = &pertransstates[existing_transno];
 			peragg->transno = existing_transno;
 		}
@@ -2757,60 +2795,14 @@ GetAggInitVal(Datum textInitVal, Oid transtype)
 /*
  * find_compatible_peragg - search for a previously initialized per-Agg struct
  *
- * Searches the previously looked at aggregates in order to find a compatible
- * aggregate. If a positive match is found then foundaggno is set to the
- * aggregate which matches.
+ * Searches the previously looked at aggregates to find one which is compatible
+ * with this one, with the same input parameters.
+ * When an compatible aggregate cannot be found -1 is returned.
  *
  * As a side-effect, this also collects a list of existing per-Trans structs
  * with matching inputs. If no identical AggRef is found, the list is passed
  * later to find_compatible_perstate, to see if we can at least reuse the
  * state value of another aggregate.
- *
- * FIXME: The below examples are a good, but they don't belong here anymore.
- *
- * Scenario 1 -- An aggregate function appears more than once in query:
- *
- *		SELECT SUM(x) FROM ... HAVING SUM(x) > 0
- *
- * Since in this case the aggregates are both the same we can optimize by
- * only calculating aggregate state and calling the finalfn just once. This
- * would be an AGGREF_EXACT_MATCH, meaning both the state and the final
- * function call are shared.
- *
- * Scenario 2 -- Two different aggregate functions appear in the query but
- *				 the two functions happen to share the same transfn, but have
- *				 different finalfn.
- *
- *		SELECT SUM(x), AVG(x) FROM ...
- *
- * Since in our case these two aggregates both share the same transfn, but
- * naturally they have different finalfns. This situation is classed as an
- * AGGREF_STATE_MATCH. This means that the same state can be shared by both
- * aggregates. Since the finalfn call is not the same this cannot be reused.
- * For this case to be valid the INITCOND of the aggregate, if one exists, must
- * also match.
- *
- * Scenario 3 -- The same aggregate function is called with different
- *				 parameters.
- *
- *		SELECT SUM(x),SUM(DISTINCT x) FROM ...
- *		SELECT SUM(x),SUM(y) FROM ...
- *		SELECT SUM(x),SUM(x) FILTER(WHERE x > 0) FROM ...
- *
- * All three of the above queries cannot share the same state and have to be
- * calculated independently.
- *
- * Scenario 4 -- Different aggregates with the same parameters and the same
- *				 transfn and finalfn.
- *
- *		SELECT SUM(x),SUM2(x) FROM ...
- *
- * A perhaps unlikely scenario where two aggregate functions exist which have,
- * both the same transfn and the same finalfn. In this case we can report an
- * AGGREF_EXACT_MATCH, providing the INITCOND of both aggregates are the same.
- *
- *
- * Returns -1 if no match found.
  */
 static int
 find_compatible_peragg(Aggref *newagg, AggState *aggstate,
@@ -2828,10 +2820,13 @@ find_compatible_peragg(Aggref *newagg, AggState *aggstate,
 	peraggs = aggstate->peragg;
 
 	/*
-	 * Search through the list of already seen aggregates. We'll stop when we
-	 * find an exact match, but until then we'll note any state matches that
-	 * we find. We may have to fall back on these should we fail to find an
-	 * exact match.
+	 * Search through the list of already seen aggregates. If we find an
+	 * existing aggregate with the same aggregate function and input parameters
+	 * as an existing one, then we can re-use that one. While searching we'll
+	 * collect a list of Aggrefs with the same input parameters. The caller may
+	 * use these when a matching Aggref cannot be found. Potentially this list
+	 * could contain a transno which this aggregate can re-use the transition
+	 * state from.
 	 */
 	for (aggno = 0; aggno <= lastaggno; aggno++)
 	{
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 7091a9d..5796de8 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1837,7 +1837,7 @@ typedef struct AggState
 	int			current_phase;	/* current phase number */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	AggStatePerAgg peragg;		/* per-Aggref information */
-	AggStatePerTrans pertrans;	/* per-Agg trans state information */
+	AggStatePerTrans pertrans;	/* per-Trans state information */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
 	AggStatePerTrans curpertrans;	/* currently active trans state */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 4dad4fe..de826b5 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1636,8 +1636,17 @@ create aggregate my_sum(int4)
    sfunc = avg_transfn,
    finalfunc = sum_finalfn
 );
+-- aggregate state should be shared as aggs are the same.
+select my_avg(one),my_avg(one) from (values(1),(3)) t(one);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_avg | my_avg 
+--------+--------
+      2 |      2
+(1 row)
+
 -- aggregate state should be shared as transfn is the same for both aggs.
-select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+select my_avg(one),my_sum(one) from (values(1),(3)) t(one);
 NOTICE:  avg_transfn called with 1
 NOTICE:  avg_transfn called with 3
  my_avg | my_sum 
@@ -1646,7 +1655,7 @@ NOTICE:  avg_transfn called with 3
 (1 row)
 
 -- shouldn't share states due to the distinctness not matching.
-select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
 NOTICE:  avg_transfn called with 1
 NOTICE:  avg_transfn called with 3
 NOTICE:  avg_transfn called with 1
@@ -1656,6 +1665,16 @@ NOTICE:  avg_transfn called with 3
       2 |      4
 (1 row)
 
+-- shouldn't share states due to the filter clause not matching.
+select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+NOTICE:  avg_transfn called with 3
+ my_avg | my_sum 
+--------+--------
+      3 |      4
+(1 row)
+
 -- this should not share the state due to different input columns.
 select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
 NOTICE:  avg_transfn called with 2
@@ -1667,6 +1686,7 @@ NOTICE:  avg_transfn called with 3
       2 |      6
 (1 row)
 
+-- test that aggs with the same sfunc and initcond share the same agg state
 create aggregate my_sum_init(int4)
 (
    stype = avg_state,
@@ -1679,17 +1699,33 @@ create aggregate my_avg_init(int4)
    stype = avg_state,
    sfunc = avg_transfn,
    finalfunc = avg_finalfn,
-   initcond = '(5,0)'
+   initcond = '(10,0)'
+);
+create aggregate my_avg_init2(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(4,0)'
 );
+-- state should be shared if INITCONDs are matching
+select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one);
+NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
+ my_sum_init | my_avg_init 
+-------------+-------------
+          14 |           7
+(1 row)
+
 -- Varying INITCONDs should cause the states not to be shared.
-select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+select my_sum_init(one),my_avg_init2(one) from (values(1),(3)) t(one);
 NOTICE:  avg_transfn called with 1
 NOTICE:  avg_transfn called with 1
 NOTICE:  avg_transfn called with 3
 NOTICE:  avg_transfn called with 3
- my_avg_init | my_sum_init 
--------------+-------------
-           4 |          14
+ my_sum_init | my_avg_init2 
+-------------+--------------
+          14 |            4
 (1 row)
 
 rollback;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 42c3b3c..8d501dc 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -655,16 +655,22 @@ create aggregate my_sum(int4)
    finalfunc = sum_finalfn
 );
 
+-- aggregate state should be shared as aggs are the same.
+select my_avg(one),my_avg(one) from (values(1),(3)) t(one);
+
 -- aggregate state should be shared as transfn is the same for both aggs.
-select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+select my_avg(one),my_sum(one) from (values(1),(3)) t(one);
 
 -- shouldn't share states due to the distinctness not matching.
-select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two);
+select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
+
+-- shouldn't share states due to the filter clause not matching.
+select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one);
 
 -- this should not share the state due to different input columns.
 select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two);
 
-
+-- test that aggs with the same sfunc and initcond share the same agg state
 create aggregate my_sum_init(int4)
 (
    stype = avg_state,
@@ -678,11 +684,22 @@ create aggregate my_avg_init(int4)
    stype = avg_state,
    sfunc = avg_transfn,
    finalfunc = avg_finalfn,
-   initcond = '(5,0)'
+   initcond = '(10,0)'
 );
 
+create aggregate my_avg_init2(int4)
+(
+   stype = avg_state,
+   sfunc = avg_transfn,
+   finalfunc = avg_finalfn,
+   initcond = '(4,0)'
+);
+
+-- state should be shared if INITCONDs are matching
+select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one);
+
 -- Varying INITCONDs should cause the states not to be shared.
-select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two);
+select my_sum_init(one),my_avg_init2(one) from (values(1),(3)) t(one);
 
 rollback;
 
#15Heikki Linnakangas
hlinnaka@iki.fi
In reply to: David Rowley (#14)
Re: Sharing aggregate states between different aggregate functions

On 08/03/2015 08:53 AM, David Rowley wrote:

Attached is a delta patched which is based
on sharing_aggstate-heikki-2.patch to fix up the out-dated comments and
also a few more test scenarios which test the sharing works with matching
INITCOND and that it does not when they don't match.

What do you think?

I committed this, after some more cleanup of the comments. Thanks!

- Heikki

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#16David Rowley
david.rowley@2ndquadrant.com
In reply to: Heikki Linnakangas (#15)
Re: Sharing aggregate states between different aggregate functions

On 5 August 2015 at 03:03, Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 08/03/2015 08:53 AM, David Rowley wrote:

Attached is a delta patched which is based
on sharing_aggstate-heikki-2.patch to fix up the out-dated comments and
also a few more test scenarios which test the sharing works with matching
INITCOND and that it does not when they don't match.

What do you think?

I committed this, after some more cleanup of the comments. Thanks!

Great! Thanks for doing the cleanups and committing it.

--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Training & Services