Hash support for grouping sets
Herewith a patch for doing grouping sets via hashing or mixed hashing
and sorting.
The principal objective is to pick whatever combination of grouping sets
has an estimated size that fits in work_mem, and minimizes the number of
sorting passes we need to do over the data, and hash those. (Yes, this
is a knapsack problem.)
This is achieved by adding another strategy to the Agg node; AGG_MIXED
means that both hashing and (sorted or plain) grouping happens. In
addition, support for multiple hash tables in AGG_HASHED mode is added.
Sample plans look like this:
explain select two,ten from onek group by cube(two,ten);
QUERY PLAN
--------------------------------------------------------------
MixedAggregate (cost=0.00..89.33 rows=33 width=8)
Hash Key: two, ten
Hash Key: two
Hash Key: ten
Group Key: ()
-> Seq Scan on onek (cost=0.00..79.00 rows=1000 width=8)
explain select two,ten from onek group by two, cube(ten,twenty);
QUERY PLAN
---------------------------------------------------------------
HashAggregate (cost=86.50..100.62 rows=162 width=12)
Hash Key: two, ten, twenty
Hash Key: two, ten
Hash Key: two
Hash Key: two, twenty
-> Seq Scan on onek (cost=0.00..79.00 rows=1000 width=12)
set work_mem='64kB';
explain select count(*) from tenk1
group by grouping sets (unique1,thousand,hundred,ten,two);
QUERY PLAN
------------------------------------------------------------------------
MixedAggregate (cost=1535.39..3023.89 rows=11112 width=28)
Hash Key: two
Hash Key: ten
Hash Key: hundred
Group Key: unique1
Sort Key: thousand
Group Key: thousand
-> Sort (cost=1535.39..1560.39 rows=10000 width=20)
Sort Key: unique1
-> Seq Scan on tenk1 (cost=0.00..458.00 rows=10000 width=20)
(10 rows)
Columns with unhashable or unsortable data types are handled
appropriately, though obviously every individual grouping set must end
up containing compatible column types.
There is one current weakness which I don't see a good solution for: the
planner code still has to pick a single value for group_pathkeys before
planning the input path. This means that we sometimes can't choose a
minimal set of sorts, because we may have chosen a sort order for a
grouping set that should have been hashed, for example:
explain select count(*) from tenk1
group by grouping sets (two,unique1,thousand,hundred,ten);
QUERY PLAN
------------------------------------------------------------------------
MixedAggregate (cost=1535.39..4126.28 rows=11112 width=28)
Hash Key: ten
Hash Key: hundred
Group Key: two
Sort Key: unique1
Group Key: unique1
Sort Key: thousand
Group Key: thousand
-> Sort (cost=1535.39..1560.39 rows=10000 width=20)
Sort Key: two
-> Seq Scan on tenk1 (cost=0.00..458.00 rows=10000 width=20)
(3 sorts, vs. 2 in the previous example that listed the same grouping
sets in different order)
Current status of the work: probably still needs cleanup, maybe some
better regression tests, and Andres has expressed some concern over the
performance impact of additional complexity in advance_aggregates; it
would be useful if this could be tested. But it should be reviewable
as-is.
--
Andrew (irc:RhodiumToad)
Attachments:
gshash10.patchtext/x-patchDownload
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c762fb0..6c787ea 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -972,6 +972,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate";
strategy = "Hashed";
break;
+ case AGG_MIXED:
+ pname = "MixedAggregate";
+ strategy = "Mixed";
+ break;
default:
pname = "Aggregate ???";
strategy = "???";
@@ -1900,6 +1904,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc;
List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx;
+ const char *keyname;
+ const char *keysetname;
+
+ if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+ {
+ keyname = "Hash Key";
+ keysetname = "Hash Keys";
+ }
+ else
+ {
+ keyname = "Group Key";
+ keysetname = "Group Keys";
+ }
ExplainOpenGroup("Grouping Set", NULL, true, es);
@@ -1914,7 +1931,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++;
}
- ExplainOpenGroup("Group Keys", "Group Keys", false, es);
+ ExplainOpenGroup(keysetname, keysetname, false, es);
foreach(lc, gsets)
{
@@ -1938,12 +1955,12 @@ show_grouping_set_keys(PlanState *planstate,
}
if (!result && es->format == EXPLAIN_FORMAT_TEXT)
- ExplainPropertyText("Group Key", "()", es);
+ ExplainPropertyText(keyname, "()", es);
else
- ExplainPropertyListNested("Group Key", result, es);
+ ExplainPropertyListNested(keyname, result, es);
}
- ExplainCloseGroup("Group Keys", "Group Keys", false, es);
+ ExplainCloseGroup(keysetname, keysetname, false, es);
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index c16b624..b2c824a 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -122,12 +122,19 @@
* specific).
*
* Where more complex grouping sets are used, we break them down into
- * "phases", where each phase has a different sort order. During each
- * phase but the last, the input tuples are additionally stored in a
- * tuplesort which is keyed to the next phase's sort order; during each
- * phase but the first, the input tuples are drawn from the previously
- * sorted data. (The sorting of the data for the first phase is handled by
- * the planner, as it might be satisfied by underlying nodes.)
+ * "phases", where each phase has a different sort order (except phase 0
+ * which is reserved for hashing). During each phase but the last, the
+ * input tuples are additionally stored in a tuplesort which is keyed to the
+ * next phase's sort order; during each phase but the first, the input
+ * tuples are drawn from the previously sorted data. (The sorting of the
+ * data for the first phase is handled by the planner, as it might be
+ * satisfied by underlying nodes.)
+ *
+ * Hashing can be mixed with sorted grouping. To do this, we have an
+ * AGG_MIXED strategy that populates the hashtables during the first sorted
+ * phase, and switches to reading them out after completing all sort phases.
+ * We can also support AGG_HASHED with multiple hash tables and no sorting
+ * at all.
*
* From the perspective of aggregate transition and final functions, the
* only issue regarding grouping sets is this: a single call site (flinfo)
@@ -139,8 +146,6 @@
* sensitive to the grouping set for which the aggregate function is
* currently being called.
*
- * TODO: AGG_HASHED doesn't support multiple grouping sets yet.
- *
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -432,6 +437,7 @@ typedef struct AggStatePerGroupData
*/
typedef struct AggStatePerPhaseData
{
+ AggStrategy aggstrategy; /* strategy for this phase */
int numsets; /* number of grouping sets (or 0) */
int *gset_lengths; /* lengths of grouping sets */
Bitmapset **grouped_cols; /* column groupings for rollup */
@@ -440,7 +446,31 @@ typedef struct AggStatePerPhaseData
Sort *sortnode; /* Sort node for input ordering for phase */
} AggStatePerPhaseData;
+/*
+ * AggStatePerHashData - per-hashtable state
+ *
+ * When doing grouping sets with hashing, we have one of these for each
+ * grouping set. (When doing hashing without grouping sets, we have just one of
+ * them.)
+ */
+
+typedef struct AggStatePerHashData
+{
+ TupleHashTable hashtable; /* hash table with one entry per group */
+ TupleHashIterator hashiter; /* for iterating through hash table */
+ TupleTableSlot *hashslot; /* slot for loading hash table */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
+ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
+ int numCols; /* number of hash key columns */
+ int numhashGrpCols; /* number of columns in hash table */
+ int largestGrpColIdx; /* largest column required for hashing */
+ AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
+ AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ Agg *aggnode; /* original Agg node, for numGroups etc. */
+} AggStatePerHashData;
+
+static void select_current_set(AggState *aggstate, int setno, bool is_hash);
static void initialize_phase(AggState *aggstate, int newphase);
static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
static void initialize_aggregates(AggState *aggstate,
@@ -449,7 +479,7 @@ static void initialize_aggregates(AggState *aggstate,
static void advance_transition_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
-static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups);
static void advance_combine_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
@@ -479,8 +509,8 @@ static TupleTableSlot *project_aggregates(AggState *aggstate);
static Bitmapset *find_unaggregated_cols(AggState *aggstate);
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
static void build_hash_table(AggState *aggstate);
-static TupleHashEntryData *lookup_hash_entry(AggState *aggstate,
- TupleTableSlot *inputslot);
+static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
+static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
static void agg_fill_hash_table(AggState *aggstate);
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -501,13 +531,29 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
/*
- * Switch to phase "newphase", which must either be 0 (to reset) or
+ * Select the current grouping set; affects current_set and
+ * curaggcontext.
+ */
+
+static void
+select_current_set(AggState *aggstate, int setno, bool is_hash)
+{
+ if (is_hash)
+ aggstate->curaggcontext = aggstate->hashcontext;
+ else
+ aggstate->curaggcontext = aggstate->aggcontexts[setno];
+
+ aggstate->current_set = setno;
+}
+
+/*
+ * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
* current_phase + 1. Juggle the tuplesorts accordingly.
*/
static void
initialize_phase(AggState *aggstate, int newphase)
{
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1);
+ Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
/*
* Whatever the previous state, we're now done with whatever input
@@ -519,7 +565,7 @@ initialize_phase(AggState *aggstate, int newphase)
aggstate->sort_in = NULL;
}
- if (newphase == 0)
+ if (newphase <= 1)
{
/*
* Discard any existing output tuplesort.
@@ -546,7 +592,7 @@ initialize_phase(AggState *aggstate, int newphase)
* If this isn't the last phase, we need to sort appropriately for the
* next phase in sequence.
*/
- if (newphase < aggstate->numphases - 1)
+ if (newphase > 0 && newphase < aggstate->numphases - 1)
{
Sort *sortnode = aggstate->phases[newphase + 1].sortnode;
PlanState *outerNode = outerPlanState(aggstate);
@@ -567,7 +613,7 @@ initialize_phase(AggState *aggstate, int newphase)
}
/*
- * Fetch a tuple from either the outer plan (for phase 0) or from the sorter
+ * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
* populated by the previous phase. Copy it to the sorter for the next phase
* if any.
*/
@@ -595,8 +641,8 @@ fetch_input_tuple(AggState *aggstate)
/*
* (Re)Initialize an individual aggregate.
*
- * This function handles only one grouping set (already set in
- * aggstate->current_set).
+ * This function handles only one grouping set, already set in
+ * aggstate->current_set.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -652,8 +698,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
{
MemoryContext oldContext;
- oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -676,8 +721,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
*
* If there are multiple grouping sets, we initialize only the first numReset
* of them (the grouping sets are ordered so that the most specific one, which
- * is reset most often, is first). As a convenience, if numReset is < 1, we
- * reinitialize all sets.
+ * is reset most often, is first). As a convenience, if numReset is 0, we
+ * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
+ * which case the caller must have used select_current_set appropriately.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -691,23 +737,34 @@ initialize_aggregates(AggState *aggstate,
int setno = 0;
AggStatePerTrans transstates = aggstate->pertrans;
- if (numReset < 1)
+ if (numReset == 0)
numReset = numGroupingSets;
for (transno = 0; transno < aggstate->numtrans; transno++)
{
AggStatePerTrans pertrans = &transstates[transno];
- for (setno = 0; setno < numReset; setno++)
+ if (numReset < 0)
{
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
-
- aggstate->current_set = setno;
+ pergroupstate = &pergroup[transno];
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
+ else
+ {
+ for (setno = 0; setno < numReset; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
+
+ select_current_set(aggstate, setno, false);
+
+ initialize_aggregate(aggstate, pertrans, pergroupstate);
+ }
+ }
}
}
@@ -757,7 +814,7 @@ advance_transition_function(AggState *aggstate,
* do not need to pfree the old transValue, since it's NULL.
*/
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -807,7 +864,7 @@ advance_transition_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -838,17 +895,21 @@ advance_transition_function(AggState *aggstate,
/*
* Advance each aggregate transition state for one input tuple. The input
* tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
- * accessible to ExecEvalExpr. pergroup is the array of per-group structs to
- * use (this might be in a hashtable entry).
+ * accessible to ExecEvalExpr.
+ *
+ * We have two sets of transition states to handle: one for sorted aggregation
+ * and one for hashed; we do them both here, to avoid multiple evaluation of
+ * the inputs.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static void
-advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
{
int transno;
int setno = 0;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
+ int numHashes = aggstate->num_hashes;
int numTrans = aggstate->numtrans;
TupleTableSlot *slot = aggstate->evalslot;
@@ -880,6 +941,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
{
/* DISTINCT and/or ORDER BY case */
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
+ Assert(!pergroups);
/*
* If the transfn is strict, we want to check for nullity before
@@ -940,13 +1002,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
}
- for (setno = 0; setno < numGroupingSets; setno++)
+ if (pergroup)
+ {
+ /* advance transition states for ordered grouping */
+
+ for (setno = 0; setno < numGroupingSets; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, false);
+
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
+ }
+
+ if (pergroups)
{
- AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
+ /* advance transition states for hashed grouping */
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, true);
- aggstate->current_set = setno;
+ pergroupstate = &pergroups[setno][transno];
- advance_transition_function(aggstate, pertrans, pergroupstate);
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
}
}
}
@@ -967,7 +1052,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
TupleTableSlot *slot;
/* combine not supported with grouping sets */
- Assert(aggstate->phase->numsets == 0);
+ Assert(aggstate->phase->numsets <= 1);
/* compute input for all aggregates */
slot = ExecProject(aggstate->evalproj, NULL);
@@ -1060,7 +1145,7 @@ advance_combine_function(AggState *aggstate,
if (!pertrans->transtypeByVal)
{
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -1105,7 +1190,7 @@ advance_combine_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -1560,7 +1645,9 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/*
* Compute the final value of all aggregates for one group.
*
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time. But in the hash
+ * case, it's the caller's responsibility to have selected the set already, and
+ * we pass in -1 here to flag that and to control the indexing into pertrans.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
@@ -1576,10 +1663,11 @@ finalize_aggregates(AggState *aggstate,
int aggno;
int transno;
- Assert(currentSet == 0 ||
- ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
- aggstate->current_set = currentSet;
+ /* ugly hack for hash */
+ if (currentSet >= 0)
+ select_current_set(aggstate, currentSet, false);
+ else
+ currentSet = 0;
/*
* If there were any DISTINCT and/or ORDER BY aggregates, sort their
@@ -1594,7 +1682,8 @@ finalize_aggregates(AggState *aggstate,
if (pertrans->numSortCols > 0)
{
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED &&
+ aggstate->aggstrategy != AGG_MIXED);
if (pertrans->numInputs == 1)
process_ordered_aggregate_single(aggstate,
@@ -1708,7 +1797,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
}
/*
- * Initialize the hash table to empty.
+ * Initialize the hash table(s) to empty.
*
* To implement hashed aggregation, we need a hashtable that stores a
* representative tuple and an array of AggStatePerGroup structs for each
@@ -1716,29 +1805,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(),
* for each entry.
*
- * The hash table always lives in the aggcontext memory context.
+ * We have a separate hashtable and associated perhash data structure for each
+ * grouping set for which we're doing hashing.
+ *
+ * The hash tables always live in the hashcontext's per-tuple memory context
+ * (there is only one of these for all tables together, since they are all
+ * reset at the same time).
*/
static void
build_hash_table(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize;
+ int i;
- Assert(node->aggstrategy == AGG_HASHED);
- Assert(node->numGroups > 0);
+ Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
- additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData);
+ additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
- aggstate->hashtable = BuildTupleHashTable(node->numCols,
- aggstate->hashGrpColIdxHash,
- aggstate->phase->eqfunctions,
- aggstate->hashfunctions,
- node->numGroups,
- additionalsize,
- aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
- tmpmem,
+ for (i = 0; i < aggstate->num_hashes; ++i)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[i];
+
+ Assert(perhash->aggnode->numGroups > 0);
+
+ perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+ perhash->hashGrpColIdxHash,
+ perhash->eqfunctions,
+ perhash->hashfunctions,
+ perhash->aggnode->numGroups,
+ additionalsize,
+ aggstate->hashcontext->ecxt_per_tuple_memory,
+ tmpmem,
!DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+ }
}
/*
@@ -1761,72 +1861,98 @@ build_hash_table(AggState *aggstate)
* the array is preserved over ExecReScanAgg, so we allocate it in the
* per-query context (unlike the hash table itself).
*/
-static List *
+static void
find_hash_columns(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
- Bitmapset *colnos;
- List *collist;
- TupleDesc hashDesc;
+ Bitmapset *base_colnos;
List *outerTlist = outerPlanState(aggstate)->plan->targetlist;
- List *hashTlist = NIL;
- int i;
-
- aggstate->largestGrpColIdx = 0;
+ int numHashes = aggstate->num_hashes;
+ int j;
/* Find Vars that will be needed in tlist and qual */
- colnos = find_unaggregated_cols(aggstate);
- /* Add in all the grouping columns */
- for (i = 0; i < node->numCols; i++)
- colnos = bms_add_member(colnos, node->grpColIdx[i]);
- /* Convert to list, using lcons so largest element ends up first */
- collist = NIL;
-
- aggstate->hashGrpColIdxInput =
- palloc(bms_num_members(colnos) * sizeof(AttrNumber));
- aggstate->hashGrpColIdxHash =
- palloc(node->numCols * sizeof(AttrNumber));
+ base_colnos = find_unaggregated_cols(aggstate);
- /*
- * First build mapping for columns directly hashed. These are the first,
- * because they'll be accessed when computing hash values and comparing
- * tuples for exact matches. We also build simple mapping for
- * execGrouping, so it knows where to find the to-be-hashed / compared
- * columns in the input.
- */
- for (i = 0; i < node->numCols; i++)
+ for (j = 0; j < numHashes; ++j)
{
- aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i];
- aggstate->hashGrpColIdxHash[i] = i + 1;
- aggstate->numhashGrpCols++;
- /* delete already mapped columns */
- bms_del_member(colnos, node->grpColIdx[i]);
- }
+ AggStatePerHash perhash = &aggstate->perhash[j];
+ Bitmapset *colnos = bms_copy(base_colnos);
+ AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
+ List *hashTlist = NIL;
+ TupleDesc hashDesc;
+ int i;
- /* and add the remaining columns */
- while ((i = bms_first_member(colnos)) >= 0)
- {
- aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i;
- aggstate->numhashGrpCols++;
- }
+ perhash->largestGrpColIdx = 0;
- /* and build a tuple descriptor for the hashtable */
- for (i = 0; i < aggstate->numhashGrpCols; i++)
- {
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ /*
+ * If we're doing grouping sets, then some Vars might be referenced in
+ * tlist/qual for the benefit of other grouping sets, but not needed
+ * when hashing; i.e. prepare_projection_slot will null them out, so
+ * there'd be no point storing them. Use prepare_projection_slot's
+ * logic to determine which.
+ */
+ if (aggstate->phases[0].grouped_cols)
+ {
+ Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j];
+ ListCell *lc;
- hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
- aggstate->largestGrpColIdx =
- Max(varNumber + 1, aggstate->largestGrpColIdx);
- }
+ foreach(lc, aggstate->all_grouped_cols)
+ {
+ int attnum = lfirst_int(lc);
- hashDesc = ExecTypeFromTL(hashTlist, false);
- ExecSetSlotDescriptor(aggstate->hashslot, hashDesc);
+ if (!bms_is_member(attnum, grouped_cols))
+ colnos = bms_del_member(colnos, attnum);
+ }
+ }
+ /* Add in all the grouping columns */
+ for (i = 0; i < perhash->numCols; i++)
+ colnos = bms_add_member(colnos, grpColIdx[i]);
+
+ perhash->hashGrpColIdxInput =
+ palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+ perhash->hashGrpColIdxHash =
+ palloc(perhash->numCols * sizeof(AttrNumber));
+
+ /*
+ * First build mapping for columns directly hashed. These are the
+ * first, because they'll be accessed when computing hash values and
+ * comparing tuples for exact matches. We also build simple mapping for
+ * execGrouping, so it knows where to find the to-be-hashed / compared
+ * columns in the input.
+ */
+ for (i = 0; i < perhash->numCols; i++)
+ {
+ perhash->hashGrpColIdxInput[i] = grpColIdx[i];
+ perhash->hashGrpColIdxHash[i] = i + 1;
+ perhash->numhashGrpCols++;
+ /* delete already mapped columns */
+ bms_del_member(colnos, grpColIdx[i]);
+ }
- list_free(hashTlist);
- bms_free(colnos);
+ /* and add the remaining columns */
+ while ((i = bms_first_member(colnos)) >= 0)
+ {
+ perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
+ perhash->numhashGrpCols++;
+ }
- return collist;
+ /* and build a tuple descriptor for the hashtable */
+ for (i = 0; i < perhash->numhashGrpCols; i++)
+ {
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
+
+ hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
+ perhash->largestGrpColIdx =
+ Max(varNumber + 1, perhash->largestGrpColIdx);
+ }
+
+ hashDesc = ExecTypeFromTL(hashTlist, false);
+ ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
+
+ list_free(hashTlist);
+ bms_free(colnos);
+ }
+
+ bms_free(base_colnos);
}
/*
@@ -1851,26 +1977,30 @@ hash_agg_entry_size(int numAggs)
}
/*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * Find or create a hashtable entry for the tuple group containing the current
+ * tuple (already set in tmpcontext's outertuple slot), in the current grouping
+ * set (which the caller must have selected - note that initialize_aggregate
+ * depends on this).
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static TupleHashEntryData *
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate)
{
- TupleTableSlot *hashslot = aggstate->hashslot;
+ TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
+ AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+ TupleTableSlot *hashslot = perhash->hashslot;
TupleHashEntryData *entry;
bool isnew;
int i;
/* transfer just the needed columns into hashslot */
- slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx);
+ slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot);
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
@@ -1878,22 +2008,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
ExecStoreVirtualTuple(hashslot);
/* find or create the hashtable entry using the filtered tuple */
- entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew);
+ entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew)
{
entry->additional = (AggStatePerGroup)
- MemoryContextAlloc(aggstate->hashtable->tablecxt,
+ MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
/* initialize aggregates for new tuple group */
initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
- 0);
+ -1);
}
return entry;
}
/*
+ * Look up hash entries for the current tuple in all hashed grouping sets,
+ * returning an array of pergroup pointers suitable for advance_aggregates.
+ *
+ * Be aware that lookup_hash_entry can reset the tmpcontext.
+ */
+static AggStatePerGroup *
+lookup_hash_entries(AggState *aggstate)
+{
+ int numHashes = aggstate->num_hashes;
+ AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+ int setno;
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ select_current_set(aggstate, setno, true);
+ pergroup[setno] = lookup_hash_entry(aggstate)->additional;
+ }
+
+ return pergroup;
+}
+
+/*
* ExecAgg -
*
* ExecAgg receives tuples from its outer subplan and aggregates over
@@ -1935,14 +2087,17 @@ ExecAgg(AggState *node)
if (!node->agg_done)
{
/* Dispatch based on strategy */
- switch (node->phase->aggnode->aggstrategy)
+ switch (node->phase->aggstrategy)
{
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
+ /*FALLTHROUGH*/
+ case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
- default:
+ case AGG_PLAIN:
+ case AGG_SORTED:
result = agg_retrieve_direct(node);
break;
}
@@ -1965,6 +2120,7 @@ agg_retrieve_direct(AggState *aggstate)
ExprContext *tmpcontext;
AggStatePerAgg peragg;
AggStatePerGroup pergroup;
+ AggStatePerGroup *hash_pergroups = NULL;
TupleTableSlot *outerslot;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
@@ -2051,6 +2207,18 @@ agg_retrieve_direct(AggState *aggstate)
node = aggstate->phase->aggnode;
numReset = numGroupingSets;
}
+ else if (aggstate->aggstrategy == AGG_MIXED)
+ {
+ /*
+ * Mixed mode; we've output all the grouped stuff and have
+ * full hashtables, so switch to outputting those.
+ */
+ initialize_phase(aggstate, 0);
+ aggstate->table_filled = true;
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable, &aggstate->perhash[0].hashiter);
+ select_current_set(aggstate, 0, true);
+ return agg_retrieve_hash_table(aggstate);
+ }
else
{
aggstate->agg_done = true;
@@ -2087,7 +2255,7 @@ agg_retrieve_direct(AggState *aggstate)
*----------
*/
if (aggstate->input_done ||
- (node->aggstrategy == AGG_SORTED &&
+ (node->aggstrategy != AGG_PLAIN &&
aggstate->projected_set != -1 &&
aggstate->projected_set < (numGroupingSets - 1) &&
nextSetSize > 0 &&
@@ -2200,10 +2368,21 @@ agg_retrieve_direct(AggState *aggstate)
*/
for (;;)
{
+ /*
+ * During phase 1 only of a mixed agg, we need to update
+ * hashtables as well in advance_aggregates.
+ */
+ if (aggstate->aggstrategy == AGG_MIXED && aggstate->current_phase == 1)
+ {
+ hash_pergroups = lookup_hash_entries(aggstate);
+ }
+ else
+ hash_pergroups = NULL;
+
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, pergroup);
else
- advance_aggregates(aggstate, pergroup);
+ advance_aggregates(aggstate, pergroup, hash_pergroups);
/* Reset per-input-tuple context after each tuple */
ResetExprContext(tmpcontext);
@@ -2230,7 +2409,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group
* boundary.
*/
- if (node->aggstrategy == AGG_SORTED)
+ if (node->aggstrategy != AGG_PLAIN)
{
if (!execTuplesMatch(firstSlot,
outerslot,
@@ -2279,21 +2458,13 @@ agg_retrieve_direct(AggState *aggstate)
}
/*
- * ExecAgg for hashed case: phase 1, read input and build hash table
+ * ExecAgg for hashed case: read input and build hash table
*/
static void
agg_fill_hash_table(AggState *aggstate)
{
- ExprContext *tmpcontext;
- TupleHashEntryData *entry;
TupleTableSlot *outerslot;
-
- /*
- * get state info from node
- *
- * tmpcontext is the per-input-tuple expression context
- */
- tmpcontext = aggstate->tmpcontext;
+ ExprContext *tmpcontext = aggstate->tmpcontext;
/*
* Process each outer-plan tuple, and then fetch the next one, until we
@@ -2301,32 +2472,39 @@ agg_fill_hash_table(AggState *aggstate)
*/
for (;;)
{
+ AggStatePerGroup *pergroups;
+
outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot))
break;
- /* set up for advance_aggregates call */
+
+ /* set up for lookup_hash_entries and advance_aggregates */
tmpcontext->ecxt_outertuple = outerslot;
- /* Find or build hashtable entry for this tuple's group */
- entry = lookup_hash_entry(aggstate, outerslot);
+ /* Find or build hashtable entries */
+ pergroups = lookup_hash_entries(aggstate);
/* Advance the aggregates */
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
- combine_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ combine_aggregates(aggstate, pergroups[0]);
else
- advance_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ advance_aggregates(aggstate, NULL, pergroups);
- /* Reset per-input-tuple context after each tuple */
- ResetExprContext(tmpcontext);
+ /*
+ * Reset per-input-tuple context after each tuple, but note that the
+ * hash lookups do this too
+ */
+ ResetExprContext(aggstate->tmpcontext);
}
aggstate->table_filled = true;
- /* Initialize to walk the hash table */
- ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+ /* Initialize to walk the first hash table */
+ select_current_set(aggstate, 0, true);
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable, &aggstate->perhash[0].hashiter);
}
/*
- * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ * ExecAgg for hashed case: retrieving groups from hash table
*/
static TupleTableSlot *
agg_retrieve_hash_table(AggState *aggstate)
@@ -2337,17 +2515,22 @@ agg_retrieve_hash_table(AggState *aggstate)
TupleHashEntryData *entry;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
- TupleTableSlot *hashslot;
+ AggStatePerHash perhash;
/*
- * get state info from node
+ * get state info from node.
+ *
+ * econtext is the per-output-tuple expression context.
*/
- /* econtext is the per-output-tuple expression context */
econtext = aggstate->ss.ps.ps_ExprContext;
peragg = aggstate->peragg;
firstSlot = aggstate->ss.ss_ScanTupleSlot;
- hashslot = aggstate->hashslot;
+ /*
+ * Note that perhash (and therefore anything accessed through it) can
+ * change inside the loop, as we change between grouping sets.
+ */
+ perhash = &aggstate->perhash[aggstate->current_set];
/*
* We loop retrieving groups until we find one satisfying
@@ -2355,17 +2538,37 @@ agg_retrieve_hash_table(AggState *aggstate)
*/
while (!aggstate->agg_done)
{
+ TupleTableSlot *hashslot = perhash->hashslot;
int i;
/*
* Find the next entry in the hash table
*/
- entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter);
+ entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
if (entry == NULL)
{
- /* No more entries in hashtable, so done */
- aggstate->agg_done = TRUE;
- return NULL;
+ int nextset = aggstate->current_set + 1;
+
+ if (nextset < aggstate->num_hashes)
+ {
+ /*
+ * Switch to next grouping set, reinitialize, and restart the
+ * loop.
+ */
+ select_current_set(aggstate, nextset, true);
+
+ perhash = &aggstate->perhash[aggstate->current_set];
+
+ ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
+
+ continue;
+ }
+ else
+ {
+ /* No more hashtables, so done */
+ aggstate->agg_done = TRUE;
+ return NULL;
+ }
}
/*
@@ -2388,9 +2591,9 @@ agg_retrieve_hash_table(AggState *aggstate)
memset(firstSlot->tts_isnull, true,
firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
@@ -2399,14 +2602,16 @@ agg_retrieve_hash_table(AggState *aggstate)
pergroup = (AggStatePerGroup) entry->additional;
- finalize_aggregates(aggstate, peragg, pergroup, 0);
-
/*
* Use the representative input tuple for any references to
* non-aggregated input columns in the qual and tlist.
*/
econtext->ecxt_outertuple = firstSlot;
+ prepare_projection_slot(aggstate, econtext->ecxt_outertuple, aggstate->current_set);
+
+ finalize_aggregates(aggstate, peragg, pergroup, -1);
+
result = project_aggregates(aggstate);
if (result)
return result;
@@ -2420,7 +2625,36 @@ agg_retrieve_hash_table(AggState *aggstate)
* ExecInitAgg
*
* Creates the run-time information for the agg node produced by the
- * planner and initializes its outer subtree
+ * planner and initializes its outer subtree.
+ *
+ * What we get from the planner is actually one "real" Agg node which is part
+ * of the plan tree proper, but which optionally has an additional list of Agg
+ * nodes hung off the side via the "chain" field. This is because an Agg node
+ * happens to be a convenient representation of all the data we need for
+ * grouping sets.
+ *
+ * For many purposes, we treat the "real" node as if it were just the first
+ * node in the chain. The chain must be ordered such that hashed entries come
+ * before sorted/plain entries; the real node is marked AGG_MIXED if there are
+ * mixed types (in which case the real node describes one of the hashed
+ * groupings, other AGG_HASHED nodes may optionally follow in the chain,
+ * followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If the real node
+ * is marked AGG_HASHED or AGG_SORTED, then all the chained nodes must be of
+ * the same type; if it is AGG_PLAIN, there can be no chained nodes.
+ *
+ * We collect all hashed nodes into a single "phase", numbered 0, and create a
+ * sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. Phase
+ * 0 is allocated even if there are no hashes, but remains unused in that
+ * case.
+ *
+ * AGG_HASHED nodes actually refer to only a single grouping set each, because
+ * for each hashed grouping we need a separate grpColIdx and numGroups
+ * estimate. AGG_SORTED nodes represent a "rollup", a list of grouping sets
+ * that share a sort order. Each AGG_SORTED node other than the first one has
+ * an associated Sort node which describes the sort order to be used; the
+ * first sorted node takes its input from the outer subtree, which the planner
+ * has already arranged to provide ordered data.
+ *
* -----------------
*/
AggState *
@@ -2435,14 +2669,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
transno,
aggno;
int phase;
+ int phaseidx;
List *combined_inputeval;
ListCell *l;
Bitmapset *all_grouped_cols = NULL;
int numGroupingSets = 1;
int numPhases;
+ int numHashes;
int column_offset;
int i = 0;
int j = 0;
+ bool use_hashing = (node->aggstrategy == AGG_HASHED ||
+ node->aggstrategy == AGG_MIXED);
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2457,9 +2695,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggs = NIL;
aggstate->numaggs = 0;
aggstate->numtrans = 0;
+ aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0;
- aggstate->hashfunctions = NULL;
aggstate->projected_set = -1;
aggstate->current_set = 0;
aggstate->peragg = NULL;
@@ -2469,18 +2707,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->agg_done = false;
aggstate->pergroup = NULL;
aggstate->grp_firstTuple = NULL;
- aggstate->hashtable = NULL;
aggstate->sort_in = NULL;
aggstate->sort_out = NULL;
/*
+ * phases[0] always exists, but is dummy in sorted/plain mode
+ */
+ numPhases = (use_hashing ? 1 : 2);
+ numHashes = (use_hashing ? 1 : 0);
+
+ /*
* Calculate the maximum number of grouping sets in any phase; this
- * determines the size of some allocations.
+ * determines the size of some allocations. Also calculate the number of
+ * phases, since all hashed/mixed nodes contribute to only a single phase.
*/
if (node->groupingSets)
{
- Assert(node->aggstrategy != AGG_HASHED);
-
numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain)
@@ -2489,22 +2731,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets));
+ /*
+ * additional AGG_HASHED aggs become part of phase 0,
+ * but all others add an extra phase.
+ */
+ if (agg->aggstrategy != AGG_HASHED)
+ ++numPhases;
+ else
+ ++numHashes;
}
}
aggstate->maxsets = numGroupingSets;
- aggstate->numphases = numPhases = 1 + list_length(node->chain);
+ aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets);
/*
* Create expression contexts. We need three or more, one for
- * per-input-tuple processing, one for per-output-tuple processing, and
- * one for each grouping set. The per-tuple memory context of the
- * per-grouping-set ExprContexts (aggcontexts) replaces the standalone
- * memory context formerly used to hold transition values. We cheat a
- * little by using ExecAssignExprContext() to build all of them.
+ * per-input-tuple processing, one for per-output-tuple processing, one for
+ * all the hashtables, and one for each grouping set. The per-tuple memory
+ * context of the per-grouping-set ExprContexts (aggcontexts) replaces the
+ * standalone memory context formerly used to hold transition values. We
+ * cheat a little by using ExecAssignExprContext() to build all of them.
*
* NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple
@@ -2520,14 +2770,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
}
+ if (use_hashing)
+ {
+ ExecAssignExprContext(estate, &aggstate->ss.ps);
+ aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+ }
+
ExecAssignExprContext(estate, &aggstate->ss.ps);
/*
- * tuple table initialization
+ * tuple table initialization.
+ *
+ * For hashtables, we create some additional slots below.
*/
ExecInitScanTupleSlot(estate, &aggstate->ss);
ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
- aggstate->hashslot = ExecInitExtraTupleSlot(estate);
aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
/*
@@ -2594,21 +2851,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing.
*/
-
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
- for (phase = 0; phase < numPhases; ++phase)
+ aggstate->num_hashes = numHashes;
+ if (numHashes)
+ {
+ aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+ aggstate->phases[0].numsets = 0;
+ aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
+ aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
+ }
+
+ for (phaseidx = 0, phase = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{
- AggStatePerPhase phasedata = &aggstate->phases[phase];
Agg *aggnode;
Sort *sortnode;
- int num_sets;
- if (phase > 0)
+ if (phaseidx > 0)
{
- aggnode = list_nth(node->chain, phase - 1);
+ aggnode = list_nth(node->chain, phaseidx - 1);
sortnode = (Sort *) aggnode->plan.lefttree;
- Assert(IsA(sortnode, Sort));
}
else
{
@@ -2616,53 +2878,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
sortnode = NULL;
}
- phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+ Assert(phase <= 1 || IsA(sortnode, Sort));
- if (num_sets)
+ if (aggnode->aggstrategy == AGG_HASHED
+ || aggnode->aggstrategy == AGG_MIXED)
{
- phasedata->gset_lengths = palloc(num_sets * sizeof(int));
- phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+ AggStatePerPhase phasedata = &aggstate->phases[0];
+ AggStatePerHash perhash;
+ Bitmapset *cols = NULL;
- i = 0;
- foreach(l, aggnode->groupingSets)
- {
- int current_length = list_length(lfirst(l));
- Bitmapset *cols = NULL;
+ Assert(phase == 0);
+ i = phasedata->numsets++;
+ perhash = &aggstate->perhash[i];
- /* planner forces this to be correct */
- for (j = 0; j < current_length; ++j)
- cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+ /* phase 0 always points to the "real" Agg in the hash case */
+ phasedata->aggnode = node;
+ phasedata->aggstrategy = node->aggstrategy;
- phasedata->grouped_cols[i] = cols;
- phasedata->gset_lengths[i] = current_length;
- ++i;
- }
+ /* but the actual Agg node representing this hash is saved here */
+ perhash->aggnode = aggnode;
+
+ phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
+
+ for (j = 0; j < aggnode->numCols; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
- all_grouped_cols = bms_add_members(all_grouped_cols,
- phasedata->grouped_cols[0]);
+ all_grouped_cols = bms_add_members(all_grouped_cols, cols);
+ continue;
}
else
{
- Assert(phase == 0);
+ AggStatePerPhase phasedata = &aggstate->phases[++phase];
+ int num_sets;
- phasedata->gset_lengths = NULL;
- phasedata->grouped_cols = NULL;
- }
+ phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
- /*
- * If we are grouping, precompute fmgr lookup data for inner loop.
- */
- if (aggnode->aggstrategy == AGG_SORTED)
- {
- Assert(aggnode->numCols > 0);
+ if (num_sets)
+ {
+ phasedata->gset_lengths = palloc(num_sets * sizeof(int));
+ phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
- phasedata->eqfunctions =
- execTuplesMatchPrepare(aggnode->numCols,
- aggnode->grpOperators);
- }
+ i = 0;
+ foreach(l, aggnode->groupingSets)
+ {
+ int current_length = list_length(lfirst(l));
+ Bitmapset *cols = NULL;
+
+ /* planner forces this to be correct */
+ for (j = 0; j < current_length; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+ phasedata->gset_lengths[i] = current_length;
+
+ ++i;
+ }
+
+ all_grouped_cols = bms_add_members(all_grouped_cols,
+ phasedata->grouped_cols[0]);
+ }
+ else
+ {
+ Assert(phaseidx == 0);
+
+ phasedata->gset_lengths = NULL;
+ phasedata->grouped_cols = NULL;
+ }
+
+ /*
+ * If we are grouping, precompute fmgr lookup data for inner loop.
+ */
+ if (aggnode->aggstrategy == AGG_SORTED)
+ {
+ Assert(aggnode->numCols > 0);
+
+ phasedata->eqfunctions =
+ execTuplesMatchPrepare(aggnode->numCols,
+ aggnode->grpOperators);
+ }
- phasedata->aggnode = aggnode;
- phasedata->sortnode = sortnode;
+ phasedata->aggnode = aggnode;
+ phasedata->aggstrategy = aggnode->aggstrategy;
+ phasedata->sortnode = sortnode;
+ }
}
/*
@@ -2673,13 +2973,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
- * Initialize current phase-dependent values to initial phase
- */
-
- aggstate->current_phase = 0;
- initialize_phase(aggstate, 0);
-
- /*
* Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage
*/
@@ -2693,23 +2986,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates;
-
/*
* Hashing can only appear in the initial phase.
*/
- if (node->aggstrategy == AGG_HASHED)
+ if (use_hashing)
{
- find_hash_columns(aggstate);
+ for (i = 0; i < numHashes; ++i)
+ {
+ aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
+
+ execTuplesHashPrepare(aggstate->perhash[i].numCols,
+ aggstate->perhash[i].aggnode->grpOperators,
+ &aggstate->perhash[i].eqfunctions,
+ &aggstate->perhash[i].hashfunctions);
+ }
- execTuplesHashPrepare(node->numCols,
- node->grpOperators,
- &aggstate->phases[0].eqfunctions,
- &aggstate->hashfunctions);
+ /* this is an array of pointers, not structures */
+ aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
+ find_hash_columns(aggstate);
build_hash_table(aggstate);
aggstate->table_filled = false;
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
AggStatePerGroup pergroup;
@@ -2720,6 +3020,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->pergroup = pergroup;
}
+ /*
+ * Initialize current phase-dependent values to initial phase. The initial
+ * phase is 1 (first sort pass) for all strategies that use sorting (if
+ * hashing is being done too, then phase 0 is processed last); but if only
+ * hashing is being done, then phase 0 is all there is.
+ */
+
+ if (node->aggstrategy == AGG_HASHED)
+ {
+ aggstate->current_phase = 0;
+ initialize_phase(aggstate, 0);
+ select_current_set(aggstate, 0, true);
+ }
+ else
+ {
+ aggstate->current_phase = 1;
+ initialize_phase(aggstate, 1);
+ select_current_set(aggstate, 0, false);
+ }
+
/* -----------------
* Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data.
@@ -3299,7 +3619,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case
* (yet)
*/
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
/* If we have only one input, we need its len/byval info. */
if (numInputs == 1)
@@ -3548,6 +3868,8 @@ ExecEndAgg(AggState *node)
/* And ensure any agg shutdown callbacks have been called */
for (setno = 0; setno < numGroupingSets; setno++)
ReScanExprContext(node->aggcontexts[setno]);
+ if (node->hashcontext)
+ ReScanExprContext(node->hashcontext);
/*
* We don't actually free any ExprContexts here (see comment in
@@ -3577,7 +3899,7 @@ ExecReScanAgg(AggState *node)
node->ss.ps.ps_TupFromTlist = false;
- if (aggnode->aggstrategy == AGG_HASHED)
+ if (node->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@@ -3597,7 +3919,8 @@ ExecReScanAgg(AggState *node)
if (outerPlan->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
- ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ ResetTupleHashIterator(node->perhash[0].hashtable, &node->perhash[0].hashiter);
+ select_current_set(node, 0, true);
return;
}
}
@@ -3622,11 +3945,7 @@ ExecReScanAgg(AggState *node)
* ExecReScan already did it. But we do need to reset our per-grouping-set
* contexts, which may have transvalues stored in them. (We use rescan
* rather than just reset because transfns may have registered callbacks
- * that need to be run now.)
- *
- * Note that with AGG_HASHED, the hash table is allocated in a sub-context
- * of the aggcontext. This used to be an issue, but now, resetting a
- * context automatically deletes sub-contexts too.
+ * that need to be run now.) For the AGG_HASHED case, see below.
*/
for (setno = 0; setno < numGroupingSets; setno++)
@@ -3646,13 +3965,22 @@ ExecReScanAgg(AggState *node)
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
- if (aggnode->aggstrategy == AGG_HASHED)
+ /*
+ * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
+ * the hashcontext. This used to be an issue, but now, resetting a context
+ * automatically deletes sub-contexts too.
+ */
+
+ if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
{
+ ReScanExprContext(node->hashcontext);
/* Rebuild an empty hash table */
build_hash_table(node);
node->table_filled = false;
+ /* iterator will be reset when the table is filled */
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
/*
* Reset the per-group state (in particular, mark transvalues null)
@@ -3660,8 +3988,8 @@ ExecReScanAgg(AggState *node)
MemSet(node->pergroup, 0,
sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
- /* reset to phase 0 */
- initialize_phase(node, 0);
+ /* reset to phase 1 */
+ initialize_phase(node, 1);
node->input_done = false;
node->projected_set = -1;
@@ -3702,7 +4030,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
if (aggcontext)
{
AggState *aggstate = ((AggState *) fcinfo->context);
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
*aggcontext = cxt->ecxt_per_tuple_memory;
}
@@ -3791,7 +4119,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
if (fcinfo->context && IsA(fcinfo->context, AggState))
{
AggState *aggstate = (AggState *) fcinfo->context;
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
RegisterExprContextCallback(cxt, func, arg);
diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 2d2ba84..f222c6c 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/lib
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
- rbtree.o stringinfo.o
+OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
+ pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c
new file mode 100644
index 0000000..54301b9
--- /dev/null
+++ b/src/backend/lib/knapsack.c
@@ -0,0 +1,112 @@
+/*-------------------------------------------------------------------------
+ *
+ * knapsack.c
+ * Knapsack problem solver
+ *
+ * Given input vectors of integral item weights (must be >= 0) and values
+ * (double >= 0), compute the set of items which produces the greatest total
+ * value without exceeding a specified total weight; each item is included at
+ * most once (this is the 0/1 knapsack problem). Weight 0 items will always be
+ * included.
+ *
+ * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
+ * weight limit. To use with non-integral weights or approximate solutions,
+ * the caller should pre-scale the input weights to a suitable range. This
+ * allows approximate solutions in polynomial time (the general case of the
+ * exact problem is NP-hard).
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/lib/knapsack.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "lib/knapsack.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+
+/*
+ * DiscreteKnapsack
+ *
+ * The item_values input is optional; if omitted, all the items are assumed to
+ * have value 1.
+ *
+ * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
+ * inclusion in the solution.
+ *
+ * This uses the usual dynamic-programming algorithm, adapted to reuse the
+ * memory on each pass (by working from larger weights to smaller). At the
+ * start of pass number i, the values[w] array contains the largest value
+ * computed with total weight <= w, using only items with indices < i; and
+ * sets[w] contains the bitmap of items actually used for that value. (The
+ * bitmapsets are all pre-initialized with an unused high bit so that memory
+ * allocation is done only once.)
+ */
+Bitmapset *
+DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values)
+{
+ MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "Knapsack",
+ ALLOCSET_SMALL_MINSIZE,
+ ALLOCSET_SMALL_INITSIZE,
+ ALLOCSET_SMALL_MAXSIZE);
+ MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
+ double *values;
+ Bitmapset **sets;
+ Bitmapset *result;
+ int i,j;
+
+ Assert(max_weight >= 0);
+ Assert(num_items > 0 && item_weights);
+
+ values = palloc((1 + max_weight) * sizeof(double));
+ sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
+
+ for (i = 0; i <= max_weight; ++i)
+ {
+ values[i] = 0;
+ sets[i] = bms_make_singleton(num_items);
+ }
+
+ for (i = 0; i < num_items; ++i)
+ {
+ int iw = item_weights[i];
+ double iv = item_values ? item_values[i] : 1;
+
+ for (j = max_weight; j >= 0; --j)
+ {
+ int ow = j - iw;
+ if (iw <= j && values[j] <= values[ow] + iv)
+ {
+ /* copy sets[ow] to sets[j] without realloc */
+ if (j != ow)
+ {
+ sets[j] = bms_del_members(sets[j], sets[j]);
+ sets[j] = bms_add_members(sets[j], sets[ow]);
+ }
+
+ sets[j] = bms_add_member(sets[j], i);
+
+ values[j] = values[ow] + iv;
+ }
+ }
+ }
+
+ MemoryContextSwitchTo(oldctx);
+
+ result = bms_del_member(bms_copy(sets[max_weight]), num_items);
+
+ MemoryContextDelete(local_ctx);
+
+ return result;
+}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 252af5c..16a357e 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -21,7 +21,7 @@
#include "postgres.h"
#include "access/hash.h"
-
+#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
@@ -458,6 +458,36 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
}
/*
+ * bms_overlap_list - does a set overlap an integer list?
+ */
+bool
+bms_overlap_list(const Bitmapset *a, const List *b)
+{
+ ListCell *lc;
+ int wordnum,
+ bitnum;
+
+ if (a == NULL || b == NIL)
+ return false;
+
+ foreach(lc, b)
+ {
+ int x = lfirst_int(lc);
+
+ /* XXX better to just return false for x<0 ? */
+ if (x < 0)
+ elog(ERROR, "negative bitmapset member not allowed");
+ wordnum = WORDNUM(x);
+ bitnum = BITNUM(x);
+ if (wordnum < a->nwords)
+ if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*
* bms_nonempty_difference - do sets have a nonempty difference?
*/
bool
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 806d0a9..e6fd88d 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1854,6 +1854,28 @@ _outAggPath(StringInfo str, const AggPath *node)
}
static void
+_outRollupData(StringInfo str, const RollupData *node)
+{
+ WRITE_NODE_TYPE("ROLLUP");
+
+ WRITE_NODE_FIELD(groupClause);
+ WRITE_NODE_FIELD(gsets);
+ WRITE_NODE_FIELD(gsets_data);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+ WRITE_BOOL_FIELD(hashable);
+ WRITE_BOOL_FIELD(is_hashed);
+}
+
+static void
+_outGroupingSetData(StringInfo str, const GroupingSetData *node)
+{
+ WRITE_NODE_TYPE("GSDATA");
+
+ WRITE_NODE_FIELD(set);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+}
+
+static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
WRITE_NODE_TYPE("GROUPINGSETSPATH");
@@ -1861,8 +1883,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
- WRITE_NODE_FIELD(rollup_groupclauses);
- WRITE_NODE_FIELD(rollup_lists);
+ WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
+ WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual);
}
@@ -3764,6 +3786,12 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
+ case T_RollupData:
+ _outRollupData(str, obj);
+ break;
+ case T_GroupingSetData:
+ _outGroupingSetData(str, obj);
+ break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a52eb7e..a37deda 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1750,11 +1750,16 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1;
}
- else if (aggstrategy == AGG_SORTED)
+ else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
{
/* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost;
total_cost = input_total_cost;
+ if (aggstrategy == AGG_MIXED && !enable_hashagg)
+ {
+ startup_cost += disable_cost;
+ total_cost += disable_cost;
+ }
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c7bcd9b..682535a 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1677,18 +1677,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
{
Agg *plan;
Plan *subplan;
- List *rollup_groupclauses = best_path->rollup_groupclauses;
- List *rollup_lists = best_path->rollup_lists;
+ List *rollups = best_path->rollups;
AttrNumber *grouping_map;
int maxref;
List *chain;
- ListCell *lc,
- *lc2;
+ ListCell *lc;
/* Shouldn't get here without grouping sets */
Assert(root->parse->groupingSets);
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
+ Assert(rollups != NIL);
/*
* Agg can project, so no need to be terribly picky about child tlist, but
@@ -1740,72 +1737,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* costs will be shown by EXPLAIN.
*/
chain = NIL;
- if (list_length(rollup_groupclauses) > 1)
+ if (list_length(rollups) > 1)
{
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+ ListCell *lc2 = lnext(list_head(rollups));
+ bool is_first_sort = ((RollupData *)linitial(rollups))->is_hashed;
+
+ for_each_cell(lc, lc2)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
+ RollupData *rollup = lfirst(lc);
AttrNumber *new_grpColIdx;
- Plan *sort_plan;
+ Plan *sort_plan = NULL;
Plan *agg_plan;
+ AggStrategy strat;
- /* We want to iterate over all but the last rollup list elements */
- if (lnext(lc) == NULL)
- break;
+ new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
+
+ if (!rollup->is_hashed && !is_first_sort)
+ {
+ sort_plan = (Plan *)
+ make_sort_from_groupcols(rollup->groupClause,
+ new_grpColIdx,
+ subplan);
+ }
- new_grpColIdx = remap_groupColIdx(root, groupClause);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
- sort_plan = (Plan *)
- make_sort_from_groupcols(groupClause,
- new_grpColIdx,
- subplan);
+ if (rollup->is_hashed)
+ strat = AGG_HASHED;
+ else if (list_length(linitial(rollup->gsets)) == 0)
+ strat = AGG_PLAIN;
+ else
+ strat = AGG_SORTED;
agg_plan = (Plan *) make_agg(NIL,
NIL,
- AGG_SORTED,
+ strat,
AGGSPLIT_SIMPLE,
- list_length((List *) linitial(gsets)),
+ list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
NIL,
- 0, /* numGroups not needed */
+ rollup->numGroups,
sort_plan);
/*
* Nuke stuff we don't need to avoid bloating debug output.
*/
- sort_plan->targetlist = NIL;
- sort_plan->lefttree = NULL;
+ if (sort_plan)
+ {
+ sort_plan->targetlist = NIL;
+ sort_plan->lefttree = NULL;
+ }
chain = lappend(chain, agg_plan);
}
}
/*
- * Now make the final Agg node
+ * Now make the real Agg node
*/
{
- List *groupClause = (List *) llast(rollup_groupclauses);
- List *gsets = (List *) llast(rollup_lists);
+ RollupData *rollup = linitial(rollups);
AttrNumber *top_grpColIdx;
int numGroupCols;
- top_grpColIdx = remap_groupColIdx(root, groupClause);
+ top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
- numGroupCols = list_length((List *) linitial(gsets));
+ numGroupCols = list_length((List *) linitial(rollup->gsets));
plan = make_agg(build_path_tlist(root, &best_path->path),
best_path->qual,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+ best_path->aggstrategy,
AGGSPLIT_SIMPLE,
numGroupCols,
top_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
chain,
- 0, /* numGroups not needed */
+ rollup->numGroups,
subplan);
/* Copy cost data from Path to Plan */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 207290f..354002b 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -89,12 +90,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+ List *rollups;
+ List *hash_sets_idx;
+ double dNumHashGroups;
+ bool any_hashable;
+ Bitmapset *unsortable_refs;
+ Bitmapset *unhashable_refs;
+ List *unsortable_sets;
+ int *tleref_to_colnum_map;
+} grouping_sets_data;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+ int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
@@ -107,8 +127,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
@@ -116,8 +135,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *input_target,
@@ -1532,8 +1559,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
- List *rollup_lists = NIL;
- List *rollup_groupclauses = NIL;
+ grouping_sets_data *gset_data = NULL;
standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
@@ -1542,84 +1568,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets)
{
- int *tleref_to_colnum_map;
- List *sets;
- int maxref;
- ListCell *lc;
- ListCell *lc2;
- ListCell *lc_set;
-
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
- /* Identify max SortGroupRef in groupClause, for array sizing */
- maxref = 0;
- foreach(lc, parse->groupClause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- if (gc->tleSortGroupRef > maxref)
- maxref = gc->tleSortGroupRef;
- }
-
- /* Allocate workspace array for remapping */
- tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
- /* Examine the rollup sets */
- sets = extract_rollup_sets(parse->groupingSets);
-
- foreach(lc_set, sets)
- {
- List *current_sets = (List *) lfirst(lc_set);
- List *groupclause;
- int ref;
-
- /*
- * Reorder the current list of grouping sets into correct
- * prefix order. If only one aggregation pass is needed, try
- * to make the list match the ORDER BY clause; if more than
- * one pass is needed, we don't bother with that.
- */
- current_sets = reorder_grouping_sets(current_sets,
- (list_length(sets) == 1
- ? parse->sortClause
- : NIL));
-
- /*
- * Order the groupClause appropriately. If the first grouping
- * set is empty, this can match regular GROUP BY
- * preprocessing, otherwise we have to force the groupClause
- * to match that grouping set's order.
- */
- groupclause = preprocess_groupclause(root,
- linitial(current_sets));
-
- /*
- * Now that we've pinned down an order for the groupClause for
- * this list of grouping sets, we need to remap the entries in
- * the grouping sets from sortgrouprefs to plain indices
- * (0-based) into the groupClause for this collection of
- * grouping sets.
- */
- ref = 0;
- foreach(lc, groupclause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
- }
-
- foreach(lc, current_sets)
- {
- foreach(lc2, (List *) lfirst(lc))
- {
- lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
- }
- }
-
- /* Save the reordered sets and corresponding groupclauses */
- rollup_lists = lcons(current_sets, rollup_lists);
- rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
- }
+ gset_data = preprocess_grouping_sets(root);
}
else
{
@@ -1719,8 +1668,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause =
- parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+ qp_extra.groupClause = (gset_data
+ ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -1872,8 +1822,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
&agg_costs,
- rollup_lists,
- rollup_groupclauses);
+ gset_data);
}
/*
@@ -1900,7 +1849,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root,
current_rel);
}
-
} /* end of if (setOperations) */
/*
@@ -2082,6 +2030,213 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
}
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *sets;
+ int maxref = 0;
+ ListCell *lc;
+ ListCell *lc_set;
+ grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+ gd->any_hashable = false;
+ gd->unhashable_refs = NULL;
+ gd->unsortable_refs = NULL;
+ gd->unsortable_sets = NIL;
+
+ if (parse->groupClause)
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ Index ref = gc->tleSortGroupRef;
+
+ if (ref > maxref)
+ maxref = ref;
+
+ if (!gc->hashable)
+ gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+ if (!OidIsValid(gc->sortop))
+ gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+ }
+ }
+
+ /* Allocate workspace array for remapping */
+ gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+ /*
+ * If we have any unsortable sets, we must extract them before trying to
+ * prepare rollups. Unsortable sets don't go through reorder_grouping_sets,
+ * so we must apply the GroupingSetData annotation here.
+ */
+ if (!bms_is_empty(gd->unsortable_refs))
+ {
+ List *sortable_sets = NIL;
+
+ foreach(lc, parse->groupingSets)
+ {
+ List *gset = lfirst(lc);
+
+ if (bms_overlap_list(gd->unsortable_refs, gset))
+ {
+ GroupingSetData *gs = makeNode(GroupingSetData);
+ gs->set = gset;
+ gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+ /*
+ * We must enforce here that an unsortable set is hashable;
+ * later code assumes this. Parse analysis only checks that
+ * every individual column is either hashable or sortable.
+ *
+ * Note that passing this test doesn't guarantee we can
+ * generate a plan; there might be other showstoppers.
+ */
+
+ if (bms_overlap_list(gd->unhashable_refs, gset))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement GROUP BY"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ }
+ else
+ sortable_sets = lappend(sortable_sets, gset);
+ }
+
+ if (sortable_sets)
+ sets = extract_rollup_sets(sortable_sets);
+ else
+ sets = NIL;
+ }
+ else
+ sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = (List *) lfirst(lc_set);
+ RollupData *rollup = makeNode(RollupData);
+ GroupingSetData *gs;
+
+ /*
+ * Reorder the current list of grouping sets into correct
+ * prefix order. If only one aggregation pass is needed, try
+ * to make the list match the ORDER BY clause; if more than
+ * one pass is needed, we don't bother with that.
+ *
+ * Note that this reorders the sets from smallest-member-first
+ * to largest-member-first, and applies the GroupingSetData
+ * annotations, though the data will be filled in later.
+ */
+ current_sets = reorder_grouping_sets(current_sets,
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+
+ /*
+ * Get the initial (and therefore largest) grouping set.
+ */
+ gs = linitial(current_sets);
+
+ /*
+ * Order the groupClause appropriately. If the first grouping set is
+ * empty, then the groupClause must also be empty; otherwise we have to
+ * force the groupClause to match that grouping set's order.
+ *
+ * (The first grouping set can be empty even though parse->groupClause
+ * is not empty only if all non-empty grouping sets are unsortable. The
+ * groupClauses for hashed grouping sets are built later on.)
+ */
+ if (gs->set)
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ else
+ rollup->groupClause = NIL;
+
+ /*
+ * is it hashable? we pretend empty sets are hashable even though we
+ * actually force them not to be hashed later. But don't bother if
+ * there's nothing but empty sets.
+ */
+ if (gs->set &&
+ !bms_overlap_list(gd->unhashable_refs, gs->set))
+ {
+ rollup->hashable = true;
+ gd->any_hashable = true;
+ }
+
+ /*
+ * Now that we've pinned down an order for the groupClause for this
+ * list of grouping sets, we need to remap the entries in the grouping
+ * sets from sortgrouprefs to plain indices (0-based) into the
+ * groupClause for this collection of grouping sets. We keep the
+ * original form for later use, though.
+ */
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ current_sets,
+ gd->tleref_to_colnum_map);
+ rollup->gsets_data = current_sets;
+
+ gd->rollups = lappend(gd->rollups, rollup);
+ }
+
+ if (gd->unsortable_sets)
+ {
+ /*
+ * we have not yet pinned down a groupclause for this, but we will need
+ * index-based lists for estimation purposes. Construct hash_sets_idx
+ * based on the entire original groupclause for now.
+ */
+ gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+ gd->unsortable_sets,
+ gd->tleref_to_colnum_map);
+ gd->any_hashable = true;
+ }
+
+ return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+ List *gsets,
+ int *tleref_to_colnum_map)
+{
+ int ref = 0;
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, gsets)
+ {
+ List *set = NIL;
+ ListCell *lc2;
+ GroupingSetData *gs = lfirst(lc);
+
+ foreach(lc2, gs->set)
+ {
+ set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+ }
+
+ result = lappend(result, set);
+ }
+
+ return result;
+}
+
+
+
/*
* Detect whether a plan node is a "dummy" plan created when a relation
* is deemed not to need scanning due to constraint exclusion.
@@ -3051,7 +3206,7 @@ extract_rollup_sets(List *groupingSets)
/*
* Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure
@@ -3074,6 +3229,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{
List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous);
+ GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0)
{
@@ -3101,7 +3257,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
}
- result = lcons(list_copy(previous), result);
+ gs->set = list_copy(previous);
+ result = lcons(gs, result);
list_free(new_elems);
}
@@ -3196,15 +3353,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping)
*
* path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- * or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
*/
static double
get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
double dNumGroups;
@@ -3216,28 +3374,58 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets)
{
/* Add up the estimates for each grouping set */
- ListCell *lc,
- *lc2;
+ ListCell *lc;
+ ListCell *lc2;
dNumGroups = 0;
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+ foreach(lc, gd->rollups)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
- ListCell *lc3;
+ RollupData *rollup = lfirst(lc);
+ ListCell *lc;
- groupExprs = get_sortgrouplist_exprs(groupClause,
+ groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList);
- foreach(lc3, gsets)
+ rollup->numGroups = 0.0;
+
+ forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{
- List *gset = (List *) lfirst(lc3);
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+ gs->numGroups = numGroups;
+ rollup->numGroups += numGroups;
+ }
+
+ dNumGroups += rollup->numGroups;
+ }
+
+ if (gd->hash_sets_idx)
+ {
+ ListCell *lc;
- dNumGroups += estimate_num_groups(root,
- groupExprs,
- path_rows,
- &gset);
+ gd->dNumHashGroups = 0;
+
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+ {
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+ gs->numGroups = numGroups;
+ gd->dNumHashGroups += numGroups;
}
+
+ dNumGroups += gd->dNumHashGroups;
}
}
else
@@ -3273,6 +3461,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3323,8 +3516,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
@@ -3432,8 +3624,7 @@ create_grouping_paths(PlannerInfo *root,
*/
dNumGroups = get_number_of_groups(root,
cheapest_path->rows,
- rollup_lists,
- rollup_groupclauses);
+ gd);
/*
* Determine whether it's possible to perform sort-based implementations
@@ -3441,15 +3632,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.)
+ *
+ * If we have grouping sets, we might be able to sort some but not all of
+ * them; in this case, we need can_sort to be true as long as we must
+ * consider any sorted-input plan.
*/
- can_sort = grouping_is_sortable(parse->groupClause);
+ can_sort = (gd && gd->rollups != NIL)
+ || grouping_is_sortable(parse->groupClause);
/*
* Determine whether we should consider hash-based implementations of
* grouping.
*
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
+ * Hashed aggregation only applies if we're grouping. If we have grouping
+ * sets, some groups might be hashable but others not; in this case we set
+ * can_hash true as long as there is nothing globally preventing us from
+ * hashing (and we should therefore consider plans with hashes).
*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@@ -3462,9 +3660,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last.
*/
can_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 &&
- grouping_is_hashable(parse->groupClause));
+ (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3530,8 +3727,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
- NIL,
- NIL);
+ gd);
/*
* Collect statistics about aggregates for estimating costs of
@@ -3664,20 +3860,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
- /*
- * We have grouping sets, possibly with aggregation. Make
- * a GroupingSetsPath.
- */
- add_path(grouped_rel, (Path *)
- create_groupingsets_path(root,
- grouped_rel,
- path,
- target,
- (List *) parse->havingQual,
- rollup_lists,
- rollup_groupclauses,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash, target,
+ gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
@@ -3775,33 +3960,45 @@ create_grouping_paths(PlannerInfo *root,
if (can_hash)
{
- hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
- agg_costs,
- dNumGroups);
-
- /*
- * Provided that the estimated size of the hashtable does not exceed
- * work_mem, we'll generate a HashAgg Path, although if we were unable
- * to sort above, then we'd better generate a Path, so that we at
- * least have one.
- */
- if (hashaggtablesize < work_mem * 1024L ||
- grouped_rel->pathlist == NIL)
+ if (parse->groupingSets)
{
/*
- * We just need an Agg over the cheapest-total input path, since
- * input order won't matter.
+ * Try for a hash-only groupingsets path over unsorted input.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- parse->groupClause,
- (List *) parse->havingQual,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ cheapest_path, false, true, target,
+ gd, agg_costs, dNumGroups);
+ }
+ else
+ {
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ agg_costs,
+ dNumGroups);
+
+ /*
+ * Provided that the estimated size of the hashtable does not exceed
+ * work_mem, we'll generate a HashAgg Path, although if we were unable
+ * to sort above, then we'd better generate a Path, so that we at
+ * least have one.
+ */
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path, since
+ * input order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ agg_costs,
+ dNumGroups));
+ }
}
/*
@@ -3870,6 +4067,310 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel;
}
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting. This can be called multiple
+ * times, so it's important that it not scribble on input. No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Query *parse = root->parse;
+
+ /*
+ * If we're not being offered sorted input, then only consider plans that
+ * can be done entirely by hashing.
+ *
+ * We can hash everything if it looks like it'll fit in work_mem. But if
+ * the input is actually sorted despite not being advertised as such, we
+ * prefer to make use of that in order to use less memory.
+ *
+ * If none of the grouping sets are sortable, then ignore the work_mem
+ * limit and generate a path anyway, since otherwise we'll just fail.
+ */
+ if (!is_sorted)
+ {
+ List *new_rollups = NIL;
+ RollupData *unhashable_rollup = NULL;
+ List *sets_data;
+ List *empty_sets_data = NIL;
+ List *empty_sets = NIL;
+ ListCell *lc;
+ ListCell *l_start = list_head(gd->rollups);
+ AggStrategy strat = AGG_HASHED;
+ Size hashsize;
+ double exclude_groups = 0.0;
+
+ Assert(can_hash);
+
+ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ unhashable_rollup = lfirst(l_start);
+ exclude_groups = unhashable_rollup->numGroups;
+ l_start = lnext(l_start);
+ }
+
+ hashsize = estimate_hashagg_tablesize(path,
+ agg_costs,
+ dNumGroups - exclude_groups);
+
+ if (hashsize > work_mem * 1024L && gd->rollups)
+ return; /* nope, won't fit */
+
+ /*
+ * We need to burst the existing rollups list into individual grouping
+ * sets and recompute a groupClause for each set.
+ */
+ sets_data = list_copy(gd->unsortable_sets);
+
+ for_each_cell(lc, l_start)
+ {
+ RollupData *rollup = lfirst(lc);
+
+ /*
+ * If we find an unhashable rollup that's not been skipped by the
+ * "actually sorted" check above, we can't cope; we'd need sorted
+ * input (with a different sort order) but we can't get that here.
+ * So bail out; we'll get a valid path from the is_sorted case
+ * instead.
+ */
+ if (!rollup->hashable)
+ return;
+ else
+ sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+ }
+ foreach(lc, sets_data)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ List *gset = gs->set;
+ RollupData *rollup;
+
+ if (gset == NIL)
+ {
+ empty_sets_data = lappend(empty_sets_data, gs);
+ empty_sets = lappend(empty_sets, NIL);
+ }
+ else
+ {
+ rollup = makeNode(RollupData);
+
+ rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ new_rollups = lappend(new_rollups, rollup);
+ }
+ }
+
+ /* If we didn't find anything nonempty to hash, then bail. */
+ if (new_rollups == NIL)
+ return;
+
+ /*
+ * If there were empty grouping sets they should have been in the first
+ * rollup.
+ */
+ Assert(!unhashable_rollup || !empty_sets);
+
+ if (unhashable_rollup)
+ {
+ new_rollups = lappend(new_rollups, unhashable_rollup);
+ strat = AGG_MIXED;
+ }
+ else if (empty_sets)
+ {
+ RollupData *rollup = makeNode(RollupData);
+
+ rollup->groupClause = NIL;
+ rollup->gsets_data = empty_sets_data;
+ rollup->gsets = empty_sets;
+ rollup->numGroups = list_length(empty_sets);
+ rollup->hashable = false;
+ rollup->is_hashed = false;
+ new_rollups = lappend(new_rollups, rollup);
+ strat = AGG_MIXED;
+ }
+
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ strat,
+ new_rollups,
+ agg_costs,
+ dNumGroups));
+ return;
+ }
+
+ /*
+ * If we have sorted input but nothing we can do with it, bail.
+ */
+
+ if (list_length(gd->rollups) == 0)
+ return;
+
+ /*
+ * Given sorted input, we try and make two paths: one sorted and one mixed
+ * sort/hash. (We need to try both because hashagg might be disabled, or
+ * some columns might not be sortable.)
+ *
+ * can_hash is passed in as false if some obstacle elsewhere (such as
+ * ordered aggs) means that we shouldn't consider hashing at all.
+ */
+
+ if (can_hash && gd->any_hashable)
+ {
+ List *rollups = NIL;
+ List *hash_sets = list_copy(gd->unsortable_sets);
+ double availspace = (work_mem * 1024.0);
+ ListCell *lc;
+
+ /*
+ * Account first for space needed for groups we can't sort at all.
+ */
+ availspace -= (double) estimate_hashagg_tablesize(path, agg_costs, gd->dNumHashGroups);
+
+ if (availspace > 0 && list_length(gd->rollups) > 1)
+ {
+ double scale;
+ int k_capacity;
+ int *k_weights = palloc(list_length(gd->rollups) * sizeof(int));
+ Bitmapset *hash_items = NULL;
+ int i;
+
+ /*
+ * To use the discrete knapsack, we need to scale the values to a
+ * reasonably small bounded range. We choose to allow a 5% error
+ * margin; we have no more than 4096 rollups in the worst possible
+ * case, which with a 5% error margin will require a bit over 42MB
+ * of workspace. (Anyone wanting to plan queries that complex had
+ * better have the memory for it. In more reasonable cases, with
+ * no more than a couple of dozen rollups, the memory usage will be
+ * negligible.)
+ */
+ scale = availspace / (20.0 * list_length(gd->rollups));
+ k_capacity = (int) floor(availspace / scale);
+
+ /*
+ * We leave the first rollup out of consideration since it's the
+ * one that matches the input sort order. We assign indexes "i" to
+ * only those entries considered for hashing; the second loop,
+ * below, must use the same condition.
+ */
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+ if (rollup->hashable)
+ {
+ double sz = estimate_hashagg_tablesize(path,
+ agg_costs,
+ rollup->numGroups);
+ k_weights[i] = (int) floor(sz / scale);
+ ++i;
+ }
+ }
+
+ /*
+ * Apply knapsack algorithm; compute the set of items which
+ * maximizes the value stored (in this case the number of sorts
+ * saved) while keeping the total size (approximately) within
+ * capacity.
+ */
+ if (i > 0)
+ hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+ if (!bms_is_empty(hash_items))
+ {
+ rollups = list_make1(linitial(gd->rollups));
+
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+ if (rollup->hashable)
+ {
+ if (bms_is_member(i, hash_items))
+ hash_sets = list_concat(hash_sets, list_copy(rollup->gsets_data));
+ else
+ rollups = lappend(rollups, rollup);
+ ++i;
+ }
+ else
+ rollups = lappend(rollups, rollup);
+ }
+ }
+ }
+
+ if (!rollups && hash_sets)
+ rollups = list_copy(gd->rollups);
+
+ foreach(lc, hash_sets)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ RollupData *rollup = makeNode(RollupData);
+
+ Assert(gs->set != NIL);
+
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ rollups = lcons(rollup, rollups);
+ }
+
+ if (rollups)
+ {
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_MIXED,
+ rollups,
+ agg_costs,
+ dNumGroups));
+ }
+ }
+
+ /*
+ * Now try the simple sorted case.
+ */
+ if (!gd->unsortable_sets)
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ gd->rollups,
+ agg_costs,
+ dNumGroups));
+}
+
/*
* create_window_paths
*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 3b7c56d..3147249 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2544,10 +2544,9 @@ create_agg_path(PlannerInfo *root,
* 'subpath' is the path representing the source of data
* 'target' is the PathTarget to be computed
* 'having_qual' is the HAVING quals if any
- * 'rollup_lists' is a list of grouping sets
- * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
+ * 'rollups' is a list of RollupData nodes
* 'agg_costs' contains cost info about the aggregate functions to be computed
- * 'numGroups' is the estimated number of groups
+ * 'numGroups' is the estimated total number of groups
*/
GroupingSetsPath *
create_groupingsets_path(PlannerInfo *root,
@@ -2555,13 +2554,15 @@ create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups)
{
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
- int numGroupCols;
+ ListCell *lc;
+ bool is_first = true;
+ bool is_first_sort = true;
/* The topmost generated Plan node will be an Agg */
pathnode->path.pathtype = T_Agg;
@@ -2575,74 +2576,107 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->subpath = subpath;
/*
+ * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN,
+ * and AGG_MIXED to AGG_HASHED, here if possible.
+ */
+ if (aggstrategy == AGG_SORTED &&
+ list_length(rollups) == 1 &&
+ ((RollupData *) linitial(rollups))->groupClause == NIL)
+ aggstrategy = AGG_PLAIN;
+
+ if (aggstrategy == AGG_MIXED &&
+ list_length(rollups) == 1)
+ aggstrategy = AGG_HASHED;
+
+ /*
* Output will be in sorted order by group_pathkeys if, and only if, there
* is a single rollup operation on a non-empty list of grouping
* expressions.
*/
- if (list_length(rollup_groupclauses) == 1 &&
- ((List *) linitial(rollup_groupclauses)) != NIL)
+ if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
pathnode->path.pathkeys = root->group_pathkeys;
else
pathnode->path.pathkeys = NIL;
- pathnode->rollup_groupclauses = rollup_groupclauses;
- pathnode->rollup_lists = rollup_lists;
+ pathnode->aggstrategy = aggstrategy;
+ pathnode->rollups = rollups;
pathnode->qual = having_qual;
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
-
- /* Account for cost of the topmost Agg node */
- numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
-
- cost_agg(&pathnode->path, root,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
- agg_costs,
- numGroupCols,
- numGroups,
- subpath->startup_cost,
- subpath->total_cost,
- subpath->rows);
+ Assert(rollups != NIL);
+ Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
+ Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
- /*
- * Add in the costs and output rows of the additional sorting/aggregation
- * steps, if any. Only total costs count, since the extra sorts aren't
- * run on startup.
- */
- if (list_length(rollup_lists) > 1)
+ foreach(lc, rollups)
{
- ListCell *lc;
+ RollupData *rollup = lfirst(lc);
+ List *gsets = rollup->gsets;
+ int numGroupCols = list_length(linitial(gsets));
+
+ /*
+ * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
+ * (already-sorted) input, and following ones do their own sort.
+ *
+ * In AGG_HASHED mode, there is one rollup for each grouping set.
+ *
+ * In AGG_MIXED mode, the first rollups are hashed, the first
+ * non-hashed one takes the (already-sorted) input, and
+ * following ones do their own sort.
+ */
- foreach(lc, rollup_lists)
+ if (is_first)
+ {
+ cost_agg(&pathnode->path, root,
+ aggstrategy,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows);
+ is_first = false;
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
{
- List *gsets = (List *) lfirst(lc);
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
- /* We must iterate over all but the last rollup_lists element */
- if (lnext(lc) == NULL)
- break;
-
- /* Account for cost of sort, but don't charge input cost again */
- cost_sort(&sort_path, root, NIL,
- 0.0,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
-
- /* Account for cost of aggregation */
- numGroupCols = list_length((List *) linitial(gsets));
-
- cost_agg(&agg_path, root,
- AGG_SORTED,
- agg_costs,
- numGroupCols,
- numGroups, /* XXX surely not right for all steps? */
- sort_path.startup_cost,
- sort_path.total_cost,
- sort_path.rows);
+ if (rollup->is_hashed || is_first_sort)
+ {
+ /* Account for cost of aggregation, but don't charge input cost again */
+ cost_agg(&agg_path, root,
+ rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ 0.0, 0.0,
+ subpath->rows);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
+ {
+ /* Account for cost of sort, but don't charge input cost again */
+ cost_sort(&sort_path, root, NIL,
+ 0.0,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+
+ /* Account for cost of aggregation */
+
+ cost_agg(&agg_path, root,
+ AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ sort_path.startup_cost,
+ sort_path.total_cost,
+ sort_path.rows);
+ }
pathnode->path.total_cost += agg_path.total_cost;
pathnode->path.rows += agg_path.rows;
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 0000000..1d5b3b8
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,17 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "postgres.h"
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 4f1910e..109f7b0 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -21,6 +21,11 @@
#define BITMAPSET_H
/*
+ * Forward decl to save including pg_list.h
+ */
+struct List;
+
+/*
* Data representation
*/
@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
+extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0e05d47..0081ca8 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1844,6 +1844,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
+typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
@@ -1851,15 +1852,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */
+ AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */
- int numphases; /* number of phases */
+ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */
+ ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */
+ ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */
@@ -1871,21 +1874,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */
- Tuplesortstate *sort_in; /* sorted input to phases > 0 */
+ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
- /* these fields are used in AGG_HASHED mode: */
- TupleHashTable hashtable; /* hash table with one entry per group */
- TupleTableSlot *hashslot; /* slot for loading hash table */
- int numhashGrpCols; /* number of columns in hash table */
- int largestGrpColIdx; /* largest column required for hashing */
- AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
- AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
bool table_filled; /* hash table filled yet? */
- TupleHashIterator hashiter; /* for iterating through hash table */
+ int num_hashes;
+ AggStatePerHash perhash;
+ AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index a1bb0ac..821b5c6 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -269,6 +269,8 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
+ T_RollupData,
+ T_GroupingSetData,
/*
* TAGS FOR MEMORY NODES (memnodes.h)
@@ -699,7 +701,8 @@ typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
- AGG_HASHED /* grouped agg, use internal hashtable */
+ AGG_HASHED, /* grouped agg, use internal hashtable */
+ AGG_MIXED /* grouped agg, hash and sort concurrently */
} AggStrategy;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 692a626..962842c 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -717,7 +717,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
- /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
+ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */
} Agg;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index e1d31c7..bca7b56 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1357,17 +1357,37 @@ typedef struct AggPath
} AggPath;
/*
+ * Various annotations used for grouping sets in the planner.
+ */
+
+typedef struct GroupingSetData
+{
+ NodeTag type;
+ List *set; /* grouping set as list of sortgrouprefs */
+ double numGroups; /* est. number of result groups */
+} GroupingSetData;
+
+typedef struct RollupData
+{
+ NodeTag type;
+ List *groupClause; /* applicable subset of parse->groupClause */
+ List *gsets; /* lists of integer indexes into groupClause */
+ List *gsets_data; /* list of GroupingSetData */
+ double numGroups; /* est. number of result groups */
+ bool hashable; /* can be hashed */
+ bool is_hashed; /* to be implemented as a hashagg */
+} RollupData;
+
+/*
* GroupingSetsPath represents a GROUPING SETS aggregation
- *
- * Currently we only support this in sorted not hashed form, so the input
- * must always be appropriately presorted.
*/
+
typedef struct GroupingSetsPath
{
Path path;
Path *subpath; /* path representing input source */
- List *rollup_groupclauses; /* list of lists of SortGroupClause's */
- List *rollup_lists; /* parallel list of lists of grouping sets */
+ AggStrategy aggstrategy; /* basic strategy */
+ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index d16f879..d5fe8a8 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -176,8 +176,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 260ccd5..9f8b20e 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -13,6 +13,13 @@ copy gstest2 from stdin;
create temp table gstest3 (a integer, b integer, c integer, d integer);
copy gstest3 from stdin;
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
returns setof record
@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
end;
$f$ language plpgsql;
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
- | | 3 | 25 | 2 | 13
- | | 3 | 14 | 1 | 14
- | | 3 | 15 | 1 | 15
- | | 3 | 16 | 1 | 16
- | | 3 | 17 | 1 | 17
- | | 3 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
(21 rows)
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
@@ -847,4 +855,548 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500
(6 rows)
+-- hashing support
+set enable_hashagg = true;
+-- failure cases
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+-- simple cases
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+(8 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(6 rows)
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+ | | 3 | 145 | 10 | 19
+(16 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> MixedAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ Group Key: ()
+ -> Values Scan on "*VALUES*"
+(8 rows)
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+ QUERY PLAN
+----------------------------------------------------------
+ GroupAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ Group Key: "*VALUES*".column1
+ Group Key: ()
+ Sort Key: "*VALUES*".column2
+ Group Key: "*VALUES*".column2
+ -> Sort
+ Sort Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(9 rows)
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 2 | 17
+ 0001 | | 1 | 2 | 34
+ 0010 | | 1 | 2 | 68
+ 0011 | | 1 | 2 | 136
+ | 2 | 2 | 4 | 60
+ | 1 | 2 | 4 | 195
+(6 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: unsortable_col
+ Group Key: unhashable_col
+ -> Sort
+ Sort Key: unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 1 | 1
+ 0001 | | 1 | 1 | 2
+ 0010 | | 1 | 1 | 4
+ 0011 | | 1 | 1 | 8
+ 0000 | | 1 | 1 | 16
+ 0001 | | 1 | 1 | 32
+ 0010 | | 1 | 1 | 64
+ 0011 | | 1 | 1 | 128
+ | 1 | 2 | 1 | 1
+ | 1 | 2 | 1 | 2
+ | 2 | 2 | 1 | 4
+ | 2 | 2 | 1 | 8
+ | 2 | 2 | 1 | 16
+ | 2 | 2 | 1 | 32
+ | 1 | 2 | 1 | 64
+ | 1 | 2 | 1 | 128
+(16 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: v, unsortable_col
+ Group Key: v, unhashable_col
+ -> Sort
+ Sort Key: v, unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ a | b | sum | count
+---+---+-----+-------
+(0 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ QUERY PLAN
+--------------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a
+ -> Seq Scan on gstest_empty
+(4 rows)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+(1 row)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+ | | | 0
+ | | | 0
+(3 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ QUERY PLAN
+--------------------------------
+ MixedAggregate
+ Hash Key: a, b
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(6 rows)
+
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ sum | count
+-----+-------
+ | 0
+ | 0
+ | 0
+(3 rows)
+
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ QUERY PLAN
+--------------------------------
+ Aggregate
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(5 rows)
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ a | d | grouping
+---+---+----------
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 1 | 1 | 2
+ 2 | 2 | 2
+(4 rows)
+
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ QUERY PLAN
+---------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a, c
+ -> Seq Scan on gstest3
+(4 rows)
+
+-- simple rescan tests
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ a | b | sum
+---+---+-----
+ 1 | | 3
+ 2 | | 6
+ | 1 | 3
+ | 2 | 3
+ | 3 | 3
+(5 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ QUERY PLAN
+------------------------------------------
+ HashAggregate
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(6 rows)
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
+ ^
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
+ ^
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ | | 3 | 21 | 2 | 11
+ | | 3 | 21 | 2 | 11
+ | | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
+ | | 3 | 14 | 1 | 14
+ | | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
+ | | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
+ | | 3 | 17 | 1 | 17
+ | | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
+(21 rows)
+
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ a | b | sum | rsum
+---+---+-----+------
+ 1 | 1 | 8 | 8
+ 1 | 2 | 2 | 10
+ 1 | | 10 | 20
+ 2 | 2 | 2 | 22
+ 2 | | 2 | 24
+ | 1 | 8 | 32
+ | 2 | 4 | 36
+ | | 12 | 48
+(8 rows)
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ a | b | sum
+---+---+-----
+ 1 | 1 | 1
+ 1 | 2 | 1
+ 1 | 3 | 1
+ 1 | | 3
+ 2 | 1 | 2
+ 2 | 2 | 2
+ 2 | 3 | 2
+ 2 | | 6
+ | 1 | 3
+ | 2 | 3
+ | 3 | 3
+ | | 9
+(12 rows)
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | 0 | 50
+ 1 | 1 | 0 | 2 | 50
+ 1 | 1 | 0 | 4 | 50
+ 1 | 1 | 0 | 6 | 50
+ 1 | 1 | 0 | 8 | 50
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | 1 | 50
+ 1 | 1 | 1 | 3 | 50
+ 1 | 1 | 1 | 5 | 50
+ 1 | 1 | 1 | 7 | 50
+ 1 | 1 | 1 | 9 | 50
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | 0 | 50
+ 1 | 1 | 2 | 2 | 50
+ 1 | 1 | 2 | 4 | 50
+ 1 | 1 | 2 | 6 | 50
+ 1 | 1 | 2 | 8 | 50
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | 1 | 50
+ 1 | 1 | 3 | 3 | 50
+ 1 | 1 | 3 | 5 | 50
+ 1 | 1 | 3 | 7 | 50
+ 1 | 1 | 3 | 9 | 50
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 1 | 1 | | | 1000
+ 2 | 2 | 0 | 0 | 50
+ 2 | 2 | 0 | 2 | 50
+ 2 | 2 | 0 | 4 | 50
+ 2 | 2 | 0 | 6 | 50
+ 2 | 2 | 0 | 8 | 50
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | 1 | 50
+ 2 | 2 | 1 | 3 | 50
+ 2 | 2 | 1 | 5 | 50
+ 2 | 2 | 1 | 7 | 50
+ 2 | 2 | 1 | 9 | 50
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | 0 | 50
+ 2 | 2 | 2 | 2 | 50
+ 2 | 2 | 2 | 4 | 50
+ 2 | 2 | 2 | 6 | 50
+ 2 | 2 | 2 | 8 | 50
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | 1 | 50
+ 2 | 2 | 3 | 3 | 50
+ 2 | 2 | 3 | 5 | 50
+ 2 | 2 | 3 | 7 | 50
+ 2 | 2 | 3 | 9 | 50
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+ 2 | 2 | | | 1000
+(70 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
+ {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
+(2 rows)
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+(28 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+---------------------------------------------------------------------------------
+ {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
+ {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
+(2 rows)
+
+-- test the knapsack
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ Sort Key: thousand
+ Group Key: thousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(13 rows)
+
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(9 rows)
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Hash Key: thousand
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(12 rows)
+
-- end
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index 8c54f71..d30b50f 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -199,6 +199,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
(6 rows)
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
dataa | b | g | count
-------+-----+---+-------
@@ -277,46 +278,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
b | bar | | 2
b | | | 2
| | | 6
- a | | 1 | 2
- b | | 1 | 1
- | | 1 | 3
- a | | 2 | 2
- b | | 2 | 1
- | | 2 | 3
| bar | 1 | 2
| bar | 2 | 2
| bar | | 4
| foo | 1 | 1
| foo | 2 | 1
| foo | | 2
+ a | | 1 | 2
+ b | | 1 | 1
+ | | 1 | 3
+ a | | 2 | 2
+ b | | 2 | 1
+ | | 2 | 3
(24 rows)
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
dataa | b | g | count
-------+-----+---+-------
+ a | foo | | 2
+ a | | | 4
+ a | | 2 | 2
a | bar | 1 | 1
a | bar | 2 | 1
a | bar | | 2
a | foo | 1 | 1
a | foo | 2 | 1
- a | foo | | 2
- a | | | 4
a | | 1 | 2
- a | | 2 | 2
- b | bar | 2 | 1
+ b | bar | 1 | 1
b | | | 2
b | | 1 | 1
- b | | 2 | 1
- b | bar | 1 | 1
+ b | bar | 2 | 1
b | bar | | 2
- | foo | | 2
- | foo | 1 | 1
+ b | | 2 | 1
| | 2 | 3
+ | | | 6
| bar | 1 | 2
| bar | 2 | 2
- | | | 6
- | foo | 2 | 1
| bar | | 4
+ | foo | 1 | 1
+ | foo | 2 | 1
+ | foo | | 2
| | 1 | 3
(24 rows)
@@ -326,29 +327,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
a | bar | 1 | 1
a | foo | 1 | 1
b | bar | 1 | 1
+ | bar | 1 | 2
+ | foo | 1 | 1
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
- | bar | 1 | 2
- | foo | 1 | 1
- | foo | 2 | 1
- | bar | 2 | 2
a | | 2 | 2
b | | 2 | 1
- a | bar | 2 | 1
+ | bar | 2 | 2
| | 2 | 3
+ | foo | 2 | 1
+ a | bar | 2 | 1
a | foo | 2 | 1
b | bar | 2 | 1
- a | foo | | 2
+ a | | | 4
b | bar | | 2
b | | | 2
| | | 6
- a | | | 4
+ a | foo | | 2
+ a | bar | | 2
| bar | | 4
| foo | | 2
- a | bar | | 2
(24 rows)
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5));
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index 71cc0ec..3f425eb 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -31,6 +31,14 @@ copy gstest3 from stdin;
\.
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
+
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
+
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
+
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by rollup (a,b);
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
@@ -224,4 +235,136 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
select sum(ten) from onek group by two, rollup(four::text) order by 1;
select sum(ten) from onek group by rollup(four::text), two order by 1;
+-- hashing support
+
+set enable_hashagg = true;
+
+-- failure cases
+
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+
+-- simple cases
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+
+-- simple rescan tests
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- test the knapsack
+
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+
-- end
diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql
index cf2fbe3..7875193 100644
--- a/src/test/regress/sql/tsrf.sql
+++ b/src/test/regress/sql/tsrf.sql
@@ -59,12 +59,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
On Thu, Jan 5, 2017 at 10:48 PM, Andrew Gierth
<andrew@tao11.riddles.org.uk> wrote:
Herewith a patch for doing grouping sets via hashing or mixed hashing
and sorting.
Cool.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
The ability to exploit hashed aggregation within sorted groups, when the order of the input stream can be exploited this way, is potentially a useful way to improve aggregation performance more generally. This would potentially be beneficial when the input size is expected to be larger than the amount of working memory available for hashed aggregation, but where there is enough memory to hash-aggregate just the unsorted grouping key combinations, and when the cumulative cost of rebuilding the hash table for each sorted subgroup is less than the cost of sorting the entire input. In other words, if most of the grouping key combinations are already segregated by virtue of the input order, then hashing the remaining combinations within each sorted group might be done in memory, at the cost of rebuilding the hash table for each sorted subgroup.
I haven’t looked at the code for this change yet (I hope I will have the time to do that). Ideally the decision to choose the aggregation method as sorted, hashed, or mixed hash/sort should be integrated into the cost model, but given the notorious difficulty of estimating intermediate cardinalities accurately it would be difficult to develop a cardinality model and a cost model accurate enough to choose among these options consistently well.
Jim Finnerty
Amazon Corp.
On 1/10/17, 10:22 AM, "pgsql-hackers-owner@postgresql.org on behalf of Robert Haas" <pgsql-hackers-owner@postgresql.org on behalf of robertmhaas@gmail.com> wrote:
On Thu, Jan 5, 2017 at 10:48 PM, Andrew Gierth
<andrew@tao11.riddles.org.uk> wrote:
Herewith a patch for doing grouping sets via hashing or mixed hashing
and sorting.
Cool.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mon, Jan 16, 2017 at 10:59 AM, Finnerty, Jim <jfinnert@amazon.com> wrote:
The ability to exploit hashed aggregation within sorted groups, when the order of the input stream can be exploited this way, is potentially a useful way to improve aggregation performance more generally. This would potentially be beneficial when the input size is expected to be larger than the amount of working memory available for hashed aggregation, but where there is enough memory to hash-aggregate just the unsorted grouping key combinations, and when the cumulative cost of rebuilding the hash table for each sorted subgroup is less than the cost of sorting the entire input. In other words, if most of the grouping key combinations are already segregated by virtue of the input order, then hashing the remaining combinations within each sorted group might be done in memory, at the cost of rebuilding the hash table for each sorted subgroup.
Neat idea.
I haven’t looked at the code for this change yet (I hope I will have the time to do that). Ideally the decision to choose the aggregation method as sorted, hashed, or mixed hash/sort should be integrated into the cost model, but given the notorious difficulty of estimating intermediate cardinalities accurately it would be difficult to develop a cardinality model and a cost model accurate enough to choose among these options consistently well.
Yes, that might be a little tricky.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 6 January 2017 at 03:48, Andrew Gierth <andrew@tao11.riddles.org.uk> wrote:
Herewith a patch for doing grouping sets via hashing or mixed hashing
and sorting.The principal objective is to pick whatever combination of grouping sets
has an estimated size that fits in work_mem, and minimizes the number of
sorting passes we need to do over the data, and hash those. (Yes, this
is a knapsack problem.)This is achieved by adding another strategy to the Agg node; AGG_MIXED
means that both hashing and (sorted or plain) grouping happens. In
addition, support for multiple hash tables in AGG_HASHED mode is added.Sample plans look like this:
explain select two,ten from onek group by cube(two,ten);
QUERY PLAN
--------------------------------------------------------------
MixedAggregate (cost=0.00..89.33 rows=33 width=8)
Hash Key: two, ten
Hash Key: two
Hash Key: ten
Group Key: ()
-> Seq Scan on onek (cost=0.00..79.00 rows=1000 width=8)explain select two,ten from onek group by two, cube(ten,twenty);
QUERY PLAN
---------------------------------------------------------------
HashAggregate (cost=86.50..100.62 rows=162 width=12)
Hash Key: two, ten, twenty
Hash Key: two, ten
Hash Key: two
Hash Key: two, twenty
-> Seq Scan on onek (cost=0.00..79.00 rows=1000 width=12)set work_mem='64kB';
explain select count(*) from tenk1
group by grouping sets (unique1,thousand,hundred,ten,two);
QUERY PLAN
------------------------------------------------------------------------
MixedAggregate (cost=1535.39..3023.89 rows=11112 width=28)
Hash Key: two
Hash Key: ten
Hash Key: hundred
Group Key: unique1
Sort Key: thousand
Group Key: thousand
-> Sort (cost=1535.39..1560.39 rows=10000 width=20)
Sort Key: unique1
-> Seq Scan on tenk1 (cost=0.00..458.00 rows=10000 width=20)
(10 rows)Columns with unhashable or unsortable data types are handled
appropriately, though obviously every individual grouping set must end
up containing compatible column types.There is one current weakness which I don't see a good solution for: the
planner code still has to pick a single value for group_pathkeys before
planning the input path. This means that we sometimes can't choose a
minimal set of sorts, because we may have chosen a sort order for a
grouping set that should have been hashed, for example:explain select count(*) from tenk1
group by grouping sets (two,unique1,thousand,hundred,ten);
QUERY PLAN
------------------------------------------------------------------------
MixedAggregate (cost=1535.39..4126.28 rows=11112 width=28)
Hash Key: ten
Hash Key: hundred
Group Key: two
Sort Key: unique1
Group Key: unique1
Sort Key: thousand
Group Key: thousand
-> Sort (cost=1535.39..1560.39 rows=10000 width=20)
Sort Key: two
-> Seq Scan on tenk1 (cost=0.00..458.00 rows=10000 width=20)(3 sorts, vs. 2 in the previous example that listed the same grouping
sets in different order)Current status of the work: probably still needs cleanup, maybe some
better regression tests, and Andres has expressed some concern over the
performance impact of additional complexity in advance_aggregates; it
would be useful if this could be tested. But it should be reviewable
as-is.
This doesn't apply cleanly to latest master. Could you please post a
rebased patch?
Thanks
Thom
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Thom" == Thom Brown <thom@linux.com> writes:
Thom> This doesn't apply cleanly to latest master. Could you please
Thom> post a rebased patch?
Sure.
--
Andrew (irc:RhodiumToad)
Attachments:
gshash11.patchtext/x-patchDownload
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c9e0a3e..480a07e 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -996,6 +996,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate";
strategy = "Hashed";
break;
+ case AGG_MIXED:
+ pname = "MixedAggregate";
+ strategy = "Mixed";
+ break;
default:
pname = "Aggregate ???";
strategy = "???";
@@ -1924,6 +1928,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc;
List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx;
+ const char *keyname;
+ const char *keysetname;
+
+ if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+ {
+ keyname = "Hash Key";
+ keysetname = "Hash Keys";
+ }
+ else
+ {
+ keyname = "Group Key";
+ keysetname = "Group Keys";
+ }
ExplainOpenGroup("Grouping Set", NULL, true, es);
@@ -1938,7 +1955,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++;
}
- ExplainOpenGroup("Group Keys", "Group Keys", false, es);
+ ExplainOpenGroup(keysetname, keysetname, false, es);
foreach(lc, gsets)
{
@@ -1962,12 +1979,12 @@ show_grouping_set_keys(PlanState *planstate,
}
if (!result && es->format == EXPLAIN_FORMAT_TEXT)
- ExplainPropertyText("Group Key", "()", es);
+ ExplainPropertyText(keyname, "()", es);
else
- ExplainPropertyListNested("Group Key", result, es);
+ ExplainPropertyListNested(keyname, result, es);
}
- ExplainCloseGroup("Group Keys", "Group Keys", false, es);
+ ExplainCloseGroup(keysetname, keysetname, false, es);
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index aa08152..f059560 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -122,12 +122,19 @@
* specific).
*
* Where more complex grouping sets are used, we break them down into
- * "phases", where each phase has a different sort order. During each
- * phase but the last, the input tuples are additionally stored in a
- * tuplesort which is keyed to the next phase's sort order; during each
- * phase but the first, the input tuples are drawn from the previously
- * sorted data. (The sorting of the data for the first phase is handled by
- * the planner, as it might be satisfied by underlying nodes.)
+ * "phases", where each phase has a different sort order (except phase 0
+ * which is reserved for hashing). During each phase but the last, the
+ * input tuples are additionally stored in a tuplesort which is keyed to the
+ * next phase's sort order; during each phase but the first, the input
+ * tuples are drawn from the previously sorted data. (The sorting of the
+ * data for the first phase is handled by the planner, as it might be
+ * satisfied by underlying nodes.)
+ *
+ * Hashing can be mixed with sorted grouping. To do this, we have an
+ * AGG_MIXED strategy that populates the hashtables during the first sorted
+ * phase, and switches to reading them out after completing all sort phases.
+ * We can also support AGG_HASHED with multiple hash tables and no sorting
+ * at all.
*
* From the perspective of aggregate transition and final functions, the
* only issue regarding grouping sets is this: a single call site (flinfo)
@@ -139,8 +146,6 @@
* sensitive to the grouping set for which the aggregate function is
* currently being called.
*
- * TODO: AGG_HASHED doesn't support multiple grouping sets yet.
- *
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -432,6 +437,7 @@ typedef struct AggStatePerGroupData
*/
typedef struct AggStatePerPhaseData
{
+ AggStrategy aggstrategy; /* strategy for this phase */
int numsets; /* number of grouping sets (or 0) */
int *gset_lengths; /* lengths of grouping sets */
Bitmapset **grouped_cols; /* column groupings for rollup */
@@ -440,7 +446,31 @@ typedef struct AggStatePerPhaseData
Sort *sortnode; /* Sort node for input ordering for phase */
} AggStatePerPhaseData;
+/*
+ * AggStatePerHashData - per-hashtable state
+ *
+ * When doing grouping sets with hashing, we have one of these for each
+ * grouping set. (When doing hashing without grouping sets, we have just one of
+ * them.)
+ */
+
+typedef struct AggStatePerHashData
+{
+ TupleHashTable hashtable; /* hash table with one entry per group */
+ TupleHashIterator hashiter; /* for iterating through hash table */
+ TupleTableSlot *hashslot; /* slot for loading hash table */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
+ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
+ int numCols; /* number of hash key columns */
+ int numhashGrpCols; /* number of columns in hash table */
+ int largestGrpColIdx; /* largest column required for hashing */
+ AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
+ AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ Agg *aggnode; /* original Agg node, for numGroups etc. */
+} AggStatePerHashData;
+
+static void select_current_set(AggState *aggstate, int setno, bool is_hash);
static void initialize_phase(AggState *aggstate, int newphase);
static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
static void initialize_aggregates(AggState *aggstate,
@@ -449,7 +479,7 @@ static void initialize_aggregates(AggState *aggstate,
static void advance_transition_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
-static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups);
static void advance_combine_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
@@ -479,8 +509,8 @@ static TupleTableSlot *project_aggregates(AggState *aggstate);
static Bitmapset *find_unaggregated_cols(AggState *aggstate);
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
static void build_hash_table(AggState *aggstate);
-static TupleHashEntryData *lookup_hash_entry(AggState *aggstate,
- TupleTableSlot *inputslot);
+static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
+static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
static void agg_fill_hash_table(AggState *aggstate);
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -501,13 +531,29 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
/*
- * Switch to phase "newphase", which must either be 0 (to reset) or
+ * Select the current grouping set; affects current_set and
+ * curaggcontext.
+ */
+
+static void
+select_current_set(AggState *aggstate, int setno, bool is_hash)
+{
+ if (is_hash)
+ aggstate->curaggcontext = aggstate->hashcontext;
+ else
+ aggstate->curaggcontext = aggstate->aggcontexts[setno];
+
+ aggstate->current_set = setno;
+}
+
+/*
+ * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
* current_phase + 1. Juggle the tuplesorts accordingly.
*/
static void
initialize_phase(AggState *aggstate, int newphase)
{
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1);
+ Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
/*
* Whatever the previous state, we're now done with whatever input
@@ -519,7 +565,7 @@ initialize_phase(AggState *aggstate, int newphase)
aggstate->sort_in = NULL;
}
- if (newphase == 0)
+ if (newphase <= 1)
{
/*
* Discard any existing output tuplesort.
@@ -546,7 +592,7 @@ initialize_phase(AggState *aggstate, int newphase)
* If this isn't the last phase, we need to sort appropriately for the
* next phase in sequence.
*/
- if (newphase < aggstate->numphases - 1)
+ if (newphase > 0 && newphase < aggstate->numphases - 1)
{
Sort *sortnode = aggstate->phases[newphase + 1].sortnode;
PlanState *outerNode = outerPlanState(aggstate);
@@ -567,7 +613,7 @@ initialize_phase(AggState *aggstate, int newphase)
}
/*
- * Fetch a tuple from either the outer plan (for phase 0) or from the sorter
+ * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
* populated by the previous phase. Copy it to the sorter for the next phase
* if any.
*/
@@ -595,8 +641,8 @@ fetch_input_tuple(AggState *aggstate)
/*
* (Re)Initialize an individual aggregate.
*
- * This function handles only one grouping set (already set in
- * aggstate->current_set).
+ * This function handles only one grouping set, already set in
+ * aggstate->current_set.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -652,8 +698,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
{
MemoryContext oldContext;
- oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -676,8 +721,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
*
* If there are multiple grouping sets, we initialize only the first numReset
* of them (the grouping sets are ordered so that the most specific one, which
- * is reset most often, is first). As a convenience, if numReset is < 1, we
- * reinitialize all sets.
+ * is reset most often, is first). As a convenience, if numReset is 0, we
+ * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
+ * which case the caller must have used select_current_set appropriately.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -691,23 +737,34 @@ initialize_aggregates(AggState *aggstate,
int setno = 0;
AggStatePerTrans transstates = aggstate->pertrans;
- if (numReset < 1)
+ if (numReset == 0)
numReset = numGroupingSets;
for (transno = 0; transno < aggstate->numtrans; transno++)
{
AggStatePerTrans pertrans = &transstates[transno];
- for (setno = 0; setno < numReset; setno++)
+ if (numReset < 0)
{
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
-
- aggstate->current_set = setno;
+ pergroupstate = &pergroup[transno];
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
+ else
+ {
+ for (setno = 0; setno < numReset; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
+
+ select_current_set(aggstate, setno, false);
+
+ initialize_aggregate(aggstate, pertrans, pergroupstate);
+ }
+ }
}
}
@@ -757,7 +814,7 @@ advance_transition_function(AggState *aggstate,
* do not need to pfree the old transValue, since it's NULL.
*/
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -807,7 +864,7 @@ advance_transition_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -838,17 +895,21 @@ advance_transition_function(AggState *aggstate,
/*
* Advance each aggregate transition state for one input tuple. The input
* tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
- * accessible to ExecEvalExpr. pergroup is the array of per-group structs to
- * use (this might be in a hashtable entry).
+ * accessible to ExecEvalExpr.
+ *
+ * We have two sets of transition states to handle: one for sorted aggregation
+ * and one for hashed; we do them both here, to avoid multiple evaluation of
+ * the inputs.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static void
-advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
{
int transno;
int setno = 0;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
+ int numHashes = aggstate->num_hashes;
int numTrans = aggstate->numtrans;
TupleTableSlot *slot = aggstate->evalslot;
@@ -880,6 +941,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
{
/* DISTINCT and/or ORDER BY case */
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
+ Assert(!pergroups);
/*
* If the transfn is strict, we want to check for nullity before
@@ -940,13 +1002,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
}
- for (setno = 0; setno < numGroupingSets; setno++)
+ if (pergroup)
+ {
+ /* advance transition states for ordered grouping */
+
+ for (setno = 0; setno < numGroupingSets; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, false);
+
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
+ }
+
+ if (pergroups)
{
- AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
+ /* advance transition states for hashed grouping */
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, true);
- aggstate->current_set = setno;
+ pergroupstate = &pergroups[setno][transno];
- advance_transition_function(aggstate, pertrans, pergroupstate);
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
}
}
}
@@ -967,7 +1052,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
TupleTableSlot *slot;
/* combine not supported with grouping sets */
- Assert(aggstate->phase->numsets == 0);
+ Assert(aggstate->phase->numsets <= 1);
/* compute input for all aggregates */
slot = ExecProject(aggstate->evalproj);
@@ -1060,7 +1145,7 @@ advance_combine_function(AggState *aggstate,
if (!pertrans->transtypeByVal)
{
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -1105,7 +1190,7 @@ advance_combine_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -1559,7 +1644,9 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/*
* Compute the final value of all aggregates for one group.
*
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time. But in the hash
+ * case, it's the caller's responsibility to have selected the set already, and
+ * we pass in -1 here to flag that and to control the indexing into pertrans.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
@@ -1575,10 +1662,11 @@ finalize_aggregates(AggState *aggstate,
int aggno;
int transno;
- Assert(currentSet == 0 ||
- ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
- aggstate->current_set = currentSet;
+ /* ugly hack for hash */
+ if (currentSet >= 0)
+ select_current_set(aggstate, currentSet, false);
+ else
+ currentSet = 0;
/*
* If there were any DISTINCT and/or ORDER BY aggregates, sort their
@@ -1593,7 +1681,8 @@ finalize_aggregates(AggState *aggstate,
if (pertrans->numSortCols > 0)
{
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED &&
+ aggstate->aggstrategy != AGG_MIXED);
if (pertrans->numInputs == 1)
process_ordered_aggregate_single(aggstate,
@@ -1697,7 +1786,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
}
/*
- * Initialize the hash table to empty.
+ * Initialize the hash table(s) to empty.
*
* To implement hashed aggregation, we need a hashtable that stores a
* representative tuple and an array of AggStatePerGroup structs for each
@@ -1705,29 +1794,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(),
* for each entry.
*
- * The hash table always lives in the aggcontext memory context.
+ * We have a separate hashtable and associated perhash data structure for each
+ * grouping set for which we're doing hashing.
+ *
+ * The hash tables always live in the hashcontext's per-tuple memory context
+ * (there is only one of these for all tables together, since they are all
+ * reset at the same time).
*/
static void
build_hash_table(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize;
+ int i;
- Assert(node->aggstrategy == AGG_HASHED);
- Assert(node->numGroups > 0);
+ Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
- additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData);
+ additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
- aggstate->hashtable = BuildTupleHashTable(node->numCols,
- aggstate->hashGrpColIdxHash,
- aggstate->phase->eqfunctions,
- aggstate->hashfunctions,
- node->numGroups,
- additionalsize,
- aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
- tmpmem,
+ for (i = 0; i < aggstate->num_hashes; ++i)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[i];
+
+ Assert(perhash->aggnode->numGroups > 0);
+
+ perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+ perhash->hashGrpColIdxHash,
+ perhash->eqfunctions,
+ perhash->hashfunctions,
+ perhash->aggnode->numGroups,
+ additionalsize,
+ aggstate->hashcontext->ecxt_per_tuple_memory,
+ tmpmem,
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+ }
}
/*
@@ -1750,72 +1850,98 @@ build_hash_table(AggState *aggstate)
* the array is preserved over ExecReScanAgg, so we allocate it in the
* per-query context (unlike the hash table itself).
*/
-static List *
+static void
find_hash_columns(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
- Bitmapset *colnos;
- List *collist;
- TupleDesc hashDesc;
+ Bitmapset *base_colnos;
List *outerTlist = outerPlanState(aggstate)->plan->targetlist;
- List *hashTlist = NIL;
- int i;
-
- aggstate->largestGrpColIdx = 0;
+ int numHashes = aggstate->num_hashes;
+ int j;
/* Find Vars that will be needed in tlist and qual */
- colnos = find_unaggregated_cols(aggstate);
- /* Add in all the grouping columns */
- for (i = 0; i < node->numCols; i++)
- colnos = bms_add_member(colnos, node->grpColIdx[i]);
- /* Convert to list, using lcons so largest element ends up first */
- collist = NIL;
-
- aggstate->hashGrpColIdxInput =
- palloc(bms_num_members(colnos) * sizeof(AttrNumber));
- aggstate->hashGrpColIdxHash =
- palloc(node->numCols * sizeof(AttrNumber));
+ base_colnos = find_unaggregated_cols(aggstate);
- /*
- * First build mapping for columns directly hashed. These are the first,
- * because they'll be accessed when computing hash values and comparing
- * tuples for exact matches. We also build simple mapping for
- * execGrouping, so it knows where to find the to-be-hashed / compared
- * columns in the input.
- */
- for (i = 0; i < node->numCols; i++)
+ for (j = 0; j < numHashes; ++j)
{
- aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i];
- aggstate->hashGrpColIdxHash[i] = i + 1;
- aggstate->numhashGrpCols++;
- /* delete already mapped columns */
- bms_del_member(colnos, node->grpColIdx[i]);
- }
+ AggStatePerHash perhash = &aggstate->perhash[j];
+ Bitmapset *colnos = bms_copy(base_colnos);
+ AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
+ List *hashTlist = NIL;
+ TupleDesc hashDesc;
+ int i;
- /* and add the remaining columns */
- while ((i = bms_first_member(colnos)) >= 0)
- {
- aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i;
- aggstate->numhashGrpCols++;
- }
+ perhash->largestGrpColIdx = 0;
- /* and build a tuple descriptor for the hashtable */
- for (i = 0; i < aggstate->numhashGrpCols; i++)
- {
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ /*
+ * If we're doing grouping sets, then some Vars might be referenced in
+ * tlist/qual for the benefit of other grouping sets, but not needed
+ * when hashing; i.e. prepare_projection_slot will null them out, so
+ * there'd be no point storing them. Use prepare_projection_slot's
+ * logic to determine which.
+ */
+ if (aggstate->phases[0].grouped_cols)
+ {
+ Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j];
+ ListCell *lc;
- hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
- aggstate->largestGrpColIdx =
- Max(varNumber + 1, aggstate->largestGrpColIdx);
- }
+ foreach(lc, aggstate->all_grouped_cols)
+ {
+ int attnum = lfirst_int(lc);
- hashDesc = ExecTypeFromTL(hashTlist, false);
- ExecSetSlotDescriptor(aggstate->hashslot, hashDesc);
+ if (!bms_is_member(attnum, grouped_cols))
+ colnos = bms_del_member(colnos, attnum);
+ }
+ }
+ /* Add in all the grouping columns */
+ for (i = 0; i < perhash->numCols; i++)
+ colnos = bms_add_member(colnos, grpColIdx[i]);
+
+ perhash->hashGrpColIdxInput =
+ palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+ perhash->hashGrpColIdxHash =
+ palloc(perhash->numCols * sizeof(AttrNumber));
+
+ /*
+ * First build mapping for columns directly hashed. These are the
+ * first, because they'll be accessed when computing hash values and
+ * comparing tuples for exact matches. We also build simple mapping for
+ * execGrouping, so it knows where to find the to-be-hashed / compared
+ * columns in the input.
+ */
+ for (i = 0; i < perhash->numCols; i++)
+ {
+ perhash->hashGrpColIdxInput[i] = grpColIdx[i];
+ perhash->hashGrpColIdxHash[i] = i + 1;
+ perhash->numhashGrpCols++;
+ /* delete already mapped columns */
+ bms_del_member(colnos, grpColIdx[i]);
+ }
- list_free(hashTlist);
- bms_free(colnos);
+ /* and add the remaining columns */
+ while ((i = bms_first_member(colnos)) >= 0)
+ {
+ perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
+ perhash->numhashGrpCols++;
+ }
- return collist;
+ /* and build a tuple descriptor for the hashtable */
+ for (i = 0; i < perhash->numhashGrpCols; i++)
+ {
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
+
+ hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
+ perhash->largestGrpColIdx =
+ Max(varNumber + 1, perhash->largestGrpColIdx);
+ }
+
+ hashDesc = ExecTypeFromTL(hashTlist, false);
+ ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
+
+ list_free(hashTlist);
+ bms_free(colnos);
+ }
+
+ bms_free(base_colnos);
}
/*
@@ -1840,26 +1966,30 @@ hash_agg_entry_size(int numAggs)
}
/*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * Find or create a hashtable entry for the tuple group containing the current
+ * tuple (already set in tmpcontext's outertuple slot), in the current grouping
+ * set (which the caller must have selected - note that initialize_aggregate
+ * depends on this).
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static TupleHashEntryData *
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate)
{
- TupleTableSlot *hashslot = aggstate->hashslot;
+ TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
+ AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+ TupleTableSlot *hashslot = perhash->hashslot;
TupleHashEntryData *entry;
bool isnew;
int i;
/* transfer just the needed columns into hashslot */
- slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx);
+ slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot);
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
@@ -1867,22 +1997,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
ExecStoreVirtualTuple(hashslot);
/* find or create the hashtable entry using the filtered tuple */
- entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew);
+ entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew)
{
entry->additional = (AggStatePerGroup)
- MemoryContextAlloc(aggstate->hashtable->tablecxt,
+ MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
/* initialize aggregates for new tuple group */
initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
- 0);
+ -1);
}
return entry;
}
/*
+ * Look up hash entries for the current tuple in all hashed grouping sets,
+ * returning an array of pergroup pointers suitable for advance_aggregates.
+ *
+ * Be aware that lookup_hash_entry can reset the tmpcontext.
+ */
+static AggStatePerGroup *
+lookup_hash_entries(AggState *aggstate)
+{
+ int numHashes = aggstate->num_hashes;
+ AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+ int setno;
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ select_current_set(aggstate, setno, true);
+ pergroup[setno] = lookup_hash_entry(aggstate)->additional;
+ }
+
+ return pergroup;
+}
+
+/*
* ExecAgg -
*
* ExecAgg receives tuples from its outer subplan and aggregates over
@@ -1903,14 +2055,17 @@ ExecAgg(AggState *node)
if (!node->agg_done)
{
/* Dispatch based on strategy */
- switch (node->phase->aggnode->aggstrategy)
+ switch (node->phase->aggstrategy)
{
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
+ /*FALLTHROUGH*/
+ case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
- default:
+ case AGG_PLAIN:
+ case AGG_SORTED:
result = agg_retrieve_direct(node);
break;
}
@@ -1933,6 +2088,7 @@ agg_retrieve_direct(AggState *aggstate)
ExprContext *tmpcontext;
AggStatePerAgg peragg;
AggStatePerGroup pergroup;
+ AggStatePerGroup *hash_pergroups = NULL;
TupleTableSlot *outerslot;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
@@ -2019,6 +2175,18 @@ agg_retrieve_direct(AggState *aggstate)
node = aggstate->phase->aggnode;
numReset = numGroupingSets;
}
+ else if (aggstate->aggstrategy == AGG_MIXED)
+ {
+ /*
+ * Mixed mode; we've output all the grouped stuff and have
+ * full hashtables, so switch to outputting those.
+ */
+ initialize_phase(aggstate, 0);
+ aggstate->table_filled = true;
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable, &aggstate->perhash[0].hashiter);
+ select_current_set(aggstate, 0, true);
+ return agg_retrieve_hash_table(aggstate);
+ }
else
{
aggstate->agg_done = true;
@@ -2055,7 +2223,7 @@ agg_retrieve_direct(AggState *aggstate)
*----------
*/
if (aggstate->input_done ||
- (node->aggstrategy == AGG_SORTED &&
+ (node->aggstrategy != AGG_PLAIN &&
aggstate->projected_set != -1 &&
aggstate->projected_set < (numGroupingSets - 1) &&
nextSetSize > 0 &&
@@ -2168,10 +2336,21 @@ agg_retrieve_direct(AggState *aggstate)
*/
for (;;)
{
+ /*
+ * During phase 1 only of a mixed agg, we need to update
+ * hashtables as well in advance_aggregates.
+ */
+ if (aggstate->aggstrategy == AGG_MIXED && aggstate->current_phase == 1)
+ {
+ hash_pergroups = lookup_hash_entries(aggstate);
+ }
+ else
+ hash_pergroups = NULL;
+
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, pergroup);
else
- advance_aggregates(aggstate, pergroup);
+ advance_aggregates(aggstate, pergroup, hash_pergroups);
/* Reset per-input-tuple context after each tuple */
ResetExprContext(tmpcontext);
@@ -2198,7 +2377,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group
* boundary.
*/
- if (node->aggstrategy == AGG_SORTED)
+ if (node->aggstrategy != AGG_PLAIN)
{
if (!execTuplesMatch(firstSlot,
outerslot,
@@ -2247,21 +2426,13 @@ agg_retrieve_direct(AggState *aggstate)
}
/*
- * ExecAgg for hashed case: phase 1, read input and build hash table
+ * ExecAgg for hashed case: read input and build hash table
*/
static void
agg_fill_hash_table(AggState *aggstate)
{
- ExprContext *tmpcontext;
- TupleHashEntryData *entry;
TupleTableSlot *outerslot;
-
- /*
- * get state info from node
- *
- * tmpcontext is the per-input-tuple expression context
- */
- tmpcontext = aggstate->tmpcontext;
+ ExprContext *tmpcontext = aggstate->tmpcontext;
/*
* Process each outer-plan tuple, and then fetch the next one, until we
@@ -2269,32 +2440,39 @@ agg_fill_hash_table(AggState *aggstate)
*/
for (;;)
{
+ AggStatePerGroup *pergroups;
+
outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot))
break;
- /* set up for advance_aggregates call */
+
+ /* set up for lookup_hash_entries and advance_aggregates */
tmpcontext->ecxt_outertuple = outerslot;
- /* Find or build hashtable entry for this tuple's group */
- entry = lookup_hash_entry(aggstate, outerslot);
+ /* Find or build hashtable entries */
+ pergroups = lookup_hash_entries(aggstate);
/* Advance the aggregates */
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
- combine_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ combine_aggregates(aggstate, pergroups[0]);
else
- advance_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ advance_aggregates(aggstate, NULL, pergroups);
- /* Reset per-input-tuple context after each tuple */
- ResetExprContext(tmpcontext);
+ /*
+ * Reset per-input-tuple context after each tuple, but note that the
+ * hash lookups do this too
+ */
+ ResetExprContext(aggstate->tmpcontext);
}
aggstate->table_filled = true;
- /* Initialize to walk the hash table */
- ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+ /* Initialize to walk the first hash table */
+ select_current_set(aggstate, 0, true);
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable, &aggstate->perhash[0].hashiter);
}
/*
- * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ * ExecAgg for hashed case: retrieving groups from hash table
*/
static TupleTableSlot *
agg_retrieve_hash_table(AggState *aggstate)
@@ -2305,17 +2483,22 @@ agg_retrieve_hash_table(AggState *aggstate)
TupleHashEntryData *entry;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
- TupleTableSlot *hashslot;
+ AggStatePerHash perhash;
/*
- * get state info from node
+ * get state info from node.
+ *
+ * econtext is the per-output-tuple expression context.
*/
- /* econtext is the per-output-tuple expression context */
econtext = aggstate->ss.ps.ps_ExprContext;
peragg = aggstate->peragg;
firstSlot = aggstate->ss.ss_ScanTupleSlot;
- hashslot = aggstate->hashslot;
+ /*
+ * Note that perhash (and therefore anything accessed through it) can
+ * change inside the loop, as we change between grouping sets.
+ */
+ perhash = &aggstate->perhash[aggstate->current_set];
/*
* We loop retrieving groups until we find one satisfying
@@ -2323,17 +2506,37 @@ agg_retrieve_hash_table(AggState *aggstate)
*/
while (!aggstate->agg_done)
{
+ TupleTableSlot *hashslot = perhash->hashslot;
int i;
/*
* Find the next entry in the hash table
*/
- entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter);
+ entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
if (entry == NULL)
{
- /* No more entries in hashtable, so done */
- aggstate->agg_done = TRUE;
- return NULL;
+ int nextset = aggstate->current_set + 1;
+
+ if (nextset < aggstate->num_hashes)
+ {
+ /*
+ * Switch to next grouping set, reinitialize, and restart the
+ * loop.
+ */
+ select_current_set(aggstate, nextset, true);
+
+ perhash = &aggstate->perhash[aggstate->current_set];
+
+ ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
+
+ continue;
+ }
+ else
+ {
+ /* No more hashtables, so done */
+ aggstate->agg_done = TRUE;
+ return NULL;
+ }
}
/*
@@ -2356,9 +2559,9 @@ agg_retrieve_hash_table(AggState *aggstate)
memset(firstSlot->tts_isnull, true,
firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
@@ -2367,14 +2570,16 @@ agg_retrieve_hash_table(AggState *aggstate)
pergroup = (AggStatePerGroup) entry->additional;
- finalize_aggregates(aggstate, peragg, pergroup, 0);
-
/*
* Use the representative input tuple for any references to
* non-aggregated input columns in the qual and tlist.
*/
econtext->ecxt_outertuple = firstSlot;
+ prepare_projection_slot(aggstate, econtext->ecxt_outertuple, aggstate->current_set);
+
+ finalize_aggregates(aggstate, peragg, pergroup, -1);
+
result = project_aggregates(aggstate);
if (result)
return result;
@@ -2388,7 +2593,36 @@ agg_retrieve_hash_table(AggState *aggstate)
* ExecInitAgg
*
* Creates the run-time information for the agg node produced by the
- * planner and initializes its outer subtree
+ * planner and initializes its outer subtree.
+ *
+ * What we get from the planner is actually one "real" Agg node which is part
+ * of the plan tree proper, but which optionally has an additional list of Agg
+ * nodes hung off the side via the "chain" field. This is because an Agg node
+ * happens to be a convenient representation of all the data we need for
+ * grouping sets.
+ *
+ * For many purposes, we treat the "real" node as if it were just the first
+ * node in the chain. The chain must be ordered such that hashed entries come
+ * before sorted/plain entries; the real node is marked AGG_MIXED if there are
+ * mixed types (in which case the real node describes one of the hashed
+ * groupings, other AGG_HASHED nodes may optionally follow in the chain,
+ * followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If the real node
+ * is marked AGG_HASHED or AGG_SORTED, then all the chained nodes must be of
+ * the same type; if it is AGG_PLAIN, there can be no chained nodes.
+ *
+ * We collect all hashed nodes into a single "phase", numbered 0, and create a
+ * sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. Phase
+ * 0 is allocated even if there are no hashes, but remains unused in that
+ * case.
+ *
+ * AGG_HASHED nodes actually refer to only a single grouping set each, because
+ * for each hashed grouping we need a separate grpColIdx and numGroups
+ * estimate. AGG_SORTED nodes represent a "rollup", a list of grouping sets
+ * that share a sort order. Each AGG_SORTED node other than the first one has
+ * an associated Sort node which describes the sort order to be used; the
+ * first sorted node takes its input from the outer subtree, which the planner
+ * has already arranged to provide ordered data.
+ *
* -----------------
*/
AggState *
@@ -2403,14 +2637,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
transno,
aggno;
int phase;
+ int phaseidx;
List *combined_inputeval;
ListCell *l;
Bitmapset *all_grouped_cols = NULL;
int numGroupingSets = 1;
int numPhases;
+ int numHashes;
int column_offset;
int i = 0;
int j = 0;
+ bool use_hashing = (node->aggstrategy == AGG_HASHED ||
+ node->aggstrategy == AGG_MIXED);
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2425,9 +2663,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggs = NIL;
aggstate->numaggs = 0;
aggstate->numtrans = 0;
+ aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0;
- aggstate->hashfunctions = NULL;
aggstate->projected_set = -1;
aggstate->current_set = 0;
aggstate->peragg = NULL;
@@ -2437,18 +2675,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->agg_done = false;
aggstate->pergroup = NULL;
aggstate->grp_firstTuple = NULL;
- aggstate->hashtable = NULL;
aggstate->sort_in = NULL;
aggstate->sort_out = NULL;
/*
+ * phases[0] always exists, but is dummy in sorted/plain mode
+ */
+ numPhases = (use_hashing ? 1 : 2);
+ numHashes = (use_hashing ? 1 : 0);
+
+ /*
* Calculate the maximum number of grouping sets in any phase; this
- * determines the size of some allocations.
+ * determines the size of some allocations. Also calculate the number of
+ * phases, since all hashed/mixed nodes contribute to only a single phase.
*/
if (node->groupingSets)
{
- Assert(node->aggstrategy != AGG_HASHED);
-
numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain)
@@ -2457,22 +2699,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets));
+ /*
+ * additional AGG_HASHED aggs become part of phase 0,
+ * but all others add an extra phase.
+ */
+ if (agg->aggstrategy != AGG_HASHED)
+ ++numPhases;
+ else
+ ++numHashes;
}
}
aggstate->maxsets = numGroupingSets;
- aggstate->numphases = numPhases = 1 + list_length(node->chain);
+ aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets);
/*
* Create expression contexts. We need three or more, one for
- * per-input-tuple processing, one for per-output-tuple processing, and
- * one for each grouping set. The per-tuple memory context of the
- * per-grouping-set ExprContexts (aggcontexts) replaces the standalone
- * memory context formerly used to hold transition values. We cheat a
- * little by using ExecAssignExprContext() to build all of them.
+ * per-input-tuple processing, one for per-output-tuple processing, one for
+ * all the hashtables, and one for each grouping set. The per-tuple memory
+ * context of the per-grouping-set ExprContexts (aggcontexts) replaces the
+ * standalone memory context formerly used to hold transition values. We
+ * cheat a little by using ExecAssignExprContext() to build all of them.
*
* NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple
@@ -2488,14 +2738,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
}
+ if (use_hashing)
+ {
+ ExecAssignExprContext(estate, &aggstate->ss.ps);
+ aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+ }
+
ExecAssignExprContext(estate, &aggstate->ss.ps);
/*
- * tuple table initialization
+ * tuple table initialization.
+ *
+ * For hashtables, we create some additional slots below.
*/
ExecInitScanTupleSlot(estate, &aggstate->ss);
ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
- aggstate->hashslot = ExecInitExtraTupleSlot(estate);
aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
/*
@@ -2560,19 +2817,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing.
*/
-
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
- for (phase = 0; phase < numPhases; ++phase)
+ aggstate->num_hashes = numHashes;
+ if (numHashes)
+ {
+ aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+ aggstate->phases[0].numsets = 0;
+ aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
+ aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
+ }
+
+ for (phaseidx = 0, phase = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{
- AggStatePerPhase phasedata = &aggstate->phases[phase];
Agg *aggnode;
Sort *sortnode;
- int num_sets;
- if (phase > 0)
+ if (phaseidx > 0)
{
- aggnode = castNode(Agg, list_nth(node->chain, phase - 1));
+ aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1));
sortnode = castNode(Sort, aggnode->plan.lefttree);
}
else
@@ -2581,53 +2844,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
sortnode = NULL;
}
- phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+ Assert(phase <= 1 || sortnode);
- if (num_sets)
+ if (aggnode->aggstrategy == AGG_HASHED
+ || aggnode->aggstrategy == AGG_MIXED)
{
- phasedata->gset_lengths = palloc(num_sets * sizeof(int));
- phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+ AggStatePerPhase phasedata = &aggstate->phases[0];
+ AggStatePerHash perhash;
+ Bitmapset *cols = NULL;
- i = 0;
- foreach(l, aggnode->groupingSets)
- {
- int current_length = list_length(lfirst(l));
- Bitmapset *cols = NULL;
+ Assert(phase == 0);
+ i = phasedata->numsets++;
+ perhash = &aggstate->perhash[i];
- /* planner forces this to be correct */
- for (j = 0; j < current_length; ++j)
- cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+ /* phase 0 always points to the "real" Agg in the hash case */
+ phasedata->aggnode = node;
+ phasedata->aggstrategy = node->aggstrategy;
- phasedata->grouped_cols[i] = cols;
- phasedata->gset_lengths[i] = current_length;
- ++i;
- }
+ /* but the actual Agg node representing this hash is saved here */
+ perhash->aggnode = aggnode;
+
+ phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
+
+ for (j = 0; j < aggnode->numCols; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
- all_grouped_cols = bms_add_members(all_grouped_cols,
- phasedata->grouped_cols[0]);
+ all_grouped_cols = bms_add_members(all_grouped_cols, cols);
+ continue;
}
else
{
- Assert(phase == 0);
+ AggStatePerPhase phasedata = &aggstate->phases[++phase];
+ int num_sets;
- phasedata->gset_lengths = NULL;
- phasedata->grouped_cols = NULL;
- }
+ phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
- /*
- * If we are grouping, precompute fmgr lookup data for inner loop.
- */
- if (aggnode->aggstrategy == AGG_SORTED)
- {
- Assert(aggnode->numCols > 0);
+ if (num_sets)
+ {
+ phasedata->gset_lengths = palloc(num_sets * sizeof(int));
+ phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
- phasedata->eqfunctions =
- execTuplesMatchPrepare(aggnode->numCols,
- aggnode->grpOperators);
- }
+ i = 0;
+ foreach(l, aggnode->groupingSets)
+ {
+ int current_length = list_length(lfirst(l));
+ Bitmapset *cols = NULL;
+
+ /* planner forces this to be correct */
+ for (j = 0; j < current_length; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+ phasedata->gset_lengths[i] = current_length;
+
+ ++i;
+ }
+
+ all_grouped_cols = bms_add_members(all_grouped_cols,
+ phasedata->grouped_cols[0]);
+ }
+ else
+ {
+ Assert(phaseidx == 0);
+
+ phasedata->gset_lengths = NULL;
+ phasedata->grouped_cols = NULL;
+ }
+
+ /*
+ * If we are grouping, precompute fmgr lookup data for inner loop.
+ */
+ if (aggnode->aggstrategy == AGG_SORTED)
+ {
+ Assert(aggnode->numCols > 0);
+
+ phasedata->eqfunctions =
+ execTuplesMatchPrepare(aggnode->numCols,
+ aggnode->grpOperators);
+ }
- phasedata->aggnode = aggnode;
- phasedata->sortnode = sortnode;
+ phasedata->aggnode = aggnode;
+ phasedata->aggstrategy = aggnode->aggstrategy;
+ phasedata->sortnode = sortnode;
+ }
}
/*
@@ -2638,13 +2939,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
- * Initialize current phase-dependent values to initial phase
- */
-
- aggstate->current_phase = 0;
- initialize_phase(aggstate, 0);
-
- /*
* Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage
*/
@@ -2658,23 +2952,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates;
-
/*
* Hashing can only appear in the initial phase.
*/
- if (node->aggstrategy == AGG_HASHED)
+ if (use_hashing)
{
- find_hash_columns(aggstate);
+ for (i = 0; i < numHashes; ++i)
+ {
+ aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
+
+ execTuplesHashPrepare(aggstate->perhash[i].numCols,
+ aggstate->perhash[i].aggnode->grpOperators,
+ &aggstate->perhash[i].eqfunctions,
+ &aggstate->perhash[i].hashfunctions);
+ }
- execTuplesHashPrepare(node->numCols,
- node->grpOperators,
- &aggstate->phases[0].eqfunctions,
- &aggstate->hashfunctions);
+ /* this is an array of pointers, not structures */
+ aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
+ find_hash_columns(aggstate);
build_hash_table(aggstate);
aggstate->table_filled = false;
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
AggStatePerGroup pergroup;
@@ -2685,6 +2986,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->pergroup = pergroup;
}
+ /*
+ * Initialize current phase-dependent values to initial phase. The initial
+ * phase is 1 (first sort pass) for all strategies that use sorting (if
+ * hashing is being done too, then phase 0 is processed last); but if only
+ * hashing is being done, then phase 0 is all there is.
+ */
+
+ if (node->aggstrategy == AGG_HASHED)
+ {
+ aggstate->current_phase = 0;
+ initialize_phase(aggstate, 0);
+ select_current_set(aggstate, 0, true);
+ }
+ else
+ {
+ aggstate->current_phase = 1;
+ initialize_phase(aggstate, 1);
+ select_current_set(aggstate, 0, false);
+ }
+
/* -----------------
* Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data.
@@ -3263,7 +3584,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case
* (yet)
*/
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
/* If we have only one input, we need its len/byval info. */
if (numInputs == 1)
@@ -3512,6 +3833,8 @@ ExecEndAgg(AggState *node)
/* And ensure any agg shutdown callbacks have been called */
for (setno = 0; setno < numGroupingSets; setno++)
ReScanExprContext(node->aggcontexts[setno]);
+ if (node->hashcontext)
+ ReScanExprContext(node->hashcontext);
/*
* We don't actually free any ExprContexts here (see comment in
@@ -3539,7 +3862,7 @@ ExecReScanAgg(AggState *node)
node->agg_done = false;
- if (aggnode->aggstrategy == AGG_HASHED)
+ if (node->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@@ -3559,7 +3882,8 @@ ExecReScanAgg(AggState *node)
if (outerPlan->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
- ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ ResetTupleHashIterator(node->perhash[0].hashtable, &node->perhash[0].hashiter);
+ select_current_set(node, 0, true);
return;
}
}
@@ -3584,11 +3908,7 @@ ExecReScanAgg(AggState *node)
* ExecReScan already did it. But we do need to reset our per-grouping-set
* contexts, which may have transvalues stored in them. (We use rescan
* rather than just reset because transfns may have registered callbacks
- * that need to be run now.)
- *
- * Note that with AGG_HASHED, the hash table is allocated in a sub-context
- * of the aggcontext. This used to be an issue, but now, resetting a
- * context automatically deletes sub-contexts too.
+ * that need to be run now.) For the AGG_HASHED case, see below.
*/
for (setno = 0; setno < numGroupingSets; setno++)
@@ -3608,13 +3928,22 @@ ExecReScanAgg(AggState *node)
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
- if (aggnode->aggstrategy == AGG_HASHED)
+ /*
+ * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
+ * the hashcontext. This used to be an issue, but now, resetting a context
+ * automatically deletes sub-contexts too.
+ */
+
+ if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
{
+ ReScanExprContext(node->hashcontext);
/* Rebuild an empty hash table */
build_hash_table(node);
node->table_filled = false;
+ /* iterator will be reset when the table is filled */
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
/*
* Reset the per-group state (in particular, mark transvalues null)
@@ -3622,8 +3951,8 @@ ExecReScanAgg(AggState *node)
MemSet(node->pergroup, 0,
sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
- /* reset to phase 0 */
- initialize_phase(node, 0);
+ /* reset to phase 1 */
+ initialize_phase(node, 1);
node->input_done = false;
node->projected_set = -1;
@@ -3664,7 +3993,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
if (aggcontext)
{
AggState *aggstate = ((AggState *) fcinfo->context);
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
*aggcontext = cxt->ecxt_per_tuple_memory;
}
@@ -3753,7 +4082,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
if (fcinfo->context && IsA(fcinfo->context, AggState))
{
AggState *aggstate = (AggState *) fcinfo->context;
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
RegisterExprContextCallback(cxt, func, arg);
diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 2d2ba84..f222c6c 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/lib
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
- rbtree.o stringinfo.o
+OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
+ pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c
new file mode 100644
index 0000000..54301b9
--- /dev/null
+++ b/src/backend/lib/knapsack.c
@@ -0,0 +1,112 @@
+/*-------------------------------------------------------------------------
+ *
+ * knapsack.c
+ * Knapsack problem solver
+ *
+ * Given input vectors of integral item weights (must be >= 0) and values
+ * (double >= 0), compute the set of items which produces the greatest total
+ * value without exceeding a specified total weight; each item is included at
+ * most once (this is the 0/1 knapsack problem). Weight 0 items will always be
+ * included.
+ *
+ * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
+ * weight limit. To use with non-integral weights or approximate solutions,
+ * the caller should pre-scale the input weights to a suitable range. This
+ * allows approximate solutions in polynomial time (the general case of the
+ * exact problem is NP-hard).
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/lib/knapsack.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "lib/knapsack.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+
+/*
+ * DiscreteKnapsack
+ *
+ * The item_values input is optional; if omitted, all the items are assumed to
+ * have value 1.
+ *
+ * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
+ * inclusion in the solution.
+ *
+ * This uses the usual dynamic-programming algorithm, adapted to reuse the
+ * memory on each pass (by working from larger weights to smaller). At the
+ * start of pass number i, the values[w] array contains the largest value
+ * computed with total weight <= w, using only items with indices < i; and
+ * sets[w] contains the bitmap of items actually used for that value. (The
+ * bitmapsets are all pre-initialized with an unused high bit so that memory
+ * allocation is done only once.)
+ */
+Bitmapset *
+DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values)
+{
+ MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "Knapsack",
+ ALLOCSET_SMALL_MINSIZE,
+ ALLOCSET_SMALL_INITSIZE,
+ ALLOCSET_SMALL_MAXSIZE);
+ MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
+ double *values;
+ Bitmapset **sets;
+ Bitmapset *result;
+ int i,j;
+
+ Assert(max_weight >= 0);
+ Assert(num_items > 0 && item_weights);
+
+ values = palloc((1 + max_weight) * sizeof(double));
+ sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
+
+ for (i = 0; i <= max_weight; ++i)
+ {
+ values[i] = 0;
+ sets[i] = bms_make_singleton(num_items);
+ }
+
+ for (i = 0; i < num_items; ++i)
+ {
+ int iw = item_weights[i];
+ double iv = item_values ? item_values[i] : 1;
+
+ for (j = max_weight; j >= 0; --j)
+ {
+ int ow = j - iw;
+ if (iw <= j && values[j] <= values[ow] + iv)
+ {
+ /* copy sets[ow] to sets[j] without realloc */
+ if (j != ow)
+ {
+ sets[j] = bms_del_members(sets[j], sets[j]);
+ sets[j] = bms_add_members(sets[j], sets[ow]);
+ }
+
+ sets[j] = bms_add_member(sets[j], i);
+
+ values[j] = values[ow] + iv;
+ }
+ }
+ }
+
+ MemoryContextSwitchTo(oldctx);
+
+ result = bms_del_member(bms_copy(sets[max_weight]), num_items);
+
+ MemoryContextDelete(local_ctx);
+
+ return result;
+}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 252af5c..16a357e 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -21,7 +21,7 @@
#include "postgres.h"
#include "access/hash.h"
-
+#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
@@ -458,6 +458,36 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
}
/*
+ * bms_overlap_list - does a set overlap an integer list?
+ */
+bool
+bms_overlap_list(const Bitmapset *a, const List *b)
+{
+ ListCell *lc;
+ int wordnum,
+ bitnum;
+
+ if (a == NULL || b == NIL)
+ return false;
+
+ foreach(lc, b)
+ {
+ int x = lfirst_int(lc);
+
+ /* XXX better to just return false for x<0 ? */
+ if (x < 0)
+ elog(ERROR, "negative bitmapset member not allowed");
+ wordnum = WORDNUM(x);
+ bitnum = BITNUM(x);
+ if (wordnum < a->nwords)
+ if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*
* bms_nonempty_difference - do sets have a nonempty difference?
*/
bool
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index b3802b4..59a7044 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1875,6 +1875,28 @@ _outAggPath(StringInfo str, const AggPath *node)
}
static void
+_outRollupData(StringInfo str, const RollupData *node)
+{
+ WRITE_NODE_TYPE("ROLLUP");
+
+ WRITE_NODE_FIELD(groupClause);
+ WRITE_NODE_FIELD(gsets);
+ WRITE_NODE_FIELD(gsets_data);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+ WRITE_BOOL_FIELD(hashable);
+ WRITE_BOOL_FIELD(is_hashed);
+}
+
+static void
+_outGroupingSetData(StringInfo str, const GroupingSetData *node)
+{
+ WRITE_NODE_TYPE("GSDATA");
+
+ WRITE_NODE_FIELD(set);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+}
+
+static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
WRITE_NODE_TYPE("GROUPINGSETSPATH");
@@ -1882,8 +1904,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
- WRITE_NODE_FIELD(rollup_groupclauses);
- WRITE_NODE_FIELD(rollup_lists);
+ WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
+ WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual);
}
@@ -3800,6 +3822,12 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
+ case T_RollupData:
+ _outRollupData(str, obj);
+ break;
+ case T_GroupingSetData:
+ _outGroupingSetData(str, obj);
+ break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index c138f57..937e30d 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1739,11 +1739,16 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1;
}
- else if (aggstrategy == AGG_SORTED)
+ else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
{
/* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost;
total_cost = input_total_cost;
+ if (aggstrategy == AGG_MIXED && !enable_hashagg)
+ {
+ startup_cost += disable_cost;
+ total_cost += disable_cost;
+ }
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 1e953b4..7137244 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1707,18 +1707,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
{
Agg *plan;
Plan *subplan;
- List *rollup_groupclauses = best_path->rollup_groupclauses;
- List *rollup_lists = best_path->rollup_lists;
+ List *rollups = best_path->rollups;
AttrNumber *grouping_map;
int maxref;
List *chain;
- ListCell *lc,
- *lc2;
+ ListCell *lc;
/* Shouldn't get here without grouping sets */
Assert(root->parse->groupingSets);
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
+ Assert(rollups != NIL);
/*
* Agg can project, so no need to be terribly picky about child tlist, but
@@ -1770,72 +1767,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* costs will be shown by EXPLAIN.
*/
chain = NIL;
- if (list_length(rollup_groupclauses) > 1)
+ if (list_length(rollups) > 1)
{
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+ ListCell *lc2 = lnext(list_head(rollups));
+ bool is_first_sort = ((RollupData *)linitial(rollups))->is_hashed;
+
+ for_each_cell(lc, lc2)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
+ RollupData *rollup = lfirst(lc);
AttrNumber *new_grpColIdx;
- Plan *sort_plan;
+ Plan *sort_plan = NULL;
Plan *agg_plan;
+ AggStrategy strat;
- /* We want to iterate over all but the last rollup list elements */
- if (lnext(lc) == NULL)
- break;
+ new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
+
+ if (!rollup->is_hashed && !is_first_sort)
+ {
+ sort_plan = (Plan *)
+ make_sort_from_groupcols(rollup->groupClause,
+ new_grpColIdx,
+ subplan);
+ }
- new_grpColIdx = remap_groupColIdx(root, groupClause);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
- sort_plan = (Plan *)
- make_sort_from_groupcols(groupClause,
- new_grpColIdx,
- subplan);
+ if (rollup->is_hashed)
+ strat = AGG_HASHED;
+ else if (list_length(linitial(rollup->gsets)) == 0)
+ strat = AGG_PLAIN;
+ else
+ strat = AGG_SORTED;
agg_plan = (Plan *) make_agg(NIL,
NIL,
- AGG_SORTED,
+ strat,
AGGSPLIT_SIMPLE,
- list_length((List *) linitial(gsets)),
+ list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
NIL,
- 0, /* numGroups not needed */
+ rollup->numGroups,
sort_plan);
/*
* Nuke stuff we don't need to avoid bloating debug output.
*/
- sort_plan->targetlist = NIL;
- sort_plan->lefttree = NULL;
+ if (sort_plan)
+ {
+ sort_plan->targetlist = NIL;
+ sort_plan->lefttree = NULL;
+ }
chain = lappend(chain, agg_plan);
}
}
/*
- * Now make the final Agg node
+ * Now make the real Agg node
*/
{
- List *groupClause = (List *) llast(rollup_groupclauses);
- List *gsets = (List *) llast(rollup_lists);
+ RollupData *rollup = linitial(rollups);
AttrNumber *top_grpColIdx;
int numGroupCols;
- top_grpColIdx = remap_groupColIdx(root, groupClause);
+ top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
- numGroupCols = list_length((List *) linitial(gsets));
+ numGroupCols = list_length((List *) linitial(rollup->gsets));
plan = make_agg(build_path_tlist(root, &best_path->path),
best_path->qual,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+ best_path->aggstrategy,
AGGSPLIT_SIMPLE,
numGroupCols,
top_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
chain,
- 0, /* numGroups not needed */
+ rollup->numGroups,
subplan);
/* Copy cost data from Path to Plan */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ca0ae78..c49bd82 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -89,12 +90,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+ List *rollups;
+ List *hash_sets_idx;
+ double dNumHashGroups;
+ bool any_hashable;
+ Bitmapset *unsortable_refs;
+ Bitmapset *unhashable_refs;
+ List *unsortable_sets;
+ int *tleref_to_colnum_map;
+} grouping_sets_data;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+ int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
@@ -107,8 +127,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
@@ -116,8 +135,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *input_target,
@@ -1494,8 +1521,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
- List *rollup_lists = NIL;
- List *rollup_groupclauses = NIL;
+ grouping_sets_data *gset_data = NULL;
standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
@@ -1504,84 +1530,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets)
{
- int *tleref_to_colnum_map;
- List *sets;
- int maxref;
- ListCell *lc;
- ListCell *lc2;
- ListCell *lc_set;
-
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
- /* Identify max SortGroupRef in groupClause, for array sizing */
- maxref = 0;
- foreach(lc, parse->groupClause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- if (gc->tleSortGroupRef > maxref)
- maxref = gc->tleSortGroupRef;
- }
-
- /* Allocate workspace array for remapping */
- tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
- /* Examine the rollup sets */
- sets = extract_rollup_sets(parse->groupingSets);
-
- foreach(lc_set, sets)
- {
- List *current_sets = (List *) lfirst(lc_set);
- List *groupclause;
- int ref;
-
- /*
- * Reorder the current list of grouping sets into correct
- * prefix order. If only one aggregation pass is needed, try
- * to make the list match the ORDER BY clause; if more than
- * one pass is needed, we don't bother with that.
- */
- current_sets = reorder_grouping_sets(current_sets,
- (list_length(sets) == 1
- ? parse->sortClause
- : NIL));
-
- /*
- * Order the groupClause appropriately. If the first grouping
- * set is empty, this can match regular GROUP BY
- * preprocessing, otherwise we have to force the groupClause
- * to match that grouping set's order.
- */
- groupclause = preprocess_groupclause(root,
- linitial(current_sets));
-
- /*
- * Now that we've pinned down an order for the groupClause for
- * this list of grouping sets, we need to remap the entries in
- * the grouping sets from sortgrouprefs to plain indices
- * (0-based) into the groupClause for this collection of
- * grouping sets.
- */
- ref = 0;
- foreach(lc, groupclause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
- }
-
- foreach(lc, current_sets)
- {
- foreach(lc2, (List *) lfirst(lc))
- {
- lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
- }
- }
-
- /* Save the reordered sets and corresponding groupclauses */
- rollup_lists = lcons(current_sets, rollup_lists);
- rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
- }
+ gset_data = preprocess_grouping_sets(root);
}
else
{
@@ -1675,8 +1624,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause =
- parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+ qp_extra.groupClause = (gset_data
+ ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -1876,8 +1826,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
&agg_costs,
- rollup_lists,
- rollup_groupclauses);
+ gset_data);
/* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs)
adjust_paths_for_srfs(root, current_rel,
@@ -1914,7 +1863,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root,
current_rel);
}
-
} /* end of if (setOperations) */
/*
@@ -2067,6 +2015,213 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
}
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *sets;
+ int maxref = 0;
+ ListCell *lc;
+ ListCell *lc_set;
+ grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+ gd->any_hashable = false;
+ gd->unhashable_refs = NULL;
+ gd->unsortable_refs = NULL;
+ gd->unsortable_sets = NIL;
+
+ if (parse->groupClause)
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ Index ref = gc->tleSortGroupRef;
+
+ if (ref > maxref)
+ maxref = ref;
+
+ if (!gc->hashable)
+ gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+ if (!OidIsValid(gc->sortop))
+ gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+ }
+ }
+
+ /* Allocate workspace array for remapping */
+ gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+ /*
+ * If we have any unsortable sets, we must extract them before trying to
+ * prepare rollups. Unsortable sets don't go through reorder_grouping_sets,
+ * so we must apply the GroupingSetData annotation here.
+ */
+ if (!bms_is_empty(gd->unsortable_refs))
+ {
+ List *sortable_sets = NIL;
+
+ foreach(lc, parse->groupingSets)
+ {
+ List *gset = lfirst(lc);
+
+ if (bms_overlap_list(gd->unsortable_refs, gset))
+ {
+ GroupingSetData *gs = makeNode(GroupingSetData);
+ gs->set = gset;
+ gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+ /*
+ * We must enforce here that an unsortable set is hashable;
+ * later code assumes this. Parse analysis only checks that
+ * every individual column is either hashable or sortable.
+ *
+ * Note that passing this test doesn't guarantee we can
+ * generate a plan; there might be other showstoppers.
+ */
+
+ if (bms_overlap_list(gd->unhashable_refs, gset))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement GROUP BY"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ }
+ else
+ sortable_sets = lappend(sortable_sets, gset);
+ }
+
+ if (sortable_sets)
+ sets = extract_rollup_sets(sortable_sets);
+ else
+ sets = NIL;
+ }
+ else
+ sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = (List *) lfirst(lc_set);
+ RollupData *rollup = makeNode(RollupData);
+ GroupingSetData *gs;
+
+ /*
+ * Reorder the current list of grouping sets into correct
+ * prefix order. If only one aggregation pass is needed, try
+ * to make the list match the ORDER BY clause; if more than
+ * one pass is needed, we don't bother with that.
+ *
+ * Note that this reorders the sets from smallest-member-first
+ * to largest-member-first, and applies the GroupingSetData
+ * annotations, though the data will be filled in later.
+ */
+ current_sets = reorder_grouping_sets(current_sets,
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+
+ /*
+ * Get the initial (and therefore largest) grouping set.
+ */
+ gs = linitial(current_sets);
+
+ /*
+ * Order the groupClause appropriately. If the first grouping set is
+ * empty, then the groupClause must also be empty; otherwise we have to
+ * force the groupClause to match that grouping set's order.
+ *
+ * (The first grouping set can be empty even though parse->groupClause
+ * is not empty only if all non-empty grouping sets are unsortable. The
+ * groupClauses for hashed grouping sets are built later on.)
+ */
+ if (gs->set)
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ else
+ rollup->groupClause = NIL;
+
+ /*
+ * is it hashable? we pretend empty sets are hashable even though we
+ * actually force them not to be hashed later. But don't bother if
+ * there's nothing but empty sets.
+ */
+ if (gs->set &&
+ !bms_overlap_list(gd->unhashable_refs, gs->set))
+ {
+ rollup->hashable = true;
+ gd->any_hashable = true;
+ }
+
+ /*
+ * Now that we've pinned down an order for the groupClause for this
+ * list of grouping sets, we need to remap the entries in the grouping
+ * sets from sortgrouprefs to plain indices (0-based) into the
+ * groupClause for this collection of grouping sets. We keep the
+ * original form for later use, though.
+ */
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ current_sets,
+ gd->tleref_to_colnum_map);
+ rollup->gsets_data = current_sets;
+
+ gd->rollups = lappend(gd->rollups, rollup);
+ }
+
+ if (gd->unsortable_sets)
+ {
+ /*
+ * we have not yet pinned down a groupclause for this, but we will need
+ * index-based lists for estimation purposes. Construct hash_sets_idx
+ * based on the entire original groupclause for now.
+ */
+ gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+ gd->unsortable_sets,
+ gd->tleref_to_colnum_map);
+ gd->any_hashable = true;
+ }
+
+ return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+ List *gsets,
+ int *tleref_to_colnum_map)
+{
+ int ref = 0;
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, gsets)
+ {
+ List *set = NIL;
+ ListCell *lc2;
+ GroupingSetData *gs = lfirst(lc);
+
+ foreach(lc2, gs->set)
+ {
+ set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+ }
+
+ result = lappend(result, set);
+ }
+
+ return result;
+}
+
+
+
/*
* Detect whether a plan node is a "dummy" plan created when a relation
* is deemed not to need scanning due to constraint exclusion.
@@ -2981,7 +3136,7 @@ extract_rollup_sets(List *groupingSets)
/*
* Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure
@@ -3004,6 +3159,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{
List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous);
+ GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0)
{
@@ -3031,7 +3187,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
}
- result = lcons(list_copy(previous), result);
+ gs->set = list_copy(previous);
+ result = lcons(gs, result);
list_free(new_elems);
}
@@ -3126,15 +3283,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping)
*
* path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- * or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
*/
static double
get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
double dNumGroups;
@@ -3146,28 +3304,58 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets)
{
/* Add up the estimates for each grouping set */
- ListCell *lc,
- *lc2;
+ ListCell *lc;
+ ListCell *lc2;
dNumGroups = 0;
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+ foreach(lc, gd->rollups)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
- ListCell *lc3;
+ RollupData *rollup = lfirst(lc);
+ ListCell *lc;
- groupExprs = get_sortgrouplist_exprs(groupClause,
+ groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList);
- foreach(lc3, gsets)
+ rollup->numGroups = 0.0;
+
+ forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{
- List *gset = (List *) lfirst(lc3);
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+ gs->numGroups = numGroups;
+ rollup->numGroups += numGroups;
+ }
+
+ dNumGroups += rollup->numGroups;
+ }
- dNumGroups += estimate_num_groups(root,
- groupExprs,
- path_rows,
- &gset);
+ if (gd->hash_sets_idx)
+ {
+ ListCell *lc;
+
+ gd->dNumHashGroups = 0;
+
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+ {
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+ gs->numGroups = numGroups;
+ gd->dNumHashGroups += numGroups;
}
+
+ dNumGroups += gd->dNumHashGroups;
}
}
else
@@ -3203,6 +3391,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3253,8 +3446,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
@@ -3362,8 +3554,7 @@ create_grouping_paths(PlannerInfo *root,
*/
dNumGroups = get_number_of_groups(root,
cheapest_path->rows,
- rollup_lists,
- rollup_groupclauses);
+ gd);
/*
* Determine whether it's possible to perform sort-based implementations
@@ -3371,15 +3562,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.)
+ *
+ * If we have grouping sets, we might be able to sort some but not all of
+ * them; in this case, we need can_sort to be true as long as we must
+ * consider any sorted-input plan.
*/
- can_sort = grouping_is_sortable(parse->groupClause);
+ can_sort = (gd && gd->rollups != NIL)
+ || grouping_is_sortable(parse->groupClause);
/*
* Determine whether we should consider hash-based implementations of
* grouping.
*
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
+ * Hashed aggregation only applies if we're grouping. If we have grouping
+ * sets, some groups might be hashable but others not; in this case we set
+ * can_hash true as long as there is nothing globally preventing us from
+ * hashing (and we should therefore consider plans with hashes).
*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@@ -3392,9 +3590,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last.
*/
can_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 &&
- grouping_is_hashable(parse->groupClause));
+ (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3460,8 +3657,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
- NIL,
- NIL);
+ gd);
/*
* Collect statistics about aggregates for estimating costs of
@@ -3594,20 +3790,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
- /*
- * We have grouping sets, possibly with aggregation. Make
- * a GroupingSetsPath.
- */
- add_path(grouped_rel, (Path *)
- create_groupingsets_path(root,
- grouped_rel,
- path,
- target,
- (List *) parse->havingQual,
- rollup_lists,
- rollup_groupclauses,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash, target,
+ gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
@@ -3705,33 +3890,45 @@ create_grouping_paths(PlannerInfo *root,
if (can_hash)
{
- hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
- agg_costs,
- dNumGroups);
-
- /*
- * Provided that the estimated size of the hashtable does not exceed
- * work_mem, we'll generate a HashAgg Path, although if we were unable
- * to sort above, then we'd better generate a Path, so that we at
- * least have one.
- */
- if (hashaggtablesize < work_mem * 1024L ||
- grouped_rel->pathlist == NIL)
+ if (parse->groupingSets)
{
/*
- * We just need an Agg over the cheapest-total input path, since
- * input order won't matter.
+ * Try for a hash-only groupingsets path over unsorted input.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- parse->groupClause,
- (List *) parse->havingQual,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ cheapest_path, false, true, target,
+ gd, agg_costs, dNumGroups);
+ }
+ else
+ {
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ agg_costs,
+ dNumGroups);
+
+ /*
+ * Provided that the estimated size of the hashtable does not exceed
+ * work_mem, we'll generate a HashAgg Path, although if we were unable
+ * to sort above, then we'd better generate a Path, so that we at
+ * least have one.
+ */
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path, since
+ * input order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ agg_costs,
+ dNumGroups));
+ }
}
/*
@@ -3800,6 +3997,310 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel;
}
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting. This can be called multiple
+ * times, so it's important that it not scribble on input. No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Query *parse = root->parse;
+
+ /*
+ * If we're not being offered sorted input, then only consider plans that
+ * can be done entirely by hashing.
+ *
+ * We can hash everything if it looks like it'll fit in work_mem. But if
+ * the input is actually sorted despite not being advertised as such, we
+ * prefer to make use of that in order to use less memory.
+ *
+ * If none of the grouping sets are sortable, then ignore the work_mem
+ * limit and generate a path anyway, since otherwise we'll just fail.
+ */
+ if (!is_sorted)
+ {
+ List *new_rollups = NIL;
+ RollupData *unhashable_rollup = NULL;
+ List *sets_data;
+ List *empty_sets_data = NIL;
+ List *empty_sets = NIL;
+ ListCell *lc;
+ ListCell *l_start = list_head(gd->rollups);
+ AggStrategy strat = AGG_HASHED;
+ Size hashsize;
+ double exclude_groups = 0.0;
+
+ Assert(can_hash);
+
+ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ unhashable_rollup = lfirst(l_start);
+ exclude_groups = unhashable_rollup->numGroups;
+ l_start = lnext(l_start);
+ }
+
+ hashsize = estimate_hashagg_tablesize(path,
+ agg_costs,
+ dNumGroups - exclude_groups);
+
+ if (hashsize > work_mem * 1024L && gd->rollups)
+ return; /* nope, won't fit */
+
+ /*
+ * We need to burst the existing rollups list into individual grouping
+ * sets and recompute a groupClause for each set.
+ */
+ sets_data = list_copy(gd->unsortable_sets);
+
+ for_each_cell(lc, l_start)
+ {
+ RollupData *rollup = lfirst(lc);
+
+ /*
+ * If we find an unhashable rollup that's not been skipped by the
+ * "actually sorted" check above, we can't cope; we'd need sorted
+ * input (with a different sort order) but we can't get that here.
+ * So bail out; we'll get a valid path from the is_sorted case
+ * instead.
+ */
+ if (!rollup->hashable)
+ return;
+ else
+ sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+ }
+ foreach(lc, sets_data)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ List *gset = gs->set;
+ RollupData *rollup;
+
+ if (gset == NIL)
+ {
+ empty_sets_data = lappend(empty_sets_data, gs);
+ empty_sets = lappend(empty_sets, NIL);
+ }
+ else
+ {
+ rollup = makeNode(RollupData);
+
+ rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ new_rollups = lappend(new_rollups, rollup);
+ }
+ }
+
+ /* If we didn't find anything nonempty to hash, then bail. */
+ if (new_rollups == NIL)
+ return;
+
+ /*
+ * If there were empty grouping sets they should have been in the first
+ * rollup.
+ */
+ Assert(!unhashable_rollup || !empty_sets);
+
+ if (unhashable_rollup)
+ {
+ new_rollups = lappend(new_rollups, unhashable_rollup);
+ strat = AGG_MIXED;
+ }
+ else if (empty_sets)
+ {
+ RollupData *rollup = makeNode(RollupData);
+
+ rollup->groupClause = NIL;
+ rollup->gsets_data = empty_sets_data;
+ rollup->gsets = empty_sets;
+ rollup->numGroups = list_length(empty_sets);
+ rollup->hashable = false;
+ rollup->is_hashed = false;
+ new_rollups = lappend(new_rollups, rollup);
+ strat = AGG_MIXED;
+ }
+
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ strat,
+ new_rollups,
+ agg_costs,
+ dNumGroups));
+ return;
+ }
+
+ /*
+ * If we have sorted input but nothing we can do with it, bail.
+ */
+
+ if (list_length(gd->rollups) == 0)
+ return;
+
+ /*
+ * Given sorted input, we try and make two paths: one sorted and one mixed
+ * sort/hash. (We need to try both because hashagg might be disabled, or
+ * some columns might not be sortable.)
+ *
+ * can_hash is passed in as false if some obstacle elsewhere (such as
+ * ordered aggs) means that we shouldn't consider hashing at all.
+ */
+
+ if (can_hash && gd->any_hashable)
+ {
+ List *rollups = NIL;
+ List *hash_sets = list_copy(gd->unsortable_sets);
+ double availspace = (work_mem * 1024.0);
+ ListCell *lc;
+
+ /*
+ * Account first for space needed for groups we can't sort at all.
+ */
+ availspace -= (double) estimate_hashagg_tablesize(path, agg_costs, gd->dNumHashGroups);
+
+ if (availspace > 0 && list_length(gd->rollups) > 1)
+ {
+ double scale;
+ int k_capacity;
+ int *k_weights = palloc(list_length(gd->rollups) * sizeof(int));
+ Bitmapset *hash_items = NULL;
+ int i;
+
+ /*
+ * To use the discrete knapsack, we need to scale the values to a
+ * reasonably small bounded range. We choose to allow a 5% error
+ * margin; we have no more than 4096 rollups in the worst possible
+ * case, which with a 5% error margin will require a bit over 42MB
+ * of workspace. (Anyone wanting to plan queries that complex had
+ * better have the memory for it. In more reasonable cases, with
+ * no more than a couple of dozen rollups, the memory usage will be
+ * negligible.)
+ */
+ scale = availspace / (20.0 * list_length(gd->rollups));
+ k_capacity = (int) floor(availspace / scale);
+
+ /*
+ * We leave the first rollup out of consideration since it's the
+ * one that matches the input sort order. We assign indexes "i" to
+ * only those entries considered for hashing; the second loop,
+ * below, must use the same condition.
+ */
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+ if (rollup->hashable)
+ {
+ double sz = estimate_hashagg_tablesize(path,
+ agg_costs,
+ rollup->numGroups);
+ k_weights[i] = (int) floor(sz / scale);
+ ++i;
+ }
+ }
+
+ /*
+ * Apply knapsack algorithm; compute the set of items which
+ * maximizes the value stored (in this case the number of sorts
+ * saved) while keeping the total size (approximately) within
+ * capacity.
+ */
+ if (i > 0)
+ hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+ if (!bms_is_empty(hash_items))
+ {
+ rollups = list_make1(linitial(gd->rollups));
+
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+ if (rollup->hashable)
+ {
+ if (bms_is_member(i, hash_items))
+ hash_sets = list_concat(hash_sets, list_copy(rollup->gsets_data));
+ else
+ rollups = lappend(rollups, rollup);
+ ++i;
+ }
+ else
+ rollups = lappend(rollups, rollup);
+ }
+ }
+ }
+
+ if (!rollups && hash_sets)
+ rollups = list_copy(gd->rollups);
+
+ foreach(lc, hash_sets)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ RollupData *rollup = makeNode(RollupData);
+
+ Assert(gs->set != NIL);
+
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ rollups = lcons(rollup, rollups);
+ }
+
+ if (rollups)
+ {
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_MIXED,
+ rollups,
+ agg_costs,
+ dNumGroups));
+ }
+ }
+
+ /*
+ * Now try the simple sorted case.
+ */
+ if (!gd->unsortable_sets)
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ gd->rollups,
+ agg_costs,
+ dNumGroups));
+}
+
/*
* create_window_paths
*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 3248296..67590b8 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2607,10 +2607,9 @@ create_agg_path(PlannerInfo *root,
* 'subpath' is the path representing the source of data
* 'target' is the PathTarget to be computed
* 'having_qual' is the HAVING quals if any
- * 'rollup_lists' is a list of grouping sets
- * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
+ * 'rollups' is a list of RollupData nodes
* 'agg_costs' contains cost info about the aggregate functions to be computed
- * 'numGroups' is the estimated number of groups
+ * 'numGroups' is the estimated total number of groups
*/
GroupingSetsPath *
create_groupingsets_path(PlannerInfo *root,
@@ -2618,13 +2617,15 @@ create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups)
{
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
- int numGroupCols;
+ ListCell *lc;
+ bool is_first = true;
+ bool is_first_sort = true;
/* The topmost generated Plan node will be an Agg */
pathnode->path.pathtype = T_Agg;
@@ -2638,74 +2639,107 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->subpath = subpath;
/*
+ * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN,
+ * and AGG_MIXED to AGG_HASHED, here if possible.
+ */
+ if (aggstrategy == AGG_SORTED &&
+ list_length(rollups) == 1 &&
+ ((RollupData *) linitial(rollups))->groupClause == NIL)
+ aggstrategy = AGG_PLAIN;
+
+ if (aggstrategy == AGG_MIXED &&
+ list_length(rollups) == 1)
+ aggstrategy = AGG_HASHED;
+
+ /*
* Output will be in sorted order by group_pathkeys if, and only if, there
* is a single rollup operation on a non-empty list of grouping
* expressions.
*/
- if (list_length(rollup_groupclauses) == 1 &&
- ((List *) linitial(rollup_groupclauses)) != NIL)
+ if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
pathnode->path.pathkeys = root->group_pathkeys;
else
pathnode->path.pathkeys = NIL;
- pathnode->rollup_groupclauses = rollup_groupclauses;
- pathnode->rollup_lists = rollup_lists;
+ pathnode->aggstrategy = aggstrategy;
+ pathnode->rollups = rollups;
pathnode->qual = having_qual;
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
-
- /* Account for cost of the topmost Agg node */
- numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
-
- cost_agg(&pathnode->path, root,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
- agg_costs,
- numGroupCols,
- numGroups,
- subpath->startup_cost,
- subpath->total_cost,
- subpath->rows);
+ Assert(rollups != NIL);
+ Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
+ Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
- /*
- * Add in the costs and output rows of the additional sorting/aggregation
- * steps, if any. Only total costs count, since the extra sorts aren't
- * run on startup.
- */
- if (list_length(rollup_lists) > 1)
+ foreach(lc, rollups)
{
- ListCell *lc;
+ RollupData *rollup = lfirst(lc);
+ List *gsets = rollup->gsets;
+ int numGroupCols = list_length(linitial(gsets));
+
+ /*
+ * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
+ * (already-sorted) input, and following ones do their own sort.
+ *
+ * In AGG_HASHED mode, there is one rollup for each grouping set.
+ *
+ * In AGG_MIXED mode, the first rollups are hashed, the first
+ * non-hashed one takes the (already-sorted) input, and
+ * following ones do their own sort.
+ */
- foreach(lc, rollup_lists)
+ if (is_first)
+ {
+ cost_agg(&pathnode->path, root,
+ aggstrategy,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows);
+ is_first = false;
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
{
- List *gsets = (List *) lfirst(lc);
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
- /* We must iterate over all but the last rollup_lists element */
- if (lnext(lc) == NULL)
- break;
-
- /* Account for cost of sort, but don't charge input cost again */
- cost_sort(&sort_path, root, NIL,
- 0.0,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
-
- /* Account for cost of aggregation */
- numGroupCols = list_length((List *) linitial(gsets));
-
- cost_agg(&agg_path, root,
- AGG_SORTED,
- agg_costs,
- numGroupCols,
- numGroups, /* XXX surely not right for all steps? */
- sort_path.startup_cost,
- sort_path.total_cost,
- sort_path.rows);
+ if (rollup->is_hashed || is_first_sort)
+ {
+ /* Account for cost of aggregation, but don't charge input cost again */
+ cost_agg(&agg_path, root,
+ rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ 0.0, 0.0,
+ subpath->rows);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
+ {
+ /* Account for cost of sort, but don't charge input cost again */
+ cost_sort(&sort_path, root, NIL,
+ 0.0,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+
+ /* Account for cost of aggregation */
+
+ cost_agg(&agg_path, root,
+ AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ sort_path.startup_cost,
+ sort_path.total_cost,
+ sort_path.rows);
+ }
pathnode->path.total_cost += agg_path.total_cost;
pathnode->path.rows += agg_path.rows;
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 0000000..1d5b3b8
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,17 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "postgres.h"
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 4f1910e..109f7b0 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -21,6 +21,11 @@
#define BITMAPSET_H
/*
+ * Forward decl to save including pg_list.h
+ */
+struct List;
+
+/*
* Data representation
*/
@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
+extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 6332ea0..4314447 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1861,6 +1861,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
+typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
@@ -1868,15 +1869,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */
+ AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */
- int numphases; /* number of phases */
+ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */
+ ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */
+ ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */
@@ -1888,21 +1891,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */
- Tuplesortstate *sort_in; /* sorted input to phases > 0 */
+ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
- /* these fields are used in AGG_HASHED mode: */
- TupleHashTable hashtable; /* hash table with one entry per group */
- TupleTableSlot *hashslot; /* slot for loading hash table */
- int numhashGrpCols; /* number of columns in hash table */
- int largestGrpColIdx; /* largest column required for hashing */
- AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
- AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
bool table_filled; /* hash table filled yet? */
- TupleHashIterator hashiter; /* for iterating through hash table */
+ int num_hashes;
+ AggStatePerHash perhash;
+ AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 95dd8ba..111fad1 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -272,6 +272,8 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
+ T_RollupData,
+ T_GroupingSetData,
/*
* TAGS FOR MEMORY NODES (memnodes.h)
@@ -728,7 +730,8 @@ typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
- AGG_HASHED /* grouped agg, use internal hashtable */
+ AGG_HASHED, /* grouped agg, use internal hashtable */
+ AGG_MIXED /* grouped agg, hash and sort concurrently */
} AggStrategy;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index f72f7a8..b3a2a03 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -737,7 +737,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
- /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
+ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */
} Agg;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index f7ac6f6..4d0791d 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1378,17 +1378,37 @@ typedef struct AggPath
} AggPath;
/*
+ * Various annotations used for grouping sets in the planner.
+ */
+
+typedef struct GroupingSetData
+{
+ NodeTag type;
+ List *set; /* grouping set as list of sortgrouprefs */
+ double numGroups; /* est. number of result groups */
+} GroupingSetData;
+
+typedef struct RollupData
+{
+ NodeTag type;
+ List *groupClause; /* applicable subset of parse->groupClause */
+ List *gsets; /* lists of integer indexes into groupClause */
+ List *gsets_data; /* list of GroupingSetData */
+ double numGroups; /* est. number of result groups */
+ bool hashable; /* can be hashed */
+ bool is_hashed; /* to be implemented as a hashagg */
+} RollupData;
+
+/*
* GroupingSetsPath represents a GROUPING SETS aggregation
- *
- * Currently we only support this in sorted not hashed form, so the input
- * must always be appropriately presorted.
*/
+
typedef struct GroupingSetsPath
{
Path path;
Path *subpath; /* path representing input source */
- List *rollup_groupclauses; /* list of lists of SortGroupClause's */
- List *rollup_lists; /* parallel list of lists of grouping sets */
+ AggStrategy aggstrategy; /* basic strategy */
+ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 53cad24..a084ba1 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -181,8 +181,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 260ccd5..7c46441 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -13,6 +13,13 @@ copy gstest2 from stdin;
create temp table gstest3 (a integer, b integer, c integer, d integer);
copy gstest3 from stdin;
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
returns setof record
@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
end;
$f$ language plpgsql;
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
- | | 3 | 25 | 2 | 13
- | | 3 | 14 | 1 | 14
- | | 3 | 15 | 1 | 15
- | | 3 | 16 | 1 | 16
- | | 3 | 17 | 1 | 17
- | | 3 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
(21 rows)
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
@@ -847,4 +855,548 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500
(6 rows)
+-- hashing support
+set enable_hashagg = true;
+-- failure cases
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+-- simple cases
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+(8 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(6 rows)
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+ | | 3 | 145 | 10 | 19
+(16 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> MixedAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ Group Key: ()
+ -> Values Scan on "*VALUES*"
+(8 rows)
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+ QUERY PLAN
+----------------------------------------------------------
+ GroupAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ Group Key: "*VALUES*".column1
+ Group Key: ()
+ Sort Key: "*VALUES*".column2
+ Group Key: "*VALUES*".column2
+ -> Sort
+ Sort Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(9 rows)
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 2 | 17
+ 0001 | | 1 | 2 | 34
+ 0010 | | 1 | 2 | 68
+ 0011 | | 1 | 2 | 136
+ | 2 | 2 | 4 | 60
+ | 1 | 2 | 4 | 195
+(6 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: unsortable_col
+ Group Key: unhashable_col
+ -> Sort
+ Sort Key: unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 1 | 1
+ 0001 | | 1 | 1 | 2
+ 0010 | | 1 | 1 | 4
+ 0011 | | 1 | 1 | 8
+ 0000 | | 1 | 1 | 16
+ 0001 | | 1 | 1 | 32
+ 0010 | | 1 | 1 | 64
+ 0011 | | 1 | 1 | 128
+ | 1 | 2 | 1 | 1
+ | 1 | 2 | 1 | 2
+ | 2 | 2 | 1 | 4
+ | 2 | 2 | 1 | 8
+ | 2 | 2 | 1 | 16
+ | 2 | 2 | 1 | 32
+ | 1 | 2 | 1 | 64
+ | 1 | 2 | 1 | 128
+(16 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: v, unsortable_col
+ Group Key: v, unhashable_col
+ -> Sort
+ Sort Key: v, unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ a | b | sum | count
+---+---+-----+-------
+(0 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ QUERY PLAN
+--------------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a
+ -> Seq Scan on gstest_empty
+(4 rows)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+(1 row)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+ | | | 0
+ | | | 0
+(3 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ QUERY PLAN
+--------------------------------
+ MixedAggregate
+ Hash Key: a, b
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(6 rows)
+
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ sum | count
+-----+-------
+ | 0
+ | 0
+ | 0
+(3 rows)
+
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ QUERY PLAN
+--------------------------------
+ Aggregate
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(5 rows)
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ a | d | grouping
+---+---+----------
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 1 | 1 | 2
+ 2 | 2 | 2
+(4 rows)
+
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ QUERY PLAN
+---------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a, c
+ -> Seq Scan on gstest3
+(4 rows)
+
+-- simple rescan tests
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ a | b | sum
+---+---+-----
+ 2 | | 6
+ 1 | | 3
+ | 2 | 3
+ | 3 | 3
+ | 1 | 3
+(5 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ QUERY PLAN
+------------------------------------------
+ HashAggregate
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(6 rows)
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
+ ^
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
+ ^
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ | | 3 | 21 | 2 | 11
+ | | 3 | 21 | 2 | 11
+ | | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
+ | | 3 | 14 | 1 | 14
+ | | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
+ | | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
+ | | 3 | 17 | 1 | 17
+ | | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
+(21 rows)
+
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ a | b | sum | rsum
+---+---+-----+------
+ 1 | 1 | 8 | 8
+ 1 | 2 | 2 | 10
+ 1 | | 10 | 20
+ 2 | 2 | 2 | 22
+ 2 | | 2 | 24
+ | 1 | 8 | 32
+ | 2 | 4 | 36
+ | | 12 | 48
+(8 rows)
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ a | b | sum
+---+---+-----
+ 1 | 1 | 1
+ 1 | 2 | 1
+ 1 | 3 | 1
+ 1 | | 3
+ 2 | 1 | 2
+ 2 | 2 | 2
+ 2 | 3 | 2
+ 2 | | 6
+ | 1 | 3
+ | 2 | 3
+ | 3 | 3
+ | | 9
+(12 rows)
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | 0 | 50
+ 1 | 1 | 0 | 2 | 50
+ 1 | 1 | 0 | 4 | 50
+ 1 | 1 | 0 | 6 | 50
+ 1 | 1 | 0 | 8 | 50
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | 1 | 50
+ 1 | 1 | 1 | 3 | 50
+ 1 | 1 | 1 | 5 | 50
+ 1 | 1 | 1 | 7 | 50
+ 1 | 1 | 1 | 9 | 50
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | 0 | 50
+ 1 | 1 | 2 | 2 | 50
+ 1 | 1 | 2 | 4 | 50
+ 1 | 1 | 2 | 6 | 50
+ 1 | 1 | 2 | 8 | 50
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | 1 | 50
+ 1 | 1 | 3 | 3 | 50
+ 1 | 1 | 3 | 5 | 50
+ 1 | 1 | 3 | 7 | 50
+ 1 | 1 | 3 | 9 | 50
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 1 | 1 | | | 1000
+ 2 | 2 | 0 | 0 | 50
+ 2 | 2 | 0 | 2 | 50
+ 2 | 2 | 0 | 4 | 50
+ 2 | 2 | 0 | 6 | 50
+ 2 | 2 | 0 | 8 | 50
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | 1 | 50
+ 2 | 2 | 1 | 3 | 50
+ 2 | 2 | 1 | 5 | 50
+ 2 | 2 | 1 | 7 | 50
+ 2 | 2 | 1 | 9 | 50
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | 0 | 50
+ 2 | 2 | 2 | 2 | 50
+ 2 | 2 | 2 | 4 | 50
+ 2 | 2 | 2 | 6 | 50
+ 2 | 2 | 2 | 8 | 50
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | 1 | 50
+ 2 | 2 | 3 | 3 | 50
+ 2 | 2 | 3 | 5 | 50
+ 2 | 2 | 3 | 7 | 50
+ 2 | 2 | 3 | 9 | 50
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+ 2 | 2 | | | 1000
+(70 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
+ {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
+(2 rows)
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+(28 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+---------------------------------------------------------------------------------
+ {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
+ {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
+(2 rows)
+
+-- test the knapsack
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ Sort Key: thousand
+ Group Key: thousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(13 rows)
+
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(9 rows)
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Hash Key: thousand
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(12 rows)
+
-- end
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index 0eeaf9e..33f370b 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
(6 rows)
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
dataa | b | g | count
-------+-----+---+-------
@@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
b | bar | | 2
b | | | 2
| | | 6
- a | | 1 | 2
- b | | 1 | 1
- | | 1 | 3
- a | | 2 | 2
- b | | 2 | 1
- | | 2 | 3
| bar | 1 | 2
| bar | 2 | 2
| bar | | 4
| foo | 1 | 1
| foo | 2 | 1
| foo | | 2
+ a | | 1 | 2
+ b | | 1 | 1
+ | | 1 | 3
+ a | | 2 | 2
+ b | | 2 | 1
+ | | 2 | 3
(24 rows)
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
dataa | b | g | count
-------+-----+---+-------
+ a | foo | | 2
+ a | | | 4
+ a | | 2 | 2
a | bar | 1 | 1
a | bar | 2 | 1
a | bar | | 2
a | foo | 1 | 1
a | foo | 2 | 1
- a | foo | | 2
- a | | | 4
a | | 1 | 2
- a | | 2 | 2
- b | bar | 2 | 1
+ b | bar | 1 | 1
b | | | 2
b | | 1 | 1
- b | | 2 | 1
- b | bar | 1 | 1
+ b | bar | 2 | 1
b | bar | | 2
- | foo | | 2
- | foo | 1 | 1
+ b | | 2 | 1
| | 2 | 3
+ | | | 6
| bar | 1 | 2
| bar | 2 | 2
- | | | 6
- | foo | 2 | 1
| bar | | 4
+ | foo | 1 | 1
+ | foo | 2 | 1
+ | foo | | 2
| | 1 | 3
(24 rows)
@@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
a | bar | 1 | 1
a | foo | 1 | 1
b | bar | 1 | 1
+ | bar | 1 | 2
+ | foo | 1 | 1
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
- | bar | 1 | 2
- | foo | 1 | 1
- | foo | 2 | 1
- | bar | 2 | 2
a | | 2 | 2
b | | 2 | 1
- a | bar | 2 | 1
+ | bar | 2 | 2
| | 2 | 3
+ | foo | 2 | 1
+ a | bar | 2 | 1
a | foo | 2 | 1
b | bar | 2 | 1
- a | foo | | 2
+ a | | | 4
b | bar | | 2
b | | | 2
| | | 6
- a | | | 4
+ a | foo | | 2
+ a | bar | | 2
| bar | | 4
| foo | | 2
- a | bar | | 2
(24 rows)
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5));
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index 71cc0ec..3f425eb 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -31,6 +31,14 @@ copy gstest3 from stdin;
\.
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
+
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
+
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
+
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by rollup (a,b);
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
@@ -224,4 +235,136 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
select sum(ten) from onek group by two, rollup(four::text) order by 1;
select sum(ten) from onek group by rollup(four::text), two order by 1;
+-- hashing support
+
+set enable_hashagg = true;
+
+-- failure cases
+
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+
+-- simple cases
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+
+-- simple rescan tests
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- test the knapsack
+
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+
-- end
diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql
index e627bb9..417e78c 100644
--- a/src/test/regress/sql/tsrf.sql
+++ b/src/test/regress/sql/tsrf.sql
@@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
The following review has been posted through the commitfest application:
make installcheck-world: tested, failed
Implements feature: not tested
Spec compliant: not tested
Documentation: not tested
On my MacBook, `make check-world` gives differences in the contrib modules:
cat contrib/postgres_fdw/regression.diffs
*** /Users/mark/hydra/postgresql.review/contrib/postgres_fdw/expected/postgres_fdw.out 2017-03-03 13:33:47.000000000 -0800
--- /Users/mark/hydra/postgresql.review/contrib/postgres_fdw/results/postgres_fdw.out 2017-03-07 13:27:56.000000000 -0800
***************
*** 3148,3163 ****
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
! QUERY PLAN
! ---------------------------------------------------------------------------------------------------
! GroupAggregate
! Output: c2, sum(c1)
! Group Key: ft1.c2
! Group Key: ()
! -> Foreign Scan on public.ft1
! Output: c2, c1
! Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
! (7 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
--- 3148,3166 ----
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
! QUERY PLAN
! ------------------------------------------------------------------------------
! Sort
! Output: c2, (sum(c1))
! Sort Key: ft1.c2
! -> MixedAggregate
! Output: c2, sum(c1)
! Hash Key: ft1.c2
! Group Key: ()
! -> Foreign Scan on public.ft1
! Output: c2, c1
! Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
! (10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
***************
*** 3170,3185 ****
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
! QUERY PLAN
! ---------------------------------------------------------------------------------------------------
! GroupAggregate
! Output: c2, sum(c1)
! Group Key: ft1.c2
! Group Key: ()
! -> Foreign Scan on public.ft1
! Output: c2, c1
! Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
! (7 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
--- 3173,3191 ----
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
! QUERY PLAN
! ------------------------------------------------------------------------------
! Sort
! Output: c2, (sum(c1))
! Sort Key: ft1.c2
! -> MixedAggregate
! Output: c2, sum(c1)
! Hash Key: ft1.c2
! Group Key: ()
! -> Foreign Scan on public.ft1
! Output: c2, c1
! Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
! (10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
***************
*** 3192,3211 ****
explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
! QUERY PLAN
! -------------------------------------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
! -> GroupAggregate
Output: c2, c6, sum(c1)
! Group Key: ft1.c2
! Sort Key: ft1.c6
! Group Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
! Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
! (11 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
--- 3198,3216 ----
explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
! QUERY PLAN
! ----------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
! -> HashAggregate
Output: c2, c6, sum(c1)
! Hash Key: ft1.c2
! Hash Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
! Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
! (10 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
======================================================================
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mar 7, 2017, at 1:43 PM, Mark Dilger <hornschnorter@gmail.com> wrote:
On my MacBook, `make check-world` gives differences in the contrib modules:
I get the same (or similar -- didn't check) regression failure on CentOS, so this
doesn't appear to be MacBook / hardware specific.
Mark Dilger
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
The following review has been posted through the commitfest application:
make installcheck-world: tested, failed
Implements feature: not tested
Spec compliant: not tested
Documentation: not tested
On linux/gcc the patch generates a warning in nodeAgg.c that is fairly easy
to fix. Using -Werror to make catching the error easier, I get:
make[3]: Entering directory `/home/mark/postgresql.clean/src/backend/executor'
gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -O2 -Werror -I../../../src/include -D_GNU_SOURCE -c -o nodeAgg.o nodeAgg.c -MMD -MP -MF .deps/nodeAgg.Po
cc1: warnings being treated as errors
nodeAgg.c: In function ‘ExecAgg’:
nodeAgg.c:2053: error: ‘result’ may be used uninitialized in this function
make[3]: *** [nodeAgg.o] Error 1
make[3]: Leaving directory `/home/mark/postgresql.clean/src/backend/executor'
make[2]: *** [executor-recursive] Error 2
make[2]: Leaving directory `/home/mark/postgresql.clean/src/backend'
make[1]: *** [all-backend-recurse] Error 2
make[1]: Leaving directory `/home/mark/postgresql.clean/src'
make: *** [all-src-recurse] Error 2
There are two obvious choices to silence the warning:
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index f059560..8959f5b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2066,6 +2066,7 @@ ExecAgg(AggState *node)
break;
case AGG_PLAIN:
case AGG_SORTED:
+ default:
result = agg_retrieve_direct(node);
break;
}
which I like better, because I don't expect it would create
an extra instruction in the compiled code, but also:
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index f059560..eab2605 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2050,7 +2050,7 @@ lookup_hash_entries(AggState *aggstate)
TupleTableSlot *
ExecAgg(AggState *node)
{
- TupleTableSlot *result;
+ TupleTableSlot *result = NULL;
if (!node->agg_done)
{
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> On linux/gcc the patch generates a warning in nodeAgg.c that is
Mark> fairly easy to fix. Using -Werror to make catching the error
Mark> easier, I get:
what gcc version is this exactly?
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> On my MacBook, `make check-world` gives differences in the
Mark> contrib modules:
Thanks! Latest cleaned up version of patch is attached.
--
Andrew (irc:RhodiumToad)
Attachments:
gshash23.patchtext/x-patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0b9e3e4..91765c2 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3148,16 +3148,19 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
@@ -3170,16 +3173,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
@@ -3192,20 +3198,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last
explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
- -> GroupAggregate
+ -> HashAggregate
Output: c2, c6, sum(c1)
- Group Key: ft1.c2
- Sort Key: ft1.c6
- Group Key: ft1.c6
+ Hash Key: ft1.c2
+ Hash Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
- Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(11 rows)
+ Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c9e0a3e..480a07e 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -996,6 +996,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate";
strategy = "Hashed";
break;
+ case AGG_MIXED:
+ pname = "MixedAggregate";
+ strategy = "Mixed";
+ break;
default:
pname = "Aggregate ???";
strategy = "???";
@@ -1924,6 +1928,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc;
List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx;
+ const char *keyname;
+ const char *keysetname;
+
+ if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+ {
+ keyname = "Hash Key";
+ keysetname = "Hash Keys";
+ }
+ else
+ {
+ keyname = "Group Key";
+ keysetname = "Group Keys";
+ }
ExplainOpenGroup("Grouping Set", NULL, true, es);
@@ -1938,7 +1955,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++;
}
- ExplainOpenGroup("Group Keys", "Group Keys", false, es);
+ ExplainOpenGroup(keysetname, keysetname, false, es);
foreach(lc, gsets)
{
@@ -1962,12 +1979,12 @@ show_grouping_set_keys(PlanState *planstate,
}
if (!result && es->format == EXPLAIN_FORMAT_TEXT)
- ExplainPropertyText("Group Key", "()", es);
+ ExplainPropertyText(keyname, "()", es);
else
- ExplainPropertyListNested("Group Key", result, es);
+ ExplainPropertyListNested(keyname, result, es);
}
- ExplainCloseGroup("Group Keys", "Group Keys", false, es);
+ ExplainCloseGroup(keysetname, keysetname, false, es);
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index aa08152..b146ada 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -122,12 +122,19 @@
* specific).
*
* Where more complex grouping sets are used, we break them down into
- * "phases", where each phase has a different sort order. During each
- * phase but the last, the input tuples are additionally stored in a
- * tuplesort which is keyed to the next phase's sort order; during each
- * phase but the first, the input tuples are drawn from the previously
- * sorted data. (The sorting of the data for the first phase is handled by
- * the planner, as it might be satisfied by underlying nodes.)
+ * "phases", where each phase has a different sort order (except phase 0
+ * which is reserved for hashing). During each phase but the last, the
+ * input tuples are additionally stored in a tuplesort which is keyed to the
+ * next phase's sort order; during each phase but the first, the input
+ * tuples are drawn from the previously sorted data. (The sorting of the
+ * data for the first phase is handled by the planner, as it might be
+ * satisfied by underlying nodes.)
+ *
+ * Hashing can be mixed with sorted grouping. To do this, we have an
+ * AGG_MIXED strategy that populates the hashtables during the first sorted
+ * phase, and switches to reading them out after completing all sort phases.
+ * We can also support AGG_HASHED with multiple hash tables and no sorting
+ * at all.
*
* From the perspective of aggregate transition and final functions, the
* only issue regarding grouping sets is this: a single call site (flinfo)
@@ -139,8 +146,6 @@
* sensitive to the grouping set for which the aggregate function is
* currently being called.
*
- * TODO: AGG_HASHED doesn't support multiple grouping sets yet.
- *
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -432,6 +437,7 @@ typedef struct AggStatePerGroupData
*/
typedef struct AggStatePerPhaseData
{
+ AggStrategy aggstrategy; /* strategy for this phase */
int numsets; /* number of grouping sets (or 0) */
int *gset_lengths; /* lengths of grouping sets */
Bitmapset **grouped_cols; /* column groupings for rollup */
@@ -440,7 +446,31 @@ typedef struct AggStatePerPhaseData
Sort *sortnode; /* Sort node for input ordering for phase */
} AggStatePerPhaseData;
+/*
+ * AggStatePerHashData - per-hashtable state
+ *
+ * When doing grouping sets with hashing, we have one of these for each
+ * grouping set. (When doing hashing without grouping sets, we have just one of
+ * them.)
+ */
+
+typedef struct AggStatePerHashData
+{
+ TupleHashTable hashtable; /* hash table with one entry per group */
+ TupleHashIterator hashiter; /* for iterating through hash table */
+ TupleTableSlot *hashslot; /* slot for loading hash table */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
+ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
+ int numCols; /* number of hash key columns */
+ int numhashGrpCols; /* number of columns in hash table */
+ int largestGrpColIdx; /* largest col required for hashing */
+ AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
+ AttrNumber *hashGrpColIdxHash; /* indices in hashtbl tuples */
+ Agg *aggnode; /* original Agg node, for numGroups etc. */
+} AggStatePerHashData;
+
+static void select_current_set(AggState *aggstate, int setno, bool is_hash);
static void initialize_phase(AggState *aggstate, int newphase);
static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
static void initialize_aggregates(AggState *aggstate,
@@ -449,7 +479,8 @@ static void initialize_aggregates(AggState *aggstate,
static void advance_transition_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
-static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup,
+ AggStatePerGroup *pergroups);
static void advance_combine_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
@@ -479,8 +510,8 @@ static TupleTableSlot *project_aggregates(AggState *aggstate);
static Bitmapset *find_unaggregated_cols(AggState *aggstate);
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
static void build_hash_table(AggState *aggstate);
-static TupleHashEntryData *lookup_hash_entry(AggState *aggstate,
- TupleTableSlot *inputslot);
+static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
+static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
static void agg_fill_hash_table(AggState *aggstate);
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -501,13 +532,29 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
/*
- * Switch to phase "newphase", which must either be 0 (to reset) or
+ * Select the current grouping set; affects current_set and
+ * curaggcontext.
+ */
+
+static void
+select_current_set(AggState *aggstate, int setno, bool is_hash)
+{
+ if (is_hash)
+ aggstate->curaggcontext = aggstate->hashcontext;
+ else
+ aggstate->curaggcontext = aggstate->aggcontexts[setno];
+
+ aggstate->current_set = setno;
+}
+
+/*
+ * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
* current_phase + 1. Juggle the tuplesorts accordingly.
*/
static void
initialize_phase(AggState *aggstate, int newphase)
{
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1);
+ Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
/*
* Whatever the previous state, we're now done with whatever input
@@ -519,7 +566,7 @@ initialize_phase(AggState *aggstate, int newphase)
aggstate->sort_in = NULL;
}
- if (newphase == 0)
+ if (newphase <= 1)
{
/*
* Discard any existing output tuplesort.
@@ -546,7 +593,7 @@ initialize_phase(AggState *aggstate, int newphase)
* If this isn't the last phase, we need to sort appropriately for the
* next phase in sequence.
*/
- if (newphase < aggstate->numphases - 1)
+ if (newphase > 0 && newphase < aggstate->numphases - 1)
{
Sort *sortnode = aggstate->phases[newphase + 1].sortnode;
PlanState *outerNode = outerPlanState(aggstate);
@@ -567,7 +614,7 @@ initialize_phase(AggState *aggstate, int newphase)
}
/*
- * Fetch a tuple from either the outer plan (for phase 0) or from the sorter
+ * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
* populated by the previous phase. Copy it to the sorter for the next phase
* if any.
*/
@@ -595,8 +642,8 @@ fetch_input_tuple(AggState *aggstate)
/*
* (Re)Initialize an individual aggregate.
*
- * This function handles only one grouping set (already set in
- * aggstate->current_set).
+ * This function handles only one grouping set, already set in
+ * aggstate->current_set.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -653,7 +700,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
MemoryContext oldContext;
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -676,8 +723,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
*
* If there are multiple grouping sets, we initialize only the first numReset
* of them (the grouping sets are ordered so that the most specific one, which
- * is reset most often, is first). As a convenience, if numReset is < 1, we
- * reinitialize all sets.
+ * is reset most often, is first). As a convenience, if numReset is 0, we
+ * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
+ * which case the caller must have used select_current_set appropriately.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -689,25 +737,37 @@ initialize_aggregates(AggState *aggstate,
int transno;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
int setno = 0;
+ int numTrans = aggstate->numtrans;
AggStatePerTrans transstates = aggstate->pertrans;
- if (numReset < 1)
+ if (numReset == 0)
numReset = numGroupingSets;
- for (transno = 0; transno < aggstate->numtrans; transno++)
+ for (transno = 0; transno < numTrans; transno++)
{
AggStatePerTrans pertrans = &transstates[transno];
- for (setno = 0; setno < numReset; setno++)
+ if (numReset < 0)
{
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
-
- aggstate->current_set = setno;
+ pergroupstate = &pergroup[transno];
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
+ else
+ {
+ for (setno = 0; setno < numReset; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ select_current_set(aggstate, setno, false);
+
+ initialize_aggregate(aggstate, pertrans, pergroupstate);
+ }
+ }
}
}
@@ -757,7 +817,7 @@ advance_transition_function(AggState *aggstate,
* do not need to pfree the old transValue, since it's NULL.
*/
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -807,7 +867,7 @@ advance_transition_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -838,17 +898,21 @@ advance_transition_function(AggState *aggstate,
/*
* Advance each aggregate transition state for one input tuple. The input
* tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
- * accessible to ExecEvalExpr. pergroup is the array of per-group structs to
- * use (this might be in a hashtable entry).
+ * accessible to ExecEvalExpr.
+ *
+ * We have two sets of transition states to handle: one for sorted aggregation
+ * and one for hashed; we do them both here, to avoid multiple evaluation of
+ * the inputs.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static void
-advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
{
int transno;
int setno = 0;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
+ int numHashes = aggstate->num_hashes;
int numTrans = aggstate->numtrans;
TupleTableSlot *slot = aggstate->evalslot;
@@ -880,6 +944,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
{
/* DISTINCT and/or ORDER BY case */
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
+ Assert(!pergroups);
/*
* If the transfn is strict, we want to check for nullity before
@@ -940,13 +1005,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
}
- for (setno = 0; setno < numGroupingSets; setno++)
+ if (pergroup)
{
- AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
+ /* advance transition states for ordered grouping */
- aggstate->current_set = setno;
+ for (setno = 0; setno < numGroupingSets; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, false);
- advance_transition_function(aggstate, pertrans, pergroupstate);
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
+ }
+
+ if (pergroups)
+ {
+ /* advance transition states for hashed grouping */
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, true);
+
+ pergroupstate = &pergroups[setno][transno];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
}
}
}
@@ -967,7 +1055,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
TupleTableSlot *slot;
/* combine not supported with grouping sets */
- Assert(aggstate->phase->numsets == 0);
+ Assert(aggstate->phase->numsets <= 1);
/* compute input for all aggregates */
slot = ExecProject(aggstate->evalproj);
@@ -1060,7 +1148,7 @@ advance_combine_function(AggState *aggstate,
if (!pertrans->transtypeByVal)
{
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -1105,7 +1193,7 @@ advance_combine_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -1559,7 +1647,9 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/*
* Compute the final value of all aggregates for one group.
*
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time. But in the hash
+ * case, it's the caller's responsibility to have selected the set already, and
+ * we pass in -1 here to flag that and to control the indexing into pertrans.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
@@ -1575,10 +1665,11 @@ finalize_aggregates(AggState *aggstate,
int aggno;
int transno;
- Assert(currentSet == 0 ||
- ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
- aggstate->current_set = currentSet;
+ /* ugly hack for hash */
+ if (currentSet >= 0)
+ select_current_set(aggstate, currentSet, false);
+ else
+ currentSet = 0;
/*
* If there were any DISTINCT and/or ORDER BY aggregates, sort their
@@ -1593,7 +1684,8 @@ finalize_aggregates(AggState *aggstate,
if (pertrans->numSortCols > 0)
{
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED &&
+ aggstate->aggstrategy != AGG_MIXED);
if (pertrans->numInputs == 1)
process_ordered_aggregate_single(aggstate,
@@ -1697,7 +1789,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
}
/*
- * Initialize the hash table to empty.
+ * Initialize the hash table(s) to empty.
*
* To implement hashed aggregation, we need a hashtable that stores a
* representative tuple and an array of AggStatePerGroup structs for each
@@ -1705,29 +1797,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(),
* for each entry.
*
- * The hash table always lives in the aggcontext memory context.
+ * We have a separate hashtable and associated perhash data structure for each
+ * grouping set for which we're doing hashing.
+ *
+ * The hash tables always live in the hashcontext's per-tuple memory context
+ * (there is only one of these for all tables together, since they are all
+ * reset at the same time).
*/
static void
build_hash_table(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize;
+ int i;
- Assert(node->aggstrategy == AGG_HASHED);
- Assert(node->numGroups > 0);
+ Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
- additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData);
+ additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
- aggstate->hashtable = BuildTupleHashTable(node->numCols,
- aggstate->hashGrpColIdxHash,
- aggstate->phase->eqfunctions,
- aggstate->hashfunctions,
- node->numGroups,
- additionalsize,
- aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
- tmpmem,
+ for (i = 0; i < aggstate->num_hashes; ++i)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[i];
+
+ Assert(perhash->aggnode->numGroups > 0);
+
+ perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+ perhash->hashGrpColIdxHash,
+ perhash->eqfunctions,
+ perhash->hashfunctions,
+ perhash->aggnode->numGroups,
+ additionalsize,
+ aggstate->hashcontext->ecxt_per_tuple_memory,
+ tmpmem,
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+ }
}
/*
@@ -1750,72 +1853,98 @@ build_hash_table(AggState *aggstate)
* the array is preserved over ExecReScanAgg, so we allocate it in the
* per-query context (unlike the hash table itself).
*/
-static List *
+static void
find_hash_columns(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
- Bitmapset *colnos;
- List *collist;
- TupleDesc hashDesc;
+ Bitmapset *base_colnos;
List *outerTlist = outerPlanState(aggstate)->plan->targetlist;
- List *hashTlist = NIL;
- int i;
-
- aggstate->largestGrpColIdx = 0;
+ int numHashes = aggstate->num_hashes;
+ int j;
/* Find Vars that will be needed in tlist and qual */
- colnos = find_unaggregated_cols(aggstate);
- /* Add in all the grouping columns */
- for (i = 0; i < node->numCols; i++)
- colnos = bms_add_member(colnos, node->grpColIdx[i]);
- /* Convert to list, using lcons so largest element ends up first */
- collist = NIL;
+ base_colnos = find_unaggregated_cols(aggstate);
- aggstate->hashGrpColIdxInput =
- palloc(bms_num_members(colnos) * sizeof(AttrNumber));
- aggstate->hashGrpColIdxHash =
- palloc(node->numCols * sizeof(AttrNumber));
-
- /*
- * First build mapping for columns directly hashed. These are the first,
- * because they'll be accessed when computing hash values and comparing
- * tuples for exact matches. We also build simple mapping for
- * execGrouping, so it knows where to find the to-be-hashed / compared
- * columns in the input.
- */
- for (i = 0; i < node->numCols; i++)
+ for (j = 0; j < numHashes; ++j)
{
- aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i];
- aggstate->hashGrpColIdxHash[i] = i + 1;
- aggstate->numhashGrpCols++;
- /* delete already mapped columns */
- bms_del_member(colnos, node->grpColIdx[i]);
- }
+ AggStatePerHash perhash = &aggstate->perhash[j];
+ Bitmapset *colnos = bms_copy(base_colnos);
+ AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
+ List *hashTlist = NIL;
+ TupleDesc hashDesc;
+ int i;
- /* and add the remaining columns */
- while ((i = bms_first_member(colnos)) >= 0)
- {
- aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i;
- aggstate->numhashGrpCols++;
- }
+ perhash->largestGrpColIdx = 0;
- /* and build a tuple descriptor for the hashtable */
- for (i = 0; i < aggstate->numhashGrpCols; i++)
- {
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ /*
+ * If we're doing grouping sets, then some Vars might be referenced in
+ * tlist/qual for the benefit of other grouping sets, but not needed
+ * when hashing; i.e. prepare_projection_slot will null them out, so
+ * there'd be no point storing them. Use prepare_projection_slot's
+ * logic to determine which.
+ */
+ if (aggstate->phases[0].grouped_cols)
+ {
+ Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j];
+ ListCell *lc;
- hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
- aggstate->largestGrpColIdx =
- Max(varNumber + 1, aggstate->largestGrpColIdx);
- }
+ foreach(lc, aggstate->all_grouped_cols)
+ {
+ int attnum = lfirst_int(lc);
+
+ if (!bms_is_member(attnum, grouped_cols))
+ colnos = bms_del_member(colnos, attnum);
+ }
+ }
+ /* Add in all the grouping columns */
+ for (i = 0; i < perhash->numCols; i++)
+ colnos = bms_add_member(colnos, grpColIdx[i]);
+
+ perhash->hashGrpColIdxInput =
+ palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+ perhash->hashGrpColIdxHash =
+ palloc(perhash->numCols * sizeof(AttrNumber));
+
+ /*
+ * First build mapping for columns directly hashed. These are the
+ * first, because they'll be accessed when computing hash values and
+ * comparing tuples for exact matches. We also build simple mapping
+ * for execGrouping, so it knows where to find the to-be-hashed /
+ * compared columns in the input.
+ */
+ for (i = 0; i < perhash->numCols; i++)
+ {
+ perhash->hashGrpColIdxInput[i] = grpColIdx[i];
+ perhash->hashGrpColIdxHash[i] = i + 1;
+ perhash->numhashGrpCols++;
+ /* delete already mapped columns */
+ bms_del_member(colnos, grpColIdx[i]);
+ }
+
+ /* and add the remaining columns */
+ while ((i = bms_first_member(colnos)) >= 0)
+ {
+ perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
+ perhash->numhashGrpCols++;
+ }
- hashDesc = ExecTypeFromTL(hashTlist, false);
- ExecSetSlotDescriptor(aggstate->hashslot, hashDesc);
+ /* and build a tuple descriptor for the hashtable */
+ for (i = 0; i < perhash->numhashGrpCols; i++)
+ {
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
+
+ hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
+ perhash->largestGrpColIdx =
+ Max(varNumber + 1, perhash->largestGrpColIdx);
+ }
+
+ hashDesc = ExecTypeFromTL(hashTlist, false);
+ ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
- list_free(hashTlist);
- bms_free(colnos);
+ list_free(hashTlist);
+ bms_free(colnos);
+ }
- return collist;
+ bms_free(base_colnos);
}
/*
@@ -1840,26 +1969,30 @@ hash_agg_entry_size(int numAggs)
}
/*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * Find or create a hashtable entry for the tuple group containing the current
+ * tuple (already set in tmpcontext's outertuple slot), in the current grouping
+ * set (which the caller must have selected - note that initialize_aggregate
+ * depends on this).
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static TupleHashEntryData *
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate)
{
- TupleTableSlot *hashslot = aggstate->hashslot;
+ TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
+ AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+ TupleTableSlot *hashslot = perhash->hashslot;
TupleHashEntryData *entry;
bool isnew;
- int i;
+ int i;
/* transfer just the needed columns into hashslot */
- slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx);
+ slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot);
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
@@ -1867,22 +2000,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
ExecStoreVirtualTuple(hashslot);
/* find or create the hashtable entry using the filtered tuple */
- entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew);
+ entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew)
{
entry->additional = (AggStatePerGroup)
- MemoryContextAlloc(aggstate->hashtable->tablecxt,
+ MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
/* initialize aggregates for new tuple group */
initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
- 0);
+ -1);
}
return entry;
}
/*
+ * Look up hash entries for the current tuple in all hashed grouping sets,
+ * returning an array of pergroup pointers suitable for advance_aggregates.
+ *
+ * Be aware that lookup_hash_entry can reset the tmpcontext.
+ */
+static AggStatePerGroup *
+lookup_hash_entries(AggState *aggstate)
+{
+ int numHashes = aggstate->num_hashes;
+ AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+ int setno;
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ select_current_set(aggstate, setno, true);
+ pergroup[setno] = lookup_hash_entry(aggstate)->additional;
+ }
+
+ return pergroup;
+}
+
+/*
* ExecAgg -
*
* ExecAgg receives tuples from its outer subplan and aggregates over
@@ -1898,19 +2053,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
TupleTableSlot *
ExecAgg(AggState *node)
{
- TupleTableSlot *result;
+ TupleTableSlot *result = NULL;
if (!node->agg_done)
{
/* Dispatch based on strategy */
- switch (node->phase->aggnode->aggstrategy)
+ switch (node->phase->aggstrategy)
{
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
+ /* FALLTHROUGH */
+ case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
- default:
+ case AGG_PLAIN:
+ case AGG_SORTED:
result = agg_retrieve_direct(node);
break;
}
@@ -1933,6 +2091,7 @@ agg_retrieve_direct(AggState *aggstate)
ExprContext *tmpcontext;
AggStatePerAgg peragg;
AggStatePerGroup pergroup;
+ AggStatePerGroup *hash_pergroups = NULL;
TupleTableSlot *outerslot;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
@@ -2019,6 +2178,19 @@ agg_retrieve_direct(AggState *aggstate)
node = aggstate->phase->aggnode;
numReset = numGroupingSets;
}
+ else if (aggstate->aggstrategy == AGG_MIXED)
+ {
+ /*
+ * Mixed mode; we've output all the grouped stuff and have
+ * full hashtables, so switch to outputting those.
+ */
+ initialize_phase(aggstate, 0);
+ aggstate->table_filled = true;
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
+ select_current_set(aggstate, 0, true);
+ return agg_retrieve_hash_table(aggstate);
+ }
else
{
aggstate->agg_done = true;
@@ -2055,7 +2227,7 @@ agg_retrieve_direct(AggState *aggstate)
*----------
*/
if (aggstate->input_done ||
- (node->aggstrategy == AGG_SORTED &&
+ (node->aggstrategy != AGG_PLAIN &&
aggstate->projected_set != -1 &&
aggstate->projected_set < (numGroupingSets - 1) &&
nextSetSize > 0 &&
@@ -2168,10 +2340,22 @@ agg_retrieve_direct(AggState *aggstate)
*/
for (;;)
{
+ /*
+ * During phase 1 only of a mixed agg, we need to update
+ * hashtables as well in advance_aggregates.
+ */
+ if (aggstate->aggstrategy == AGG_MIXED &&
+ aggstate->current_phase == 1)
+ {
+ hash_pergroups = lookup_hash_entries(aggstate);
+ }
+ else
+ hash_pergroups = NULL;
+
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, pergroup);
else
- advance_aggregates(aggstate, pergroup);
+ advance_aggregates(aggstate, pergroup, hash_pergroups);
/* Reset per-input-tuple context after each tuple */
ResetExprContext(tmpcontext);
@@ -2198,7 +2382,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group
* boundary.
*/
- if (node->aggstrategy == AGG_SORTED)
+ if (node->aggstrategy != AGG_PLAIN)
{
if (!execTuplesMatch(firstSlot,
outerslot,
@@ -2247,21 +2431,13 @@ agg_retrieve_direct(AggState *aggstate)
}
/*
- * ExecAgg for hashed case: phase 1, read input and build hash table
+ * ExecAgg for hashed case: read input and build hash table
*/
static void
agg_fill_hash_table(AggState *aggstate)
{
- ExprContext *tmpcontext;
- TupleHashEntryData *entry;
TupleTableSlot *outerslot;
-
- /*
- * get state info from node
- *
- * tmpcontext is the per-input-tuple expression context
- */
- tmpcontext = aggstate->tmpcontext;
+ ExprContext *tmpcontext = aggstate->tmpcontext;
/*
* Process each outer-plan tuple, and then fetch the next one, until we
@@ -2269,32 +2445,40 @@ agg_fill_hash_table(AggState *aggstate)
*/
for (;;)
{
+ AggStatePerGroup *pergroups;
+
outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot))
break;
- /* set up for advance_aggregates call */
+
+ /* set up for lookup_hash_entries and advance_aggregates */
tmpcontext->ecxt_outertuple = outerslot;
- /* Find or build hashtable entry for this tuple's group */
- entry = lookup_hash_entry(aggstate, outerslot);
+ /* Find or build hashtable entries */
+ pergroups = lookup_hash_entries(aggstate);
/* Advance the aggregates */
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
- combine_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ combine_aggregates(aggstate, pergroups[0]);
else
- advance_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ advance_aggregates(aggstate, NULL, pergroups);
- /* Reset per-input-tuple context after each tuple */
- ResetExprContext(tmpcontext);
+ /*
+ * Reset per-input-tuple context after each tuple, but note that the
+ * hash lookups do this too
+ */
+ ResetExprContext(aggstate->tmpcontext);
}
aggstate->table_filled = true;
- /* Initialize to walk the hash table */
- ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+ /* Initialize to walk the first hash table */
+ select_current_set(aggstate, 0, true);
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
}
/*
- * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ * ExecAgg for hashed case: retrieving groups from hash table
*/
static TupleTableSlot *
agg_retrieve_hash_table(AggState *aggstate)
@@ -2305,17 +2489,22 @@ agg_retrieve_hash_table(AggState *aggstate)
TupleHashEntryData *entry;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
- TupleTableSlot *hashslot;
+ AggStatePerHash perhash;
/*
- * get state info from node
+ * get state info from node.
+ *
+ * econtext is the per-output-tuple expression context.
*/
- /* econtext is the per-output-tuple expression context */
econtext = aggstate->ss.ps.ps_ExprContext;
peragg = aggstate->peragg;
firstSlot = aggstate->ss.ss_ScanTupleSlot;
- hashslot = aggstate->hashslot;
+ /*
+ * Note that perhash (and therefore anything accessed through it) can
+ * change inside the loop, as we change between grouping sets.
+ */
+ perhash = &aggstate->perhash[aggstate->current_set];
/*
* We loop retrieving groups until we find one satisfying
@@ -2323,17 +2512,37 @@ agg_retrieve_hash_table(AggState *aggstate)
*/
while (!aggstate->agg_done)
{
- int i;
+ TupleTableSlot *hashslot = perhash->hashslot;
+ int i;
/*
* Find the next entry in the hash table
*/
- entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter);
+ entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
if (entry == NULL)
{
- /* No more entries in hashtable, so done */
- aggstate->agg_done = TRUE;
- return NULL;
+ int nextset = aggstate->current_set + 1;
+
+ if (nextset < aggstate->num_hashes)
+ {
+ /*
+ * Switch to next grouping set, reinitialize, and restart the
+ * loop.
+ */
+ select_current_set(aggstate, nextset, true);
+
+ perhash = &aggstate->perhash[aggstate->current_set];
+
+ ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
+
+ continue;
+ }
+ else
+ {
+ /* No more hashtables, so done */
+ aggstate->agg_done = TRUE;
+ return NULL;
+ }
}
/*
@@ -2356,9 +2565,9 @@ agg_retrieve_hash_table(AggState *aggstate)
memset(firstSlot->tts_isnull, true,
firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
@@ -2367,14 +2576,18 @@ agg_retrieve_hash_table(AggState *aggstate)
pergroup = (AggStatePerGroup) entry->additional;
- finalize_aggregates(aggstate, peragg, pergroup, 0);
-
/*
* Use the representative input tuple for any references to
* non-aggregated input columns in the qual and tlist.
*/
econtext->ecxt_outertuple = firstSlot;
+ prepare_projection_slot(aggstate,
+ econtext->ecxt_outertuple,
+ aggstate->current_set);
+
+ finalize_aggregates(aggstate, peragg, pergroup, -1);
+
result = project_aggregates(aggstate);
if (result)
return result;
@@ -2388,7 +2601,36 @@ agg_retrieve_hash_table(AggState *aggstate)
* ExecInitAgg
*
* Creates the run-time information for the agg node produced by the
- * planner and initializes its outer subtree
+ * planner and initializes its outer subtree.
+ *
+ * What we get from the planner is actually one "real" Agg node which is part
+ * of the plan tree proper, but which optionally has an additional list of Agg
+ * nodes hung off the side via the "chain" field. This is because an Agg node
+ * happens to be a convenient representation of all the data we need for
+ * grouping sets.
+ *
+ * For many purposes, we treat the "real" node as if it were just the first
+ * node in the chain. The chain must be ordered such that hashed entries come
+ * before sorted/plain entries; the real node is marked AGG_MIXED if there are
+ * mixed types (in which case the real node describes one of the hashed
+ * groupings, other AGG_HASHED nodes may optionally follow in the chain,
+ * followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If the real node
+ * is marked AGG_HASHED or AGG_SORTED, then all the chained nodes must be of
+ * the same type; if it is AGG_PLAIN, there can be no chained nodes.
+ *
+ * We collect all hashed nodes into a single "phase", numbered 0, and create a
+ * sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. Phase
+ * 0 is allocated even if there are no hashes, but remains unused in that
+ * case.
+ *
+ * AGG_HASHED nodes actually refer to only a single grouping set each, because
+ * for each hashed grouping we need a separate grpColIdx and numGroups
+ * estimate. AGG_SORTED nodes represent a "rollup", a list of grouping sets
+ * that share a sort order. Each AGG_SORTED node other than the first one has
+ * an associated Sort node which describes the sort order to be used; the
+ * first sorted node takes its input from the outer subtree, which the planner
+ * has already arranged to provide ordered data.
+ *
* -----------------
*/
AggState *
@@ -2403,14 +2645,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
transno,
aggno;
int phase;
+ int phaseidx;
List *combined_inputeval;
ListCell *l;
Bitmapset *all_grouped_cols = NULL;
int numGroupingSets = 1;
int numPhases;
+ int numHashes;
int column_offset;
int i = 0;
int j = 0;
+ bool use_hashing = (node->aggstrategy == AGG_HASHED ||
+ node->aggstrategy == AGG_MIXED);
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2425,9 +2671,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggs = NIL;
aggstate->numaggs = 0;
aggstate->numtrans = 0;
+ aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0;
- aggstate->hashfunctions = NULL;
aggstate->projected_set = -1;
aggstate->current_set = 0;
aggstate->peragg = NULL;
@@ -2437,18 +2683,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->agg_done = false;
aggstate->pergroup = NULL;
aggstate->grp_firstTuple = NULL;
- aggstate->hashtable = NULL;
aggstate->sort_in = NULL;
aggstate->sort_out = NULL;
/*
+ * phases[0] always exists, but is dummy in sorted/plain mode
+ */
+ numPhases = (use_hashing ? 1 : 2);
+ numHashes = (use_hashing ? 1 : 0);
+
+ /*
* Calculate the maximum number of grouping sets in any phase; this
- * determines the size of some allocations.
+ * determines the size of some allocations. Also calculate the number of
+ * phases, since all hashed/mixed nodes contribute to only a single phase.
*/
if (node->groupingSets)
{
- Assert(node->aggstrategy != AGG_HASHED);
-
numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain)
@@ -2457,22 +2707,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets));
+
+ /*
+ * additional AGG_HASHED aggs become part of phase 0, but all
+ * others add an extra phase.
+ */
+ if (agg->aggstrategy != AGG_HASHED)
+ ++numPhases;
+ else
+ ++numHashes;
}
}
aggstate->maxsets = numGroupingSets;
- aggstate->numphases = numPhases = 1 + list_length(node->chain);
+ aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets);
/*
* Create expression contexts. We need three or more, one for
- * per-input-tuple processing, one for per-output-tuple processing, and
- * one for each grouping set. The per-tuple memory context of the
- * per-grouping-set ExprContexts (aggcontexts) replaces the standalone
- * memory context formerly used to hold transition values. We cheat a
- * little by using ExecAssignExprContext() to build all of them.
+ * per-input-tuple processing, one for per-output-tuple processing, one
+ * for all the hashtables, and one for each grouping set. The per-tuple
+ * memory context of the per-grouping-set ExprContexts (aggcontexts)
+ * replaces the standalone memory context formerly used to hold transition
+ * values. We cheat a little by using ExecAssignExprContext() to build
+ * all of them.
*
* NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple
@@ -2488,14 +2748,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
}
+ if (use_hashing)
+ {
+ ExecAssignExprContext(estate, &aggstate->ss.ps);
+ aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+ }
+
ExecAssignExprContext(estate, &aggstate->ss.ps);
/*
- * tuple table initialization
+ * tuple table initialization.
+ *
+ * For hashtables, we create some additional slots below.
*/
ExecInitScanTupleSlot(estate, &aggstate->ss);
ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
- aggstate->hashslot = ExecInitExtraTupleSlot(estate);
aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
/*
@@ -2560,19 +2827,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing.
*/
-
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
- for (phase = 0; phase < numPhases; ++phase)
+ aggstate->num_hashes = numHashes;
+ if (numHashes)
+ {
+ aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+ aggstate->phases[0].numsets = 0;
+ aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
+ aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
+ }
+
+ phase = 0;
+ for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{
- AggStatePerPhase phasedata = &aggstate->phases[phase];
Agg *aggnode;
Sort *sortnode;
- int num_sets;
- if (phase > 0)
+ if (phaseidx > 0)
{
- aggnode = castNode(Agg, list_nth(node->chain, phase - 1));
+ aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1));
sortnode = castNode(Sort, aggnode->plan.lefttree);
}
else
@@ -2581,53 +2855,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
sortnode = NULL;
}
- phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+ Assert(phase <= 1 || sortnode);
- if (num_sets)
+ if (aggnode->aggstrategy == AGG_HASHED
+ || aggnode->aggstrategy == AGG_MIXED)
{
- phasedata->gset_lengths = palloc(num_sets * sizeof(int));
- phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+ AggStatePerPhase phasedata = &aggstate->phases[0];
+ AggStatePerHash perhash;
+ Bitmapset *cols = NULL;
- i = 0;
- foreach(l, aggnode->groupingSets)
- {
- int current_length = list_length(lfirst(l));
- Bitmapset *cols = NULL;
+ Assert(phase == 0);
+ i = phasedata->numsets++;
+ perhash = &aggstate->perhash[i];
- /* planner forces this to be correct */
- for (j = 0; j < current_length; ++j)
- cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+ /* phase 0 always points to the "real" Agg in the hash case */
+ phasedata->aggnode = node;
+ phasedata->aggstrategy = node->aggstrategy;
- phasedata->grouped_cols[i] = cols;
- phasedata->gset_lengths[i] = current_length;
- ++i;
- }
+ /* but the actual Agg node representing this hash is saved here */
+ perhash->aggnode = aggnode;
+
+ phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
- all_grouped_cols = bms_add_members(all_grouped_cols,
- phasedata->grouped_cols[0]);
+ for (j = 0; j < aggnode->numCols; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+
+ all_grouped_cols = bms_add_members(all_grouped_cols, cols);
+ continue;
}
else
{
- Assert(phase == 0);
+ AggStatePerPhase phasedata = &aggstate->phases[++phase];
+ int num_sets;
- phasedata->gset_lengths = NULL;
- phasedata->grouped_cols = NULL;
- }
+ phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
- /*
- * If we are grouping, precompute fmgr lookup data for inner loop.
- */
- if (aggnode->aggstrategy == AGG_SORTED)
- {
- Assert(aggnode->numCols > 0);
+ if (num_sets)
+ {
+ phasedata->gset_lengths = palloc(num_sets * sizeof(int));
+ phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
- phasedata->eqfunctions =
- execTuplesMatchPrepare(aggnode->numCols,
- aggnode->grpOperators);
- }
+ i = 0;
+ foreach(l, aggnode->groupingSets)
+ {
+ int current_length = list_length(lfirst(l));
+ Bitmapset *cols = NULL;
+
+ /* planner forces this to be correct */
+ for (j = 0; j < current_length; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+ phasedata->gset_lengths[i] = current_length;
+
+ ++i;
+ }
- phasedata->aggnode = aggnode;
- phasedata->sortnode = sortnode;
+ all_grouped_cols = bms_add_members(all_grouped_cols,
+ phasedata->grouped_cols[0]);
+ }
+ else
+ {
+ Assert(phaseidx == 0);
+
+ phasedata->gset_lengths = NULL;
+ phasedata->grouped_cols = NULL;
+ }
+
+ /*
+ * If we are grouping, precompute fmgr lookup data for inner loop.
+ */
+ if (aggnode->aggstrategy == AGG_SORTED)
+ {
+ Assert(aggnode->numCols > 0);
+
+ phasedata->eqfunctions =
+ execTuplesMatchPrepare(aggnode->numCols,
+ aggnode->grpOperators);
+ }
+
+ phasedata->aggnode = aggnode;
+ phasedata->aggstrategy = aggnode->aggstrategy;
+ phasedata->sortnode = sortnode;
+ }
}
/*
@@ -2638,13 +2950,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
- * Initialize current phase-dependent values to initial phase
- */
-
- aggstate->current_phase = 0;
- initialize_phase(aggstate, 0);
-
- /*
* Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage
*/
@@ -2658,23 +2963,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates;
-
/*
* Hashing can only appear in the initial phase.
*/
- if (node->aggstrategy == AGG_HASHED)
+ if (use_hashing)
{
- find_hash_columns(aggstate);
+ for (i = 0; i < numHashes; ++i)
+ {
+ aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
- execTuplesHashPrepare(node->numCols,
- node->grpOperators,
- &aggstate->phases[0].eqfunctions,
- &aggstate->hashfunctions);
+ execTuplesHashPrepare(aggstate->perhash[i].numCols,
+ aggstate->perhash[i].aggnode->grpOperators,
+ &aggstate->perhash[i].eqfunctions,
+ &aggstate->perhash[i].hashfunctions);
+ }
+ /* this is an array of pointers, not structures */
+ aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
+
+ find_hash_columns(aggstate);
build_hash_table(aggstate);
aggstate->table_filled = false;
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
AggStatePerGroup pergroup;
@@ -2685,6 +2997,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->pergroup = pergroup;
}
+ /*
+ * Initialize current phase-dependent values to initial phase. The initial
+ * phase is 1 (first sort pass) for all strategies that use sorting (if
+ * hashing is being done too, then phase 0 is processed last); but if only
+ * hashing is being done, then phase 0 is all there is.
+ */
+
+ if (node->aggstrategy == AGG_HASHED)
+ {
+ aggstate->current_phase = 0;
+ initialize_phase(aggstate, 0);
+ select_current_set(aggstate, 0, true);
+ }
+ else
+ {
+ aggstate->current_phase = 1;
+ initialize_phase(aggstate, 1);
+ select_current_set(aggstate, 0, false);
+ }
+
/* -----------------
* Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data.
@@ -3263,7 +3595,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case
* (yet)
*/
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
/* If we have only one input, we need its len/byval info. */
if (numInputs == 1)
@@ -3512,6 +3844,8 @@ ExecEndAgg(AggState *node)
/* And ensure any agg shutdown callbacks have been called */
for (setno = 0; setno < numGroupingSets; setno++)
ReScanExprContext(node->aggcontexts[setno]);
+ if (node->hashcontext)
+ ReScanExprContext(node->hashcontext);
/*
* We don't actually free any ExprContexts here (see comment in
@@ -3539,7 +3873,7 @@ ExecReScanAgg(AggState *node)
node->agg_done = false;
- if (aggnode->aggstrategy == AGG_HASHED)
+ if (node->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@@ -3559,7 +3893,9 @@ ExecReScanAgg(AggState *node)
if (outerPlan->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
- ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ ResetTupleHashIterator(node->perhash[0].hashtable,
+ &node->perhash[0].hashiter);
+ select_current_set(node, 0, true);
return;
}
}
@@ -3584,11 +3920,7 @@ ExecReScanAgg(AggState *node)
* ExecReScan already did it. But we do need to reset our per-grouping-set
* contexts, which may have transvalues stored in them. (We use rescan
* rather than just reset because transfns may have registered callbacks
- * that need to be run now.)
- *
- * Note that with AGG_HASHED, the hash table is allocated in a sub-context
- * of the aggcontext. This used to be an issue, but now, resetting a
- * context automatically deletes sub-contexts too.
+ * that need to be run now.) For the AGG_HASHED case, see below.
*/
for (setno = 0; setno < numGroupingSets; setno++)
@@ -3608,13 +3940,22 @@ ExecReScanAgg(AggState *node)
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
- if (aggnode->aggstrategy == AGG_HASHED)
+ /*
+ * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
+ * the hashcontext. This used to be an issue, but now, resetting a context
+ * automatically deletes sub-contexts too.
+ */
+
+ if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
{
+ ReScanExprContext(node->hashcontext);
/* Rebuild an empty hash table */
build_hash_table(node);
node->table_filled = false;
+ /* iterator will be reset when the table is filled */
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
/*
* Reset the per-group state (in particular, mark transvalues null)
@@ -3622,8 +3963,8 @@ ExecReScanAgg(AggState *node)
MemSet(node->pergroup, 0,
sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
- /* reset to phase 0 */
- initialize_phase(node, 0);
+ /* reset to phase 1 */
+ initialize_phase(node, 1);
node->input_done = false;
node->projected_set = -1;
@@ -3664,7 +4005,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
if (aggcontext)
{
AggState *aggstate = ((AggState *) fcinfo->context);
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
*aggcontext = cxt->ecxt_per_tuple_memory;
}
@@ -3753,7 +4094,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
if (fcinfo->context && IsA(fcinfo->context, AggState))
{
AggState *aggstate = (AggState *) fcinfo->context;
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
RegisterExprContextCallback(cxt, func, arg);
diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 2d2ba84..f222c6c 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/lib
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
- rbtree.o stringinfo.o
+OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
+ pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c
new file mode 100644
index 0000000..ddf2b9a
--- /dev/null
+++ b/src/backend/lib/knapsack.c
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * knapsack.c
+ * Knapsack problem solver
+ *
+ * Given input vectors of integral item weights (must be >= 0) and values
+ * (double >= 0), compute the set of items which produces the greatest total
+ * value without exceeding a specified total weight; each item is included at
+ * most once (this is the 0/1 knapsack problem). Weight 0 items will always be
+ * included.
+ *
+ * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
+ * weight limit. To use with non-integral weights or approximate solutions,
+ * the caller should pre-scale the input weights to a suitable range. This
+ * allows approximate solutions in polynomial time (the general case of the
+ * exact problem is NP-hard).
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/lib/knapsack.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "lib/knapsack.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+
+/*
+ * DiscreteKnapsack
+ *
+ * The item_values input is optional; if omitted, all the items are assumed to
+ * have value 1.
+ *
+ * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
+ * inclusion in the solution.
+ *
+ * This uses the usual dynamic-programming algorithm, adapted to reuse the
+ * memory on each pass (by working from larger weights to smaller). At the
+ * start of pass number i, the values[w] array contains the largest value
+ * computed with total weight <= w, using only items with indices < i; and
+ * sets[w] contains the bitmap of items actually used for that value. (The
+ * bitmapsets are all pre-initialized with an unused high bit so that memory
+ * allocation is done only once.)
+ */
+Bitmapset *
+DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values)
+{
+ MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "Knapsack",
+ ALLOCSET_SMALL_MINSIZE,
+ ALLOCSET_SMALL_INITSIZE,
+ ALLOCSET_SMALL_MAXSIZE);
+ MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
+ double *values;
+ Bitmapset **sets;
+ Bitmapset *result;
+ int i,
+ j;
+
+ Assert(max_weight >= 0);
+ Assert(num_items > 0 && item_weights);
+
+ values = palloc((1 + max_weight) * sizeof(double));
+ sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
+
+ for (i = 0; i <= max_weight; ++i)
+ {
+ values[i] = 0;
+ sets[i] = bms_make_singleton(num_items);
+ }
+
+ for (i = 0; i < num_items; ++i)
+ {
+ int iw = item_weights[i];
+ double iv = item_values ? item_values[i] : 1;
+
+ for (j = max_weight; j >= iw; --j)
+ {
+ int ow = j - iw;
+
+ if (values[j] <= values[ow] + iv)
+ {
+ /* copy sets[ow] to sets[j] without realloc */
+ if (j != ow)
+ {
+ sets[j] = bms_del_members(sets[j], sets[j]);
+ sets[j] = bms_add_members(sets[j], sets[ow]);
+ }
+
+ sets[j] = bms_add_member(sets[j], i);
+
+ values[j] = values[ow] + iv;
+ }
+ }
+ }
+
+ MemoryContextSwitchTo(oldctx);
+
+ result = bms_del_member(bms_copy(sets[max_weight]), num_items);
+
+ MemoryContextDelete(local_ctx);
+
+ return result;
+}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 252af5c..ea1c538 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -21,7 +21,7 @@
#include "postgres.h"
#include "access/hash.h"
-
+#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
@@ -458,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
}
/*
+ * bms_overlap_list - does a set overlap an integer list?
+ */
+bool
+bms_overlap_list(const Bitmapset *a, const List *b)
+{
+ ListCell *lc;
+ int wordnum,
+ bitnum;
+
+ if (a == NULL || b == NIL)
+ return false;
+
+ foreach(lc, b)
+ {
+ int x = lfirst_int(lc);
+
+ if (x < 0)
+ elog(ERROR, "negative bitmapset member not allowed");
+ wordnum = WORDNUM(x);
+ bitnum = BITNUM(x);
+ if (wordnum < a->nwords)
+ if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*
* bms_nonempty_difference - do sets have a nonempty difference?
*/
bool
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index b3802b4..59a7044 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1875,6 +1875,28 @@ _outAggPath(StringInfo str, const AggPath *node)
}
static void
+_outRollupData(StringInfo str, const RollupData *node)
+{
+ WRITE_NODE_TYPE("ROLLUP");
+
+ WRITE_NODE_FIELD(groupClause);
+ WRITE_NODE_FIELD(gsets);
+ WRITE_NODE_FIELD(gsets_data);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+ WRITE_BOOL_FIELD(hashable);
+ WRITE_BOOL_FIELD(is_hashed);
+}
+
+static void
+_outGroupingSetData(StringInfo str, const GroupingSetData *node)
+{
+ WRITE_NODE_TYPE("GSDATA");
+
+ WRITE_NODE_FIELD(set);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+}
+
+static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
WRITE_NODE_TYPE("GROUPINGSETSPATH");
@@ -1882,8 +1904,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
- WRITE_NODE_FIELD(rollup_groupclauses);
- WRITE_NODE_FIELD(rollup_lists);
+ WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
+ WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual);
}
@@ -3800,6 +3822,12 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
+ case T_RollupData:
+ _outRollupData(str, obj);
+ break;
+ case T_GroupingSetData:
+ _outGroupingSetData(str, obj);
+ break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index c138f57..937e30d 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1739,11 +1739,16 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1;
}
- else if (aggstrategy == AGG_SORTED)
+ else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
{
/* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost;
total_cost = input_total_cost;
+ if (aggstrategy == AGG_MIXED && !enable_hashagg)
+ {
+ startup_cost += disable_cost;
+ total_cost += disable_cost;
+ }
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 1e953b4..d600f08 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1707,18 +1707,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
{
Agg *plan;
Plan *subplan;
- List *rollup_groupclauses = best_path->rollup_groupclauses;
- List *rollup_lists = best_path->rollup_lists;
+ List *rollups = best_path->rollups;
AttrNumber *grouping_map;
int maxref;
List *chain;
- ListCell *lc,
- *lc2;
+ ListCell *lc;
/* Shouldn't get here without grouping sets */
Assert(root->parse->groupingSets);
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
+ Assert(rollups != NIL);
/*
* Agg can project, so no need to be terribly picky about child tlist, but
@@ -1770,72 +1767,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* costs will be shown by EXPLAIN.
*/
chain = NIL;
- if (list_length(rollup_groupclauses) > 1)
+ if (list_length(rollups) > 1)
{
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+ ListCell *lc2 = lnext(list_head(rollups));
+ bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
+
+ for_each_cell(lc, lc2)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
+ RollupData *rollup = lfirst(lc);
AttrNumber *new_grpColIdx;
- Plan *sort_plan;
+ Plan *sort_plan = NULL;
Plan *agg_plan;
+ AggStrategy strat;
- /* We want to iterate over all but the last rollup list elements */
- if (lnext(lc) == NULL)
- break;
+ new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
+
+ if (!rollup->is_hashed && !is_first_sort)
+ {
+ sort_plan = (Plan *)
+ make_sort_from_groupcols(rollup->groupClause,
+ new_grpColIdx,
+ subplan);
+ }
- new_grpColIdx = remap_groupColIdx(root, groupClause);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
- sort_plan = (Plan *)
- make_sort_from_groupcols(groupClause,
- new_grpColIdx,
- subplan);
+ if (rollup->is_hashed)
+ strat = AGG_HASHED;
+ else if (list_length(linitial(rollup->gsets)) == 0)
+ strat = AGG_PLAIN;
+ else
+ strat = AGG_SORTED;
agg_plan = (Plan *) make_agg(NIL,
NIL,
- AGG_SORTED,
+ strat,
AGGSPLIT_SIMPLE,
- list_length((List *) linitial(gsets)),
+ list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
NIL,
- 0, /* numGroups not needed */
+ rollup->numGroups,
sort_plan);
/*
- * Nuke stuff we don't need to avoid bloating debug output.
+ * Remove stuff we don't need to avoid bloating debug output.
*/
- sort_plan->targetlist = NIL;
- sort_plan->lefttree = NULL;
+ if (sort_plan)
+ {
+ sort_plan->targetlist = NIL;
+ sort_plan->lefttree = NULL;
+ }
chain = lappend(chain, agg_plan);
}
}
/*
- * Now make the final Agg node
+ * Now make the real Agg node
*/
{
- List *groupClause = (List *) llast(rollup_groupclauses);
- List *gsets = (List *) llast(rollup_lists);
+ RollupData *rollup = linitial(rollups);
AttrNumber *top_grpColIdx;
int numGroupCols;
- top_grpColIdx = remap_groupColIdx(root, groupClause);
+ top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
- numGroupCols = list_length((List *) linitial(gsets));
+ numGroupCols = list_length((List *) linitial(rollup->gsets));
plan = make_agg(build_path_tlist(root, &best_path->path),
best_path->qual,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+ best_path->aggstrategy,
AGGSPLIT_SIMPLE,
numGroupCols,
top_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
chain,
- 0, /* numGroups not needed */
+ rollup->numGroups,
subplan);
/* Copy cost data from Path to Plan */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ca0ae78..e5fdbe7 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -89,12 +90,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+ List *rollups;
+ List *hash_sets_idx;
+ double dNumHashGroups;
+ bool any_hashable;
+ Bitmapset *unsortable_refs;
+ Bitmapset *unhashable_refs;
+ List *unsortable_sets;
+ int *tleref_to_colnum_map;
+} grouping_sets_data;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+ int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
@@ -107,8 +127,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
@@ -116,8 +135,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *input_target,
@@ -1494,8 +1521,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
- List *rollup_lists = NIL;
- List *rollup_groupclauses = NIL;
+ grouping_sets_data *gset_data = NULL;
standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
@@ -1504,84 +1530,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets)
{
- int *tleref_to_colnum_map;
- List *sets;
- int maxref;
- ListCell *lc;
- ListCell *lc2;
- ListCell *lc_set;
-
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
- /* Identify max SortGroupRef in groupClause, for array sizing */
- maxref = 0;
- foreach(lc, parse->groupClause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- if (gc->tleSortGroupRef > maxref)
- maxref = gc->tleSortGroupRef;
- }
-
- /* Allocate workspace array for remapping */
- tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
- /* Examine the rollup sets */
- sets = extract_rollup_sets(parse->groupingSets);
-
- foreach(lc_set, sets)
- {
- List *current_sets = (List *) lfirst(lc_set);
- List *groupclause;
- int ref;
-
- /*
- * Reorder the current list of grouping sets into correct
- * prefix order. If only one aggregation pass is needed, try
- * to make the list match the ORDER BY clause; if more than
- * one pass is needed, we don't bother with that.
- */
- current_sets = reorder_grouping_sets(current_sets,
- (list_length(sets) == 1
- ? parse->sortClause
- : NIL));
-
- /*
- * Order the groupClause appropriately. If the first grouping
- * set is empty, this can match regular GROUP BY
- * preprocessing, otherwise we have to force the groupClause
- * to match that grouping set's order.
- */
- groupclause = preprocess_groupclause(root,
- linitial(current_sets));
-
- /*
- * Now that we've pinned down an order for the groupClause for
- * this list of grouping sets, we need to remap the entries in
- * the grouping sets from sortgrouprefs to plain indices
- * (0-based) into the groupClause for this collection of
- * grouping sets.
- */
- ref = 0;
- foreach(lc, groupclause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
- }
-
- foreach(lc, current_sets)
- {
- foreach(lc2, (List *) lfirst(lc))
- {
- lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
- }
- }
-
- /* Save the reordered sets and corresponding groupclauses */
- rollup_lists = lcons(current_sets, rollup_lists);
- rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
- }
+ gset_data = preprocess_grouping_sets(root);
}
else
{
@@ -1675,8 +1624,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause =
- parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+ qp_extra.groupClause = (gset_data
+ ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -1876,8 +1826,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
&agg_costs,
- rollup_lists,
- rollup_groupclauses);
+ gset_data);
/* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs)
adjust_paths_for_srfs(root, current_rel,
@@ -1914,7 +1863,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root,
current_rel);
}
-
} /* end of if (setOperations) */
/*
@@ -2067,6 +2015,216 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
}
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *sets;
+ int maxref = 0;
+ ListCell *lc;
+ ListCell *lc_set;
+ grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+ gd->any_hashable = false;
+ gd->unhashable_refs = NULL;
+ gd->unsortable_refs = NULL;
+ gd->unsortable_sets = NIL;
+
+ if (parse->groupClause)
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ Index ref = gc->tleSortGroupRef;
+
+ if (ref > maxref)
+ maxref = ref;
+
+ if (!gc->hashable)
+ gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+ if (!OidIsValid(gc->sortop))
+ gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+ }
+ }
+
+ /* Allocate workspace array for remapping */
+ gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+ /*
+ * If we have any unsortable sets, we must extract them before trying to
+ * prepare rollups. Unsortable sets don't go through
+ * reorder_grouping_sets, so we must apply the GroupingSetData annotation
+ * here.
+ */
+ if (!bms_is_empty(gd->unsortable_refs))
+ {
+ List *sortable_sets = NIL;
+
+ foreach(lc, parse->groupingSets)
+ {
+ List *gset = lfirst(lc);
+
+ if (bms_overlap_list(gd->unsortable_refs, gset))
+ {
+ GroupingSetData *gs = makeNode(GroupingSetData);
+
+ gs->set = gset;
+ gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+ /*
+ * We must enforce here that an unsortable set is hashable;
+ * later code assumes this. Parse analysis only checks that
+ * every individual column is either hashable or sortable.
+ *
+ * Note that passing this test doesn't guarantee we can
+ * generate a plan; there might be other showstoppers.
+ */
+
+ if (bms_overlap_list(gd->unhashable_refs, gset))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement GROUP BY"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ }
+ else
+ sortable_sets = lappend(sortable_sets, gset);
+ }
+
+ if (sortable_sets)
+ sets = extract_rollup_sets(sortable_sets);
+ else
+ sets = NIL;
+ }
+ else
+ sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = (List *) lfirst(lc_set);
+ RollupData *rollup = makeNode(RollupData);
+ GroupingSetData *gs;
+
+ /*
+ * Reorder the current list of grouping sets into correct prefix
+ * order. If only one aggregation pass is needed, try to make the
+ * list match the ORDER BY clause; if more than one pass is needed, we
+ * don't bother with that.
+ *
+ * Note that this reorders the sets from smallest-member-first to
+ * largest-member-first, and applies the GroupingSetData annotations,
+ * though the data will be filled in later.
+ */
+ current_sets = reorder_grouping_sets(current_sets,
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+
+ /*
+ * Get the initial (and therefore largest) grouping set.
+ */
+ gs = linitial(current_sets);
+
+ /*
+ * Order the groupClause appropriately. If the first grouping set is
+ * empty, then the groupClause must also be empty; otherwise we have
+ * to force the groupClause to match that grouping set's order.
+ *
+ * (The first grouping set can be empty even though parse->groupClause
+ * is not empty only if all non-empty grouping sets are unsortable.
+ * The groupClauses for hashed grouping sets are built later on.)
+ */
+ if (gs->set)
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ else
+ rollup->groupClause = NIL;
+
+ /*
+ * Is it hashable? We pretend empty sets are hashable even though we
+ * actually force them not to be hashed later. But don't bother if
+ * there's nothing but empty sets.
+ */
+ if (gs->set &&
+ !bms_overlap_list(gd->unhashable_refs, gs->set))
+ {
+ rollup->hashable = true;
+ gd->any_hashable = true;
+ }
+
+ /*
+ * Now that we've pinned down an order for the groupClause for this
+ * list of grouping sets, we need to remap the entries in the grouping
+ * sets from sortgrouprefs to plain indices (0-based) into the
+ * groupClause for this collection of grouping sets. We keep the
+ * original form for later use, though.
+ */
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ current_sets,
+ gd->tleref_to_colnum_map);
+ rollup->gsets_data = current_sets;
+
+ gd->rollups = lappend(gd->rollups, rollup);
+ }
+
+ if (gd->unsortable_sets)
+ {
+ /*
+ * We have not yet pinned down a groupclause for this, but we will
+ * need index-based lists for estimation purposes. Construct
+ * hash_sets_idx based on the entire original groupclause for now.
+ */
+ gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+ gd->unsortable_sets,
+ gd->tleref_to_colnum_map);
+ gd->any_hashable = true;
+ }
+
+ return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+ List *gsets,
+ int *tleref_to_colnum_map)
+{
+ int ref = 0;
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, gsets)
+ {
+ List *set = NIL;
+ ListCell *lc2;
+ GroupingSetData *gs = lfirst(lc);
+
+ foreach(lc2, gs->set)
+ {
+ set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+ }
+
+ result = lappend(result, set);
+ }
+
+ return result;
+}
+
+
+
/*
* Detect whether a plan node is a "dummy" plan created when a relation
* is deemed not to need scanning due to constraint exclusion.
@@ -2981,7 +3139,7 @@ extract_rollup_sets(List *groupingSets)
/*
* Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure
@@ -3004,6 +3162,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{
List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous);
+ GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0)
{
@@ -3031,7 +3190,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
}
- result = lcons(list_copy(previous), result);
+ gs->set = list_copy(previous);
+ result = lcons(gs, result);
list_free(new_elems);
}
@@ -3126,15 +3286,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping)
*
* path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- * or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
*/
static double
get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
double dNumGroups;
@@ -3146,28 +3307,60 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets)
{
/* Add up the estimates for each grouping set */
- ListCell *lc,
- *lc2;
+ ListCell *lc;
+ ListCell *lc2;
dNumGroups = 0;
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+ foreach(lc, gd->rollups)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
- ListCell *lc3;
+ RollupData *rollup = lfirst(lc);
+ ListCell *lc;
- groupExprs = get_sortgrouplist_exprs(groupClause,
+ groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList);
- foreach(lc3, gsets)
+ rollup->numGroups = 0.0;
+
+ forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{
- List *gset = (List *) lfirst(lc3);
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ rollup->numGroups += numGroups;
+ }
+
+ dNumGroups += rollup->numGroups;
+ }
- dNumGroups += estimate_num_groups(root,
- groupExprs,
- path_rows,
- &gset);
+ if (gd->hash_sets_idx)
+ {
+ ListCell *lc;
+
+ gd->dNumHashGroups = 0;
+
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+ {
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ gd->dNumHashGroups += numGroups;
}
+
+ dNumGroups += gd->dNumHashGroups;
}
}
else
@@ -3203,6 +3396,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3253,8 +3451,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
@@ -3362,8 +3559,7 @@ create_grouping_paths(PlannerInfo *root,
*/
dNumGroups = get_number_of_groups(root,
cheapest_path->rows,
- rollup_lists,
- rollup_groupclauses);
+ gd);
/*
* Determine whether it's possible to perform sort-based implementations
@@ -3371,15 +3567,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.)
+ *
+ * If we have grouping sets, we might be able to sort some but not all of
+ * them; in this case, we need can_sort to be true as long as we must
+ * consider any sorted-input plan.
*/
- can_sort = grouping_is_sortable(parse->groupClause);
+ can_sort = (gd && gd->rollups != NIL)
+ || grouping_is_sortable(parse->groupClause);
/*
* Determine whether we should consider hash-based implementations of
* grouping.
*
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
+ * Hashed aggregation only applies if we're grouping. If we have grouping
+ * sets, some groups might be hashable but others not; in this case we set
+ * can_hash true as long as there is nothing globally preventing us from
+ * hashing (and we should therefore consider plans with hashes).
*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@@ -3392,9 +3595,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last.
*/
can_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 &&
- grouping_is_hashable(parse->groupClause));
+ (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3460,8 +3662,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
- NIL,
- NIL);
+ gd);
/*
* Collect statistics about aggregates for estimating costs of
@@ -3594,20 +3795,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
- /*
- * We have grouping sets, possibly with aggregation. Make
- * a GroupingSetsPath.
- */
- add_path(grouped_rel, (Path *)
- create_groupingsets_path(root,
- grouped_rel,
- path,
- target,
- (List *) parse->havingQual,
- rollup_lists,
- rollup_groupclauses,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash, target,
+ gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
@@ -3705,33 +3895,45 @@ create_grouping_paths(PlannerInfo *root,
if (can_hash)
{
- hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
- agg_costs,
- dNumGroups);
-
- /*
- * Provided that the estimated size of the hashtable does not exceed
- * work_mem, we'll generate a HashAgg Path, although if we were unable
- * to sort above, then we'd better generate a Path, so that we at
- * least have one.
- */
- if (hashaggtablesize < work_mem * 1024L ||
- grouped_rel->pathlist == NIL)
+ if (parse->groupingSets)
{
/*
- * We just need an Agg over the cheapest-total input path, since
- * input order won't matter.
+ * Try for a hash-only groupingsets path over unsorted input.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- parse->groupClause,
- (List *) parse->havingQual,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ cheapest_path, false, true, target,
+ gd, agg_costs, dNumGroups);
+ }
+ else
+ {
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ agg_costs,
+ dNumGroups);
+
+ /*
+ * Provided that the estimated size of the hashtable does not
+ * exceed work_mem, we'll generate a HashAgg Path, although if we
+ * were unable to sort above, then we'd better generate a Path, so
+ * that we at least have one.
+ */
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path,
+ * since input order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ agg_costs,
+ dNumGroups));
+ }
}
/*
@@ -3800,6 +4002,317 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel;
}
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting. This can be called multiple
+ * times, so it's important that it not scribble on input. No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Query *parse = root->parse;
+
+ /*
+ * If we're not being offered sorted input, then only consider plans that
+ * can be done entirely by hashing.
+ *
+ * We can hash everything if it looks like it'll fit in work_mem. But if
+ * the input is actually sorted despite not being advertised as such, we
+ * prefer to make use of that in order to use less memory.
+ *
+ * If none of the grouping sets are sortable, then ignore the work_mem
+ * limit and generate a path anyway, since otherwise we'll just fail.
+ */
+ if (!is_sorted)
+ {
+ List *new_rollups = NIL;
+ RollupData *unhashable_rollup = NULL;
+ List *sets_data;
+ List *empty_sets_data = NIL;
+ List *empty_sets = NIL;
+ ListCell *lc;
+ ListCell *l_start = list_head(gd->rollups);
+ AggStrategy strat = AGG_HASHED;
+ Size hashsize;
+ double exclude_groups = 0.0;
+
+ Assert(can_hash);
+
+ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ unhashable_rollup = lfirst(l_start);
+ exclude_groups = unhashable_rollup->numGroups;
+ l_start = lnext(l_start);
+ }
+
+ hashsize = estimate_hashagg_tablesize(path,
+ agg_costs,
+ dNumGroups - exclude_groups);
+
+ if (hashsize > work_mem * 1024L && gd->rollups)
+ return; /* nope, won't fit */
+
+ /*
+ * We need to burst the existing rollups list into individual grouping
+ * sets and recompute a groupClause for each set.
+ */
+ sets_data = list_copy(gd->unsortable_sets);
+
+ for_each_cell(lc, l_start)
+ {
+ RollupData *rollup = lfirst(lc);
+
+ /*
+ * If we find an unhashable rollup that's not been skipped by the
+ * "actually sorted" check above, we can't cope; we'd need sorted
+ * input (with a different sort order) but we can't get that here.
+ * So bail out; we'll get a valid path from the is_sorted case
+ * instead.
+ */
+ if (!rollup->hashable)
+ return;
+ else
+ sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+ }
+ foreach(lc, sets_data)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ List *gset = gs->set;
+ RollupData *rollup;
+
+ if (gset == NIL)
+ {
+ empty_sets_data = lappend(empty_sets_data, gs);
+ empty_sets = lappend(empty_sets, NIL);
+ }
+ else
+ {
+ rollup = makeNode(RollupData);
+
+ rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ new_rollups = lappend(new_rollups, rollup);
+ }
+ }
+
+ /* If we didn't find anything nonempty to hash, then bail. */
+ if (new_rollups == NIL)
+ return;
+
+ /*
+ * If there were empty grouping sets they should have been in the
+ * first rollup.
+ */
+ Assert(!unhashable_rollup || !empty_sets);
+
+ if (unhashable_rollup)
+ {
+ new_rollups = lappend(new_rollups, unhashable_rollup);
+ strat = AGG_MIXED;
+ }
+ else if (empty_sets)
+ {
+ RollupData *rollup = makeNode(RollupData);
+
+ rollup->groupClause = NIL;
+ rollup->gsets_data = empty_sets_data;
+ rollup->gsets = empty_sets;
+ rollup->numGroups = list_length(empty_sets);
+ rollup->hashable = false;
+ rollup->is_hashed = false;
+ new_rollups = lappend(new_rollups, rollup);
+ strat = AGG_MIXED;
+ }
+
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ strat,
+ new_rollups,
+ agg_costs,
+ dNumGroups));
+ return;
+ }
+
+ /*
+ * If we have sorted input but nothing we can do with it, bail.
+ */
+
+ if (list_length(gd->rollups) == 0)
+ return;
+
+ /*
+ * Given sorted input, we try and make two paths: one sorted and one mixed
+ * sort/hash. (We need to try both because hashagg might be disabled, or
+ * some columns might not be sortable.)
+ *
+ * can_hash is passed in as false if some obstacle elsewhere (such as
+ * ordered aggs) means that we shouldn't consider hashing at all.
+ */
+
+ if (can_hash && gd->any_hashable)
+ {
+ List *rollups = NIL;
+ List *hash_sets = list_copy(gd->unsortable_sets);
+ double availspace = (work_mem * 1024.0);
+ ListCell *lc;
+
+ /*
+ * Account first for space needed for groups we can't sort at all.
+ */
+ availspace -= (double) estimate_hashagg_tablesize(path,
+ agg_costs,
+ gd->dNumHashGroups);
+
+ if (availspace > 0 && list_length(gd->rollups) > 1)
+ {
+ double scale;
+ int num_rollups = list_length(gd->rollups);
+ int k_capacity;
+ int *k_weights = palloc(num_rollups * sizeof(int));
+ Bitmapset *hash_items = NULL;
+ int i;
+
+ /*
+ * To use the discrete knapsack, we need to scale the values to a
+ * reasonably small bounded range. We choose to allow a 5% error
+ * margin; we have no more than 4096 rollups in the worst possible
+ * case, which with a 5% error margin will require a bit over 42MB
+ * of workspace. (Anyone wanting to plan queries that complex had
+ * better have the memory for it. In more reasonable cases, with
+ * no more than a couple of dozen rollups, the memory usage will
+ * be negligible.)
+ */
+ scale = availspace / (20.0 * num_rollups);
+ k_capacity = (int) floor(availspace / scale);
+
+ /*
+ * We leave the first rollup out of consideration since it's the
+ * one that matches the input sort order. We assign indexes "i"
+ * to only those entries considered for hashing; the second loop,
+ * below, must use the same condition.
+ */
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ double sz = estimate_hashagg_tablesize(path,
+ agg_costs,
+ rollup->numGroups);
+
+ k_weights[i] = (int) floor(sz / scale);
+ ++i;
+ }
+ }
+
+ /*
+ * Apply knapsack algorithm; compute the set of items which
+ * maximizes the value stored (in this case the number of sorts
+ * saved) while keeping the total size (approximately) within
+ * capacity.
+ */
+ if (i > 0)
+ hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+ if (!bms_is_empty(hash_items))
+ {
+ rollups = list_make1(linitial(gd->rollups));
+
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ if (bms_is_member(i, hash_items))
+ hash_sets = list_concat(hash_sets,
+ list_copy(rollup->gsets_data));
+ else
+ rollups = lappend(rollups, rollup);
+ ++i;
+ }
+ else
+ rollups = lappend(rollups, rollup);
+ }
+ }
+ }
+
+ if (!rollups && hash_sets)
+ rollups = list_copy(gd->rollups);
+
+ foreach(lc, hash_sets)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ RollupData *rollup = makeNode(RollupData);
+
+ Assert(gs->set != NIL);
+
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ rollups = lcons(rollup, rollups);
+ }
+
+ if (rollups)
+ {
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_MIXED,
+ rollups,
+ agg_costs,
+ dNumGroups));
+ }
+ }
+
+ /*
+ * Now try the simple sorted case.
+ */
+ if (!gd->unsortable_sets)
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ gd->rollups,
+ agg_costs,
+ dNumGroups));
+}
+
/*
* create_window_paths
*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 3248296..4f85aca 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2607,10 +2607,9 @@ create_agg_path(PlannerInfo *root,
* 'subpath' is the path representing the source of data
* 'target' is the PathTarget to be computed
* 'having_qual' is the HAVING quals if any
- * 'rollup_lists' is a list of grouping sets
- * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
+ * 'rollups' is a list of RollupData nodes
* 'agg_costs' contains cost info about the aggregate functions to be computed
- * 'numGroups' is the estimated number of groups
+ * 'numGroups' is the estimated total number of groups
*/
GroupingSetsPath *
create_groupingsets_path(PlannerInfo *root,
@@ -2618,13 +2617,15 @@ create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups)
{
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
- int numGroupCols;
+ ListCell *lc;
+ bool is_first = true;
+ bool is_first_sort = true;
/* The topmost generated Plan node will be an Agg */
pathnode->path.pathtype = T_Agg;
@@ -2638,74 +2639,110 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->subpath = subpath;
/*
+ * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
+ * to AGG_HASHED, here if possible.
+ */
+ if (aggstrategy == AGG_SORTED &&
+ list_length(rollups) == 1 &&
+ ((RollupData *) linitial(rollups))->groupClause == NIL)
+ aggstrategy = AGG_PLAIN;
+
+ if (aggstrategy == AGG_MIXED &&
+ list_length(rollups) == 1)
+ aggstrategy = AGG_HASHED;
+
+ /*
* Output will be in sorted order by group_pathkeys if, and only if, there
* is a single rollup operation on a non-empty list of grouping
* expressions.
*/
- if (list_length(rollup_groupclauses) == 1 &&
- ((List *) linitial(rollup_groupclauses)) != NIL)
+ if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
pathnode->path.pathkeys = root->group_pathkeys;
else
pathnode->path.pathkeys = NIL;
- pathnode->rollup_groupclauses = rollup_groupclauses;
- pathnode->rollup_lists = rollup_lists;
+ pathnode->aggstrategy = aggstrategy;
+ pathnode->rollups = rollups;
pathnode->qual = having_qual;
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
-
- /* Account for cost of the topmost Agg node */
- numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
-
- cost_agg(&pathnode->path, root,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
- agg_costs,
- numGroupCols,
- numGroups,
- subpath->startup_cost,
- subpath->total_cost,
- subpath->rows);
+ Assert(rollups != NIL);
+ Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
+ Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
- /*
- * Add in the costs and output rows of the additional sorting/aggregation
- * steps, if any. Only total costs count, since the extra sorts aren't
- * run on startup.
- */
- if (list_length(rollup_lists) > 1)
+ foreach(lc, rollups)
{
- ListCell *lc;
+ RollupData *rollup = lfirst(lc);
+ List *gsets = rollup->gsets;
+ int numGroupCols = list_length(linitial(gsets));
- foreach(lc, rollup_lists)
+ /*
+ * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
+ * (already-sorted) input, and following ones do their own sort.
+ *
+ * In AGG_HASHED mode, there is one rollup for each grouping set.
+ *
+ * In AGG_MIXED mode, the first rollups are hashed, the first
+ * non-hashed one takes the (already-sorted) input, and following ones
+ * do their own sort.
+ */
+
+ if (is_first)
+ {
+ cost_agg(&pathnode->path, root,
+ aggstrategy,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows);
+ is_first = false;
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
{
- List *gsets = (List *) lfirst(lc);
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
- /* We must iterate over all but the last rollup_lists element */
- if (lnext(lc) == NULL)
- break;
-
- /* Account for cost of sort, but don't charge input cost again */
- cost_sort(&sort_path, root, NIL,
- 0.0,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
-
- /* Account for cost of aggregation */
- numGroupCols = list_length((List *) linitial(gsets));
-
- cost_agg(&agg_path, root,
- AGG_SORTED,
- agg_costs,
- numGroupCols,
- numGroups, /* XXX surely not right for all steps? */
- sort_path.startup_cost,
- sort_path.total_cost,
- sort_path.rows);
+ if (rollup->is_hashed || is_first_sort)
+ {
+ /*
+ * Account for cost of aggregation, but don't charge input
+ * cost again
+ */
+ cost_agg(&agg_path, root,
+ rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ 0.0, 0.0,
+ subpath->rows);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
+ {
+ /* Account for cost of sort, but don't charge input cost again */
+ cost_sort(&sort_path, root, NIL,
+ 0.0,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+
+ /* Account for cost of aggregation */
+
+ cost_agg(&agg_path, root,
+ AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ sort_path.startup_cost,
+ sort_path.total_cost,
+ sort_path.rows);
+ }
pathnode->path.total_cost += agg_path.total_cost;
pathnode->path.rows += agg_path.rows;
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 0000000..8d1e6d0
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,17 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "postgres.h"
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 4f1910e..109f7b0 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -21,6 +21,11 @@
#define BITMAPSET_H
/*
+ * Forward decl to save including pg_list.h
+ */
+struct List;
+
+/*
* Data representation
*/
@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
+extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 6332ea0..a29f502 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1861,6 +1861,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
+typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
@@ -1868,15 +1869,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */
+ AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */
- int numphases; /* number of phases */
+ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */
+ ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */
+ ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */
@@ -1888,21 +1891,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */
- Tuplesortstate *sort_in; /* sorted input to phases > 0 */
+ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
- /* these fields are used in AGG_HASHED mode: */
- TupleHashTable hashtable; /* hash table with one entry per group */
- TupleTableSlot *hashslot; /* slot for loading hash table */
- int numhashGrpCols; /* number of columns in hash table */
- int largestGrpColIdx; /* largest column required for hashing */
- AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
- AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
bool table_filled; /* hash table filled yet? */
- TupleHashIterator hashiter; /* for iterating through hash table */
+ int num_hashes;
+ AggStatePerHash perhash;
+ AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index ede7ace..1aba477 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -272,6 +272,8 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
+ T_RollupData,
+ T_GroupingSetData,
/*
* TAGS FOR MEMORY NODES (memnodes.h)
@@ -729,7 +731,8 @@ typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
- AGG_HASHED /* grouped agg, use internal hashtable */
+ AGG_HASHED, /* grouped agg, use internal hashtable */
+ AGG_MIXED /* grouped agg, hash and sort concurrently */
} AggStrategy;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index f72f7a8..b3a2a03 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -737,7 +737,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
- /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
+ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */
} Agg;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index f7ac6f6..4d0791d 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1378,17 +1378,37 @@ typedef struct AggPath
} AggPath;
/*
+ * Various annotations used for grouping sets in the planner.
+ */
+
+typedef struct GroupingSetData
+{
+ NodeTag type;
+ List *set; /* grouping set as list of sortgrouprefs */
+ double numGroups; /* est. number of result groups */
+} GroupingSetData;
+
+typedef struct RollupData
+{
+ NodeTag type;
+ List *groupClause; /* applicable subset of parse->groupClause */
+ List *gsets; /* lists of integer indexes into groupClause */
+ List *gsets_data; /* list of GroupingSetData */
+ double numGroups; /* est. number of result groups */
+ bool hashable; /* can be hashed */
+ bool is_hashed; /* to be implemented as a hashagg */
+} RollupData;
+
+/*
* GroupingSetsPath represents a GROUPING SETS aggregation
- *
- * Currently we only support this in sorted not hashed form, so the input
- * must always be appropriately presorted.
*/
+
typedef struct GroupingSetsPath
{
Path path;
Path *subpath; /* path representing input source */
- List *rollup_groupclauses; /* list of lists of SortGroupClause's */
- List *rollup_lists; /* parallel list of lists of grouping sets */
+ AggStrategy aggstrategy; /* basic strategy */
+ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 53cad24..a084ba1 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -181,8 +181,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 260ccd5..49e86da 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -13,6 +13,13 @@ copy gstest2 from stdin;
create temp table gstest3 (a integer, b integer, c integer, d integer);
copy gstest3 from stdin;
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
returns setof record
@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
end;
$f$ language plpgsql;
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
- | | 3 | 25 | 2 | 13
- | | 3 | 14 | 1 | 14
- | | 3 | 15 | 1 | 15
- | | 3 | 16 | 1 | 16
- | | 3 | 17 | 1 | 17
- | | 3 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
(21 rows)
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
@@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500
(6 rows)
+-- hashing support
+set enable_hashagg = true;
+-- failure cases
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+-- simple cases
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+(8 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(6 rows)
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+ | | 3 | 145 | 10 | 19
+(16 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> MixedAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ Group Key: ()
+ -> Values Scan on "*VALUES*"
+(8 rows)
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+ QUERY PLAN
+----------------------------------------------------------
+ GroupAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ Group Key: "*VALUES*".column1
+ Group Key: ()
+ Sort Key: "*VALUES*".column2
+ Group Key: "*VALUES*".column2
+ -> Sort
+ Sort Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(9 rows)
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 2 | 17
+ 0001 | | 1 | 2 | 34
+ 0010 | | 1 | 2 | 68
+ 0011 | | 1 | 2 | 136
+ | 2 | 2 | 4 | 60
+ | 1 | 2 | 4 | 195
+(6 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: unsortable_col
+ Group Key: unhashable_col
+ -> Sort
+ Sort Key: unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 1 | 1
+ 0001 | | 1 | 1 | 2
+ 0010 | | 1 | 1 | 4
+ 0011 | | 1 | 1 | 8
+ 0000 | | 1 | 1 | 16
+ 0001 | | 1 | 1 | 32
+ 0010 | | 1 | 1 | 64
+ 0011 | | 1 | 1 | 128
+ | 1 | 2 | 1 | 1
+ | 1 | 2 | 1 | 2
+ | 2 | 2 | 1 | 4
+ | 2 | 2 | 1 | 8
+ | 2 | 2 | 1 | 16
+ | 2 | 2 | 1 | 32
+ | 1 | 2 | 1 | 64
+ | 1 | 2 | 1 | 128
+(16 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: v, unsortable_col
+ Group Key: v, unhashable_col
+ -> Sort
+ Sort Key: v, unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ a | b | sum | count
+---+---+-----+-------
+(0 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ QUERY PLAN
+--------------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a
+ -> Seq Scan on gstest_empty
+(4 rows)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+(1 row)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+ | | | 0
+ | | | 0
+(3 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ QUERY PLAN
+--------------------------------
+ MixedAggregate
+ Hash Key: a, b
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(6 rows)
+
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ sum | count
+-----+-------
+ | 0
+ | 0
+ | 0
+(3 rows)
+
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ QUERY PLAN
+--------------------------------
+ Aggregate
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(5 rows)
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ a | d | grouping
+---+---+----------
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 1 | 1 | 2
+ 2 | 2 | 2
+(4 rows)
+
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ QUERY PLAN
+---------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a, c
+ -> Seq Scan on gstest3
+(4 rows)
+
+-- simple rescan tests
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ a | b | sum
+---+---+-----
+ 2 | | 6
+ 1 | | 3
+ | 2 | 3
+ | 3 | 3
+ | 1 | 3
+(5 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ QUERY PLAN
+------------------------------------------
+ HashAggregate
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(6 rows)
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
+ ^
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
+ ^
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ | | 3 | 21 | 2 | 11
+ | | 3 | 21 | 2 | 11
+ | | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
+ | | 3 | 14 | 1 | 14
+ | | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
+ | | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
+ | | 3 | 17 | 1 | 17
+ | | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
+(21 rows)
+
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), (max("*VALUES*".column3))
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: ("*VALUES*".column1 + 1), ("*VALUES*".column2 + 1)
+ Hash Key: ("*VALUES*".column1 + 2), ("*VALUES*".column2 + 2)
+ -> Values Scan on "*VALUES*"
+(7 rows)
+
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ a | b | sum | rsum
+---+---+-----+------
+ 1 | 1 | 8 | 8
+ 1 | 2 | 2 | 10
+ 1 | | 10 | 20
+ 2 | 2 | 2 | 22
+ 2 | | 2 | 24
+ | 1 | 8 | 32
+ | 2 | 4 | 36
+ | | 12 | 48
+(8 rows)
+
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ QUERY PLAN
+---------------------------------------------
+ Sort
+ Sort Key: (sum((sum(c))) OVER (?)), a, b
+ -> WindowAgg
+ -> Sort
+ Sort Key: a, b
+ -> MixedAggregate
+ Hash Key: a, b
+ Hash Key: a
+ Hash Key: b
+ Group Key: ()
+ -> Seq Scan on gstest2
+(11 rows)
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ a | b | sum
+---+---+-----
+ 1 | 1 | 1
+ 1 | 2 | 1
+ 1 | 3 | 1
+ 1 | | 3
+ 2 | 1 | 2
+ 2 | 2 | 2
+ 2 | 3 | 2
+ 2 | | 6
+ | 1 | 3
+ | 2 | 3
+ | 3 | 3
+ | | 9
+(12 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ QUERY PLAN
+------------------------------------------------
+ Sort
+ Sort Key: gstest_data.a, gstest_data.b
+ -> MixedAggregate
+ Hash Key: gstest_data.a, gstest_data.b
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ Group Key: ()
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(10 rows)
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | 0 | 50
+ 1 | 1 | 0 | 2 | 50
+ 1 | 1 | 0 | 4 | 50
+ 1 | 1 | 0 | 6 | 50
+ 1 | 1 | 0 | 8 | 50
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | 1 | 50
+ 1 | 1 | 1 | 3 | 50
+ 1 | 1 | 1 | 5 | 50
+ 1 | 1 | 1 | 7 | 50
+ 1 | 1 | 1 | 9 | 50
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | 0 | 50
+ 1 | 1 | 2 | 2 | 50
+ 1 | 1 | 2 | 4 | 50
+ 1 | 1 | 2 | 6 | 50
+ 1 | 1 | 2 | 8 | 50
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | 1 | 50
+ 1 | 1 | 3 | 3 | 50
+ 1 | 1 | 3 | 5 | 50
+ 1 | 1 | 3 | 7 | 50
+ 1 | 1 | 3 | 9 | 50
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 1 | 1 | | | 1000
+ 2 | 2 | 0 | 0 | 50
+ 2 | 2 | 0 | 2 | 50
+ 2 | 2 | 0 | 4 | 50
+ 2 | 2 | 0 | 6 | 50
+ 2 | 2 | 0 | 8 | 50
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | 1 | 50
+ 2 | 2 | 1 | 3 | 50
+ 2 | 2 | 1 | 5 | 50
+ 2 | 2 | 1 | 7 | 50
+ 2 | 2 | 1 | 9 | 50
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | 0 | 50
+ 2 | 2 | 2 | 2 | 50
+ 2 | 2 | 2 | 4 | 50
+ 2 | 2 | 2 | 6 | 50
+ 2 | 2 | 2 | 8 | 50
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | 1 | 50
+ 2 | 2 | 3 | 3 | 50
+ 2 | 2 | 3 | 5 | 50
+ 2 | 2 | 3 | 7 | 50
+ 2 | 2 | 3 | 9 | 50
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+ 2 | 2 | | | 1000
+(70 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
+ {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
+(2 rows)
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+(28 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+---------------------------------------------------------------------------------
+ {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
+ {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
+(2 rows)
+
+-- test the knapsack
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ Sort Key: thousand
+ Group Key: thousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(13 rows)
+
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(9 rows)
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Hash Key: thousand
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(12 rows)
+
-- end
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index 0eeaf9e..33f370b 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
(6 rows)
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
dataa | b | g | count
-------+-----+---+-------
@@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
b | bar | | 2
b | | | 2
| | | 6
- a | | 1 | 2
- b | | 1 | 1
- | | 1 | 3
- a | | 2 | 2
- b | | 2 | 1
- | | 2 | 3
| bar | 1 | 2
| bar | 2 | 2
| bar | | 4
| foo | 1 | 1
| foo | 2 | 1
| foo | | 2
+ a | | 1 | 2
+ b | | 1 | 1
+ | | 1 | 3
+ a | | 2 | 2
+ b | | 2 | 1
+ | | 2 | 3
(24 rows)
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
dataa | b | g | count
-------+-----+---+-------
+ a | foo | | 2
+ a | | | 4
+ a | | 2 | 2
a | bar | 1 | 1
a | bar | 2 | 1
a | bar | | 2
a | foo | 1 | 1
a | foo | 2 | 1
- a | foo | | 2
- a | | | 4
a | | 1 | 2
- a | | 2 | 2
- b | bar | 2 | 1
+ b | bar | 1 | 1
b | | | 2
b | | 1 | 1
- b | | 2 | 1
- b | bar | 1 | 1
+ b | bar | 2 | 1
b | bar | | 2
- | foo | | 2
- | foo | 1 | 1
+ b | | 2 | 1
| | 2 | 3
+ | | | 6
| bar | 1 | 2
| bar | 2 | 2
- | | | 6
- | foo | 2 | 1
| bar | | 4
+ | foo | 1 | 1
+ | foo | 2 | 1
+ | foo | | 2
| | 1 | 3
(24 rows)
@@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
a | bar | 1 | 1
a | foo | 1 | 1
b | bar | 1 | 1
+ | bar | 1 | 2
+ | foo | 1 | 1
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
- | bar | 1 | 2
- | foo | 1 | 1
- | foo | 2 | 1
- | bar | 2 | 2
a | | 2 | 2
b | | 2 | 1
- a | bar | 2 | 1
+ | bar | 2 | 2
| | 2 | 3
+ | foo | 2 | 1
+ a | bar | 2 | 1
a | foo | 2 | 1
b | bar | 2 | 1
- a | foo | | 2
+ a | | | 4
b | bar | | 2
b | | | 2
| | | 6
- a | | | 4
+ a | foo | | 2
+ a | bar | | 2
| bar | | 4
| foo | | 2
- a | bar | | 2
(24 rows)
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5));
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index 71cc0ec..da1c577 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -31,6 +31,14 @@ copy gstest3 from stdin;
\.
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
+
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
+
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
+
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by rollup (a,b);
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
@@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
select sum(ten) from onek group by two, rollup(four::text) order by 1;
select sum(ten) from onek group by rollup(four::text), two order by 1;
+-- hashing support
+
+set enable_hashagg = true;
+
+-- failure cases
+
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+
+-- simple cases
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+
+-- simple rescan tests
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- test the knapsack
+
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+
-- end
diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql
index e627bb9..417e78c 100644
--- a/src/test/regress/sql/tsrf.sql
+++ b/src/test/regress/sql/tsrf.sql
@@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
On Mar 8, 2017, at 2:30 AM, Andrew Gierth <andrew@tao11.riddles.org.uk> wrote:
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> On linux/gcc the patch generates a warning in nodeAgg.c that is
Mark> fairly easy to fix. Using -Werror to make catching the error
Mark> easier, I get:what gcc version is this exactly?
Linux version 2.6.32-573.18.1.el6.x86_64 (mockbuild@c6b8.bsys.dev.centos.org) (gcc version 4.4.7 20120313 (Red Hat 4.4.7-16) (GCC) ) #1 SMP Tue Feb 9 22:46:17 UTC 2016
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mar 8, 2017, at 5:47 AM, Andrew Gierth <andrew@tao11.riddles.org.uk> wrote:
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> On my MacBook, `make check-world` gives differences in the
Mark> contrib modules:Thanks! Latest cleaned up version of patch is attached.
This fixes both the warning and the contrib tests on linux and mac.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hi Andrew,
Reviewing the patch a bit more, I find it hard to understand the comment about
passing -1 as a flag for finalize_aggregates. Any chance you can spend a bit
more time word-smithing that code comment?
@@ -1559,7 +1647,9 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/*
* Compute the final value of all aggregates for one group.
*
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time. But in the hash
+ * case, it's the caller's responsibility to have selected the set already, and
+ * we pass in -1 here to flag that and to control the indexing into pertrans.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
@@ -1575,10 +1665,11 @@ finalize_aggregates(AggState *aggstate,
int aggno;
int transno;
- Assert(currentSet == 0 ||
- ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
- aggstate->current_set = currentSet;
+ /* ugly hack for hash */
+ if (currentSet >= 0)
+ select_current_set(aggstate, currentSet, false);
+ else
+ currentSet = 0;
On Mar 8, 2017, at 8:00 AM, Mark Dilger <hornschnorter@gmail.com> wrote:
On Mar 8, 2017, at 5:47 AM, Andrew Gierth <andrew@tao11.riddles.org.uk> wrote:
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> On my MacBook, `make check-world` gives differences in the
Mark> contrib modules:Thanks! Latest cleaned up version of patch is attached.
This fixes both the warning and the contrib tests on linux and mac.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> Hi Andrew,
Mark> Reviewing the patch a bit more, I find it hard to understand the
Mark> comment about passing -1 as a flag for finalize_aggregates. Any
Mark> chance you can spend a bit more time word-smithing that code
Mark> comment?
Sure.
How does this look for wording (I'll incorporate it into an updated
patch later):
/*
* Compute the final value of all aggregates for one group.
*
* This function handles only one grouping set at a time. But in the hash
* case, it's the caller's responsibility to have selected the set already, and
* we pass in -1 as currentSet here to flag that; this also changes how we
* handle the indexing into AggStatePerGroup as explained below.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
static void
finalize_aggregates(AggState *aggstate,
AggStatePerAgg peraggs,
AggStatePerGroup pergroup,
int currentSet)
{
ExprContext *econtext = aggstate->ss.ps.ps_ExprContext;
Datum *aggvalues = econtext->ecxt_aggvalues;
bool *aggnulls = econtext->ecxt_aggnulls;
int aggno;
int transno;
/*
* If currentSet >= 0, then we're doing sorted grouping, and pergroup is an
* array of size numTrans*numSets which we have to index into using
* currentSet in addition to transno. The caller may not have selected the
* set, so we do that.
*
* If currentSet < 0, then we're doing hashed grouping, and pergroup is an
* array of only numTrans items (since for hashed grouping, each grouping
* set is in a separate hashtable). We rely on the caller having done
* select_current_set, and we fudge currentSet to 0 in order to make the
* same indexing calculations work as for the grouping case.
*/
if (currentSet >= 0)
select_current_set(aggstate, currentSet, false);
else
currentSet = 0;
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mar 8, 2017, at 9:40 AM, Andrew Gierth <andrew@tao11.riddles.org.uk> wrote:
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> Hi Andrew,
Mark> Reviewing the patch a bit more, I find it hard to understand the
Mark> comment about passing -1 as a flag for finalize_aggregates. Any
Mark> chance you can spend a bit more time word-smithing that code
Mark> comment?Sure.
How does this look for wording (I'll incorporate it into an updated
patch later):
Much better. Thanks!
/*
* Compute the final value of all aggregates for one group.
*
* This function handles only one grouping set at a time. But in the hash
* case, it's the caller's responsibility to have selected the set already, and
* we pass in -1 as currentSet here to flag that; this also changes how we
* handle the indexing into AggStatePerGroup as explained below.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
static void
finalize_aggregates(AggState *aggstate,
AggStatePerAgg peraggs,
AggStatePerGroup pergroup,
int currentSet)
{
ExprContext *econtext = aggstate->ss.ps.ps_ExprContext;
Datum *aggvalues = econtext->ecxt_aggvalues;
bool *aggnulls = econtext->ecxt_aggnulls;
int aggno;
int transno;/*
* If currentSet >= 0, then we're doing sorted grouping, and pergroup is an
* array of size numTrans*numSets which we have to index into using
* currentSet in addition to transno. The caller may not have selected the
* set, so we do that.
*
* If currentSet < 0, then we're doing hashed grouping, and pergroup is an
* array of only numTrans items (since for hashed grouping, each grouping
* set is in a separate hashtable). We rely on the caller having done
* select_current_set, and we fudge currentSet to 0 in order to make the
* same indexing calculations work as for the grouping case.
*/
if (currentSet >= 0)
select_current_set(aggstate, currentSet, false);
else
currentSet = 0;--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> Hi Andrew,
Mark> Reviewing the patch a bit more, I find it hard to understand the
Mark> comment about passing -1 as a flag for finalize_aggregates. Any
Mark> chance you can spend a bit more time word-smithing that code
Mark> comment?
Actually, ignore that prior response, I'll refactor it a bit to remove
the need for the comment at all.
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> Reviewing the patch a bit more, I find it hard to understand the
Mark> comment about passing -1 as a flag for finalize_aggregates. Any
Mark> chance you can spend a bit more time word-smithing that code
Mark> comment?
Sure.
How does this look for wording (I'll incorporate it into an updated
patch later):
Mark> Much better. Thanks!
Here's a version of the patch that obviates that comment entirely, by
moving the differences between the hashed and sorted cases out to the
separate callers of finalize_aggregates.
--
Andrew (irc:RhodiumToad)
Attachments:
gshash24.patchtext/x-patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0b9e3e4..91765c2 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3148,16 +3148,19 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
@@ -3170,16 +3173,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
@@ -3192,20 +3198,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last
explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
- -> GroupAggregate
+ -> HashAggregate
Output: c2, c6, sum(c1)
- Group Key: ft1.c2
- Sort Key: ft1.c6
- Group Key: ft1.c6
+ Hash Key: ft1.c2
+ Hash Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
- Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(11 rows)
+ Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 7a8d36c..132557a 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1000,6 +1000,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate";
strategy = "Hashed";
break;
+ case AGG_MIXED:
+ pname = "MixedAggregate";
+ strategy = "Mixed";
+ break;
default:
pname = "Aggregate ???";
strategy = "???";
@@ -1943,6 +1947,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc;
List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx;
+ const char *keyname;
+ const char *keysetname;
+
+ if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+ {
+ keyname = "Hash Key";
+ keysetname = "Hash Keys";
+ }
+ else
+ {
+ keyname = "Group Key";
+ keysetname = "Group Keys";
+ }
ExplainOpenGroup("Grouping Set", NULL, true, es);
@@ -1957,7 +1974,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++;
}
- ExplainOpenGroup("Group Keys", "Group Keys", false, es);
+ ExplainOpenGroup(keysetname, keysetname, false, es);
foreach(lc, gsets)
{
@@ -1981,12 +1998,12 @@ show_grouping_set_keys(PlanState *planstate,
}
if (!result && es->format == EXPLAIN_FORMAT_TEXT)
- ExplainPropertyText("Group Key", "()", es);
+ ExplainPropertyText(keyname, "()", es);
else
- ExplainPropertyListNested("Group Key", result, es);
+ ExplainPropertyListNested(keyname, result, es);
}
- ExplainCloseGroup("Group Keys", "Group Keys", false, es);
+ ExplainCloseGroup(keysetname, keysetname, false, es);
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index aa08152..d255b0e 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -122,12 +122,19 @@
* specific).
*
* Where more complex grouping sets are used, we break them down into
- * "phases", where each phase has a different sort order. During each
- * phase but the last, the input tuples are additionally stored in a
- * tuplesort which is keyed to the next phase's sort order; during each
- * phase but the first, the input tuples are drawn from the previously
- * sorted data. (The sorting of the data for the first phase is handled by
- * the planner, as it might be satisfied by underlying nodes.)
+ * "phases", where each phase has a different sort order (except phase 0
+ * which is reserved for hashing). During each phase but the last, the
+ * input tuples are additionally stored in a tuplesort which is keyed to the
+ * next phase's sort order; during each phase but the first, the input
+ * tuples are drawn from the previously sorted data. (The sorting of the
+ * data for the first phase is handled by the planner, as it might be
+ * satisfied by underlying nodes.)
+ *
+ * Hashing can be mixed with sorted grouping. To do this, we have an
+ * AGG_MIXED strategy that populates the hashtables during the first sorted
+ * phase, and switches to reading them out after completing all sort phases.
+ * We can also support AGG_HASHED with multiple hash tables and no sorting
+ * at all.
*
* From the perspective of aggregate transition and final functions, the
* only issue regarding grouping sets is this: a single call site (flinfo)
@@ -139,8 +146,6 @@
* sensitive to the grouping set for which the aggregate function is
* currently being called.
*
- * TODO: AGG_HASHED doesn't support multiple grouping sets yet.
- *
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -432,6 +437,7 @@ typedef struct AggStatePerGroupData
*/
typedef struct AggStatePerPhaseData
{
+ AggStrategy aggstrategy; /* strategy for this phase */
int numsets; /* number of grouping sets (or 0) */
int *gset_lengths; /* lengths of grouping sets */
Bitmapset **grouped_cols; /* column groupings for rollup */
@@ -440,7 +446,31 @@ typedef struct AggStatePerPhaseData
Sort *sortnode; /* Sort node for input ordering for phase */
} AggStatePerPhaseData;
+/*
+ * AggStatePerHashData - per-hashtable state
+ *
+ * When doing grouping sets with hashing, we have one of these for each
+ * grouping set. (When doing hashing without grouping sets, we have just one of
+ * them.)
+ */
+
+typedef struct AggStatePerHashData
+{
+ TupleHashTable hashtable; /* hash table with one entry per group */
+ TupleHashIterator hashiter; /* for iterating through hash table */
+ TupleTableSlot *hashslot; /* slot for loading hash table */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
+ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
+ int numCols; /* number of hash key columns */
+ int numhashGrpCols; /* number of columns in hash table */
+ int largestGrpColIdx; /* largest col required for hashing */
+ AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
+ AttrNumber *hashGrpColIdxHash; /* indices in hashtbl tuples */
+ Agg *aggnode; /* original Agg node, for numGroups etc. */
+} AggStatePerHashData;
+
+static void select_current_set(AggState *aggstate, int setno, bool is_hash);
static void initialize_phase(AggState *aggstate, int newphase);
static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
static void initialize_aggregates(AggState *aggstate,
@@ -449,7 +479,8 @@ static void initialize_aggregates(AggState *aggstate,
static void advance_transition_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
-static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup,
+ AggStatePerGroup *pergroups);
static void advance_combine_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
@@ -473,14 +504,13 @@ static void prepare_projection_slot(AggState *aggstate,
int currentSet);
static void finalize_aggregates(AggState *aggstate,
AggStatePerAgg peragg,
- AggStatePerGroup pergroup,
- int currentSet);
+ AggStatePerGroup pergroup);
static TupleTableSlot *project_aggregates(AggState *aggstate);
static Bitmapset *find_unaggregated_cols(AggState *aggstate);
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
static void build_hash_table(AggState *aggstate);
-static TupleHashEntryData *lookup_hash_entry(AggState *aggstate,
- TupleTableSlot *inputslot);
+static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
+static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
static void agg_fill_hash_table(AggState *aggstate);
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -501,13 +531,29 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
/*
- * Switch to phase "newphase", which must either be 0 (to reset) or
+ * Select the current grouping set; affects current_set and
+ * curaggcontext.
+ */
+
+static void
+select_current_set(AggState *aggstate, int setno, bool is_hash)
+{
+ if (is_hash)
+ aggstate->curaggcontext = aggstate->hashcontext;
+ else
+ aggstate->curaggcontext = aggstate->aggcontexts[setno];
+
+ aggstate->current_set = setno;
+}
+
+/*
+ * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
* current_phase + 1. Juggle the tuplesorts accordingly.
*/
static void
initialize_phase(AggState *aggstate, int newphase)
{
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1);
+ Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
/*
* Whatever the previous state, we're now done with whatever input
@@ -519,7 +565,7 @@ initialize_phase(AggState *aggstate, int newphase)
aggstate->sort_in = NULL;
}
- if (newphase == 0)
+ if (newphase <= 1)
{
/*
* Discard any existing output tuplesort.
@@ -546,7 +592,7 @@ initialize_phase(AggState *aggstate, int newphase)
* If this isn't the last phase, we need to sort appropriately for the
* next phase in sequence.
*/
- if (newphase < aggstate->numphases - 1)
+ if (newphase > 0 && newphase < aggstate->numphases - 1)
{
Sort *sortnode = aggstate->phases[newphase + 1].sortnode;
PlanState *outerNode = outerPlanState(aggstate);
@@ -567,7 +613,7 @@ initialize_phase(AggState *aggstate, int newphase)
}
/*
- * Fetch a tuple from either the outer plan (for phase 0) or from the sorter
+ * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
* populated by the previous phase. Copy it to the sorter for the next phase
* if any.
*/
@@ -595,8 +641,8 @@ fetch_input_tuple(AggState *aggstate)
/*
* (Re)Initialize an individual aggregate.
*
- * This function handles only one grouping set (already set in
- * aggstate->current_set).
+ * This function handles only one grouping set, already set in
+ * aggstate->current_set.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -653,7 +699,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
MemoryContext oldContext;
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -676,8 +722,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
*
* If there are multiple grouping sets, we initialize only the first numReset
* of them (the grouping sets are ordered so that the most specific one, which
- * is reset most often, is first). As a convenience, if numReset is < 1, we
- * reinitialize all sets.
+ * is reset most often, is first). As a convenience, if numReset is 0, we
+ * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
+ * which case the caller must have used select_current_set appropriately.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -689,25 +736,37 @@ initialize_aggregates(AggState *aggstate,
int transno;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
int setno = 0;
+ int numTrans = aggstate->numtrans;
AggStatePerTrans transstates = aggstate->pertrans;
- if (numReset < 1)
+ if (numReset == 0)
numReset = numGroupingSets;
- for (transno = 0; transno < aggstate->numtrans; transno++)
+ for (transno = 0; transno < numTrans; transno++)
{
AggStatePerTrans pertrans = &transstates[transno];
- for (setno = 0; setno < numReset; setno++)
+ if (numReset < 0)
{
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
-
- aggstate->current_set = setno;
+ pergroupstate = &pergroup[transno];
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
+ else
+ {
+ for (setno = 0; setno < numReset; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ select_current_set(aggstate, setno, false);
+
+ initialize_aggregate(aggstate, pertrans, pergroupstate);
+ }
+ }
}
}
@@ -757,7 +816,7 @@ advance_transition_function(AggState *aggstate,
* do not need to pfree the old transValue, since it's NULL.
*/
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -807,7 +866,7 @@ advance_transition_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -838,17 +897,21 @@ advance_transition_function(AggState *aggstate,
/*
* Advance each aggregate transition state for one input tuple. The input
* tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
- * accessible to ExecEvalExpr. pergroup is the array of per-group structs to
- * use (this might be in a hashtable entry).
+ * accessible to ExecEvalExpr.
+ *
+ * We have two sets of transition states to handle: one for sorted aggregation
+ * and one for hashed; we do them both here, to avoid multiple evaluation of
+ * the inputs.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static void
-advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
{
int transno;
int setno = 0;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
+ int numHashes = aggstate->num_hashes;
int numTrans = aggstate->numtrans;
TupleTableSlot *slot = aggstate->evalslot;
@@ -880,6 +943,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
{
/* DISTINCT and/or ORDER BY case */
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
+ Assert(!pergroups);
/*
* If the transfn is strict, we want to check for nullity before
@@ -940,13 +1004,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
}
- for (setno = 0; setno < numGroupingSets; setno++)
+ if (pergroup)
{
- AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
+ /* advance transition states for ordered grouping */
- aggstate->current_set = setno;
+ for (setno = 0; setno < numGroupingSets; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, false);
- advance_transition_function(aggstate, pertrans, pergroupstate);
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
+ }
+
+ if (pergroups)
+ {
+ /* advance transition states for hashed grouping */
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, true);
+
+ pergroupstate = &pergroups[setno][transno];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
}
}
}
@@ -967,7 +1054,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
TupleTableSlot *slot;
/* combine not supported with grouping sets */
- Assert(aggstate->phase->numsets == 0);
+ Assert(aggstate->phase->numsets <= 1);
/* compute input for all aggregates */
slot = ExecProject(aggstate->evalproj);
@@ -1060,7 +1147,7 @@ advance_combine_function(AggState *aggstate,
if (!pertrans->transtypeByVal)
{
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -1105,7 +1192,7 @@ advance_combine_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -1559,15 +1646,16 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/*
* Compute the final value of all aggregates for one group.
*
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time, which the caller must
+ * have selected. It's also the caller's responsibility to adjust the supplied
+ * pergroup parameter to point to the current set's transvalues.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
static void
finalize_aggregates(AggState *aggstate,
AggStatePerAgg peraggs,
- AggStatePerGroup pergroup,
- int currentSet)
+ AggStatePerGroup pergroup)
{
ExprContext *econtext = aggstate->ss.ps.ps_ExprContext;
Datum *aggvalues = econtext->ecxt_aggvalues;
@@ -1575,11 +1663,6 @@ finalize_aggregates(AggState *aggstate,
int aggno;
int transno;
- Assert(currentSet == 0 ||
- ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
- aggstate->current_set = currentSet;
-
/*
* If there were any DISTINCT and/or ORDER BY aggregates, sort their
* inputs and run the transition functions.
@@ -1589,11 +1672,12 @@ finalize_aggregates(AggState *aggstate,
AggStatePerTrans pertrans = &aggstate->pertrans[transno];
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+ pergroupstate = &pergroup[transno];
if (pertrans->numSortCols > 0)
{
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED &&
+ aggstate->aggstrategy != AGG_MIXED);
if (pertrans->numInputs == 1)
process_ordered_aggregate_single(aggstate,
@@ -1615,7 +1699,7 @@ finalize_aggregates(AggState *aggstate,
int transno = peragg->transno;
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+ pergroupstate = &pergroup[transno];
if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
finalize_partialaggregate(aggstate, peragg, pergroupstate,
@@ -1697,7 +1781,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
}
/*
- * Initialize the hash table to empty.
+ * Initialize the hash table(s) to empty.
*
* To implement hashed aggregation, we need a hashtable that stores a
* representative tuple and an array of AggStatePerGroup structs for each
@@ -1705,29 +1789,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(),
* for each entry.
*
- * The hash table always lives in the aggcontext memory context.
+ * We have a separate hashtable and associated perhash data structure for each
+ * grouping set for which we're doing hashing.
+ *
+ * The hash tables always live in the hashcontext's per-tuple memory context
+ * (there is only one of these for all tables together, since they are all
+ * reset at the same time).
*/
static void
build_hash_table(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize;
+ int i;
- Assert(node->aggstrategy == AGG_HASHED);
- Assert(node->numGroups > 0);
+ Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
- additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData);
+ additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
- aggstate->hashtable = BuildTupleHashTable(node->numCols,
- aggstate->hashGrpColIdxHash,
- aggstate->phase->eqfunctions,
- aggstate->hashfunctions,
- node->numGroups,
- additionalsize,
- aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
- tmpmem,
+ for (i = 0; i < aggstate->num_hashes; ++i)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[i];
+
+ Assert(perhash->aggnode->numGroups > 0);
+
+ perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+ perhash->hashGrpColIdxHash,
+ perhash->eqfunctions,
+ perhash->hashfunctions,
+ perhash->aggnode->numGroups,
+ additionalsize,
+ aggstate->hashcontext->ecxt_per_tuple_memory,
+ tmpmem,
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+ }
}
/*
@@ -1750,72 +1845,98 @@ build_hash_table(AggState *aggstate)
* the array is preserved over ExecReScanAgg, so we allocate it in the
* per-query context (unlike the hash table itself).
*/
-static List *
+static void
find_hash_columns(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
- Bitmapset *colnos;
- List *collist;
- TupleDesc hashDesc;
+ Bitmapset *base_colnos;
List *outerTlist = outerPlanState(aggstate)->plan->targetlist;
- List *hashTlist = NIL;
- int i;
-
- aggstate->largestGrpColIdx = 0;
+ int numHashes = aggstate->num_hashes;
+ int j;
/* Find Vars that will be needed in tlist and qual */
- colnos = find_unaggregated_cols(aggstate);
- /* Add in all the grouping columns */
- for (i = 0; i < node->numCols; i++)
- colnos = bms_add_member(colnos, node->grpColIdx[i]);
- /* Convert to list, using lcons so largest element ends up first */
- collist = NIL;
+ base_colnos = find_unaggregated_cols(aggstate);
- aggstate->hashGrpColIdxInput =
- palloc(bms_num_members(colnos) * sizeof(AttrNumber));
- aggstate->hashGrpColIdxHash =
- palloc(node->numCols * sizeof(AttrNumber));
-
- /*
- * First build mapping for columns directly hashed. These are the first,
- * because they'll be accessed when computing hash values and comparing
- * tuples for exact matches. We also build simple mapping for
- * execGrouping, so it knows where to find the to-be-hashed / compared
- * columns in the input.
- */
- for (i = 0; i < node->numCols; i++)
+ for (j = 0; j < numHashes; ++j)
{
- aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i];
- aggstate->hashGrpColIdxHash[i] = i + 1;
- aggstate->numhashGrpCols++;
- /* delete already mapped columns */
- bms_del_member(colnos, node->grpColIdx[i]);
- }
+ AggStatePerHash perhash = &aggstate->perhash[j];
+ Bitmapset *colnos = bms_copy(base_colnos);
+ AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
+ List *hashTlist = NIL;
+ TupleDesc hashDesc;
+ int i;
- /* and add the remaining columns */
- while ((i = bms_first_member(colnos)) >= 0)
- {
- aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i;
- aggstate->numhashGrpCols++;
- }
+ perhash->largestGrpColIdx = 0;
- /* and build a tuple descriptor for the hashtable */
- for (i = 0; i < aggstate->numhashGrpCols; i++)
- {
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ /*
+ * If we're doing grouping sets, then some Vars might be referenced in
+ * tlist/qual for the benefit of other grouping sets, but not needed
+ * when hashing; i.e. prepare_projection_slot will null them out, so
+ * there'd be no point storing them. Use prepare_projection_slot's
+ * logic to determine which.
+ */
+ if (aggstate->phases[0].grouped_cols)
+ {
+ Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j];
+ ListCell *lc;
- hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
- aggstate->largestGrpColIdx =
- Max(varNumber + 1, aggstate->largestGrpColIdx);
- }
+ foreach(lc, aggstate->all_grouped_cols)
+ {
+ int attnum = lfirst_int(lc);
- hashDesc = ExecTypeFromTL(hashTlist, false);
- ExecSetSlotDescriptor(aggstate->hashslot, hashDesc);
+ if (!bms_is_member(attnum, grouped_cols))
+ colnos = bms_del_member(colnos, attnum);
+ }
+ }
+ /* Add in all the grouping columns */
+ for (i = 0; i < perhash->numCols; i++)
+ colnos = bms_add_member(colnos, grpColIdx[i]);
- list_free(hashTlist);
- bms_free(colnos);
+ perhash->hashGrpColIdxInput =
+ palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+ perhash->hashGrpColIdxHash =
+ palloc(perhash->numCols * sizeof(AttrNumber));
- return collist;
+ /*
+ * First build mapping for columns directly hashed. These are the
+ * first, because they'll be accessed when computing hash values and
+ * comparing tuples for exact matches. We also build simple mapping
+ * for execGrouping, so it knows where to find the to-be-hashed /
+ * compared columns in the input.
+ */
+ for (i = 0; i < perhash->numCols; i++)
+ {
+ perhash->hashGrpColIdxInput[i] = grpColIdx[i];
+ perhash->hashGrpColIdxHash[i] = i + 1;
+ perhash->numhashGrpCols++;
+ /* delete already mapped columns */
+ bms_del_member(colnos, grpColIdx[i]);
+ }
+
+ /* and add the remaining columns */
+ while ((i = bms_first_member(colnos)) >= 0)
+ {
+ perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
+ perhash->numhashGrpCols++;
+ }
+
+ /* and build a tuple descriptor for the hashtable */
+ for (i = 0; i < perhash->numhashGrpCols; i++)
+ {
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
+
+ hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
+ perhash->largestGrpColIdx =
+ Max(varNumber + 1, perhash->largestGrpColIdx);
+ }
+
+ hashDesc = ExecTypeFromTL(hashTlist, false);
+ ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
+
+ list_free(hashTlist);
+ bms_free(colnos);
+ }
+
+ bms_free(base_colnos);
}
/*
@@ -1840,26 +1961,30 @@ hash_agg_entry_size(int numAggs)
}
/*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * Find or create a hashtable entry for the tuple group containing the current
+ * tuple (already set in tmpcontext's outertuple slot), in the current grouping
+ * set (which the caller must have selected - note that initialize_aggregate
+ * depends on this).
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static TupleHashEntryData *
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate)
{
- TupleTableSlot *hashslot = aggstate->hashslot;
+ TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
+ AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+ TupleTableSlot *hashslot = perhash->hashslot;
TupleHashEntryData *entry;
bool isnew;
- int i;
+ int i;
/* transfer just the needed columns into hashslot */
- slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx);
+ slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot);
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
@@ -1867,22 +1992,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
ExecStoreVirtualTuple(hashslot);
/* find or create the hashtable entry using the filtered tuple */
- entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew);
+ entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew)
{
entry->additional = (AggStatePerGroup)
- MemoryContextAlloc(aggstate->hashtable->tablecxt,
+ MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
/* initialize aggregates for new tuple group */
initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
- 0);
+ -1);
}
return entry;
}
/*
+ * Look up hash entries for the current tuple in all hashed grouping sets,
+ * returning an array of pergroup pointers suitable for advance_aggregates.
+ *
+ * Be aware that lookup_hash_entry can reset the tmpcontext.
+ */
+static AggStatePerGroup *
+lookup_hash_entries(AggState *aggstate)
+{
+ int numHashes = aggstate->num_hashes;
+ AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+ int setno;
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ select_current_set(aggstate, setno, true);
+ pergroup[setno] = lookup_hash_entry(aggstate)->additional;
+ }
+
+ return pergroup;
+}
+
+/*
* ExecAgg -
*
* ExecAgg receives tuples from its outer subplan and aggregates over
@@ -1898,19 +2045,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
TupleTableSlot *
ExecAgg(AggState *node)
{
- TupleTableSlot *result;
+ TupleTableSlot *result = NULL;
if (!node->agg_done)
{
/* Dispatch based on strategy */
- switch (node->phase->aggnode->aggstrategy)
+ switch (node->phase->aggstrategy)
{
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
+ /* FALLTHROUGH */
+ case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
- default:
+ case AGG_PLAIN:
+ case AGG_SORTED:
result = agg_retrieve_direct(node);
break;
}
@@ -1933,6 +2083,7 @@ agg_retrieve_direct(AggState *aggstate)
ExprContext *tmpcontext;
AggStatePerAgg peragg;
AggStatePerGroup pergroup;
+ AggStatePerGroup *hash_pergroups = NULL;
TupleTableSlot *outerslot;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
@@ -2019,6 +2170,19 @@ agg_retrieve_direct(AggState *aggstate)
node = aggstate->phase->aggnode;
numReset = numGroupingSets;
}
+ else if (aggstate->aggstrategy == AGG_MIXED)
+ {
+ /*
+ * Mixed mode; we've output all the grouped stuff and have
+ * full hashtables, so switch to outputting those.
+ */
+ initialize_phase(aggstate, 0);
+ aggstate->table_filled = true;
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
+ select_current_set(aggstate, 0, true);
+ return agg_retrieve_hash_table(aggstate);
+ }
else
{
aggstate->agg_done = true;
@@ -2055,7 +2219,7 @@ agg_retrieve_direct(AggState *aggstate)
*----------
*/
if (aggstate->input_done ||
- (node->aggstrategy == AGG_SORTED &&
+ (node->aggstrategy != AGG_PLAIN &&
aggstate->projected_set != -1 &&
aggstate->projected_set < (numGroupingSets - 1) &&
nextSetSize > 0 &&
@@ -2168,10 +2332,22 @@ agg_retrieve_direct(AggState *aggstate)
*/
for (;;)
{
+ /*
+ * During phase 1 only of a mixed agg, we need to update
+ * hashtables as well in advance_aggregates.
+ */
+ if (aggstate->aggstrategy == AGG_MIXED &&
+ aggstate->current_phase == 1)
+ {
+ hash_pergroups = lookup_hash_entries(aggstate);
+ }
+ else
+ hash_pergroups = NULL;
+
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, pergroup);
else
- advance_aggregates(aggstate, pergroup);
+ advance_aggregates(aggstate, pergroup, hash_pergroups);
/* Reset per-input-tuple context after each tuple */
ResetExprContext(tmpcontext);
@@ -2198,7 +2374,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group
* boundary.
*/
- if (node->aggstrategy == AGG_SORTED)
+ if (node->aggstrategy != AGG_PLAIN)
{
if (!execTuplesMatch(firstSlot,
outerslot,
@@ -2231,7 +2407,11 @@ agg_retrieve_direct(AggState *aggstate)
prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet);
- finalize_aggregates(aggstate, peragg, pergroup, currentSet);
+ select_current_set(aggstate, currentSet, false);
+
+ finalize_aggregates(aggstate,
+ peragg,
+ pergroup + (currentSet * aggstate->numtrans));
/*
* If there's no row to project right now, we must continue rather
@@ -2247,21 +2427,13 @@ agg_retrieve_direct(AggState *aggstate)
}
/*
- * ExecAgg for hashed case: phase 1, read input and build hash table
+ * ExecAgg for hashed case: read input and build hash table
*/
static void
agg_fill_hash_table(AggState *aggstate)
{
- ExprContext *tmpcontext;
- TupleHashEntryData *entry;
TupleTableSlot *outerslot;
-
- /*
- * get state info from node
- *
- * tmpcontext is the per-input-tuple expression context
- */
- tmpcontext = aggstate->tmpcontext;
+ ExprContext *tmpcontext = aggstate->tmpcontext;
/*
* Process each outer-plan tuple, and then fetch the next one, until we
@@ -2269,32 +2441,40 @@ agg_fill_hash_table(AggState *aggstate)
*/
for (;;)
{
+ AggStatePerGroup *pergroups;
+
outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot))
break;
- /* set up for advance_aggregates call */
+
+ /* set up for lookup_hash_entries and advance_aggregates */
tmpcontext->ecxt_outertuple = outerslot;
- /* Find or build hashtable entry for this tuple's group */
- entry = lookup_hash_entry(aggstate, outerslot);
+ /* Find or build hashtable entries */
+ pergroups = lookup_hash_entries(aggstate);
/* Advance the aggregates */
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
- combine_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ combine_aggregates(aggstate, pergroups[0]);
else
- advance_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ advance_aggregates(aggstate, NULL, pergroups);
- /* Reset per-input-tuple context after each tuple */
- ResetExprContext(tmpcontext);
+ /*
+ * Reset per-input-tuple context after each tuple, but note that the
+ * hash lookups do this too
+ */
+ ResetExprContext(aggstate->tmpcontext);
}
aggstate->table_filled = true;
- /* Initialize to walk the hash table */
- ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+ /* Initialize to walk the first hash table */
+ select_current_set(aggstate, 0, true);
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
}
/*
- * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ * ExecAgg for hashed case: retrieving groups from hash table
*/
static TupleTableSlot *
agg_retrieve_hash_table(AggState *aggstate)
@@ -2305,17 +2485,22 @@ agg_retrieve_hash_table(AggState *aggstate)
TupleHashEntryData *entry;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
- TupleTableSlot *hashslot;
+ AggStatePerHash perhash;
/*
- * get state info from node
+ * get state info from node.
+ *
+ * econtext is the per-output-tuple expression context.
*/
- /* econtext is the per-output-tuple expression context */
econtext = aggstate->ss.ps.ps_ExprContext;
peragg = aggstate->peragg;
firstSlot = aggstate->ss.ss_ScanTupleSlot;
- hashslot = aggstate->hashslot;
+ /*
+ * Note that perhash (and therefore anything accessed through it) can
+ * change inside the loop, as we change between grouping sets.
+ */
+ perhash = &aggstate->perhash[aggstate->current_set];
/*
* We loop retrieving groups until we find one satisfying
@@ -2323,17 +2508,37 @@ agg_retrieve_hash_table(AggState *aggstate)
*/
while (!aggstate->agg_done)
{
- int i;
+ TupleTableSlot *hashslot = perhash->hashslot;
+ int i;
/*
* Find the next entry in the hash table
*/
- entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter);
+ entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
if (entry == NULL)
{
- /* No more entries in hashtable, so done */
- aggstate->agg_done = TRUE;
- return NULL;
+ int nextset = aggstate->current_set + 1;
+
+ if (nextset < aggstate->num_hashes)
+ {
+ /*
+ * Switch to next grouping set, reinitialize, and restart the
+ * loop.
+ */
+ select_current_set(aggstate, nextset, true);
+
+ perhash = &aggstate->perhash[aggstate->current_set];
+
+ ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
+
+ continue;
+ }
+ else
+ {
+ /* No more hashtables, so done */
+ aggstate->agg_done = TRUE;
+ return NULL;
+ }
}
/*
@@ -2356,9 +2561,9 @@ agg_retrieve_hash_table(AggState *aggstate)
memset(firstSlot->tts_isnull, true,
firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
@@ -2367,14 +2572,18 @@ agg_retrieve_hash_table(AggState *aggstate)
pergroup = (AggStatePerGroup) entry->additional;
- finalize_aggregates(aggstate, peragg, pergroup, 0);
-
/*
* Use the representative input tuple for any references to
* non-aggregated input columns in the qual and tlist.
*/
econtext->ecxt_outertuple = firstSlot;
+ prepare_projection_slot(aggstate,
+ econtext->ecxt_outertuple,
+ aggstate->current_set);
+
+ finalize_aggregates(aggstate, peragg, pergroup);
+
result = project_aggregates(aggstate);
if (result)
return result;
@@ -2388,7 +2597,36 @@ agg_retrieve_hash_table(AggState *aggstate)
* ExecInitAgg
*
* Creates the run-time information for the agg node produced by the
- * planner and initializes its outer subtree
+ * planner and initializes its outer subtree.
+ *
+ * What we get from the planner is actually one "real" Agg node which is part
+ * of the plan tree proper, but which optionally has an additional list of Agg
+ * nodes hung off the side via the "chain" field. This is because an Agg node
+ * happens to be a convenient representation of all the data we need for
+ * grouping sets.
+ *
+ * For many purposes, we treat the "real" node as if it were just the first
+ * node in the chain. The chain must be ordered such that hashed entries come
+ * before sorted/plain entries; the real node is marked AGG_MIXED if there are
+ * mixed types (in which case the real node describes one of the hashed
+ * groupings, other AGG_HASHED nodes may optionally follow in the chain,
+ * followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If the real node
+ * is marked AGG_HASHED or AGG_SORTED, then all the chained nodes must be of
+ * the same type; if it is AGG_PLAIN, there can be no chained nodes.
+ *
+ * We collect all hashed nodes into a single "phase", numbered 0, and create a
+ * sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. Phase
+ * 0 is allocated even if there are no hashes, but remains unused in that
+ * case.
+ *
+ * AGG_HASHED nodes actually refer to only a single grouping set each, because
+ * for each hashed grouping we need a separate grpColIdx and numGroups
+ * estimate. AGG_SORTED nodes represent a "rollup", a list of grouping sets
+ * that share a sort order. Each AGG_SORTED node other than the first one has
+ * an associated Sort node which describes the sort order to be used; the
+ * first sorted node takes its input from the outer subtree, which the planner
+ * has already arranged to provide ordered data.
+ *
* -----------------
*/
AggState *
@@ -2403,14 +2641,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
transno,
aggno;
int phase;
+ int phaseidx;
List *combined_inputeval;
ListCell *l;
Bitmapset *all_grouped_cols = NULL;
int numGroupingSets = 1;
int numPhases;
+ int numHashes;
int column_offset;
int i = 0;
int j = 0;
+ bool use_hashing = (node->aggstrategy == AGG_HASHED ||
+ node->aggstrategy == AGG_MIXED);
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2425,9 +2667,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggs = NIL;
aggstate->numaggs = 0;
aggstate->numtrans = 0;
+ aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0;
- aggstate->hashfunctions = NULL;
aggstate->projected_set = -1;
aggstate->current_set = 0;
aggstate->peragg = NULL;
@@ -2437,18 +2679,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->agg_done = false;
aggstate->pergroup = NULL;
aggstate->grp_firstTuple = NULL;
- aggstate->hashtable = NULL;
aggstate->sort_in = NULL;
aggstate->sort_out = NULL;
/*
+ * phases[0] always exists, but is dummy in sorted/plain mode
+ */
+ numPhases = (use_hashing ? 1 : 2);
+ numHashes = (use_hashing ? 1 : 0);
+
+ /*
* Calculate the maximum number of grouping sets in any phase; this
- * determines the size of some allocations.
+ * determines the size of some allocations. Also calculate the number of
+ * phases, since all hashed/mixed nodes contribute to only a single phase.
*/
if (node->groupingSets)
{
- Assert(node->aggstrategy != AGG_HASHED);
-
numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain)
@@ -2457,22 +2703,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets));
+
+ /*
+ * additional AGG_HASHED aggs become part of phase 0, but all
+ * others add an extra phase.
+ */
+ if (agg->aggstrategy != AGG_HASHED)
+ ++numPhases;
+ else
+ ++numHashes;
}
}
aggstate->maxsets = numGroupingSets;
- aggstate->numphases = numPhases = 1 + list_length(node->chain);
+ aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets);
/*
* Create expression contexts. We need three or more, one for
- * per-input-tuple processing, one for per-output-tuple processing, and
- * one for each grouping set. The per-tuple memory context of the
- * per-grouping-set ExprContexts (aggcontexts) replaces the standalone
- * memory context formerly used to hold transition values. We cheat a
- * little by using ExecAssignExprContext() to build all of them.
+ * per-input-tuple processing, one for per-output-tuple processing, one
+ * for all the hashtables, and one for each grouping set. The per-tuple
+ * memory context of the per-grouping-set ExprContexts (aggcontexts)
+ * replaces the standalone memory context formerly used to hold transition
+ * values. We cheat a little by using ExecAssignExprContext() to build
+ * all of them.
*
* NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple
@@ -2488,14 +2744,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
}
+ if (use_hashing)
+ {
+ ExecAssignExprContext(estate, &aggstate->ss.ps);
+ aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+ }
+
ExecAssignExprContext(estate, &aggstate->ss.ps);
/*
- * tuple table initialization
+ * tuple table initialization.
+ *
+ * For hashtables, we create some additional slots below.
*/
ExecInitScanTupleSlot(estate, &aggstate->ss);
ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
- aggstate->hashslot = ExecInitExtraTupleSlot(estate);
aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
/*
@@ -2560,19 +2823,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing.
*/
-
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
- for (phase = 0; phase < numPhases; ++phase)
+ aggstate->num_hashes = numHashes;
+ if (numHashes)
+ {
+ aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+ aggstate->phases[0].numsets = 0;
+ aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
+ aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
+ }
+
+ phase = 0;
+ for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{
- AggStatePerPhase phasedata = &aggstate->phases[phase];
Agg *aggnode;
Sort *sortnode;
- int num_sets;
- if (phase > 0)
+ if (phaseidx > 0)
{
- aggnode = castNode(Agg, list_nth(node->chain, phase - 1));
+ aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1));
sortnode = castNode(Sort, aggnode->plan.lefttree);
}
else
@@ -2581,53 +2851,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
sortnode = NULL;
}
- phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+ Assert(phase <= 1 || sortnode);
- if (num_sets)
+ if (aggnode->aggstrategy == AGG_HASHED
+ || aggnode->aggstrategy == AGG_MIXED)
{
- phasedata->gset_lengths = palloc(num_sets * sizeof(int));
- phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+ AggStatePerPhase phasedata = &aggstate->phases[0];
+ AggStatePerHash perhash;
+ Bitmapset *cols = NULL;
- i = 0;
- foreach(l, aggnode->groupingSets)
- {
- int current_length = list_length(lfirst(l));
- Bitmapset *cols = NULL;
+ Assert(phase == 0);
+ i = phasedata->numsets++;
+ perhash = &aggstate->perhash[i];
- /* planner forces this to be correct */
- for (j = 0; j < current_length; ++j)
- cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+ /* phase 0 always points to the "real" Agg in the hash case */
+ phasedata->aggnode = node;
+ phasedata->aggstrategy = node->aggstrategy;
- phasedata->grouped_cols[i] = cols;
- phasedata->gset_lengths[i] = current_length;
- ++i;
- }
+ /* but the actual Agg node representing this hash is saved here */
+ perhash->aggnode = aggnode;
+
+ phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
- all_grouped_cols = bms_add_members(all_grouped_cols,
- phasedata->grouped_cols[0]);
+ for (j = 0; j < aggnode->numCols; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+
+ all_grouped_cols = bms_add_members(all_grouped_cols, cols);
+ continue;
}
else
{
- Assert(phase == 0);
+ AggStatePerPhase phasedata = &aggstate->phases[++phase];
+ int num_sets;
- phasedata->gset_lengths = NULL;
- phasedata->grouped_cols = NULL;
- }
+ phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
- /*
- * If we are grouping, precompute fmgr lookup data for inner loop.
- */
- if (aggnode->aggstrategy == AGG_SORTED)
- {
- Assert(aggnode->numCols > 0);
+ if (num_sets)
+ {
+ phasedata->gset_lengths = palloc(num_sets * sizeof(int));
+ phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
- phasedata->eqfunctions =
- execTuplesMatchPrepare(aggnode->numCols,
- aggnode->grpOperators);
- }
+ i = 0;
+ foreach(l, aggnode->groupingSets)
+ {
+ int current_length = list_length(lfirst(l));
+ Bitmapset *cols = NULL;
+
+ /* planner forces this to be correct */
+ for (j = 0; j < current_length; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+ phasedata->gset_lengths[i] = current_length;
+
+ ++i;
+ }
- phasedata->aggnode = aggnode;
- phasedata->sortnode = sortnode;
+ all_grouped_cols = bms_add_members(all_grouped_cols,
+ phasedata->grouped_cols[0]);
+ }
+ else
+ {
+ Assert(phaseidx == 0);
+
+ phasedata->gset_lengths = NULL;
+ phasedata->grouped_cols = NULL;
+ }
+
+ /*
+ * If we are grouping, precompute fmgr lookup data for inner loop.
+ */
+ if (aggnode->aggstrategy == AGG_SORTED)
+ {
+ Assert(aggnode->numCols > 0);
+
+ phasedata->eqfunctions =
+ execTuplesMatchPrepare(aggnode->numCols,
+ aggnode->grpOperators);
+ }
+
+ phasedata->aggnode = aggnode;
+ phasedata->aggstrategy = aggnode->aggstrategy;
+ phasedata->sortnode = sortnode;
+ }
}
/*
@@ -2638,13 +2946,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
- * Initialize current phase-dependent values to initial phase
- */
-
- aggstate->current_phase = 0;
- initialize_phase(aggstate, 0);
-
- /*
* Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage
*/
@@ -2658,23 +2959,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates;
-
/*
* Hashing can only appear in the initial phase.
*/
- if (node->aggstrategy == AGG_HASHED)
+ if (use_hashing)
{
- find_hash_columns(aggstate);
+ for (i = 0; i < numHashes; ++i)
+ {
+ aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
- execTuplesHashPrepare(node->numCols,
- node->grpOperators,
- &aggstate->phases[0].eqfunctions,
- &aggstate->hashfunctions);
+ execTuplesHashPrepare(aggstate->perhash[i].numCols,
+ aggstate->perhash[i].aggnode->grpOperators,
+ &aggstate->perhash[i].eqfunctions,
+ &aggstate->perhash[i].hashfunctions);
+ }
+ /* this is an array of pointers, not structures */
+ aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
+
+ find_hash_columns(aggstate);
build_hash_table(aggstate);
aggstate->table_filled = false;
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
AggStatePerGroup pergroup;
@@ -2685,6 +2993,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->pergroup = pergroup;
}
+ /*
+ * Initialize current phase-dependent values to initial phase. The initial
+ * phase is 1 (first sort pass) for all strategies that use sorting (if
+ * hashing is being done too, then phase 0 is processed last); but if only
+ * hashing is being done, then phase 0 is all there is.
+ */
+
+ if (node->aggstrategy == AGG_HASHED)
+ {
+ aggstate->current_phase = 0;
+ initialize_phase(aggstate, 0);
+ select_current_set(aggstate, 0, true);
+ }
+ else
+ {
+ aggstate->current_phase = 1;
+ initialize_phase(aggstate, 1);
+ select_current_set(aggstate, 0, false);
+ }
+
/* -----------------
* Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data.
@@ -3263,7 +3591,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case
* (yet)
*/
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
/* If we have only one input, we need its len/byval info. */
if (numInputs == 1)
@@ -3512,6 +3840,8 @@ ExecEndAgg(AggState *node)
/* And ensure any agg shutdown callbacks have been called */
for (setno = 0; setno < numGroupingSets; setno++)
ReScanExprContext(node->aggcontexts[setno]);
+ if (node->hashcontext)
+ ReScanExprContext(node->hashcontext);
/*
* We don't actually free any ExprContexts here (see comment in
@@ -3539,7 +3869,7 @@ ExecReScanAgg(AggState *node)
node->agg_done = false;
- if (aggnode->aggstrategy == AGG_HASHED)
+ if (node->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@@ -3559,7 +3889,9 @@ ExecReScanAgg(AggState *node)
if (outerPlan->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
- ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ ResetTupleHashIterator(node->perhash[0].hashtable,
+ &node->perhash[0].hashiter);
+ select_current_set(node, 0, true);
return;
}
}
@@ -3584,11 +3916,7 @@ ExecReScanAgg(AggState *node)
* ExecReScan already did it. But we do need to reset our per-grouping-set
* contexts, which may have transvalues stored in them. (We use rescan
* rather than just reset because transfns may have registered callbacks
- * that need to be run now.)
- *
- * Note that with AGG_HASHED, the hash table is allocated in a sub-context
- * of the aggcontext. This used to be an issue, but now, resetting a
- * context automatically deletes sub-contexts too.
+ * that need to be run now.) For the AGG_HASHED case, see below.
*/
for (setno = 0; setno < numGroupingSets; setno++)
@@ -3608,13 +3936,22 @@ ExecReScanAgg(AggState *node)
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
- if (aggnode->aggstrategy == AGG_HASHED)
+ /*
+ * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
+ * the hashcontext. This used to be an issue, but now, resetting a context
+ * automatically deletes sub-contexts too.
+ */
+
+ if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
{
+ ReScanExprContext(node->hashcontext);
/* Rebuild an empty hash table */
build_hash_table(node);
node->table_filled = false;
+ /* iterator will be reset when the table is filled */
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
/*
* Reset the per-group state (in particular, mark transvalues null)
@@ -3622,8 +3959,8 @@ ExecReScanAgg(AggState *node)
MemSet(node->pergroup, 0,
sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
- /* reset to phase 0 */
- initialize_phase(node, 0);
+ /* reset to phase 1 */
+ initialize_phase(node, 1);
node->input_done = false;
node->projected_set = -1;
@@ -3664,7 +4001,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
if (aggcontext)
{
AggState *aggstate = ((AggState *) fcinfo->context);
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
*aggcontext = cxt->ecxt_per_tuple_memory;
}
@@ -3753,7 +4090,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
if (fcinfo->context && IsA(fcinfo->context, AggState))
{
AggState *aggstate = (AggState *) fcinfo->context;
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
RegisterExprContextCallback(cxt, func, arg);
diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 2d2ba84..f222c6c 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/lib
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
- rbtree.o stringinfo.o
+OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
+ pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c
new file mode 100644
index 0000000..ddf2b9a
--- /dev/null
+++ b/src/backend/lib/knapsack.c
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * knapsack.c
+ * Knapsack problem solver
+ *
+ * Given input vectors of integral item weights (must be >= 0) and values
+ * (double >= 0), compute the set of items which produces the greatest total
+ * value without exceeding a specified total weight; each item is included at
+ * most once (this is the 0/1 knapsack problem). Weight 0 items will always be
+ * included.
+ *
+ * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
+ * weight limit. To use with non-integral weights or approximate solutions,
+ * the caller should pre-scale the input weights to a suitable range. This
+ * allows approximate solutions in polynomial time (the general case of the
+ * exact problem is NP-hard).
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/lib/knapsack.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "lib/knapsack.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+
+/*
+ * DiscreteKnapsack
+ *
+ * The item_values input is optional; if omitted, all the items are assumed to
+ * have value 1.
+ *
+ * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
+ * inclusion in the solution.
+ *
+ * This uses the usual dynamic-programming algorithm, adapted to reuse the
+ * memory on each pass (by working from larger weights to smaller). At the
+ * start of pass number i, the values[w] array contains the largest value
+ * computed with total weight <= w, using only items with indices < i; and
+ * sets[w] contains the bitmap of items actually used for that value. (The
+ * bitmapsets are all pre-initialized with an unused high bit so that memory
+ * allocation is done only once.)
+ */
+Bitmapset *
+DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values)
+{
+ MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "Knapsack",
+ ALLOCSET_SMALL_MINSIZE,
+ ALLOCSET_SMALL_INITSIZE,
+ ALLOCSET_SMALL_MAXSIZE);
+ MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
+ double *values;
+ Bitmapset **sets;
+ Bitmapset *result;
+ int i,
+ j;
+
+ Assert(max_weight >= 0);
+ Assert(num_items > 0 && item_weights);
+
+ values = palloc((1 + max_weight) * sizeof(double));
+ sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
+
+ for (i = 0; i <= max_weight; ++i)
+ {
+ values[i] = 0;
+ sets[i] = bms_make_singleton(num_items);
+ }
+
+ for (i = 0; i < num_items; ++i)
+ {
+ int iw = item_weights[i];
+ double iv = item_values ? item_values[i] : 1;
+
+ for (j = max_weight; j >= iw; --j)
+ {
+ int ow = j - iw;
+
+ if (values[j] <= values[ow] + iv)
+ {
+ /* copy sets[ow] to sets[j] without realloc */
+ if (j != ow)
+ {
+ sets[j] = bms_del_members(sets[j], sets[j]);
+ sets[j] = bms_add_members(sets[j], sets[ow]);
+ }
+
+ sets[j] = bms_add_member(sets[j], i);
+
+ values[j] = values[ow] + iv;
+ }
+ }
+ }
+
+ MemoryContextSwitchTo(oldctx);
+
+ result = bms_del_member(bms_copy(sets[max_weight]), num_items);
+
+ MemoryContextDelete(local_ctx);
+
+ return result;
+}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 252af5c..ea1c538 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -21,7 +21,7 @@
#include "postgres.h"
#include "access/hash.h"
-
+#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
@@ -458,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
}
/*
+ * bms_overlap_list - does a set overlap an integer list?
+ */
+bool
+bms_overlap_list(const Bitmapset *a, const List *b)
+{
+ ListCell *lc;
+ int wordnum,
+ bitnum;
+
+ if (a == NULL || b == NIL)
+ return false;
+
+ foreach(lc, b)
+ {
+ int x = lfirst_int(lc);
+
+ if (x < 0)
+ elog(ERROR, "negative bitmapset member not allowed");
+ wordnum = WORDNUM(x);
+ bitnum = BITNUM(x);
+ if (wordnum < a->nwords)
+ if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*
* bms_nonempty_difference - do sets have a nonempty difference?
*/
bool
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 825a7b2..a5fe108 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1907,6 +1907,28 @@ _outAggPath(StringInfo str, const AggPath *node)
}
static void
+_outRollupData(StringInfo str, const RollupData *node)
+{
+ WRITE_NODE_TYPE("ROLLUP");
+
+ WRITE_NODE_FIELD(groupClause);
+ WRITE_NODE_FIELD(gsets);
+ WRITE_NODE_FIELD(gsets_data);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+ WRITE_BOOL_FIELD(hashable);
+ WRITE_BOOL_FIELD(is_hashed);
+}
+
+static void
+_outGroupingSetData(StringInfo str, const GroupingSetData *node)
+{
+ WRITE_NODE_TYPE("GSDATA");
+
+ WRITE_NODE_FIELD(set);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+}
+
+static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
WRITE_NODE_TYPE("GROUPINGSETSPATH");
@@ -1914,8 +1936,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
- WRITE_NODE_FIELD(rollup_groupclauses);
- WRITE_NODE_FIELD(rollup_lists);
+ WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
+ WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual);
}
@@ -3869,6 +3891,12 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
+ case T_RollupData:
+ _outRollupData(str, obj);
+ break;
+ case T_GroupingSetData:
+ _outGroupingSetData(str, obj);
+ break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 627e3f1..9d8b766 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1809,11 +1809,16 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1;
}
- else if (aggstrategy == AGG_SORTED)
+ else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
{
/* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost;
total_cost = input_total_cost;
+ if (aggstrategy == AGG_MIXED && !enable_hashagg)
+ {
+ startup_cost += disable_cost;
+ total_cost += disable_cost;
+ }
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8f8663c..0d951f4 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1721,18 +1721,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
{
Agg *plan;
Plan *subplan;
- List *rollup_groupclauses = best_path->rollup_groupclauses;
- List *rollup_lists = best_path->rollup_lists;
+ List *rollups = best_path->rollups;
AttrNumber *grouping_map;
int maxref;
List *chain;
- ListCell *lc,
- *lc2;
+ ListCell *lc;
/* Shouldn't get here without grouping sets */
Assert(root->parse->groupingSets);
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
+ Assert(rollups != NIL);
/*
* Agg can project, so no need to be terribly picky about child tlist, but
@@ -1784,72 +1781,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* costs will be shown by EXPLAIN.
*/
chain = NIL;
- if (list_length(rollup_groupclauses) > 1)
+ if (list_length(rollups) > 1)
{
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+ ListCell *lc2 = lnext(list_head(rollups));
+ bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
+
+ for_each_cell(lc, lc2)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
+ RollupData *rollup = lfirst(lc);
AttrNumber *new_grpColIdx;
- Plan *sort_plan;
+ Plan *sort_plan = NULL;
Plan *agg_plan;
+ AggStrategy strat;
- /* We want to iterate over all but the last rollup list elements */
- if (lnext(lc) == NULL)
- break;
+ new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
+
+ if (!rollup->is_hashed && !is_first_sort)
+ {
+ sort_plan = (Plan *)
+ make_sort_from_groupcols(rollup->groupClause,
+ new_grpColIdx,
+ subplan);
+ }
- new_grpColIdx = remap_groupColIdx(root, groupClause);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
- sort_plan = (Plan *)
- make_sort_from_groupcols(groupClause,
- new_grpColIdx,
- subplan);
+ if (rollup->is_hashed)
+ strat = AGG_HASHED;
+ else if (list_length(linitial(rollup->gsets)) == 0)
+ strat = AGG_PLAIN;
+ else
+ strat = AGG_SORTED;
agg_plan = (Plan *) make_agg(NIL,
NIL,
- AGG_SORTED,
+ strat,
AGGSPLIT_SIMPLE,
- list_length((List *) linitial(gsets)),
+ list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
NIL,
- 0, /* numGroups not needed */
+ rollup->numGroups,
sort_plan);
/*
- * Nuke stuff we don't need to avoid bloating debug output.
+ * Remove stuff we don't need to avoid bloating debug output.
*/
- sort_plan->targetlist = NIL;
- sort_plan->lefttree = NULL;
+ if (sort_plan)
+ {
+ sort_plan->targetlist = NIL;
+ sort_plan->lefttree = NULL;
+ }
chain = lappend(chain, agg_plan);
}
}
/*
- * Now make the final Agg node
+ * Now make the real Agg node
*/
{
- List *groupClause = (List *) llast(rollup_groupclauses);
- List *gsets = (List *) llast(rollup_lists);
+ RollupData *rollup = linitial(rollups);
AttrNumber *top_grpColIdx;
int numGroupCols;
- top_grpColIdx = remap_groupColIdx(root, groupClause);
+ top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
- numGroupCols = list_length((List *) linitial(gsets));
+ numGroupCols = list_length((List *) linitial(rollup->gsets));
plan = make_agg(build_path_tlist(root, &best_path->path),
best_path->qual,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+ best_path->aggstrategy,
AGGSPLIT_SIMPLE,
numGroupCols,
top_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
chain,
- 0, /* numGroups not needed */
+ rollup->numGroups,
subplan);
/* Copy cost data from Path to Plan */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1636a69..060f02f 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -91,12 +92,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+ List *rollups;
+ List *hash_sets_idx;
+ double dNumHashGroups;
+ bool any_hashable;
+ Bitmapset *unsortable_refs;
+ Bitmapset *unhashable_refs;
+ List *unsortable_sets;
+ int *tleref_to_colnum_map;
+} grouping_sets_data;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+ int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
@@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
@@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *input_target,
@@ -1505,8 +1532,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
- List *rollup_lists = NIL;
- List *rollup_groupclauses = NIL;
+ grouping_sets_data *gset_data = NULL;
standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
@@ -1515,84 +1541,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets)
{
- int *tleref_to_colnum_map;
- List *sets;
- int maxref;
- ListCell *lc;
- ListCell *lc2;
- ListCell *lc_set;
-
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
- /* Identify max SortGroupRef in groupClause, for array sizing */
- maxref = 0;
- foreach(lc, parse->groupClause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- if (gc->tleSortGroupRef > maxref)
- maxref = gc->tleSortGroupRef;
- }
-
- /* Allocate workspace array for remapping */
- tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
- /* Examine the rollup sets */
- sets = extract_rollup_sets(parse->groupingSets);
-
- foreach(lc_set, sets)
- {
- List *current_sets = (List *) lfirst(lc_set);
- List *groupclause;
- int ref;
-
- /*
- * Reorder the current list of grouping sets into correct
- * prefix order. If only one aggregation pass is needed, try
- * to make the list match the ORDER BY clause; if more than
- * one pass is needed, we don't bother with that.
- */
- current_sets = reorder_grouping_sets(current_sets,
- (list_length(sets) == 1
- ? parse->sortClause
- : NIL));
-
- /*
- * Order the groupClause appropriately. If the first grouping
- * set is empty, this can match regular GROUP BY
- * preprocessing, otherwise we have to force the groupClause
- * to match that grouping set's order.
- */
- groupclause = preprocess_groupclause(root,
- linitial(current_sets));
-
- /*
- * Now that we've pinned down an order for the groupClause for
- * this list of grouping sets, we need to remap the entries in
- * the grouping sets from sortgrouprefs to plain indices
- * (0-based) into the groupClause for this collection of
- * grouping sets.
- */
- ref = 0;
- foreach(lc, groupclause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
- }
-
- foreach(lc, current_sets)
- {
- foreach(lc2, (List *) lfirst(lc))
- {
- lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
- }
- }
-
- /* Save the reordered sets and corresponding groupclauses */
- rollup_lists = lcons(current_sets, rollup_lists);
- rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
- }
+ gset_data = preprocess_grouping_sets(root);
}
else
{
@@ -1686,8 +1635,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause =
- parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+ qp_extra.groupClause = (gset_data
+ ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -1887,8 +1837,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
&agg_costs,
- rollup_lists,
- rollup_groupclauses);
+ gset_data);
/* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs)
adjust_paths_for_srfs(root, current_rel,
@@ -1925,7 +1874,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root,
current_rel);
}
-
} /* end of if (setOperations) */
/*
@@ -2078,6 +2026,216 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
}
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *sets;
+ int maxref = 0;
+ ListCell *lc;
+ ListCell *lc_set;
+ grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+ gd->any_hashable = false;
+ gd->unhashable_refs = NULL;
+ gd->unsortable_refs = NULL;
+ gd->unsortable_sets = NIL;
+
+ if (parse->groupClause)
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ Index ref = gc->tleSortGroupRef;
+
+ if (ref > maxref)
+ maxref = ref;
+
+ if (!gc->hashable)
+ gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+ if (!OidIsValid(gc->sortop))
+ gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+ }
+ }
+
+ /* Allocate workspace array for remapping */
+ gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+ /*
+ * If we have any unsortable sets, we must extract them before trying to
+ * prepare rollups. Unsortable sets don't go through
+ * reorder_grouping_sets, so we must apply the GroupingSetData annotation
+ * here.
+ */
+ if (!bms_is_empty(gd->unsortable_refs))
+ {
+ List *sortable_sets = NIL;
+
+ foreach(lc, parse->groupingSets)
+ {
+ List *gset = lfirst(lc);
+
+ if (bms_overlap_list(gd->unsortable_refs, gset))
+ {
+ GroupingSetData *gs = makeNode(GroupingSetData);
+
+ gs->set = gset;
+ gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+ /*
+ * We must enforce here that an unsortable set is hashable;
+ * later code assumes this. Parse analysis only checks that
+ * every individual column is either hashable or sortable.
+ *
+ * Note that passing this test doesn't guarantee we can
+ * generate a plan; there might be other showstoppers.
+ */
+
+ if (bms_overlap_list(gd->unhashable_refs, gset))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement GROUP BY"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ }
+ else
+ sortable_sets = lappend(sortable_sets, gset);
+ }
+
+ if (sortable_sets)
+ sets = extract_rollup_sets(sortable_sets);
+ else
+ sets = NIL;
+ }
+ else
+ sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = (List *) lfirst(lc_set);
+ RollupData *rollup = makeNode(RollupData);
+ GroupingSetData *gs;
+
+ /*
+ * Reorder the current list of grouping sets into correct prefix
+ * order. If only one aggregation pass is needed, try to make the
+ * list match the ORDER BY clause; if more than one pass is needed, we
+ * don't bother with that.
+ *
+ * Note that this reorders the sets from smallest-member-first to
+ * largest-member-first, and applies the GroupingSetData annotations,
+ * though the data will be filled in later.
+ */
+ current_sets = reorder_grouping_sets(current_sets,
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+
+ /*
+ * Get the initial (and therefore largest) grouping set.
+ */
+ gs = linitial(current_sets);
+
+ /*
+ * Order the groupClause appropriately. If the first grouping set is
+ * empty, then the groupClause must also be empty; otherwise we have
+ * to force the groupClause to match that grouping set's order.
+ *
+ * (The first grouping set can be empty even though parse->groupClause
+ * is not empty only if all non-empty grouping sets are unsortable.
+ * The groupClauses for hashed grouping sets are built later on.)
+ */
+ if (gs->set)
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ else
+ rollup->groupClause = NIL;
+
+ /*
+ * Is it hashable? We pretend empty sets are hashable even though we
+ * actually force them not to be hashed later. But don't bother if
+ * there's nothing but empty sets.
+ */
+ if (gs->set &&
+ !bms_overlap_list(gd->unhashable_refs, gs->set))
+ {
+ rollup->hashable = true;
+ gd->any_hashable = true;
+ }
+
+ /*
+ * Now that we've pinned down an order for the groupClause for this
+ * list of grouping sets, we need to remap the entries in the grouping
+ * sets from sortgrouprefs to plain indices (0-based) into the
+ * groupClause for this collection of grouping sets. We keep the
+ * original form for later use, though.
+ */
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ current_sets,
+ gd->tleref_to_colnum_map);
+ rollup->gsets_data = current_sets;
+
+ gd->rollups = lappend(gd->rollups, rollup);
+ }
+
+ if (gd->unsortable_sets)
+ {
+ /*
+ * We have not yet pinned down a groupclause for this, but we will
+ * need index-based lists for estimation purposes. Construct
+ * hash_sets_idx based on the entire original groupclause for now.
+ */
+ gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+ gd->unsortable_sets,
+ gd->tleref_to_colnum_map);
+ gd->any_hashable = true;
+ }
+
+ return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+ List *gsets,
+ int *tleref_to_colnum_map)
+{
+ int ref = 0;
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, gsets)
+ {
+ List *set = NIL;
+ ListCell *lc2;
+ GroupingSetData *gs = lfirst(lc);
+
+ foreach(lc2, gs->set)
+ {
+ set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+ }
+
+ result = lappend(result, set);
+ }
+
+ return result;
+}
+
+
+
/*
* Detect whether a plan node is a "dummy" plan created when a relation
* is deemed not to need scanning due to constraint exclusion.
@@ -2992,7 +3150,7 @@ extract_rollup_sets(List *groupingSets)
/*
* Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure
@@ -3015,6 +3173,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{
List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous);
+ GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0)
{
@@ -3042,7 +3201,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
}
- result = lcons(list_copy(previous), result);
+ gs->set = list_copy(previous);
+ result = lcons(gs, result);
list_free(new_elems);
}
@@ -3137,15 +3297,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping)
*
* path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- * or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
*/
static double
get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
double dNumGroups;
@@ -3157,28 +3318,60 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets)
{
/* Add up the estimates for each grouping set */
- ListCell *lc,
- *lc2;
+ ListCell *lc;
+ ListCell *lc2;
dNumGroups = 0;
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+ foreach(lc, gd->rollups)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
- ListCell *lc3;
+ RollupData *rollup = lfirst(lc);
+ ListCell *lc;
- groupExprs = get_sortgrouplist_exprs(groupClause,
+ groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList);
- foreach(lc3, gsets)
+ rollup->numGroups = 0.0;
+
+ forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{
- List *gset = (List *) lfirst(lc3);
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ rollup->numGroups += numGroups;
+ }
+
+ dNumGroups += rollup->numGroups;
+ }
- dNumGroups += estimate_num_groups(root,
- groupExprs,
- path_rows,
- &gset);
+ if (gd->hash_sets_idx)
+ {
+ ListCell *lc;
+
+ gd->dNumHashGroups = 0;
+
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+ {
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ gd->dNumHashGroups += numGroups;
}
+
+ dNumGroups += gd->dNumHashGroups;
}
}
else
@@ -3214,6 +3407,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3264,8 +3462,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
@@ -3373,8 +3570,7 @@ create_grouping_paths(PlannerInfo *root,
*/
dNumGroups = get_number_of_groups(root,
cheapest_path->rows,
- rollup_lists,
- rollup_groupclauses);
+ gd);
/*
* Determine whether it's possible to perform sort-based implementations
@@ -3382,15 +3578,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.)
+ *
+ * If we have grouping sets, we might be able to sort some but not all of
+ * them; in this case, we need can_sort to be true as long as we must
+ * consider any sorted-input plan.
*/
- can_sort = grouping_is_sortable(parse->groupClause);
+ can_sort = (gd && gd->rollups != NIL)
+ || grouping_is_sortable(parse->groupClause);
/*
* Determine whether we should consider hash-based implementations of
* grouping.
*
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
+ * Hashed aggregation only applies if we're grouping. If we have grouping
+ * sets, some groups might be hashable but others not; in this case we set
+ * can_hash true as long as there is nothing globally preventing us from
+ * hashing (and we should therefore consider plans with hashes).
*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@@ -3403,9 +3606,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last.
*/
can_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 &&
- grouping_is_hashable(parse->groupClause));
+ (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3471,8 +3673,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
- NIL,
- NIL);
+ gd);
/*
* Collect statistics about aggregates for estimating costs of
@@ -3605,20 +3806,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
- /*
- * We have grouping sets, possibly with aggregation. Make
- * a GroupingSetsPath.
- */
- add_path(grouped_rel, (Path *)
- create_groupingsets_path(root,
- grouped_rel,
- path,
- target,
- (List *) parse->havingQual,
- rollup_lists,
- rollup_groupclauses,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash, target,
+ gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
@@ -3716,33 +3906,45 @@ create_grouping_paths(PlannerInfo *root,
if (can_hash)
{
- hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
- agg_costs,
- dNumGroups);
-
- /*
- * Provided that the estimated size of the hashtable does not exceed
- * work_mem, we'll generate a HashAgg Path, although if we were unable
- * to sort above, then we'd better generate a Path, so that we at
- * least have one.
- */
- if (hashaggtablesize < work_mem * 1024L ||
- grouped_rel->pathlist == NIL)
+ if (parse->groupingSets)
{
/*
- * We just need an Agg over the cheapest-total input path, since
- * input order won't matter.
+ * Try for a hash-only groupingsets path over unsorted input.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- parse->groupClause,
- (List *) parse->havingQual,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ cheapest_path, false, true, target,
+ gd, agg_costs, dNumGroups);
+ }
+ else
+ {
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ agg_costs,
+ dNumGroups);
+
+ /*
+ * Provided that the estimated size of the hashtable does not
+ * exceed work_mem, we'll generate a HashAgg Path, although if we
+ * were unable to sort above, then we'd better generate a Path, so
+ * that we at least have one.
+ */
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path,
+ * since input order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ agg_costs,
+ dNumGroups));
+ }
}
/*
@@ -3811,6 +4013,317 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel;
}
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting. This can be called multiple
+ * times, so it's important that it not scribble on input. No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Query *parse = root->parse;
+
+ /*
+ * If we're not being offered sorted input, then only consider plans that
+ * can be done entirely by hashing.
+ *
+ * We can hash everything if it looks like it'll fit in work_mem. But if
+ * the input is actually sorted despite not being advertised as such, we
+ * prefer to make use of that in order to use less memory.
+ *
+ * If none of the grouping sets are sortable, then ignore the work_mem
+ * limit and generate a path anyway, since otherwise we'll just fail.
+ */
+ if (!is_sorted)
+ {
+ List *new_rollups = NIL;
+ RollupData *unhashable_rollup = NULL;
+ List *sets_data;
+ List *empty_sets_data = NIL;
+ List *empty_sets = NIL;
+ ListCell *lc;
+ ListCell *l_start = list_head(gd->rollups);
+ AggStrategy strat = AGG_HASHED;
+ Size hashsize;
+ double exclude_groups = 0.0;
+
+ Assert(can_hash);
+
+ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ unhashable_rollup = lfirst(l_start);
+ exclude_groups = unhashable_rollup->numGroups;
+ l_start = lnext(l_start);
+ }
+
+ hashsize = estimate_hashagg_tablesize(path,
+ agg_costs,
+ dNumGroups - exclude_groups);
+
+ if (hashsize > work_mem * 1024L && gd->rollups)
+ return; /* nope, won't fit */
+
+ /*
+ * We need to burst the existing rollups list into individual grouping
+ * sets and recompute a groupClause for each set.
+ */
+ sets_data = list_copy(gd->unsortable_sets);
+
+ for_each_cell(lc, l_start)
+ {
+ RollupData *rollup = lfirst(lc);
+
+ /*
+ * If we find an unhashable rollup that's not been skipped by the
+ * "actually sorted" check above, we can't cope; we'd need sorted
+ * input (with a different sort order) but we can't get that here.
+ * So bail out; we'll get a valid path from the is_sorted case
+ * instead.
+ */
+ if (!rollup->hashable)
+ return;
+ else
+ sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+ }
+ foreach(lc, sets_data)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ List *gset = gs->set;
+ RollupData *rollup;
+
+ if (gset == NIL)
+ {
+ empty_sets_data = lappend(empty_sets_data, gs);
+ empty_sets = lappend(empty_sets, NIL);
+ }
+ else
+ {
+ rollup = makeNode(RollupData);
+
+ rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ new_rollups = lappend(new_rollups, rollup);
+ }
+ }
+
+ /* If we didn't find anything nonempty to hash, then bail. */
+ if (new_rollups == NIL)
+ return;
+
+ /*
+ * If there were empty grouping sets they should have been in the
+ * first rollup.
+ */
+ Assert(!unhashable_rollup || !empty_sets);
+
+ if (unhashable_rollup)
+ {
+ new_rollups = lappend(new_rollups, unhashable_rollup);
+ strat = AGG_MIXED;
+ }
+ else if (empty_sets)
+ {
+ RollupData *rollup = makeNode(RollupData);
+
+ rollup->groupClause = NIL;
+ rollup->gsets_data = empty_sets_data;
+ rollup->gsets = empty_sets;
+ rollup->numGroups = list_length(empty_sets);
+ rollup->hashable = false;
+ rollup->is_hashed = false;
+ new_rollups = lappend(new_rollups, rollup);
+ strat = AGG_MIXED;
+ }
+
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ strat,
+ new_rollups,
+ agg_costs,
+ dNumGroups));
+ return;
+ }
+
+ /*
+ * If we have sorted input but nothing we can do with it, bail.
+ */
+
+ if (list_length(gd->rollups) == 0)
+ return;
+
+ /*
+ * Given sorted input, we try and make two paths: one sorted and one mixed
+ * sort/hash. (We need to try both because hashagg might be disabled, or
+ * some columns might not be sortable.)
+ *
+ * can_hash is passed in as false if some obstacle elsewhere (such as
+ * ordered aggs) means that we shouldn't consider hashing at all.
+ */
+
+ if (can_hash && gd->any_hashable)
+ {
+ List *rollups = NIL;
+ List *hash_sets = list_copy(gd->unsortable_sets);
+ double availspace = (work_mem * 1024.0);
+ ListCell *lc;
+
+ /*
+ * Account first for space needed for groups we can't sort at all.
+ */
+ availspace -= (double) estimate_hashagg_tablesize(path,
+ agg_costs,
+ gd->dNumHashGroups);
+
+ if (availspace > 0 && list_length(gd->rollups) > 1)
+ {
+ double scale;
+ int num_rollups = list_length(gd->rollups);
+ int k_capacity;
+ int *k_weights = palloc(num_rollups * sizeof(int));
+ Bitmapset *hash_items = NULL;
+ int i;
+
+ /*
+ * To use the discrete knapsack, we need to scale the values to a
+ * reasonably small bounded range. We choose to allow a 5% error
+ * margin; we have no more than 4096 rollups in the worst possible
+ * case, which with a 5% error margin will require a bit over 42MB
+ * of workspace. (Anyone wanting to plan queries that complex had
+ * better have the memory for it. In more reasonable cases, with
+ * no more than a couple of dozen rollups, the memory usage will
+ * be negligible.)
+ */
+ scale = availspace / (20.0 * num_rollups);
+ k_capacity = (int) floor(availspace / scale);
+
+ /*
+ * We leave the first rollup out of consideration since it's the
+ * one that matches the input sort order. We assign indexes "i"
+ * to only those entries considered for hashing; the second loop,
+ * below, must use the same condition.
+ */
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ double sz = estimate_hashagg_tablesize(path,
+ agg_costs,
+ rollup->numGroups);
+
+ k_weights[i] = (int) floor(sz / scale);
+ ++i;
+ }
+ }
+
+ /*
+ * Apply knapsack algorithm; compute the set of items which
+ * maximizes the value stored (in this case the number of sorts
+ * saved) while keeping the total size (approximately) within
+ * capacity.
+ */
+ if (i > 0)
+ hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+ if (!bms_is_empty(hash_items))
+ {
+ rollups = list_make1(linitial(gd->rollups));
+
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ if (bms_is_member(i, hash_items))
+ hash_sets = list_concat(hash_sets,
+ list_copy(rollup->gsets_data));
+ else
+ rollups = lappend(rollups, rollup);
+ ++i;
+ }
+ else
+ rollups = lappend(rollups, rollup);
+ }
+ }
+ }
+
+ if (!rollups && hash_sets)
+ rollups = list_copy(gd->rollups);
+
+ foreach(lc, hash_sets)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ RollupData *rollup = makeNode(RollupData);
+
+ Assert(gs->set != NIL);
+
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ rollups = lcons(rollup, rollups);
+ }
+
+ if (rollups)
+ {
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_MIXED,
+ rollups,
+ agg_costs,
+ dNumGroups));
+ }
+ }
+
+ /*
+ * Now try the simple sorted case.
+ */
+ if (!gd->unsortable_sets)
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ gd->rollups,
+ agg_costs,
+ dNumGroups));
+}
+
/*
* create_window_paths
*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 0d925c6..9e30d97 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2634,10 +2634,9 @@ create_agg_path(PlannerInfo *root,
* 'subpath' is the path representing the source of data
* 'target' is the PathTarget to be computed
* 'having_qual' is the HAVING quals if any
- * 'rollup_lists' is a list of grouping sets
- * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
+ * 'rollups' is a list of RollupData nodes
* 'agg_costs' contains cost info about the aggregate functions to be computed
- * 'numGroups' is the estimated number of groups
+ * 'numGroups' is the estimated total number of groups
*/
GroupingSetsPath *
create_groupingsets_path(PlannerInfo *root,
@@ -2645,13 +2644,15 @@ create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups)
{
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
- int numGroupCols;
+ ListCell *lc;
+ bool is_first = true;
+ bool is_first_sort = true;
/* The topmost generated Plan node will be an Agg */
pathnode->path.pathtype = T_Agg;
@@ -2665,74 +2666,110 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->subpath = subpath;
/*
+ * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
+ * to AGG_HASHED, here if possible.
+ */
+ if (aggstrategy == AGG_SORTED &&
+ list_length(rollups) == 1 &&
+ ((RollupData *) linitial(rollups))->groupClause == NIL)
+ aggstrategy = AGG_PLAIN;
+
+ if (aggstrategy == AGG_MIXED &&
+ list_length(rollups) == 1)
+ aggstrategy = AGG_HASHED;
+
+ /*
* Output will be in sorted order by group_pathkeys if, and only if, there
* is a single rollup operation on a non-empty list of grouping
* expressions.
*/
- if (list_length(rollup_groupclauses) == 1 &&
- ((List *) linitial(rollup_groupclauses)) != NIL)
+ if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
pathnode->path.pathkeys = root->group_pathkeys;
else
pathnode->path.pathkeys = NIL;
- pathnode->rollup_groupclauses = rollup_groupclauses;
- pathnode->rollup_lists = rollup_lists;
+ pathnode->aggstrategy = aggstrategy;
+ pathnode->rollups = rollups;
pathnode->qual = having_qual;
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
-
- /* Account for cost of the topmost Agg node */
- numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
-
- cost_agg(&pathnode->path, root,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
- agg_costs,
- numGroupCols,
- numGroups,
- subpath->startup_cost,
- subpath->total_cost,
- subpath->rows);
+ Assert(rollups != NIL);
+ Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
+ Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
- /*
- * Add in the costs and output rows of the additional sorting/aggregation
- * steps, if any. Only total costs count, since the extra sorts aren't
- * run on startup.
- */
- if (list_length(rollup_lists) > 1)
+ foreach(lc, rollups)
{
- ListCell *lc;
+ RollupData *rollup = lfirst(lc);
+ List *gsets = rollup->gsets;
+ int numGroupCols = list_length(linitial(gsets));
- foreach(lc, rollup_lists)
+ /*
+ * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
+ * (already-sorted) input, and following ones do their own sort.
+ *
+ * In AGG_HASHED mode, there is one rollup for each grouping set.
+ *
+ * In AGG_MIXED mode, the first rollups are hashed, the first
+ * non-hashed one takes the (already-sorted) input, and following ones
+ * do their own sort.
+ */
+
+ if (is_first)
+ {
+ cost_agg(&pathnode->path, root,
+ aggstrategy,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows);
+ is_first = false;
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
{
- List *gsets = (List *) lfirst(lc);
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
- /* We must iterate over all but the last rollup_lists element */
- if (lnext(lc) == NULL)
- break;
-
- /* Account for cost of sort, but don't charge input cost again */
- cost_sort(&sort_path, root, NIL,
- 0.0,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
-
- /* Account for cost of aggregation */
- numGroupCols = list_length((List *) linitial(gsets));
-
- cost_agg(&agg_path, root,
- AGG_SORTED,
- agg_costs,
- numGroupCols,
- numGroups, /* XXX surely not right for all steps? */
- sort_path.startup_cost,
- sort_path.total_cost,
- sort_path.rows);
+ if (rollup->is_hashed || is_first_sort)
+ {
+ /*
+ * Account for cost of aggregation, but don't charge input
+ * cost again
+ */
+ cost_agg(&agg_path, root,
+ rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ 0.0, 0.0,
+ subpath->rows);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
+ {
+ /* Account for cost of sort, but don't charge input cost again */
+ cost_sort(&sort_path, root, NIL,
+ 0.0,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+
+ /* Account for cost of aggregation */
+
+ cost_agg(&agg_path, root,
+ AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ sort_path.startup_cost,
+ sort_path.total_cost,
+ sort_path.rows);
+ }
pathnode->path.total_cost += agg_path.total_cost;
pathnode->path.rows += agg_path.rows;
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 0000000..8d1e6d0
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,17 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "postgres.h"
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 4f1910e..109f7b0 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -21,6 +21,11 @@
#define BITMAPSET_H
/*
+ * Forward decl to save including pg_list.h
+ */
+struct List;
+
+/*
* Data representation
*/
@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
+extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 6a0d590..cfbb1e5 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1943,6 +1943,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
+typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
@@ -1950,15 +1951,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */
+ AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */
- int numphases; /* number of phases */
+ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */
+ ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */
+ ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */
@@ -1970,21 +1973,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */
- Tuplesortstate *sort_in; /* sorted input to phases > 0 */
+ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
- /* these fields are used in AGG_HASHED mode: */
- TupleHashTable hashtable; /* hash table with one entry per group */
- TupleTableSlot *hashslot; /* slot for loading hash table */
- int numhashGrpCols; /* number of columns in hash table */
- int largestGrpColIdx; /* largest column required for hashing */
- AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
- AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
bool table_filled; /* hash table filled yet? */
- TupleHashIterator hashiter; /* for iterating through hash table */
+ int num_hashes;
+ AggStatePerHash perhash;
+ AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 49fa944..bcb071f 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -275,6 +275,8 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
+ T_RollupData,
+ T_GroupingSetData,
/*
* TAGS FOR MEMORY NODES (memnodes.h)
@@ -734,7 +736,8 @@ typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
- AGG_HASHED /* grouped agg, use internal hashtable */
+ AGG_HASHED, /* grouped agg, use internal hashtable */
+ AGG_MIXED /* grouped agg, hash and sort concurrently */
} AggStrategy;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 7fbb0c2..a629876 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -749,7 +749,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
- /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
+ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */
} Agg;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index f7ac6f6..4d0791d 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1378,17 +1378,37 @@ typedef struct AggPath
} AggPath;
/*
+ * Various annotations used for grouping sets in the planner.
+ */
+
+typedef struct GroupingSetData
+{
+ NodeTag type;
+ List *set; /* grouping set as list of sortgrouprefs */
+ double numGroups; /* est. number of result groups */
+} GroupingSetData;
+
+typedef struct RollupData
+{
+ NodeTag type;
+ List *groupClause; /* applicable subset of parse->groupClause */
+ List *gsets; /* lists of integer indexes into groupClause */
+ List *gsets_data; /* list of GroupingSetData */
+ double numGroups; /* est. number of result groups */
+ bool hashable; /* can be hashed */
+ bool is_hashed; /* to be implemented as a hashagg */
+} RollupData;
+
+/*
* GroupingSetsPath represents a GROUPING SETS aggregation
- *
- * Currently we only support this in sorted not hashed form, so the input
- * must always be appropriately presorted.
*/
+
typedef struct GroupingSetsPath
{
Path path;
Path *subpath; /* path representing input source */
- List *rollup_groupclauses; /* list of lists of SortGroupClause's */
- List *rollup_lists; /* parallel list of lists of grouping sets */
+ AggStrategy aggstrategy; /* basic strategy */
+ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index f0fe830..72303b4 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -186,8 +186,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 260ccd5..49e86da 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -13,6 +13,13 @@ copy gstest2 from stdin;
create temp table gstest3 (a integer, b integer, c integer, d integer);
copy gstest3 from stdin;
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
returns setof record
@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
end;
$f$ language plpgsql;
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
- | | 3 | 25 | 2 | 13
- | | 3 | 14 | 1 | 14
- | | 3 | 15 | 1 | 15
- | | 3 | 16 | 1 | 16
- | | 3 | 17 | 1 | 17
- | | 3 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
(21 rows)
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
@@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500
(6 rows)
+-- hashing support
+set enable_hashagg = true;
+-- failure cases
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+-- simple cases
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+(8 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(6 rows)
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+ | | 3 | 145 | 10 | 19
+(16 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> MixedAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ Group Key: ()
+ -> Values Scan on "*VALUES*"
+(8 rows)
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+ QUERY PLAN
+----------------------------------------------------------
+ GroupAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ Group Key: "*VALUES*".column1
+ Group Key: ()
+ Sort Key: "*VALUES*".column2
+ Group Key: "*VALUES*".column2
+ -> Sort
+ Sort Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(9 rows)
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 2 | 17
+ 0001 | | 1 | 2 | 34
+ 0010 | | 1 | 2 | 68
+ 0011 | | 1 | 2 | 136
+ | 2 | 2 | 4 | 60
+ | 1 | 2 | 4 | 195
+(6 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: unsortable_col
+ Group Key: unhashable_col
+ -> Sort
+ Sort Key: unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 1 | 1
+ 0001 | | 1 | 1 | 2
+ 0010 | | 1 | 1 | 4
+ 0011 | | 1 | 1 | 8
+ 0000 | | 1 | 1 | 16
+ 0001 | | 1 | 1 | 32
+ 0010 | | 1 | 1 | 64
+ 0011 | | 1 | 1 | 128
+ | 1 | 2 | 1 | 1
+ | 1 | 2 | 1 | 2
+ | 2 | 2 | 1 | 4
+ | 2 | 2 | 1 | 8
+ | 2 | 2 | 1 | 16
+ | 2 | 2 | 1 | 32
+ | 1 | 2 | 1 | 64
+ | 1 | 2 | 1 | 128
+(16 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: v, unsortable_col
+ Group Key: v, unhashable_col
+ -> Sort
+ Sort Key: v, unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ a | b | sum | count
+---+---+-----+-------
+(0 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ QUERY PLAN
+--------------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a
+ -> Seq Scan on gstest_empty
+(4 rows)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+(1 row)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+ | | | 0
+ | | | 0
+(3 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ QUERY PLAN
+--------------------------------
+ MixedAggregate
+ Hash Key: a, b
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(6 rows)
+
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ sum | count
+-----+-------
+ | 0
+ | 0
+ | 0
+(3 rows)
+
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ QUERY PLAN
+--------------------------------
+ Aggregate
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(5 rows)
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ a | d | grouping
+---+---+----------
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 1 | 1 | 2
+ 2 | 2 | 2
+(4 rows)
+
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ QUERY PLAN
+---------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a, c
+ -> Seq Scan on gstest3
+(4 rows)
+
+-- simple rescan tests
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ a | b | sum
+---+---+-----
+ 2 | | 6
+ 1 | | 3
+ | 2 | 3
+ | 3 | 3
+ | 1 | 3
+(5 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ QUERY PLAN
+------------------------------------------
+ HashAggregate
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(6 rows)
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
+ ^
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
+ ^
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ | | 3 | 21 | 2 | 11
+ | | 3 | 21 | 2 | 11
+ | | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
+ | | 3 | 14 | 1 | 14
+ | | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
+ | | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
+ | | 3 | 17 | 1 | 17
+ | | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
+(21 rows)
+
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), (max("*VALUES*".column3))
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: ("*VALUES*".column1 + 1), ("*VALUES*".column2 + 1)
+ Hash Key: ("*VALUES*".column1 + 2), ("*VALUES*".column2 + 2)
+ -> Values Scan on "*VALUES*"
+(7 rows)
+
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ a | b | sum | rsum
+---+---+-----+------
+ 1 | 1 | 8 | 8
+ 1 | 2 | 2 | 10
+ 1 | | 10 | 20
+ 2 | 2 | 2 | 22
+ 2 | | 2 | 24
+ | 1 | 8 | 32
+ | 2 | 4 | 36
+ | | 12 | 48
+(8 rows)
+
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ QUERY PLAN
+---------------------------------------------
+ Sort
+ Sort Key: (sum((sum(c))) OVER (?)), a, b
+ -> WindowAgg
+ -> Sort
+ Sort Key: a, b
+ -> MixedAggregate
+ Hash Key: a, b
+ Hash Key: a
+ Hash Key: b
+ Group Key: ()
+ -> Seq Scan on gstest2
+(11 rows)
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ a | b | sum
+---+---+-----
+ 1 | 1 | 1
+ 1 | 2 | 1
+ 1 | 3 | 1
+ 1 | | 3
+ 2 | 1 | 2
+ 2 | 2 | 2
+ 2 | 3 | 2
+ 2 | | 6
+ | 1 | 3
+ | 2 | 3
+ | 3 | 3
+ | | 9
+(12 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ QUERY PLAN
+------------------------------------------------
+ Sort
+ Sort Key: gstest_data.a, gstest_data.b
+ -> MixedAggregate
+ Hash Key: gstest_data.a, gstest_data.b
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ Group Key: ()
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(10 rows)
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | 0 | 50
+ 1 | 1 | 0 | 2 | 50
+ 1 | 1 | 0 | 4 | 50
+ 1 | 1 | 0 | 6 | 50
+ 1 | 1 | 0 | 8 | 50
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | 1 | 50
+ 1 | 1 | 1 | 3 | 50
+ 1 | 1 | 1 | 5 | 50
+ 1 | 1 | 1 | 7 | 50
+ 1 | 1 | 1 | 9 | 50
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | 0 | 50
+ 1 | 1 | 2 | 2 | 50
+ 1 | 1 | 2 | 4 | 50
+ 1 | 1 | 2 | 6 | 50
+ 1 | 1 | 2 | 8 | 50
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | 1 | 50
+ 1 | 1 | 3 | 3 | 50
+ 1 | 1 | 3 | 5 | 50
+ 1 | 1 | 3 | 7 | 50
+ 1 | 1 | 3 | 9 | 50
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 1 | 1 | | | 1000
+ 2 | 2 | 0 | 0 | 50
+ 2 | 2 | 0 | 2 | 50
+ 2 | 2 | 0 | 4 | 50
+ 2 | 2 | 0 | 6 | 50
+ 2 | 2 | 0 | 8 | 50
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | 1 | 50
+ 2 | 2 | 1 | 3 | 50
+ 2 | 2 | 1 | 5 | 50
+ 2 | 2 | 1 | 7 | 50
+ 2 | 2 | 1 | 9 | 50
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | 0 | 50
+ 2 | 2 | 2 | 2 | 50
+ 2 | 2 | 2 | 4 | 50
+ 2 | 2 | 2 | 6 | 50
+ 2 | 2 | 2 | 8 | 50
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | 1 | 50
+ 2 | 2 | 3 | 3 | 50
+ 2 | 2 | 3 | 5 | 50
+ 2 | 2 | 3 | 7 | 50
+ 2 | 2 | 3 | 9 | 50
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+ 2 | 2 | | | 1000
+(70 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
+ {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
+(2 rows)
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+(28 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+---------------------------------------------------------------------------------
+ {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
+ {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
+(2 rows)
+
+-- test the knapsack
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ Sort Key: thousand
+ Group Key: thousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(13 rows)
+
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(9 rows)
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Hash Key: thousand
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(12 rows)
+
-- end
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index 0eeaf9e..33f370b 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
(6 rows)
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
dataa | b | g | count
-------+-----+---+-------
@@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
b | bar | | 2
b | | | 2
| | | 6
- a | | 1 | 2
- b | | 1 | 1
- | | 1 | 3
- a | | 2 | 2
- b | | 2 | 1
- | | 2 | 3
| bar | 1 | 2
| bar | 2 | 2
| bar | | 4
| foo | 1 | 1
| foo | 2 | 1
| foo | | 2
+ a | | 1 | 2
+ b | | 1 | 1
+ | | 1 | 3
+ a | | 2 | 2
+ b | | 2 | 1
+ | | 2 | 3
(24 rows)
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
dataa | b | g | count
-------+-----+---+-------
+ a | foo | | 2
+ a | | | 4
+ a | | 2 | 2
a | bar | 1 | 1
a | bar | 2 | 1
a | bar | | 2
a | foo | 1 | 1
a | foo | 2 | 1
- a | foo | | 2
- a | | | 4
a | | 1 | 2
- a | | 2 | 2
- b | bar | 2 | 1
+ b | bar | 1 | 1
b | | | 2
b | | 1 | 1
- b | | 2 | 1
- b | bar | 1 | 1
+ b | bar | 2 | 1
b | bar | | 2
- | foo | | 2
- | foo | 1 | 1
+ b | | 2 | 1
| | 2 | 3
+ | | | 6
| bar | 1 | 2
| bar | 2 | 2
- | | | 6
- | foo | 2 | 1
| bar | | 4
+ | foo | 1 | 1
+ | foo | 2 | 1
+ | foo | | 2
| | 1 | 3
(24 rows)
@@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
a | bar | 1 | 1
a | foo | 1 | 1
b | bar | 1 | 1
+ | bar | 1 | 2
+ | foo | 1 | 1
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
- | bar | 1 | 2
- | foo | 1 | 1
- | foo | 2 | 1
- | bar | 2 | 2
a | | 2 | 2
b | | 2 | 1
- a | bar | 2 | 1
+ | bar | 2 | 2
| | 2 | 3
+ | foo | 2 | 1
+ a | bar | 2 | 1
a | foo | 2 | 1
b | bar | 2 | 1
- a | foo | | 2
+ a | | | 4
b | bar | | 2
b | | | 2
| | | 6
- a | | | 4
+ a | foo | | 2
+ a | bar | | 2
| bar | | 4
| foo | | 2
- a | bar | | 2
(24 rows)
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5));
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index 71cc0ec..da1c577 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -31,6 +31,14 @@ copy gstest3 from stdin;
\.
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
+
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
+
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
+
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by rollup (a,b);
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
@@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
select sum(ten) from onek group by two, rollup(four::text) order by 1;
select sum(ten) from onek group by rollup(four::text), two order by 1;
+-- hashing support
+
+set enable_hashagg = true;
+
+-- failure cases
+
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+
+-- simple cases
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+
+-- simple rescan tests
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- test the knapsack
+
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+
-- end
diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql
index e627bb9..417e78c 100644
--- a/src/test/regress/sql/tsrf.sql
+++ b/src/test/regress/sql/tsrf.sql
@@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
Another small update to the patch, this time to eliminate any
possibility of integer overflow when handling extremely large estimated
groupings.
Key change:
- k_weights[i] = (int) floor(sz / scale);
+ /*
+ * If sz is enormous, but work_mem (and hence scale) is
+ * small, avoid integer overflow here.
+ */
+ k_weights[i] = (int) Min(floor(sz / scale),
+ k_capacity + 1.0);
--
Andrew (irc:RhodiumToad)
Attachments:
gshash25.patchtext/x-patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0b9e3e4537..91765c20cf 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3148,16 +3148,19 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
@@ -3170,16 +3173,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
@@ -3192,20 +3198,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last
explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
- -> GroupAggregate
+ -> HashAggregate
Output: c2, c6, sum(c1)
- Group Key: ft1.c2
- Sort Key: ft1.c6
- Group Key: ft1.c6
+ Hash Key: ft1.c2
+ Hash Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
- Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(11 rows)
+ Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c9b55ead3d..f0a1339702 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1015,6 +1015,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate";
strategy = "Hashed";
break;
+ case AGG_MIXED:
+ pname = "MixedAggregate";
+ strategy = "Mixed";
+ break;
default:
pname = "Aggregate ???";
strategy = "???";
@@ -1978,6 +1982,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc;
List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx;
+ const char *keyname;
+ const char *keysetname;
+
+ if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+ {
+ keyname = "Hash Key";
+ keysetname = "Hash Keys";
+ }
+ else
+ {
+ keyname = "Group Key";
+ keysetname = "Group Keys";
+ }
ExplainOpenGroup("Grouping Set", NULL, true, es);
@@ -1992,7 +2009,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++;
}
- ExplainOpenGroup("Group Keys", "Group Keys", false, es);
+ ExplainOpenGroup(keysetname, keysetname, false, es);
foreach(lc, gsets)
{
@@ -2016,12 +2033,12 @@ show_grouping_set_keys(PlanState *planstate,
}
if (!result && es->format == EXPLAIN_FORMAT_TEXT)
- ExplainPropertyText("Group Key", "()", es);
+ ExplainPropertyText(keyname, "()", es);
else
- ExplainPropertyListNested("Group Key", result, es);
+ ExplainPropertyListNested(keyname, result, es);
}
- ExplainCloseGroup("Group Keys", "Group Keys", false, es);
+ ExplainCloseGroup(keysetname, keysetname, false, es);
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 3207ee460c..775849f0d9 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -122,12 +122,19 @@
* specific).
*
* Where more complex grouping sets are used, we break them down into
- * "phases", where each phase has a different sort order. During each
- * phase but the last, the input tuples are additionally stored in a
- * tuplesort which is keyed to the next phase's sort order; during each
- * phase but the first, the input tuples are drawn from the previously
- * sorted data. (The sorting of the data for the first phase is handled by
- * the planner, as it might be satisfied by underlying nodes.)
+ * "phases", where each phase has a different sort order (except phase 0
+ * which is reserved for hashing). During each phase but the last, the
+ * input tuples are additionally stored in a tuplesort which is keyed to the
+ * next phase's sort order; during each phase but the first, the input
+ * tuples are drawn from the previously sorted data. (The sorting of the
+ * data for the first phase is handled by the planner, as it might be
+ * satisfied by underlying nodes.)
+ *
+ * Hashing can be mixed with sorted grouping. To do this, we have an
+ * AGG_MIXED strategy that populates the hashtables during the first sorted
+ * phase, and switches to reading them out after completing all sort phases.
+ * We can also support AGG_HASHED with multiple hash tables and no sorting
+ * at all.
*
* From the perspective of aggregate transition and final functions, the
* only issue regarding grouping sets is this: a single call site (flinfo)
@@ -139,8 +146,6 @@
* sensitive to the grouping set for which the aggregate function is
* currently being called.
*
- * TODO: AGG_HASHED doesn't support multiple grouping sets yet.
- *
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -432,6 +437,7 @@ typedef struct AggStatePerGroupData
*/
typedef struct AggStatePerPhaseData
{
+ AggStrategy aggstrategy; /* strategy for this phase */
int numsets; /* number of grouping sets (or 0) */
int *gset_lengths; /* lengths of grouping sets */
Bitmapset **grouped_cols; /* column groupings for rollup */
@@ -440,7 +446,31 @@ typedef struct AggStatePerPhaseData
Sort *sortnode; /* Sort node for input ordering for phase */
} AggStatePerPhaseData;
+/*
+ * AggStatePerHashData - per-hashtable state
+ *
+ * When doing grouping sets with hashing, we have one of these for each
+ * grouping set. (When doing hashing without grouping sets, we have just one of
+ * them.)
+ */
+
+typedef struct AggStatePerHashData
+{
+ TupleHashTable hashtable; /* hash table with one entry per group */
+ TupleHashIterator hashiter; /* for iterating through hash table */
+ TupleTableSlot *hashslot; /* slot for loading hash table */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
+ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
+ int numCols; /* number of hash key columns */
+ int numhashGrpCols; /* number of columns in hash table */
+ int largestGrpColIdx; /* largest col required for hashing */
+ AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
+ AttrNumber *hashGrpColIdxHash; /* indices in hashtbl tuples */
+ Agg *aggnode; /* original Agg node, for numGroups etc. */
+} AggStatePerHashData;
+
+static void select_current_set(AggState *aggstate, int setno, bool is_hash);
static void initialize_phase(AggState *aggstate, int newphase);
static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
static void initialize_aggregates(AggState *aggstate,
@@ -449,7 +479,8 @@ static void initialize_aggregates(AggState *aggstate,
static void advance_transition_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
-static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup,
+ AggStatePerGroup *pergroups);
static void advance_combine_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
@@ -473,14 +504,13 @@ static void prepare_projection_slot(AggState *aggstate,
int currentSet);
static void finalize_aggregates(AggState *aggstate,
AggStatePerAgg peragg,
- AggStatePerGroup pergroup,
- int currentSet);
+ AggStatePerGroup pergroup);
static TupleTableSlot *project_aggregates(AggState *aggstate);
static Bitmapset *find_unaggregated_cols(AggState *aggstate);
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
static void build_hash_table(AggState *aggstate);
-static TupleHashEntryData *lookup_hash_entry(AggState *aggstate,
- TupleTableSlot *inputslot);
+static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
+static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
static void agg_fill_hash_table(AggState *aggstate);
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -501,13 +531,29 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
/*
- * Switch to phase "newphase", which must either be 0 (to reset) or
+ * Select the current grouping set; affects current_set and
+ * curaggcontext.
+ */
+
+static void
+select_current_set(AggState *aggstate, int setno, bool is_hash)
+{
+ if (is_hash)
+ aggstate->curaggcontext = aggstate->hashcontext;
+ else
+ aggstate->curaggcontext = aggstate->aggcontexts[setno];
+
+ aggstate->current_set = setno;
+}
+
+/*
+ * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
* current_phase + 1. Juggle the tuplesorts accordingly.
*/
static void
initialize_phase(AggState *aggstate, int newphase)
{
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1);
+ Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
/*
* Whatever the previous state, we're now done with whatever input
@@ -519,7 +565,7 @@ initialize_phase(AggState *aggstate, int newphase)
aggstate->sort_in = NULL;
}
- if (newphase == 0)
+ if (newphase <= 1)
{
/*
* Discard any existing output tuplesort.
@@ -546,7 +592,7 @@ initialize_phase(AggState *aggstate, int newphase)
* If this isn't the last phase, we need to sort appropriately for the
* next phase in sequence.
*/
- if (newphase < aggstate->numphases - 1)
+ if (newphase > 0 && newphase < aggstate->numphases - 1)
{
Sort *sortnode = aggstate->phases[newphase + 1].sortnode;
PlanState *outerNode = outerPlanState(aggstate);
@@ -567,7 +613,7 @@ initialize_phase(AggState *aggstate, int newphase)
}
/*
- * Fetch a tuple from either the outer plan (for phase 0) or from the sorter
+ * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
* populated by the previous phase. Copy it to the sorter for the next phase
* if any.
*/
@@ -595,8 +641,8 @@ fetch_input_tuple(AggState *aggstate)
/*
* (Re)Initialize an individual aggregate.
*
- * This function handles only one grouping set (already set in
- * aggstate->current_set).
+ * This function handles only one grouping set, already set in
+ * aggstate->current_set.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -653,7 +699,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
MemoryContext oldContext;
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -676,8 +722,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
*
* If there are multiple grouping sets, we initialize only the first numReset
* of them (the grouping sets are ordered so that the most specific one, which
- * is reset most often, is first). As a convenience, if numReset is < 1, we
- * reinitialize all sets.
+ * is reset most often, is first). As a convenience, if numReset is 0, we
+ * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
+ * which case the caller must have used select_current_set appropriately.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -689,25 +736,37 @@ initialize_aggregates(AggState *aggstate,
int transno;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
int setno = 0;
+ int numTrans = aggstate->numtrans;
AggStatePerTrans transstates = aggstate->pertrans;
- if (numReset < 1)
+ if (numReset == 0)
numReset = numGroupingSets;
- for (transno = 0; transno < aggstate->numtrans; transno++)
+ for (transno = 0; transno < numTrans; transno++)
{
AggStatePerTrans pertrans = &transstates[transno];
- for (setno = 0; setno < numReset; setno++)
+ if (numReset < 0)
{
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
-
- aggstate->current_set = setno;
+ pergroupstate = &pergroup[transno];
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
+ else
+ {
+ for (setno = 0; setno < numReset; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ select_current_set(aggstate, setno, false);
+
+ initialize_aggregate(aggstate, pertrans, pergroupstate);
+ }
+ }
}
}
@@ -757,7 +816,7 @@ advance_transition_function(AggState *aggstate,
* do not need to pfree the old transValue, since it's NULL.
*/
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -807,7 +866,7 @@ advance_transition_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -838,17 +897,21 @@ advance_transition_function(AggState *aggstate,
/*
* Advance each aggregate transition state for one input tuple. The input
* tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
- * accessible to ExecEvalExpr. pergroup is the array of per-group structs to
- * use (this might be in a hashtable entry).
+ * accessible to ExecEvalExpr.
+ *
+ * We have two sets of transition states to handle: one for sorted aggregation
+ * and one for hashed; we do them both here, to avoid multiple evaluation of
+ * the inputs.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static void
-advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
{
int transno;
int setno = 0;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
+ int numHashes = aggstate->num_hashes;
int numTrans = aggstate->numtrans;
TupleTableSlot *slot = aggstate->evalslot;
@@ -880,6 +943,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
{
/* DISTINCT and/or ORDER BY case */
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
+ Assert(!pergroups);
/*
* If the transfn is strict, we want to check for nullity before
@@ -940,13 +1004,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
}
- for (setno = 0; setno < numGroupingSets; setno++)
+ if (pergroup)
{
- AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
+ /* advance transition states for ordered grouping */
- aggstate->current_set = setno;
+ for (setno = 0; setno < numGroupingSets; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, false);
- advance_transition_function(aggstate, pertrans, pergroupstate);
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
+ }
+
+ if (pergroups)
+ {
+ /* advance transition states for hashed grouping */
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, true);
+
+ pergroupstate = &pergroups[setno][transno];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
}
}
}
@@ -967,7 +1054,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
TupleTableSlot *slot;
/* combine not supported with grouping sets */
- Assert(aggstate->phase->numsets == 0);
+ Assert(aggstate->phase->numsets <= 1);
/* compute input for all aggregates */
slot = ExecProject(aggstate->evalproj);
@@ -1060,7 +1147,7 @@ advance_combine_function(AggState *aggstate,
if (!pertrans->transtypeByVal)
{
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -1105,7 +1192,7 @@ advance_combine_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -1559,15 +1646,16 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/*
* Compute the final value of all aggregates for one group.
*
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time, which the caller must
+ * have selected. It's also the caller's responsibility to adjust the supplied
+ * pergroup parameter to point to the current set's transvalues.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
static void
finalize_aggregates(AggState *aggstate,
AggStatePerAgg peraggs,
- AggStatePerGroup pergroup,
- int currentSet)
+ AggStatePerGroup pergroup)
{
ExprContext *econtext = aggstate->ss.ps.ps_ExprContext;
Datum *aggvalues = econtext->ecxt_aggvalues;
@@ -1575,11 +1663,6 @@ finalize_aggregates(AggState *aggstate,
int aggno;
int transno;
- Assert(currentSet == 0 ||
- ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
- aggstate->current_set = currentSet;
-
/*
* If there were any DISTINCT and/or ORDER BY aggregates, sort their
* inputs and run the transition functions.
@@ -1589,11 +1672,12 @@ finalize_aggregates(AggState *aggstate,
AggStatePerTrans pertrans = &aggstate->pertrans[transno];
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+ pergroupstate = &pergroup[transno];
if (pertrans->numSortCols > 0)
{
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED &&
+ aggstate->aggstrategy != AGG_MIXED);
if (pertrans->numInputs == 1)
process_ordered_aggregate_single(aggstate,
@@ -1615,7 +1699,7 @@ finalize_aggregates(AggState *aggstate,
int transno = peragg->transno;
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+ pergroupstate = &pergroup[transno];
if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
finalize_partialaggregate(aggstate, peragg, pergroupstate,
@@ -1697,7 +1781,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
}
/*
- * Initialize the hash table to empty.
+ * Initialize the hash table(s) to empty.
*
* To implement hashed aggregation, we need a hashtable that stores a
* representative tuple and an array of AggStatePerGroup structs for each
@@ -1705,29 +1789,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(),
* for each entry.
*
- * The hash table always lives in the aggcontext memory context.
+ * We have a separate hashtable and associated perhash data structure for each
+ * grouping set for which we're doing hashing.
+ *
+ * The hash tables always live in the hashcontext's per-tuple memory context
+ * (there is only one of these for all tables together, since they are all
+ * reset at the same time).
*/
static void
build_hash_table(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize;
+ int i;
- Assert(node->aggstrategy == AGG_HASHED);
- Assert(node->numGroups > 0);
+ Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
- additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData);
+ additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
- aggstate->hashtable = BuildTupleHashTable(node->numCols,
- aggstate->hashGrpColIdxHash,
- aggstate->phase->eqfunctions,
- aggstate->hashfunctions,
- node->numGroups,
- additionalsize,
- aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
- tmpmem,
+ for (i = 0; i < aggstate->num_hashes; ++i)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[i];
+
+ Assert(perhash->aggnode->numGroups > 0);
+
+ perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+ perhash->hashGrpColIdxHash,
+ perhash->eqfunctions,
+ perhash->hashfunctions,
+ perhash->aggnode->numGroups,
+ additionalsize,
+ aggstate->hashcontext->ecxt_per_tuple_memory,
+ tmpmem,
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+ }
}
/*
@@ -1750,72 +1845,98 @@ build_hash_table(AggState *aggstate)
* the array is preserved over ExecReScanAgg, so we allocate it in the
* per-query context (unlike the hash table itself).
*/
-static List *
+static void
find_hash_columns(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
- Bitmapset *colnos;
- List *collist;
- TupleDesc hashDesc;
+ Bitmapset *base_colnos;
List *outerTlist = outerPlanState(aggstate)->plan->targetlist;
- List *hashTlist = NIL;
- int i;
-
- aggstate->largestGrpColIdx = 0;
+ int numHashes = aggstate->num_hashes;
+ int j;
/* Find Vars that will be needed in tlist and qual */
- colnos = find_unaggregated_cols(aggstate);
- /* Add in all the grouping columns */
- for (i = 0; i < node->numCols; i++)
- colnos = bms_add_member(colnos, node->grpColIdx[i]);
- /* Convert to list, using lcons so largest element ends up first */
- collist = NIL;
+ base_colnos = find_unaggregated_cols(aggstate);
- aggstate->hashGrpColIdxInput =
- palloc(bms_num_members(colnos) * sizeof(AttrNumber));
- aggstate->hashGrpColIdxHash =
- palloc(node->numCols * sizeof(AttrNumber));
-
- /*
- * First build mapping for columns directly hashed. These are the first,
- * because they'll be accessed when computing hash values and comparing
- * tuples for exact matches. We also build simple mapping for
- * execGrouping, so it knows where to find the to-be-hashed / compared
- * columns in the input.
- */
- for (i = 0; i < node->numCols; i++)
+ for (j = 0; j < numHashes; ++j)
{
- aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i];
- aggstate->hashGrpColIdxHash[i] = i + 1;
- aggstate->numhashGrpCols++;
- /* delete already mapped columns */
- bms_del_member(colnos, node->grpColIdx[i]);
- }
+ AggStatePerHash perhash = &aggstate->perhash[j];
+ Bitmapset *colnos = bms_copy(base_colnos);
+ AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
+ List *hashTlist = NIL;
+ TupleDesc hashDesc;
+ int i;
- /* and add the remaining columns */
- while ((i = bms_first_member(colnos)) >= 0)
- {
- aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i;
- aggstate->numhashGrpCols++;
- }
+ perhash->largestGrpColIdx = 0;
- /* and build a tuple descriptor for the hashtable */
- for (i = 0; i < aggstate->numhashGrpCols; i++)
- {
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ /*
+ * If we're doing grouping sets, then some Vars might be referenced in
+ * tlist/qual for the benefit of other grouping sets, but not needed
+ * when hashing; i.e. prepare_projection_slot will null them out, so
+ * there'd be no point storing them. Use prepare_projection_slot's
+ * logic to determine which.
+ */
+ if (aggstate->phases[0].grouped_cols)
+ {
+ Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j];
+ ListCell *lc;
- hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
- aggstate->largestGrpColIdx =
- Max(varNumber + 1, aggstate->largestGrpColIdx);
- }
+ foreach(lc, aggstate->all_grouped_cols)
+ {
+ int attnum = lfirst_int(lc);
- hashDesc = ExecTypeFromTL(hashTlist, false);
- ExecSetSlotDescriptor(aggstate->hashslot, hashDesc);
+ if (!bms_is_member(attnum, grouped_cols))
+ colnos = bms_del_member(colnos, attnum);
+ }
+ }
+ /* Add in all the grouping columns */
+ for (i = 0; i < perhash->numCols; i++)
+ colnos = bms_add_member(colnos, grpColIdx[i]);
- list_free(hashTlist);
- bms_free(colnos);
+ perhash->hashGrpColIdxInput =
+ palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+ perhash->hashGrpColIdxHash =
+ palloc(perhash->numCols * sizeof(AttrNumber));
- return collist;
+ /*
+ * First build mapping for columns directly hashed. These are the
+ * first, because they'll be accessed when computing hash values and
+ * comparing tuples for exact matches. We also build simple mapping
+ * for execGrouping, so it knows where to find the to-be-hashed /
+ * compared columns in the input.
+ */
+ for (i = 0; i < perhash->numCols; i++)
+ {
+ perhash->hashGrpColIdxInput[i] = grpColIdx[i];
+ perhash->hashGrpColIdxHash[i] = i + 1;
+ perhash->numhashGrpCols++;
+ /* delete already mapped columns */
+ bms_del_member(colnos, grpColIdx[i]);
+ }
+
+ /* and add the remaining columns */
+ while ((i = bms_first_member(colnos)) >= 0)
+ {
+ perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
+ perhash->numhashGrpCols++;
+ }
+
+ /* and build a tuple descriptor for the hashtable */
+ for (i = 0; i < perhash->numhashGrpCols; i++)
+ {
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
+
+ hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
+ perhash->largestGrpColIdx =
+ Max(varNumber + 1, perhash->largestGrpColIdx);
+ }
+
+ hashDesc = ExecTypeFromTL(hashTlist, false);
+ ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
+
+ list_free(hashTlist);
+ bms_free(colnos);
+ }
+
+ bms_free(base_colnos);
}
/*
@@ -1840,26 +1961,30 @@ hash_agg_entry_size(int numAggs)
}
/*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * Find or create a hashtable entry for the tuple group containing the current
+ * tuple (already set in tmpcontext's outertuple slot), in the current grouping
+ * set (which the caller must have selected - note that initialize_aggregate
+ * depends on this).
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static TupleHashEntryData *
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate)
{
- TupleTableSlot *hashslot = aggstate->hashslot;
+ TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
+ AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+ TupleTableSlot *hashslot = perhash->hashslot;
TupleHashEntryData *entry;
bool isnew;
- int i;
+ int i;
/* transfer just the needed columns into hashslot */
- slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx);
+ slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot);
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
@@ -1867,22 +1992,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
ExecStoreVirtualTuple(hashslot);
/* find or create the hashtable entry using the filtered tuple */
- entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew);
+ entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew)
{
entry->additional = (AggStatePerGroup)
- MemoryContextAlloc(aggstate->hashtable->tablecxt,
+ MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
/* initialize aggregates for new tuple group */
initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
- 0);
+ -1);
}
return entry;
}
/*
+ * Look up hash entries for the current tuple in all hashed grouping sets,
+ * returning an array of pergroup pointers suitable for advance_aggregates.
+ *
+ * Be aware that lookup_hash_entry can reset the tmpcontext.
+ */
+static AggStatePerGroup *
+lookup_hash_entries(AggState *aggstate)
+{
+ int numHashes = aggstate->num_hashes;
+ AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+ int setno;
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ select_current_set(aggstate, setno, true);
+ pergroup[setno] = lookup_hash_entry(aggstate)->additional;
+ }
+
+ return pergroup;
+}
+
+/*
* ExecAgg -
*
* ExecAgg receives tuples from its outer subplan and aggregates over
@@ -1898,19 +2045,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
TupleTableSlot *
ExecAgg(AggState *node)
{
- TupleTableSlot *result;
+ TupleTableSlot *result = NULL;
if (!node->agg_done)
{
/* Dispatch based on strategy */
- switch (node->phase->aggnode->aggstrategy)
+ switch (node->phase->aggstrategy)
{
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
+ /* FALLTHROUGH */
+ case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
- default:
+ case AGG_PLAIN:
+ case AGG_SORTED:
result = agg_retrieve_direct(node);
break;
}
@@ -1933,6 +2083,7 @@ agg_retrieve_direct(AggState *aggstate)
ExprContext *tmpcontext;
AggStatePerAgg peragg;
AggStatePerGroup pergroup;
+ AggStatePerGroup *hash_pergroups = NULL;
TupleTableSlot *outerslot;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
@@ -2019,6 +2170,19 @@ agg_retrieve_direct(AggState *aggstate)
node = aggstate->phase->aggnode;
numReset = numGroupingSets;
}
+ else if (aggstate->aggstrategy == AGG_MIXED)
+ {
+ /*
+ * Mixed mode; we've output all the grouped stuff and have
+ * full hashtables, so switch to outputting those.
+ */
+ initialize_phase(aggstate, 0);
+ aggstate->table_filled = true;
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
+ select_current_set(aggstate, 0, true);
+ return agg_retrieve_hash_table(aggstate);
+ }
else
{
aggstate->agg_done = true;
@@ -2055,7 +2219,7 @@ agg_retrieve_direct(AggState *aggstate)
*----------
*/
if (aggstate->input_done ||
- (node->aggstrategy == AGG_SORTED &&
+ (node->aggstrategy != AGG_PLAIN &&
aggstate->projected_set != -1 &&
aggstate->projected_set < (numGroupingSets - 1) &&
nextSetSize > 0 &&
@@ -2168,10 +2332,22 @@ agg_retrieve_direct(AggState *aggstate)
*/
for (;;)
{
+ /*
+ * During phase 1 only of a mixed agg, we need to update
+ * hashtables as well in advance_aggregates.
+ */
+ if (aggstate->aggstrategy == AGG_MIXED &&
+ aggstate->current_phase == 1)
+ {
+ hash_pergroups = lookup_hash_entries(aggstate);
+ }
+ else
+ hash_pergroups = NULL;
+
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, pergroup);
else
- advance_aggregates(aggstate, pergroup);
+ advance_aggregates(aggstate, pergroup, hash_pergroups);
/* Reset per-input-tuple context after each tuple */
ResetExprContext(tmpcontext);
@@ -2198,7 +2374,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group
* boundary.
*/
- if (node->aggstrategy == AGG_SORTED)
+ if (node->aggstrategy != AGG_PLAIN)
{
if (!execTuplesMatch(firstSlot,
outerslot,
@@ -2231,7 +2407,11 @@ agg_retrieve_direct(AggState *aggstate)
prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet);
- finalize_aggregates(aggstate, peragg, pergroup, currentSet);
+ select_current_set(aggstate, currentSet, false);
+
+ finalize_aggregates(aggstate,
+ peragg,
+ pergroup + (currentSet * aggstate->numtrans));
/*
* If there's no row to project right now, we must continue rather
@@ -2247,21 +2427,13 @@ agg_retrieve_direct(AggState *aggstate)
}
/*
- * ExecAgg for hashed case: phase 1, read input and build hash table
+ * ExecAgg for hashed case: read input and build hash table
*/
static void
agg_fill_hash_table(AggState *aggstate)
{
- ExprContext *tmpcontext;
- TupleHashEntryData *entry;
TupleTableSlot *outerslot;
-
- /*
- * get state info from node
- *
- * tmpcontext is the per-input-tuple expression context
- */
- tmpcontext = aggstate->tmpcontext;
+ ExprContext *tmpcontext = aggstate->tmpcontext;
/*
* Process each outer-plan tuple, and then fetch the next one, until we
@@ -2269,32 +2441,40 @@ agg_fill_hash_table(AggState *aggstate)
*/
for (;;)
{
+ AggStatePerGroup *pergroups;
+
outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot))
break;
- /* set up for advance_aggregates call */
+
+ /* set up for lookup_hash_entries and advance_aggregates */
tmpcontext->ecxt_outertuple = outerslot;
- /* Find or build hashtable entry for this tuple's group */
- entry = lookup_hash_entry(aggstate, outerslot);
+ /* Find or build hashtable entries */
+ pergroups = lookup_hash_entries(aggstate);
/* Advance the aggregates */
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
- combine_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ combine_aggregates(aggstate, pergroups[0]);
else
- advance_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ advance_aggregates(aggstate, NULL, pergroups);
- /* Reset per-input-tuple context after each tuple */
- ResetExprContext(tmpcontext);
+ /*
+ * Reset per-input-tuple context after each tuple, but note that the
+ * hash lookups do this too
+ */
+ ResetExprContext(aggstate->tmpcontext);
}
aggstate->table_filled = true;
- /* Initialize to walk the hash table */
- ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+ /* Initialize to walk the first hash table */
+ select_current_set(aggstate, 0, true);
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
}
/*
- * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ * ExecAgg for hashed case: retrieving groups from hash table
*/
static TupleTableSlot *
agg_retrieve_hash_table(AggState *aggstate)
@@ -2305,17 +2485,22 @@ agg_retrieve_hash_table(AggState *aggstate)
TupleHashEntryData *entry;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
- TupleTableSlot *hashslot;
+ AggStatePerHash perhash;
/*
- * get state info from node
+ * get state info from node.
+ *
+ * econtext is the per-output-tuple expression context.
*/
- /* econtext is the per-output-tuple expression context */
econtext = aggstate->ss.ps.ps_ExprContext;
peragg = aggstate->peragg;
firstSlot = aggstate->ss.ss_ScanTupleSlot;
- hashslot = aggstate->hashslot;
+ /*
+ * Note that perhash (and therefore anything accessed through it) can
+ * change inside the loop, as we change between grouping sets.
+ */
+ perhash = &aggstate->perhash[aggstate->current_set];
/*
* We loop retrieving groups until we find one satisfying
@@ -2323,17 +2508,37 @@ agg_retrieve_hash_table(AggState *aggstate)
*/
while (!aggstate->agg_done)
{
- int i;
+ TupleTableSlot *hashslot = perhash->hashslot;
+ int i;
/*
* Find the next entry in the hash table
*/
- entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter);
+ entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
if (entry == NULL)
{
- /* No more entries in hashtable, so done */
- aggstate->agg_done = TRUE;
- return NULL;
+ int nextset = aggstate->current_set + 1;
+
+ if (nextset < aggstate->num_hashes)
+ {
+ /*
+ * Switch to next grouping set, reinitialize, and restart the
+ * loop.
+ */
+ select_current_set(aggstate, nextset, true);
+
+ perhash = &aggstate->perhash[aggstate->current_set];
+
+ ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
+
+ continue;
+ }
+ else
+ {
+ /* No more hashtables, so done */
+ aggstate->agg_done = TRUE;
+ return NULL;
+ }
}
/*
@@ -2356,9 +2561,9 @@ agg_retrieve_hash_table(AggState *aggstate)
memset(firstSlot->tts_isnull, true,
firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
@@ -2367,14 +2572,18 @@ agg_retrieve_hash_table(AggState *aggstate)
pergroup = (AggStatePerGroup) entry->additional;
- finalize_aggregates(aggstate, peragg, pergroup, 0);
-
/*
* Use the representative input tuple for any references to
* non-aggregated input columns in the qual and tlist.
*/
econtext->ecxt_outertuple = firstSlot;
+ prepare_projection_slot(aggstate,
+ econtext->ecxt_outertuple,
+ aggstate->current_set);
+
+ finalize_aggregates(aggstate, peragg, pergroup);
+
result = project_aggregates(aggstate);
if (result)
return result;
@@ -2388,7 +2597,36 @@ agg_retrieve_hash_table(AggState *aggstate)
* ExecInitAgg
*
* Creates the run-time information for the agg node produced by the
- * planner and initializes its outer subtree
+ * planner and initializes its outer subtree.
+ *
+ * What we get from the planner is actually one "real" Agg node which is part
+ * of the plan tree proper, but which optionally has an additional list of Agg
+ * nodes hung off the side via the "chain" field. This is because an Agg node
+ * happens to be a convenient representation of all the data we need for
+ * grouping sets.
+ *
+ * For many purposes, we treat the "real" node as if it were just the first
+ * node in the chain. The chain must be ordered such that hashed entries come
+ * before sorted/plain entries; the real node is marked AGG_MIXED if there are
+ * mixed types (in which case the real node describes one of the hashed
+ * groupings, other AGG_HASHED nodes may optionally follow in the chain,
+ * followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If the real node
+ * is marked AGG_HASHED or AGG_SORTED, then all the chained nodes must be of
+ * the same type; if it is AGG_PLAIN, there can be no chained nodes.
+ *
+ * We collect all hashed nodes into a single "phase", numbered 0, and create a
+ * sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. Phase
+ * 0 is allocated even if there are no hashes, but remains unused in that
+ * case.
+ *
+ * AGG_HASHED nodes actually refer to only a single grouping set each, because
+ * for each hashed grouping we need a separate grpColIdx and numGroups
+ * estimate. AGG_SORTED nodes represent a "rollup", a list of grouping sets
+ * that share a sort order. Each AGG_SORTED node other than the first one has
+ * an associated Sort node which describes the sort order to be used; the
+ * first sorted node takes its input from the outer subtree, which the planner
+ * has already arranged to provide ordered data.
+ *
* -----------------
*/
AggState *
@@ -2403,14 +2641,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
transno,
aggno;
int phase;
+ int phaseidx;
List *combined_inputeval;
ListCell *l;
Bitmapset *all_grouped_cols = NULL;
int numGroupingSets = 1;
int numPhases;
+ int numHashes;
int column_offset;
int i = 0;
int j = 0;
+ bool use_hashing = (node->aggstrategy == AGG_HASHED ||
+ node->aggstrategy == AGG_MIXED);
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2425,9 +2667,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggs = NIL;
aggstate->numaggs = 0;
aggstate->numtrans = 0;
+ aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0;
- aggstate->hashfunctions = NULL;
aggstate->projected_set = -1;
aggstate->current_set = 0;
aggstate->peragg = NULL;
@@ -2437,18 +2679,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->agg_done = false;
aggstate->pergroup = NULL;
aggstate->grp_firstTuple = NULL;
- aggstate->hashtable = NULL;
aggstate->sort_in = NULL;
aggstate->sort_out = NULL;
/*
+ * phases[0] always exists, but is dummy in sorted/plain mode
+ */
+ numPhases = (use_hashing ? 1 : 2);
+ numHashes = (use_hashing ? 1 : 0);
+
+ /*
* Calculate the maximum number of grouping sets in any phase; this
- * determines the size of some allocations.
+ * determines the size of some allocations. Also calculate the number of
+ * phases, since all hashed/mixed nodes contribute to only a single phase.
*/
if (node->groupingSets)
{
- Assert(node->aggstrategy != AGG_HASHED);
-
numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain)
@@ -2457,22 +2703,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets));
+
+ /*
+ * additional AGG_HASHED aggs become part of phase 0, but all
+ * others add an extra phase.
+ */
+ if (agg->aggstrategy != AGG_HASHED)
+ ++numPhases;
+ else
+ ++numHashes;
}
}
aggstate->maxsets = numGroupingSets;
- aggstate->numphases = numPhases = 1 + list_length(node->chain);
+ aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets);
/*
* Create expression contexts. We need three or more, one for
- * per-input-tuple processing, one for per-output-tuple processing, and
- * one for each grouping set. The per-tuple memory context of the
- * per-grouping-set ExprContexts (aggcontexts) replaces the standalone
- * memory context formerly used to hold transition values. We cheat a
- * little by using ExecAssignExprContext() to build all of them.
+ * per-input-tuple processing, one for per-output-tuple processing, one
+ * for all the hashtables, and one for each grouping set. The per-tuple
+ * memory context of the per-grouping-set ExprContexts (aggcontexts)
+ * replaces the standalone memory context formerly used to hold transition
+ * values. We cheat a little by using ExecAssignExprContext() to build
+ * all of them.
*
* NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple
@@ -2488,14 +2744,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
}
+ if (use_hashing)
+ {
+ ExecAssignExprContext(estate, &aggstate->ss.ps);
+ aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+ }
+
ExecAssignExprContext(estate, &aggstate->ss.ps);
/*
- * tuple table initialization
+ * tuple table initialization.
+ *
+ * For hashtables, we create some additional slots below.
*/
ExecInitScanTupleSlot(estate, &aggstate->ss);
ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
- aggstate->hashslot = ExecInitExtraTupleSlot(estate);
aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
/*
@@ -2560,19 +2823,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing.
*/
-
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
- for (phase = 0; phase < numPhases; ++phase)
+ aggstate->num_hashes = numHashes;
+ if (numHashes)
+ {
+ aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+ aggstate->phases[0].numsets = 0;
+ aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
+ aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
+ }
+
+ phase = 0;
+ for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{
- AggStatePerPhase phasedata = &aggstate->phases[phase];
Agg *aggnode;
Sort *sortnode;
- int num_sets;
- if (phase > 0)
+ if (phaseidx > 0)
{
- aggnode = castNode(Agg, list_nth(node->chain, phase - 1));
+ aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1));
sortnode = castNode(Sort, aggnode->plan.lefttree);
}
else
@@ -2581,53 +2851,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
sortnode = NULL;
}
- phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+ Assert(phase <= 1 || sortnode);
- if (num_sets)
+ if (aggnode->aggstrategy == AGG_HASHED
+ || aggnode->aggstrategy == AGG_MIXED)
{
- phasedata->gset_lengths = palloc(num_sets * sizeof(int));
- phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+ AggStatePerPhase phasedata = &aggstate->phases[0];
+ AggStatePerHash perhash;
+ Bitmapset *cols = NULL;
- i = 0;
- foreach(l, aggnode->groupingSets)
- {
- int current_length = list_length(lfirst(l));
- Bitmapset *cols = NULL;
+ Assert(phase == 0);
+ i = phasedata->numsets++;
+ perhash = &aggstate->perhash[i];
- /* planner forces this to be correct */
- for (j = 0; j < current_length; ++j)
- cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+ /* phase 0 always points to the "real" Agg in the hash case */
+ phasedata->aggnode = node;
+ phasedata->aggstrategy = node->aggstrategy;
- phasedata->grouped_cols[i] = cols;
- phasedata->gset_lengths[i] = current_length;
- ++i;
- }
+ /* but the actual Agg node representing this hash is saved here */
+ perhash->aggnode = aggnode;
+
+ phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
- all_grouped_cols = bms_add_members(all_grouped_cols,
- phasedata->grouped_cols[0]);
+ for (j = 0; j < aggnode->numCols; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+
+ all_grouped_cols = bms_add_members(all_grouped_cols, cols);
+ continue;
}
else
{
- Assert(phase == 0);
+ AggStatePerPhase phasedata = &aggstate->phases[++phase];
+ int num_sets;
- phasedata->gset_lengths = NULL;
- phasedata->grouped_cols = NULL;
- }
+ phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
- /*
- * If we are grouping, precompute fmgr lookup data for inner loop.
- */
- if (aggnode->aggstrategy == AGG_SORTED)
- {
- Assert(aggnode->numCols > 0);
+ if (num_sets)
+ {
+ phasedata->gset_lengths = palloc(num_sets * sizeof(int));
+ phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
- phasedata->eqfunctions =
- execTuplesMatchPrepare(aggnode->numCols,
- aggnode->grpOperators);
- }
+ i = 0;
+ foreach(l, aggnode->groupingSets)
+ {
+ int current_length = list_length(lfirst(l));
+ Bitmapset *cols = NULL;
+
+ /* planner forces this to be correct */
+ for (j = 0; j < current_length; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+ phasedata->gset_lengths[i] = current_length;
+
+ ++i;
+ }
- phasedata->aggnode = aggnode;
- phasedata->sortnode = sortnode;
+ all_grouped_cols = bms_add_members(all_grouped_cols,
+ phasedata->grouped_cols[0]);
+ }
+ else
+ {
+ Assert(phaseidx == 0);
+
+ phasedata->gset_lengths = NULL;
+ phasedata->grouped_cols = NULL;
+ }
+
+ /*
+ * If we are grouping, precompute fmgr lookup data for inner loop.
+ */
+ if (aggnode->aggstrategy == AGG_SORTED)
+ {
+ Assert(aggnode->numCols > 0);
+
+ phasedata->eqfunctions =
+ execTuplesMatchPrepare(aggnode->numCols,
+ aggnode->grpOperators);
+ }
+
+ phasedata->aggnode = aggnode;
+ phasedata->aggstrategy = aggnode->aggstrategy;
+ phasedata->sortnode = sortnode;
+ }
}
/*
@@ -2638,13 +2946,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
- * Initialize current phase-dependent values to initial phase
- */
-
- aggstate->current_phase = 0;
- initialize_phase(aggstate, 0);
-
- /*
* Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage
*/
@@ -2658,23 +2959,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates;
-
/*
* Hashing can only appear in the initial phase.
*/
- if (node->aggstrategy == AGG_HASHED)
+ if (use_hashing)
{
- find_hash_columns(aggstate);
+ for (i = 0; i < numHashes; ++i)
+ {
+ aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
- execTuplesHashPrepare(node->numCols,
- node->grpOperators,
- &aggstate->phases[0].eqfunctions,
- &aggstate->hashfunctions);
+ execTuplesHashPrepare(aggstate->perhash[i].numCols,
+ aggstate->perhash[i].aggnode->grpOperators,
+ &aggstate->perhash[i].eqfunctions,
+ &aggstate->perhash[i].hashfunctions);
+ }
+ /* this is an array of pointers, not structures */
+ aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
+
+ find_hash_columns(aggstate);
build_hash_table(aggstate);
aggstate->table_filled = false;
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
AggStatePerGroup pergroup;
@@ -2685,6 +2993,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->pergroup = pergroup;
}
+ /*
+ * Initialize current phase-dependent values to initial phase. The initial
+ * phase is 1 (first sort pass) for all strategies that use sorting (if
+ * hashing is being done too, then phase 0 is processed last); but if only
+ * hashing is being done, then phase 0 is all there is.
+ */
+
+ if (node->aggstrategy == AGG_HASHED)
+ {
+ aggstate->current_phase = 0;
+ initialize_phase(aggstate, 0);
+ select_current_set(aggstate, 0, true);
+ }
+ else
+ {
+ aggstate->current_phase = 1;
+ initialize_phase(aggstate, 1);
+ select_current_set(aggstate, 0, false);
+ }
+
/* -----------------
* Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data.
@@ -3263,7 +3591,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case
* (yet)
*/
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
/* If we have only one input, we need its len/byval info. */
if (numInputs == 1)
@@ -3512,6 +3840,8 @@ ExecEndAgg(AggState *node)
/* And ensure any agg shutdown callbacks have been called */
for (setno = 0; setno < numGroupingSets; setno++)
ReScanExprContext(node->aggcontexts[setno]);
+ if (node->hashcontext)
+ ReScanExprContext(node->hashcontext);
/*
* We don't actually free any ExprContexts here (see comment in
@@ -3539,7 +3869,7 @@ ExecReScanAgg(AggState *node)
node->agg_done = false;
- if (aggnode->aggstrategy == AGG_HASHED)
+ if (node->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@@ -3559,7 +3889,9 @@ ExecReScanAgg(AggState *node)
if (outerPlan->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
- ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ ResetTupleHashIterator(node->perhash[0].hashtable,
+ &node->perhash[0].hashiter);
+ select_current_set(node, 0, true);
return;
}
}
@@ -3584,11 +3916,7 @@ ExecReScanAgg(AggState *node)
* ExecReScan already did it. But we do need to reset our per-grouping-set
* contexts, which may have transvalues stored in them. (We use rescan
* rather than just reset because transfns may have registered callbacks
- * that need to be run now.)
- *
- * Note that with AGG_HASHED, the hash table is allocated in a sub-context
- * of the aggcontext. This used to be an issue, but now, resetting a
- * context automatically deletes sub-contexts too.
+ * that need to be run now.) For the AGG_HASHED case, see below.
*/
for (setno = 0; setno < numGroupingSets; setno++)
@@ -3608,13 +3936,22 @@ ExecReScanAgg(AggState *node)
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
- if (aggnode->aggstrategy == AGG_HASHED)
+ /*
+ * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
+ * the hashcontext. This used to be an issue, but now, resetting a context
+ * automatically deletes sub-contexts too.
+ */
+
+ if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
{
+ ReScanExprContext(node->hashcontext);
/* Rebuild an empty hash table */
build_hash_table(node);
node->table_filled = false;
+ /* iterator will be reset when the table is filled */
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
/*
* Reset the per-group state (in particular, mark transvalues null)
@@ -3622,8 +3959,8 @@ ExecReScanAgg(AggState *node)
MemSet(node->pergroup, 0,
sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
- /* reset to phase 0 */
- initialize_phase(node, 0);
+ /* reset to phase 1 */
+ initialize_phase(node, 1);
node->input_done = false;
node->projected_set = -1;
@@ -3664,7 +4001,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
if (aggcontext)
{
AggState *aggstate = ((AggState *) fcinfo->context);
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
*aggcontext = cxt->ecxt_per_tuple_memory;
}
@@ -3753,7 +4090,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
if (fcinfo->context && IsA(fcinfo->context, AggState))
{
AggState *aggstate = (AggState *) fcinfo->context;
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
RegisterExprContextCallback(cxt, func, arg);
diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 2d2ba84fe9..f222c6c20d 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/lib
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
- rbtree.o stringinfo.o
+OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
+ pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c
new file mode 100644
index 0000000000..ddf2b9afa3
--- /dev/null
+++ b/src/backend/lib/knapsack.c
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * knapsack.c
+ * Knapsack problem solver
+ *
+ * Given input vectors of integral item weights (must be >= 0) and values
+ * (double >= 0), compute the set of items which produces the greatest total
+ * value without exceeding a specified total weight; each item is included at
+ * most once (this is the 0/1 knapsack problem). Weight 0 items will always be
+ * included.
+ *
+ * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
+ * weight limit. To use with non-integral weights or approximate solutions,
+ * the caller should pre-scale the input weights to a suitable range. This
+ * allows approximate solutions in polynomial time (the general case of the
+ * exact problem is NP-hard).
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/lib/knapsack.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "lib/knapsack.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+
+/*
+ * DiscreteKnapsack
+ *
+ * The item_values input is optional; if omitted, all the items are assumed to
+ * have value 1.
+ *
+ * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
+ * inclusion in the solution.
+ *
+ * This uses the usual dynamic-programming algorithm, adapted to reuse the
+ * memory on each pass (by working from larger weights to smaller). At the
+ * start of pass number i, the values[w] array contains the largest value
+ * computed with total weight <= w, using only items with indices < i; and
+ * sets[w] contains the bitmap of items actually used for that value. (The
+ * bitmapsets are all pre-initialized with an unused high bit so that memory
+ * allocation is done only once.)
+ */
+Bitmapset *
+DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values)
+{
+ MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "Knapsack",
+ ALLOCSET_SMALL_MINSIZE,
+ ALLOCSET_SMALL_INITSIZE,
+ ALLOCSET_SMALL_MAXSIZE);
+ MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
+ double *values;
+ Bitmapset **sets;
+ Bitmapset *result;
+ int i,
+ j;
+
+ Assert(max_weight >= 0);
+ Assert(num_items > 0 && item_weights);
+
+ values = palloc((1 + max_weight) * sizeof(double));
+ sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
+
+ for (i = 0; i <= max_weight; ++i)
+ {
+ values[i] = 0;
+ sets[i] = bms_make_singleton(num_items);
+ }
+
+ for (i = 0; i < num_items; ++i)
+ {
+ int iw = item_weights[i];
+ double iv = item_values ? item_values[i] : 1;
+
+ for (j = max_weight; j >= iw; --j)
+ {
+ int ow = j - iw;
+
+ if (values[j] <= values[ow] + iv)
+ {
+ /* copy sets[ow] to sets[j] without realloc */
+ if (j != ow)
+ {
+ sets[j] = bms_del_members(sets[j], sets[j]);
+ sets[j] = bms_add_members(sets[j], sets[ow]);
+ }
+
+ sets[j] = bms_add_member(sets[j], i);
+
+ values[j] = values[ow] + iv;
+ }
+ }
+ }
+
+ MemoryContextSwitchTo(oldctx);
+
+ result = bms_del_member(bms_copy(sets[max_weight]), num_items);
+
+ MemoryContextDelete(local_ctx);
+
+ return result;
+}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 252af5c870..ea1c538287 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -21,7 +21,7 @@
#include "postgres.h"
#include "access/hash.h"
-
+#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
@@ -458,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
}
/*
+ * bms_overlap_list - does a set overlap an integer list?
+ */
+bool
+bms_overlap_list(const Bitmapset *a, const List *b)
+{
+ ListCell *lc;
+ int wordnum,
+ bitnum;
+
+ if (a == NULL || b == NIL)
+ return false;
+
+ foreach(lc, b)
+ {
+ int x = lfirst_int(lc);
+
+ if (x < 0)
+ elog(ERROR, "negative bitmapset member not allowed");
+ wordnum = WORDNUM(x);
+ bitnum = BITNUM(x);
+ if (wordnum < a->nwords)
+ if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*
* bms_nonempty_difference - do sets have a nonempty difference?
*/
bool
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 7418fbeded..d2d66f915b 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1936,6 +1936,28 @@ _outAggPath(StringInfo str, const AggPath *node)
}
static void
+_outRollupData(StringInfo str, const RollupData *node)
+{
+ WRITE_NODE_TYPE("ROLLUP");
+
+ WRITE_NODE_FIELD(groupClause);
+ WRITE_NODE_FIELD(gsets);
+ WRITE_NODE_FIELD(gsets_data);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+ WRITE_BOOL_FIELD(hashable);
+ WRITE_BOOL_FIELD(is_hashed);
+}
+
+static void
+_outGroupingSetData(StringInfo str, const GroupingSetData *node)
+{
+ WRITE_NODE_TYPE("GSDATA");
+
+ WRITE_NODE_FIELD(set);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+}
+
+static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
WRITE_NODE_TYPE("GROUPINGSETSPATH");
@@ -1943,8 +1965,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
- WRITE_NODE_FIELD(rollup_groupclauses);
- WRITE_NODE_FIELD(rollup_lists);
+ WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
+ WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual);
}
@@ -3915,6 +3937,12 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
+ case T_RollupData:
+ _outRollupData(str, obj);
+ break;
+ case T_GroupingSetData:
+ _outGroupingSetData(str, obj);
+ break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a129d1ecb3..eb7a618341 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1;
}
- else if (aggstrategy == AGG_SORTED)
+ else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
{
/* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost;
total_cost = input_total_cost;
+ if (aggstrategy == AGG_MIXED && !enable_hashagg)
+ {
+ startup_cost += disable_cost;
+ total_cost += disable_cost;
+ }
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 89e1946fc2..a1776beaab 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1782,18 +1782,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
{
Agg *plan;
Plan *subplan;
- List *rollup_groupclauses = best_path->rollup_groupclauses;
- List *rollup_lists = best_path->rollup_lists;
+ List *rollups = best_path->rollups;
AttrNumber *grouping_map;
int maxref;
List *chain;
- ListCell *lc,
- *lc2;
+ ListCell *lc;
/* Shouldn't get here without grouping sets */
Assert(root->parse->groupingSets);
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
+ Assert(rollups != NIL);
/*
* Agg can project, so no need to be terribly picky about child tlist, but
@@ -1845,72 +1842,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* costs will be shown by EXPLAIN.
*/
chain = NIL;
- if (list_length(rollup_groupclauses) > 1)
+ if (list_length(rollups) > 1)
{
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+ ListCell *lc2 = lnext(list_head(rollups));
+ bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
+
+ for_each_cell(lc, lc2)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
+ RollupData *rollup = lfirst(lc);
AttrNumber *new_grpColIdx;
- Plan *sort_plan;
+ Plan *sort_plan = NULL;
Plan *agg_plan;
+ AggStrategy strat;
- /* We want to iterate over all but the last rollup list elements */
- if (lnext(lc) == NULL)
- break;
+ new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
+
+ if (!rollup->is_hashed && !is_first_sort)
+ {
+ sort_plan = (Plan *)
+ make_sort_from_groupcols(rollup->groupClause,
+ new_grpColIdx,
+ subplan);
+ }
- new_grpColIdx = remap_groupColIdx(root, groupClause);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
- sort_plan = (Plan *)
- make_sort_from_groupcols(groupClause,
- new_grpColIdx,
- subplan);
+ if (rollup->is_hashed)
+ strat = AGG_HASHED;
+ else if (list_length(linitial(rollup->gsets)) == 0)
+ strat = AGG_PLAIN;
+ else
+ strat = AGG_SORTED;
agg_plan = (Plan *) make_agg(NIL,
NIL,
- AGG_SORTED,
+ strat,
AGGSPLIT_SIMPLE,
- list_length((List *) linitial(gsets)),
+ list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
NIL,
- 0, /* numGroups not needed */
+ rollup->numGroups,
sort_plan);
/*
- * Nuke stuff we don't need to avoid bloating debug output.
+ * Remove stuff we don't need to avoid bloating debug output.
*/
- sort_plan->targetlist = NIL;
- sort_plan->lefttree = NULL;
+ if (sort_plan)
+ {
+ sort_plan->targetlist = NIL;
+ sort_plan->lefttree = NULL;
+ }
chain = lappend(chain, agg_plan);
}
}
/*
- * Now make the final Agg node
+ * Now make the real Agg node
*/
{
- List *groupClause = (List *) llast(rollup_groupclauses);
- List *gsets = (List *) llast(rollup_lists);
+ RollupData *rollup = linitial(rollups);
AttrNumber *top_grpColIdx;
int numGroupCols;
- top_grpColIdx = remap_groupColIdx(root, groupClause);
+ top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
- numGroupCols = list_length((List *) linitial(gsets));
+ numGroupCols = list_length((List *) linitial(rollup->gsets));
plan = make_agg(build_path_tlist(root, &best_path->path),
best_path->qual,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+ best_path->aggstrategy,
AGGSPLIT_SIMPLE,
numGroupCols,
top_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
chain,
- 0, /* numGroups not needed */
+ rollup->numGroups,
subplan);
/* Copy cost data from Path to Plan */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 02286d9c52..3c113a075f 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -91,12 +92,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+ List *rollups;
+ List *hash_sets_idx;
+ double dNumHashGroups;
+ bool any_hashable;
+ Bitmapset *unsortable_refs;
+ Bitmapset *unhashable_refs;
+ List *unsortable_sets;
+ int *tleref_to_colnum_map;
+} grouping_sets_data;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+ int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
@@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
@@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *input_target,
@@ -1505,8 +1532,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
- List *rollup_lists = NIL;
- List *rollup_groupclauses = NIL;
+ grouping_sets_data *gset_data = NULL;
standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
@@ -1515,84 +1541,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets)
{
- int *tleref_to_colnum_map;
- List *sets;
- int maxref;
- ListCell *lc;
- ListCell *lc2;
- ListCell *lc_set;
-
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
- /* Identify max SortGroupRef in groupClause, for array sizing */
- maxref = 0;
- foreach(lc, parse->groupClause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- if (gc->tleSortGroupRef > maxref)
- maxref = gc->tleSortGroupRef;
- }
-
- /* Allocate workspace array for remapping */
- tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
- /* Examine the rollup sets */
- sets = extract_rollup_sets(parse->groupingSets);
-
- foreach(lc_set, sets)
- {
- List *current_sets = (List *) lfirst(lc_set);
- List *groupclause;
- int ref;
-
- /*
- * Reorder the current list of grouping sets into correct
- * prefix order. If only one aggregation pass is needed, try
- * to make the list match the ORDER BY clause; if more than
- * one pass is needed, we don't bother with that.
- */
- current_sets = reorder_grouping_sets(current_sets,
- (list_length(sets) == 1
- ? parse->sortClause
- : NIL));
-
- /*
- * Order the groupClause appropriately. If the first grouping
- * set is empty, this can match regular GROUP BY
- * preprocessing, otherwise we have to force the groupClause
- * to match that grouping set's order.
- */
- groupclause = preprocess_groupclause(root,
- linitial(current_sets));
-
- /*
- * Now that we've pinned down an order for the groupClause for
- * this list of grouping sets, we need to remap the entries in
- * the grouping sets from sortgrouprefs to plain indices
- * (0-based) into the groupClause for this collection of
- * grouping sets.
- */
- ref = 0;
- foreach(lc, groupclause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
- }
-
- foreach(lc, current_sets)
- {
- foreach(lc2, (List *) lfirst(lc))
- {
- lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
- }
- }
-
- /* Save the reordered sets and corresponding groupclauses */
- rollup_lists = lcons(current_sets, rollup_lists);
- rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
- }
+ gset_data = preprocess_grouping_sets(root);
}
else
{
@@ -1686,8 +1635,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause =
- parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+ qp_extra.groupClause = (gset_data
+ ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -1887,8 +1837,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
&agg_costs,
- rollup_lists,
- rollup_groupclauses);
+ gset_data);
/* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs)
adjust_paths_for_srfs(root, current_rel,
@@ -1925,7 +1874,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root,
current_rel);
}
-
} /* end of if (setOperations) */
/*
@@ -2078,6 +2026,216 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
}
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *sets;
+ int maxref = 0;
+ ListCell *lc;
+ ListCell *lc_set;
+ grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+ gd->any_hashable = false;
+ gd->unhashable_refs = NULL;
+ gd->unsortable_refs = NULL;
+ gd->unsortable_sets = NIL;
+
+ if (parse->groupClause)
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ Index ref = gc->tleSortGroupRef;
+
+ if (ref > maxref)
+ maxref = ref;
+
+ if (!gc->hashable)
+ gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+ if (!OidIsValid(gc->sortop))
+ gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+ }
+ }
+
+ /* Allocate workspace array for remapping */
+ gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+ /*
+ * If we have any unsortable sets, we must extract them before trying to
+ * prepare rollups. Unsortable sets don't go through
+ * reorder_grouping_sets, so we must apply the GroupingSetData annotation
+ * here.
+ */
+ if (!bms_is_empty(gd->unsortable_refs))
+ {
+ List *sortable_sets = NIL;
+
+ foreach(lc, parse->groupingSets)
+ {
+ List *gset = lfirst(lc);
+
+ if (bms_overlap_list(gd->unsortable_refs, gset))
+ {
+ GroupingSetData *gs = makeNode(GroupingSetData);
+
+ gs->set = gset;
+ gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+ /*
+ * We must enforce here that an unsortable set is hashable;
+ * later code assumes this. Parse analysis only checks that
+ * every individual column is either hashable or sortable.
+ *
+ * Note that passing this test doesn't guarantee we can
+ * generate a plan; there might be other showstoppers.
+ */
+
+ if (bms_overlap_list(gd->unhashable_refs, gset))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement GROUP BY"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ }
+ else
+ sortable_sets = lappend(sortable_sets, gset);
+ }
+
+ if (sortable_sets)
+ sets = extract_rollup_sets(sortable_sets);
+ else
+ sets = NIL;
+ }
+ else
+ sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = (List *) lfirst(lc_set);
+ RollupData *rollup = makeNode(RollupData);
+ GroupingSetData *gs;
+
+ /*
+ * Reorder the current list of grouping sets into correct prefix
+ * order. If only one aggregation pass is needed, try to make the
+ * list match the ORDER BY clause; if more than one pass is needed, we
+ * don't bother with that.
+ *
+ * Note that this reorders the sets from smallest-member-first to
+ * largest-member-first, and applies the GroupingSetData annotations,
+ * though the data will be filled in later.
+ */
+ current_sets = reorder_grouping_sets(current_sets,
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+
+ /*
+ * Get the initial (and therefore largest) grouping set.
+ */
+ gs = linitial(current_sets);
+
+ /*
+ * Order the groupClause appropriately. If the first grouping set is
+ * empty, then the groupClause must also be empty; otherwise we have
+ * to force the groupClause to match that grouping set's order.
+ *
+ * (The first grouping set can be empty even though parse->groupClause
+ * is not empty only if all non-empty grouping sets are unsortable.
+ * The groupClauses for hashed grouping sets are built later on.)
+ */
+ if (gs->set)
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ else
+ rollup->groupClause = NIL;
+
+ /*
+ * Is it hashable? We pretend empty sets are hashable even though we
+ * actually force them not to be hashed later. But don't bother if
+ * there's nothing but empty sets.
+ */
+ if (gs->set &&
+ !bms_overlap_list(gd->unhashable_refs, gs->set))
+ {
+ rollup->hashable = true;
+ gd->any_hashable = true;
+ }
+
+ /*
+ * Now that we've pinned down an order for the groupClause for this
+ * list of grouping sets, we need to remap the entries in the grouping
+ * sets from sortgrouprefs to plain indices (0-based) into the
+ * groupClause for this collection of grouping sets. We keep the
+ * original form for later use, though.
+ */
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ current_sets,
+ gd->tleref_to_colnum_map);
+ rollup->gsets_data = current_sets;
+
+ gd->rollups = lappend(gd->rollups, rollup);
+ }
+
+ if (gd->unsortable_sets)
+ {
+ /*
+ * We have not yet pinned down a groupclause for this, but we will
+ * need index-based lists for estimation purposes. Construct
+ * hash_sets_idx based on the entire original groupclause for now.
+ */
+ gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+ gd->unsortable_sets,
+ gd->tleref_to_colnum_map);
+ gd->any_hashable = true;
+ }
+
+ return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+ List *gsets,
+ int *tleref_to_colnum_map)
+{
+ int ref = 0;
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, gsets)
+ {
+ List *set = NIL;
+ ListCell *lc2;
+ GroupingSetData *gs = lfirst(lc);
+
+ foreach(lc2, gs->set)
+ {
+ set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+ }
+
+ result = lappend(result, set);
+ }
+
+ return result;
+}
+
+
+
/*
* Detect whether a plan node is a "dummy" plan created when a relation
* is deemed not to need scanning due to constraint exclusion.
@@ -2992,7 +3150,7 @@ extract_rollup_sets(List *groupingSets)
/*
* Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure
@@ -3015,6 +3173,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{
List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous);
+ GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0)
{
@@ -3042,7 +3201,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
}
- result = lcons(list_copy(previous), result);
+ gs->set = list_copy(previous);
+ result = lcons(gs, result);
list_free(new_elems);
}
@@ -3137,15 +3297,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping)
*
* path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- * or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
*/
static double
get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
double dNumGroups;
@@ -3157,28 +3318,60 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets)
{
/* Add up the estimates for each grouping set */
- ListCell *lc,
- *lc2;
+ ListCell *lc;
+ ListCell *lc2;
dNumGroups = 0;
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+ foreach(lc, gd->rollups)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
- ListCell *lc3;
+ RollupData *rollup = lfirst(lc);
+ ListCell *lc;
- groupExprs = get_sortgrouplist_exprs(groupClause,
+ groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList);
- foreach(lc3, gsets)
+ rollup->numGroups = 0.0;
+
+ forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{
- List *gset = (List *) lfirst(lc3);
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ rollup->numGroups += numGroups;
+ }
+
+ dNumGroups += rollup->numGroups;
+ }
+
+ if (gd->hash_sets_idx)
+ {
+ ListCell *lc;
- dNumGroups += estimate_num_groups(root,
- groupExprs,
- path_rows,
- &gset);
+ gd->dNumHashGroups = 0;
+
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+ {
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ gd->dNumHashGroups += numGroups;
}
+
+ dNumGroups += gd->dNumHashGroups;
}
}
else
@@ -3214,6 +3407,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3264,8 +3462,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
@@ -3373,8 +3570,7 @@ create_grouping_paths(PlannerInfo *root,
*/
dNumGroups = get_number_of_groups(root,
cheapest_path->rows,
- rollup_lists,
- rollup_groupclauses);
+ gd);
/*
* Determine whether it's possible to perform sort-based implementations
@@ -3382,15 +3578,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.)
+ *
+ * If we have grouping sets, we might be able to sort some but not all of
+ * them; in this case, we need can_sort to be true as long as we must
+ * consider any sorted-input plan.
*/
- can_sort = grouping_is_sortable(parse->groupClause);
+ can_sort = (gd && gd->rollups != NIL)
+ || grouping_is_sortable(parse->groupClause);
/*
* Determine whether we should consider hash-based implementations of
* grouping.
*
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
+ * Hashed aggregation only applies if we're grouping. If we have grouping
+ * sets, some groups might be hashable but others not; in this case we set
+ * can_hash true as long as there is nothing globally preventing us from
+ * hashing (and we should therefore consider plans with hashes).
*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@@ -3403,9 +3606,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last.
*/
can_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 &&
- grouping_is_hashable(parse->groupClause));
+ (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3471,8 +3673,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
- NIL,
- NIL);
+ gd);
/*
* Collect statistics about aggregates for estimating costs of
@@ -3605,20 +3806,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
- /*
- * We have grouping sets, possibly with aggregation. Make
- * a GroupingSetsPath.
- */
- add_path(grouped_rel, (Path *)
- create_groupingsets_path(root,
- grouped_rel,
- path,
- target,
- (List *) parse->havingQual,
- rollup_lists,
- rollup_groupclauses,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash, target,
+ gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
@@ -3779,33 +3969,45 @@ create_grouping_paths(PlannerInfo *root,
if (can_hash)
{
- hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
- agg_costs,
- dNumGroups);
-
- /*
- * Provided that the estimated size of the hashtable does not exceed
- * work_mem, we'll generate a HashAgg Path, although if we were unable
- * to sort above, then we'd better generate a Path, so that we at
- * least have one.
- */
- if (hashaggtablesize < work_mem * 1024L ||
- grouped_rel->pathlist == NIL)
+ if (parse->groupingSets)
{
/*
- * We just need an Agg over the cheapest-total input path, since
- * input order won't matter.
+ * Try for a hash-only groupingsets path over unsorted input.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- parse->groupClause,
- (List *) parse->havingQual,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ cheapest_path, false, true, target,
+ gd, agg_costs, dNumGroups);
+ }
+ else
+ {
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ agg_costs,
+ dNumGroups);
+
+ /*
+ * Provided that the estimated size of the hashtable does not
+ * exceed work_mem, we'll generate a HashAgg Path, although if we
+ * were unable to sort above, then we'd better generate a Path, so
+ * that we at least have one.
+ */
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path,
+ * since input order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ agg_costs,
+ dNumGroups));
+ }
}
/*
@@ -3884,6 +4086,326 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel;
}
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting. This can be called multiple
+ * times, so it's important that it not scribble on input. No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Query *parse = root->parse;
+
+ /*
+ * If we're not being offered sorted input, then only consider plans that
+ * can be done entirely by hashing.
+ *
+ * We can hash everything if it looks like it'll fit in work_mem. But if
+ * the input is actually sorted despite not being advertised as such, we
+ * prefer to make use of that in order to use less memory.
+ *
+ * If none of the grouping sets are sortable, then ignore the work_mem
+ * limit and generate a path anyway, since otherwise we'll just fail.
+ */
+ if (!is_sorted)
+ {
+ List *new_rollups = NIL;
+ RollupData *unhashable_rollup = NULL;
+ List *sets_data;
+ List *empty_sets_data = NIL;
+ List *empty_sets = NIL;
+ ListCell *lc;
+ ListCell *l_start = list_head(gd->rollups);
+ AggStrategy strat = AGG_HASHED;
+ Size hashsize;
+ double exclude_groups = 0.0;
+
+ Assert(can_hash);
+
+ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ unhashable_rollup = lfirst(l_start);
+ exclude_groups = unhashable_rollup->numGroups;
+ l_start = lnext(l_start);
+ }
+
+ hashsize = estimate_hashagg_tablesize(path,
+ agg_costs,
+ dNumGroups - exclude_groups);
+
+ if (hashsize > work_mem * 1024L && gd->rollups)
+ return; /* nope, won't fit */
+
+ /*
+ * We need to burst the existing rollups list into individual grouping
+ * sets and recompute a groupClause for each set.
+ */
+ sets_data = list_copy(gd->unsortable_sets);
+
+ for_each_cell(lc, l_start)
+ {
+ RollupData *rollup = lfirst(lc);
+
+ /*
+ * If we find an unhashable rollup that's not been skipped by the
+ * "actually sorted" check above, we can't cope; we'd need sorted
+ * input (with a different sort order) but we can't get that here.
+ * So bail out; we'll get a valid path from the is_sorted case
+ * instead.
+ */
+ if (!rollup->hashable)
+ return;
+ else
+ sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+ }
+ foreach(lc, sets_data)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ List *gset = gs->set;
+ RollupData *rollup;
+
+ if (gset == NIL)
+ {
+ empty_sets_data = lappend(empty_sets_data, gs);
+ empty_sets = lappend(empty_sets, NIL);
+ }
+ else
+ {
+ rollup = makeNode(RollupData);
+
+ rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ new_rollups = lappend(new_rollups, rollup);
+ }
+ }
+
+ /* If we didn't find anything nonempty to hash, then bail. */
+ if (new_rollups == NIL)
+ return;
+
+ /*
+ * If there were empty grouping sets they should have been in the
+ * first rollup.
+ */
+ Assert(!unhashable_rollup || !empty_sets);
+
+ if (unhashable_rollup)
+ {
+ new_rollups = lappend(new_rollups, unhashable_rollup);
+ strat = AGG_MIXED;
+ }
+ else if (empty_sets)
+ {
+ RollupData *rollup = makeNode(RollupData);
+
+ rollup->groupClause = NIL;
+ rollup->gsets_data = empty_sets_data;
+ rollup->gsets = empty_sets;
+ rollup->numGroups = list_length(empty_sets);
+ rollup->hashable = false;
+ rollup->is_hashed = false;
+ new_rollups = lappend(new_rollups, rollup);
+ strat = AGG_MIXED;
+ }
+
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ strat,
+ new_rollups,
+ agg_costs,
+ dNumGroups));
+ return;
+ }
+
+ /*
+ * If we have sorted input but nothing we can do with it, bail.
+ */
+
+ if (list_length(gd->rollups) == 0)
+ return;
+
+ /*
+ * Given sorted input, we try and make two paths: one sorted and one mixed
+ * sort/hash. (We need to try both because hashagg might be disabled, or
+ * some columns might not be sortable.)
+ *
+ * can_hash is passed in as false if some obstacle elsewhere (such as
+ * ordered aggs) means that we shouldn't consider hashing at all.
+ */
+
+ if (can_hash && gd->any_hashable)
+ {
+ List *rollups = NIL;
+ List *hash_sets = list_copy(gd->unsortable_sets);
+ double availspace = (work_mem * 1024.0);
+ ListCell *lc;
+
+ /*
+ * Account first for space needed for groups we can't sort at all.
+ */
+ availspace -= (double) estimate_hashagg_tablesize(path,
+ agg_costs,
+ gd->dNumHashGroups);
+
+ if (availspace > 0 && list_length(gd->rollups) > 1)
+ {
+ double scale;
+ int num_rollups = list_length(gd->rollups);
+ int k_capacity;
+ int *k_weights = palloc(num_rollups * sizeof(int));
+ Bitmapset *hash_items = NULL;
+ int i;
+
+ /*
+ * To use the discrete knapsack, we need to scale the values to a
+ * reasonably small bounded range. We choose to allow a 5% error
+ * margin; we have no more than 4096 rollups in the worst possible
+ * case, which with a 5% error margin will require a bit over 42MB
+ * of workspace. (Anyone wanting to plan queries that complex had
+ * better have the memory for it. In more reasonable cases, with
+ * no more than a couple of dozen rollups, the memory usage will
+ * be negligible.)
+ *
+ * k_capacity is naturally bounded, but we clamp the values for
+ * scale and weight (below) to avoid overflows or underflows (or
+ * uselessly trying to use a scale factor less than 1 byte).
+ */
+ scale = Max(availspace / (20.0 * num_rollups), 1.0);
+ k_capacity = (int) floor(availspace / scale);
+
+ /*
+ * We leave the first rollup out of consideration since it's the
+ * one that matches the input sort order. We assign indexes "i"
+ * to only those entries considered for hashing; the second loop,
+ * below, must use the same condition.
+ */
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ double sz = estimate_hashagg_tablesize(path,
+ agg_costs,
+ rollup->numGroups);
+
+ /*
+ * If sz is enormous, but work_mem (and hence scale) is
+ * small, avoid integer overflow here.
+ */
+ k_weights[i] = (int) Min(floor(sz / scale),
+ k_capacity + 1.0);
+ ++i;
+ }
+ }
+
+ /*
+ * Apply knapsack algorithm; compute the set of items which
+ * maximizes the value stored (in this case the number of sorts
+ * saved) while keeping the total size (approximately) within
+ * capacity.
+ */
+ if (i > 0)
+ hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+ if (!bms_is_empty(hash_items))
+ {
+ rollups = list_make1(linitial(gd->rollups));
+
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ if (bms_is_member(i, hash_items))
+ hash_sets = list_concat(hash_sets,
+ list_copy(rollup->gsets_data));
+ else
+ rollups = lappend(rollups, rollup);
+ ++i;
+ }
+ else
+ rollups = lappend(rollups, rollup);
+ }
+ }
+ }
+
+ if (!rollups && hash_sets)
+ rollups = list_copy(gd->rollups);
+
+ foreach(lc, hash_sets)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ RollupData *rollup = makeNode(RollupData);
+
+ Assert(gs->set != NIL);
+
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ rollups = lcons(rollup, rollups);
+ }
+
+ if (rollups)
+ {
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_MIXED,
+ rollups,
+ agg_costs,
+ dNumGroups));
+ }
+ }
+
+ /*
+ * Now try the simple sorted case.
+ */
+ if (!gd->unsortable_sets)
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ gd->rollups,
+ agg_costs,
+ dNumGroups));
+}
+
/*
* create_window_paths
*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 8ce772d274..abe7368eab 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2694,10 +2694,9 @@ create_agg_path(PlannerInfo *root,
* 'subpath' is the path representing the source of data
* 'target' is the PathTarget to be computed
* 'having_qual' is the HAVING quals if any
- * 'rollup_lists' is a list of grouping sets
- * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
+ * 'rollups' is a list of RollupData nodes
* 'agg_costs' contains cost info about the aggregate functions to be computed
- * 'numGroups' is the estimated number of groups
+ * 'numGroups' is the estimated total number of groups
*/
GroupingSetsPath *
create_groupingsets_path(PlannerInfo *root,
@@ -2705,13 +2704,15 @@ create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups)
{
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
- int numGroupCols;
+ ListCell *lc;
+ bool is_first = true;
+ bool is_first_sort = true;
/* The topmost generated Plan node will be an Agg */
pathnode->path.pathtype = T_Agg;
@@ -2725,74 +2726,110 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->subpath = subpath;
/*
+ * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
+ * to AGG_HASHED, here if possible.
+ */
+ if (aggstrategy == AGG_SORTED &&
+ list_length(rollups) == 1 &&
+ ((RollupData *) linitial(rollups))->groupClause == NIL)
+ aggstrategy = AGG_PLAIN;
+
+ if (aggstrategy == AGG_MIXED &&
+ list_length(rollups) == 1)
+ aggstrategy = AGG_HASHED;
+
+ /*
* Output will be in sorted order by group_pathkeys if, and only if, there
* is a single rollup operation on a non-empty list of grouping
* expressions.
*/
- if (list_length(rollup_groupclauses) == 1 &&
- ((List *) linitial(rollup_groupclauses)) != NIL)
+ if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
pathnode->path.pathkeys = root->group_pathkeys;
else
pathnode->path.pathkeys = NIL;
- pathnode->rollup_groupclauses = rollup_groupclauses;
- pathnode->rollup_lists = rollup_lists;
+ pathnode->aggstrategy = aggstrategy;
+ pathnode->rollups = rollups;
pathnode->qual = having_qual;
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
-
- /* Account for cost of the topmost Agg node */
- numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
-
- cost_agg(&pathnode->path, root,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
- agg_costs,
- numGroupCols,
- numGroups,
- subpath->startup_cost,
- subpath->total_cost,
- subpath->rows);
+ Assert(rollups != NIL);
+ Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
+ Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
- /*
- * Add in the costs and output rows of the additional sorting/aggregation
- * steps, if any. Only total costs count, since the extra sorts aren't
- * run on startup.
- */
- if (list_length(rollup_lists) > 1)
+ foreach(lc, rollups)
{
- ListCell *lc;
+ RollupData *rollup = lfirst(lc);
+ List *gsets = rollup->gsets;
+ int numGroupCols = list_length(linitial(gsets));
- foreach(lc, rollup_lists)
+ /*
+ * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
+ * (already-sorted) input, and following ones do their own sort.
+ *
+ * In AGG_HASHED mode, there is one rollup for each grouping set.
+ *
+ * In AGG_MIXED mode, the first rollups are hashed, the first
+ * non-hashed one takes the (already-sorted) input, and following ones
+ * do their own sort.
+ */
+
+ if (is_first)
+ {
+ cost_agg(&pathnode->path, root,
+ aggstrategy,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows);
+ is_first = false;
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
{
- List *gsets = (List *) lfirst(lc);
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
- /* We must iterate over all but the last rollup_lists element */
- if (lnext(lc) == NULL)
- break;
-
- /* Account for cost of sort, but don't charge input cost again */
- cost_sort(&sort_path, root, NIL,
- 0.0,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
-
- /* Account for cost of aggregation */
- numGroupCols = list_length((List *) linitial(gsets));
-
- cost_agg(&agg_path, root,
- AGG_SORTED,
- agg_costs,
- numGroupCols,
- numGroups, /* XXX surely not right for all steps? */
- sort_path.startup_cost,
- sort_path.total_cost,
- sort_path.rows);
+ if (rollup->is_hashed || is_first_sort)
+ {
+ /*
+ * Account for cost of aggregation, but don't charge input
+ * cost again
+ */
+ cost_agg(&agg_path, root,
+ rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ 0.0, 0.0,
+ subpath->rows);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
+ {
+ /* Account for cost of sort, but don't charge input cost again */
+ cost_sort(&sort_path, root, NIL,
+ 0.0,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+
+ /* Account for cost of aggregation */
+
+ cost_agg(&agg_path, root,
+ AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ sort_path.startup_cost,
+ sort_path.total_cost,
+ sort_path.rows);
+ }
pathnode->path.total_cost += agg_path.total_cost;
pathnode->path.rows += agg_path.rows;
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 0000000000..8d1e6d0aa0
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,17 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "postgres.h"
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 4f1910e5a9..109f7b0c14 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -21,6 +21,11 @@
#define BITMAPSET_H
/*
+ * Forward decl to save including pg_list.h
+ */
+struct List;
+
+/*
* Data representation
*/
@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
+extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index f856f6036f..33063071b4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1943,6 +1943,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
+typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
@@ -1950,15 +1951,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */
+ AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */
- int numphases; /* number of phases */
+ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */
+ ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */
+ ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */
@@ -1970,21 +1973,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */
- Tuplesortstate *sort_in; /* sorted input to phases > 0 */
+ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
- /* these fields are used in AGG_HASHED mode: */
- TupleHashTable hashtable; /* hash table with one entry per group */
- TupleTableSlot *hashslot; /* slot for loading hash table */
- int numhashGrpCols; /* number of columns in hash table */
- int largestGrpColIdx; /* largest column required for hashing */
- AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
- AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
bool table_filled; /* hash table filled yet? */
- TupleHashIterator hashiter; /* for iterating through hash table */
+ int num_hashes;
+ AggStatePerHash perhash;
+ AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 2bc7a5df11..39832c7c4b 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -278,6 +278,8 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
+ T_RollupData,
+ T_GroupingSetData,
/*
* TAGS FOR MEMORY NODES (memnodes.h)
@@ -737,7 +739,8 @@ typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
- AGG_HASHED /* grouped agg, use internal hashtable */
+ AGG_HASHED, /* grouped agg, use internal hashtable */
+ AGG_MIXED /* grouped agg, hash and sort concurrently */
} AggStrategy;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index b880dc16cf..614d934dba 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -749,7 +749,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
- /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
+ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */
} Agg;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 05d6f07aea..e5011aa242 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1391,17 +1391,37 @@ typedef struct AggPath
} AggPath;
/*
+ * Various annotations used for grouping sets in the planner.
+ */
+
+typedef struct GroupingSetData
+{
+ NodeTag type;
+ List *set; /* grouping set as list of sortgrouprefs */
+ double numGroups; /* est. number of result groups */
+} GroupingSetData;
+
+typedef struct RollupData
+{
+ NodeTag type;
+ List *groupClause; /* applicable subset of parse->groupClause */
+ List *gsets; /* lists of integer indexes into groupClause */
+ List *gsets_data; /* list of GroupingSetData */
+ double numGroups; /* est. number of result groups */
+ bool hashable; /* can be hashed */
+ bool is_hashed; /* to be implemented as a hashagg */
+} RollupData;
+
+/*
* GroupingSetsPath represents a GROUPING SETS aggregation
- *
- * Currently we only support this in sorted not hashed form, so the input
- * must always be appropriately presorted.
*/
+
typedef struct GroupingSetsPath
{
Path path;
Path *subpath; /* path representing input source */
- List *rollup_groupclauses; /* list of lists of SortGroupClause's */
- List *rollup_lists; /* parallel list of lists of grouping sets */
+ AggStrategy aggstrategy; /* basic strategy */
+ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 373c7221a8..ba7aa62fbf 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -193,8 +193,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index b0886da8d7..1374710087 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -13,6 +13,13 @@ copy gstest2 from stdin;
create temp table gstest3 (a integer, b integer, c integer, d integer);
copy gstest3 from stdin;
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
returns setof record
@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
end;
$f$ language plpgsql;
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
- | | 3 | 25 | 2 | 13
- | | 3 | 14 | 1 | 14
- | | 3 | 15 | 1 | 15
- | | 3 | 16 | 1 | 16
- | | 3 | 17 | 1 | 17
- | | 3 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
(21 rows)
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
@@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500
(6 rows)
+-- hashing support
+set enable_hashagg = true;
+-- failure cases
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+-- simple cases
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+(8 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(6 rows)
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+ | | 3 | 145 | 10 | 19
+(16 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> MixedAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ Group Key: ()
+ -> Values Scan on "*VALUES*"
+(8 rows)
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+ QUERY PLAN
+----------------------------------------------------------
+ GroupAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ Group Key: "*VALUES*".column1
+ Group Key: ()
+ Sort Key: "*VALUES*".column2
+ Group Key: "*VALUES*".column2
+ -> Sort
+ Sort Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(9 rows)
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 2 | 17
+ 0001 | | 1 | 2 | 34
+ 0010 | | 1 | 2 | 68
+ 0011 | | 1 | 2 | 136
+ | 2 | 2 | 4 | 60
+ | 1 | 2 | 4 | 195
+(6 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: unsortable_col
+ Group Key: unhashable_col
+ -> Sort
+ Sort Key: unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 1 | 1
+ 0001 | | 1 | 1 | 2
+ 0010 | | 1 | 1 | 4
+ 0011 | | 1 | 1 | 8
+ 0000 | | 1 | 1 | 16
+ 0001 | | 1 | 1 | 32
+ 0010 | | 1 | 1 | 64
+ 0011 | | 1 | 1 | 128
+ | 1 | 2 | 1 | 1
+ | 1 | 2 | 1 | 2
+ | 2 | 2 | 1 | 4
+ | 2 | 2 | 1 | 8
+ | 2 | 2 | 1 | 16
+ | 2 | 2 | 1 | 32
+ | 1 | 2 | 1 | 64
+ | 1 | 2 | 1 | 128
+(16 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: v, unsortable_col
+ Group Key: v, unhashable_col
+ -> Sort
+ Sort Key: v, unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ a | b | sum | count
+---+---+-----+-------
+(0 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ QUERY PLAN
+--------------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a
+ -> Seq Scan on gstest_empty
+(4 rows)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+(1 row)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+ | | | 0
+ | | | 0
+(3 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ QUERY PLAN
+--------------------------------
+ MixedAggregate
+ Hash Key: a, b
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(6 rows)
+
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ sum | count
+-----+-------
+ | 0
+ | 0
+ | 0
+(3 rows)
+
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ QUERY PLAN
+--------------------------------
+ Aggregate
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(5 rows)
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ a | d | grouping
+---+---+----------
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 1 | 1 | 2
+ 2 | 2 | 2
+(4 rows)
+
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ QUERY PLAN
+---------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a, c
+ -> Seq Scan on gstest3
+(4 rows)
+
+-- simple rescan tests
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ a | b | sum
+---+---+-----
+ 2 | | 6
+ 1 | | 3
+ | 2 | 3
+ | 3 | 3
+ | 1 | 3
+(5 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ QUERY PLAN
+------------------------------------------
+ HashAggregate
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(6 rows)
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
+ ^
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
+ ^
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ | | 3 | 21 | 2 | 11
+ | | 3 | 21 | 2 | 11
+ | | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
+ | | 3 | 14 | 1 | 14
+ | | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
+ | | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
+ | | 3 | 17 | 1 | 17
+ | | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
+(21 rows)
+
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), (max("*VALUES*".column3))
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: ("*VALUES*".column1 + 1), ("*VALUES*".column2 + 1)
+ Hash Key: ("*VALUES*".column1 + 2), ("*VALUES*".column2 + 2)
+ -> Values Scan on "*VALUES*"
+(7 rows)
+
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ a | b | sum | rsum
+---+---+-----+------
+ 1 | 1 | 8 | 8
+ 1 | 2 | 2 | 10
+ 1 | | 10 | 20
+ 2 | 2 | 2 | 22
+ 2 | | 2 | 24
+ | 1 | 8 | 32
+ | 2 | 4 | 36
+ | | 12 | 48
+(8 rows)
+
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ QUERY PLAN
+---------------------------------------------
+ Sort
+ Sort Key: (sum((sum(c))) OVER (?)), a, b
+ -> WindowAgg
+ -> Sort
+ Sort Key: a, b
+ -> MixedAggregate
+ Hash Key: a, b
+ Hash Key: a
+ Hash Key: b
+ Group Key: ()
+ -> Seq Scan on gstest2
+(11 rows)
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ a | b | sum
+---+---+-----
+ 1 | 1 | 1
+ 1 | 2 | 1
+ 1 | 3 | 1
+ 1 | | 3
+ 2 | 1 | 2
+ 2 | 2 | 2
+ 2 | 3 | 2
+ 2 | | 6
+ | 1 | 3
+ | 2 | 3
+ | 3 | 3
+ | | 9
+(12 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ QUERY PLAN
+------------------------------------------------
+ Sort
+ Sort Key: gstest_data.a, gstest_data.b
+ -> MixedAggregate
+ Hash Key: gstest_data.a, gstest_data.b
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ Group Key: ()
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(10 rows)
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | 0 | 50
+ 1 | 1 | 0 | 2 | 50
+ 1 | 1 | 0 | 4 | 50
+ 1 | 1 | 0 | 6 | 50
+ 1 | 1 | 0 | 8 | 50
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | 1 | 50
+ 1 | 1 | 1 | 3 | 50
+ 1 | 1 | 1 | 5 | 50
+ 1 | 1 | 1 | 7 | 50
+ 1 | 1 | 1 | 9 | 50
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | 0 | 50
+ 1 | 1 | 2 | 2 | 50
+ 1 | 1 | 2 | 4 | 50
+ 1 | 1 | 2 | 6 | 50
+ 1 | 1 | 2 | 8 | 50
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | 1 | 50
+ 1 | 1 | 3 | 3 | 50
+ 1 | 1 | 3 | 5 | 50
+ 1 | 1 | 3 | 7 | 50
+ 1 | 1 | 3 | 9 | 50
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 1 | 1 | | | 1000
+ 2 | 2 | 0 | 0 | 50
+ 2 | 2 | 0 | 2 | 50
+ 2 | 2 | 0 | 4 | 50
+ 2 | 2 | 0 | 6 | 50
+ 2 | 2 | 0 | 8 | 50
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | 1 | 50
+ 2 | 2 | 1 | 3 | 50
+ 2 | 2 | 1 | 5 | 50
+ 2 | 2 | 1 | 7 | 50
+ 2 | 2 | 1 | 9 | 50
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | 0 | 50
+ 2 | 2 | 2 | 2 | 50
+ 2 | 2 | 2 | 4 | 50
+ 2 | 2 | 2 | 6 | 50
+ 2 | 2 | 2 | 8 | 50
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | 1 | 50
+ 2 | 2 | 3 | 3 | 50
+ 2 | 2 | 3 | 5 | 50
+ 2 | 2 | 3 | 7 | 50
+ 2 | 2 | 3 | 9 | 50
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+ 2 | 2 | | | 1000
+(70 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
+ {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
+(2 rows)
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+(28 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+---------------------------------------------------------------------------------
+ {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
+ {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
+(2 rows)
+
+-- test the knapsack
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ Sort Key: thousand
+ Group Key: thousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(13 rows)
+
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(9 rows)
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Hash Key: thousand
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(12 rows)
+
-- end
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index 0eeaf9e830..33f370b494 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
(6 rows)
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
dataa | b | g | count
-------+-----+---+-------
@@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
b | bar | | 2
b | | | 2
| | | 6
- a | | 1 | 2
- b | | 1 | 1
- | | 1 | 3
- a | | 2 | 2
- b | | 2 | 1
- | | 2 | 3
| bar | 1 | 2
| bar | 2 | 2
| bar | | 4
| foo | 1 | 1
| foo | 2 | 1
| foo | | 2
+ a | | 1 | 2
+ b | | 1 | 1
+ | | 1 | 3
+ a | | 2 | 2
+ b | | 2 | 1
+ | | 2 | 3
(24 rows)
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
dataa | b | g | count
-------+-----+---+-------
+ a | foo | | 2
+ a | | | 4
+ a | | 2 | 2
a | bar | 1 | 1
a | bar | 2 | 1
a | bar | | 2
a | foo | 1 | 1
a | foo | 2 | 1
- a | foo | | 2
- a | | | 4
a | | 1 | 2
- a | | 2 | 2
- b | bar | 2 | 1
+ b | bar | 1 | 1
b | | | 2
b | | 1 | 1
- b | | 2 | 1
- b | bar | 1 | 1
+ b | bar | 2 | 1
b | bar | | 2
- | foo | | 2
- | foo | 1 | 1
+ b | | 2 | 1
| | 2 | 3
+ | | | 6
| bar | 1 | 2
| bar | 2 | 2
- | | | 6
- | foo | 2 | 1
| bar | | 4
+ | foo | 1 | 1
+ | foo | 2 | 1
+ | foo | | 2
| | 1 | 3
(24 rows)
@@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
a | bar | 1 | 1
a | foo | 1 | 1
b | bar | 1 | 1
+ | bar | 1 | 2
+ | foo | 1 | 1
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
- | bar | 1 | 2
- | foo | 1 | 1
- | foo | 2 | 1
- | bar | 2 | 2
a | | 2 | 2
b | | 2 | 1
- a | bar | 2 | 1
+ | bar | 2 | 2
| | 2 | 3
+ | foo | 2 | 1
+ a | bar | 2 | 1
a | foo | 2 | 1
b | bar | 2 | 1
- a | foo | | 2
+ a | | | 4
b | bar | | 2
b | | | 2
| | | 6
- a | | | 4
+ a | foo | | 2
+ a | bar | | 2
| bar | | 4
| foo | | 2
- a | bar | | 2
(24 rows)
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5));
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index cc557cc552..0e023d9ddc 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -31,6 +31,14 @@ copy gstest3 from stdin;
\.
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
+
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
+
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
+
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by rollup (a,b);
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
@@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
select sum(ten) from onek group by two, rollup(four::text) order by 1;
select sum(ten) from onek group by rollup(four::text), two order by 1;
+-- hashing support
+
+set enable_hashagg = true;
+
+-- failure cases
+
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+
+-- simple cases
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+
+-- simple rescan tests
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- test the knapsack
+
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+
-- end
diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql
index e627bb99ed..417e78c53d 100644
--- a/src/test/regress/sql/tsrf.sql
+++ b/src/test/regress/sql/tsrf.sql
@@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
[snip]
This thread seems to have gone quiet - is it time for me to just go
ahead and commit the thing anyway? Anyone else want to weigh in?
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hi,
+/* + * Switch to phase "newphase", which must either be 0 or 1 (to reset) or * current_phase + 1. Juggle the tuplesorts accordingly. */ static void initialize_phase(AggState *aggstate, int newphase) { - Assert(newphase == 0 || newphase == aggstate->current_phase + 1); + Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
I think this somewhere in the file header needs an expanded explanation
about what these "special" phases mean.
@@ -838,17 +897,21 @@ advance_transition_function(AggState *aggstate, /* * Advance each aggregate transition state for one input tuple. The input * tuple has been stored in tmpcontext->ecxt_outertuple, so that it is - * accessible to ExecEvalExpr. pergroup is the array of per-group structs to - * use (this might be in a hashtable entry). + * accessible to ExecEvalExpr. + * + * We have two sets of transition states to handle: one for sorted aggregation + * and one for hashed; we do them both here, to avoid multiple evaluation of + * the inputs. * * When called, CurrentMemoryContext should be the per-query context. */
Changes to advance_aggregates() are, in my experience, quite likely to
have performance effects. This needs some performance tests.
I scheduled a small TPCH comparison run:
master q01 min: 28924.766 dev-hash min: 28893.923 [diff -0.11]
master q02 min: 2448.135 dev-hash min: 2430.589 [diff -0.72]
master q03 min: 14596.292 dev-hash min: 14421.054 [diff -1.22]
master q04 min: 1999.684 dev-hash min: 1958.727 [diff -2.09]
master q05 min: 10638.148 dev-hash min: 10753.075 [diff +1.07]
master q06 min: 3679.955 dev-hash min: 3649.114 [diff -0.85]
master q07 min: 10097.272 dev-hash min: 10358.089 [diff +2.52]
master q08 min: 3103.698 dev-hash min: 3078.108 [diff -0.83]
master q09 min: 13034.87 dev-hash min: 13049.402 [diff +0.11]
master q10 min: 11702.966 dev-hash min: 11535.05 [diff -1.46]
master q11 min: 577.114 dev-hash min: 586.767 [diff +1.65]
master q12 min: 9087.413 dev-hash min: 9272.874 [diff +2.00]
master q13 min: 16353.813 dev-hash min: 16473.882 [diff +0.73]
master q14 min: 1564.321 dev-hash min: 1552.31 [diff -0.77]
master q15 min: 3958.565 dev-hash min: 3946.728 [diff -0.30]
master q16 min: 3793.345 dev-hash min: 3784.996 [diff -0.22]
master q17 min: 828.286 dev-hash min: 834.929 [diff +0.80]
master q18 min: 13473.084 dev-hash min: 13777.327 [diff +2.21]
master q19 min: 654.241 dev-hash min: 673.863 [diff +2.91]
master q20 min: 2906.811 dev-hash min: 2882.793 [diff -0.83]
master q22 min: 1226.397 dev-hash min: 1262.045 [diff +2.82]
master total min: 154649.176 dev-hash min: 155175.645 [diff +0.34]
Looks like it could all be noise, but it seems worthwhile to look into
some.
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(), * for each entry. * - * The hash table always lives in the aggcontext memory context. + * We have a separate hashtable and associated perhash data structure for each + * grouping set for which we're doing hashing. + * + * The hash tables always live in the hashcontext's per-tuple memory context + * (there is only one of these for all tables together, since they are all + * reset at the same time). */
I've not yet read up on the thread, or the whole patch, but an
explanation of the memory management for all those tables would be good
somewhere (maybe I've just not read that far).
@@ -2388,7 +2597,36 @@ agg_retrieve_hash_table(AggState *aggstate) * ExecInitAgg * * Creates the run-time information for the agg node produced by the - * planner and initializes its outer subtree + * planner and initializes its outer subtree. + * + * What we get from the planner is actually one "real" Agg node which is part + * of the plan tree proper, but which optionally has an additional list of Agg + * nodes hung off the side via the "chain" field. This is because an Agg node + * happens to be a convenient representation of all the data we need for + * grouping sets. + * + * For many purposes, we treat the "real" node as if it were just the first + * node in the chain. The chain must be ordered such that hashed entries come + * before sorted/plain entries; the real node is marked AGG_MIXED if there are + * mixed types (in which case the real node describes one of the hashed
"mixed types" sounds a bit weird.
+ * We collect all hashed nodes into a single "phase", numbered 0, and create a + * sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. Phase + * 0 is allocated even if there are no hashes, but remains unused in that + * case.
Ah, here's the explanation I'd been looking for ;)
+Bitmapset * +DiscreteKnapsack(int max_weight, int num_items, + int *item_weights, double *item_values) +{ + MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext, + "Knapsack", + ALLOCSET_SMALL_MINSIZE, + ALLOCSET_SMALL_INITSIZE, + ALLOCSET_SMALL_MAXSIZE); + MemoryContext oldctx = MemoryContextSwitchTo(local_ctx); + double *values; + Bitmapset **sets; + Bitmapset *result; + int i, + j; + + Assert(max_weight >= 0); + Assert(num_items > 0 && item_weights); + + values = palloc((1 + max_weight) * sizeof(double)); + sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
We usually cast the result of palloc.
+ for (i = 0; i <= max_weight; ++i) + { + values[i] = 0; + sets[i] = bms_make_singleton(num_items); + } + + for (i = 0; i < num_items; ++i) + { + int iw = item_weights[i]; + double iv = item_values ? item_values[i] : 1; + + for (j = max_weight; j >= iw; --j) + { + int ow = j - iw; + + if (values[j] <= values[ow] + iv) + { + /* copy sets[ow] to sets[j] without realloc */ + if (j != ow) + { + sets[j] = bms_del_members(sets[j], sets[j]); + sets[j] = bms_add_members(sets[j], sets[ow]); + } + + sets[j] = bms_add_member(sets[j], i); + + values[j] = values[ow] + iv; + } + } + } + + MemoryContextSwitchTo(oldctx); + + result = bms_del_member(bms_copy(sets[max_weight]), num_items); + + MemoryContextDelete(local_ctx); + + return result; +}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 252af5c870..ea1c538287 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -21,7 +21,7 @@ #include "postgres.h"#include "access/hash.h" - +#include "nodes/pg_list.h"
Pedanticism^7: You're deleting a line here...
+/* * bms_nonempty_difference - do sets have a nonempty difference? */ bool diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 7418fbeded..d2d66f915b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1936,6 +1936,28 @@ _outAggPath(StringInfo str, const AggPath *node) }static void +_outRollupData(StringInfo str, const RollupData *node) +{ + WRITE_NODE_TYPE("ROLLUP"); + + WRITE_NODE_FIELD(groupClause); + WRITE_NODE_FIELD(gsets); + WRITE_NODE_FIELD(gsets_data); + WRITE_FLOAT_FIELD(numGroups, "%.0f"); + WRITE_BOOL_FIELD(hashable); + WRITE_BOOL_FIELD(is_hashed); +} + +static void +_outGroupingSetData(StringInfo str, const GroupingSetData *node) +{ + WRITE_NODE_TYPE("GSDATA"); + + WRITE_NODE_FIELD(set); + WRITE_FLOAT_FIELD(numGroups, "%.0f"); +}
.0f?
+static grouping_sets_data * +preprocess_grouping_sets(PlannerInfo *root) +{
It'd not hurt to add a comment as to what this function is actually
doing.
Btw, this would be a lot easier to review if the move to
preprocess_grouping_sets were done in a separate preliminary commit.
+ if (!bms_is_empty(gd->unsortable_refs)) + {
...
+ } + else + sets = extract_rollup_sets(parse->groupingSets);
I hope there's tests for both these branches.
+ /* + * Is it hashable? We pretend empty sets are hashable even though we + * actually force them not to be hashed later. But don't bother if + * there's nothing but empty sets. + */
Why?
@@ -3214,6 +3407,11 @@ get_number_of_groups(PlannerInfo *root, * estimate_hashagg_tablesize * estimate the number of bytes that a hash aggregate hashtable will * require based on the agg_costs, path width and dNumGroups. + * + * XXX this may be over-estimating the size now that hashagg knows to omit + * unneeded columns from the hashtable. Also for mixed-mode grouping sets, + * grouping columns not in the hashed set are counted here even though hashagg + * won't store them. Is this a problem? */
Hm. This seems like it could move us to use sorting, even if not
actually warranted.
+/* + * For a given input path, consider the possible ways of doing grouping sets on + * it, by combinations of hashing and sorting. This can be called multiple + * times, so it's important that it not scribble on input. No result is + * returned, but any generated paths are added to grouped_rel. + */ + +static void +consider_groupingsets_paths(PlannerInfo *root,
What's the newline after the comment about?
+ RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Query *parse = root->parse; + + /* + * If we're not being offered sorted input, then only consider plans that + * can be done entirely by hashing. + * + * We can hash everything if it looks like it'll fit in work_mem. But if + * the input is actually sorted despite not being advertised as such, we + * prefer to make use of that in order to use less memory. + * If none of the grouping sets are sortable, then ignore the work_mem + * limit and generate a path anyway, since otherwise we'll just fail. + */ + if (!is_sorted) + { + List *new_rollups = NIL; + RollupData *unhashable_rollup = NULL; + List *sets_data; + List *empty_sets_data = NIL; + List *empty_sets = NIL; + ListCell *lc; + ListCell *l_start = list_head(gd->rollups); + AggStrategy strat = AGG_HASHED; + Size hashsize; + double exclude_groups = 0.0; + + Assert(can_hash); + + if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + unhashable_rollup = lfirst(l_start);
a) cast result of lfirst/lnext/whatnot. b) Why is this guaranteed to be
unhashable?
+ exclude_groups = unhashable_rollup->numGroups; + l_start = lnext(l_start); + }
What guarantees that such a rollup would be at the start?
+ hashsize = estimate_hashagg_tablesize(path, + agg_costs, + dNumGroups - exclude_groups); + + if (hashsize > work_mem * 1024L && gd->rollups) + return; /* nope, won't fit */
Didn't you above talk about generating a path anyway? Or is rollups
guaranteed not to be set in that case (if so, not obvious on a casual
read)?
+ /* + * If we have sorted input but nothing we can do with it, bail. + */ + + if (list_length(gd->rollups) == 0) + return;
Postgres style usually doesn't add a newline before comment and the
block it concerns - you do that all over.
+ if (can_hash && gd->any_hashable) + { + List *rollups = NIL; + List *hash_sets = list_copy(gd->unsortable_sets); + double availspace = (work_mem * 1024.0);
Why the parens, and why the .0?
+ ListCell *lc; + + /* + * Account first for space needed for groups we can't sort at all. + */ + availspace -= (double) estimate_hashagg_tablesize(path, + agg_costs, + gd->dNumHashGroups); + + if (availspace > 0 && list_length(gd->rollups) > 1) + { + double scale; + int num_rollups = list_length(gd->rollups); + int k_capacity; + int *k_weights = palloc(num_rollups * sizeof(int)); + Bitmapset *hash_items = NULL; + int i; + + /* + * To use the discrete knapsack, we need to scale the values to a + * reasonably small bounded range. We choose to allow a 5% error + * margin; we have no more than 4096 rollups in the worst possible + * case, which with a 5% error margin will require a bit over 42MB + * of workspace. (Anyone wanting to plan queries that complex had + * better have the memory for it. In more reasonable cases, with + * no more than a couple of dozen rollups, the memory usage will + * be negligible.) + * + * k_capacity is naturally bounded, but we clamp the values for + * scale and weight (below) to avoid overflows or underflows (or + * uselessly trying to use a scale factor less than 1 byte). + */
I think you need a higher level explanation of how you're mapping the
work_mem issue to knapsack somewhere.
+ /* + * Apply knapsack algorithm; compute the set of items which + * maximizes the value stored (in this case the number of sorts + * saved) while keeping the total size (approximately) within + * capacity. + */
Hm. So we're solely optimizing for the number of sorts, rather the cost
of the sorts. Probably an acceptable tradeoff.
+}
Phew, this isn't simple. Need to be more awake than I'm 8 hours into a
flight.
@@ -737,7 +739,8 @@ typedef enum AggStrategy { AGG_PLAIN, /* simple agg across all input rows */ AGG_SORTED, /* grouped agg, input must be sorted */ - AGG_HASHED /* grouped agg, use internal hashtable */ + AGG_HASHED, /* grouped agg, use internal hashtable */ + AGG_MIXED /* grouped agg, hash and sort concurrently */ } AggStrategy;
Concurrently sounds a bit like using multiple processes / parallelism...
- Andres
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Andres" == Andres Freund <andres@anarazel.de> writes:
Andres> Changes to advance_aggregates() are, in my experience, quite
Andres> likely to have performance effects. This needs some
Andres> performance tests.
[...]
Andres> Looks like it could all be noise, but it seems worthwhile to
Andres> look into some.
Trying to sort out signal from noise when dealing with performance
impacts of no more than a few percent is _extremely hard_ these days.
Remember this, from a couple of years back? http://tinyurl.com/op9qg8a
That's a graph of performance results from tests where the only change
being made was adding padding bytes to a function that was never called
during the test. Performance differences on the order of 3% were being
introduced by doing nothing more than changing the actual memory
locations of functions.
My latest round of tests on this patch shows a similar effect. Here's
one representative test timing (a select count(*) with no grouping sets
involved):
master: 5727ms
gsets_hash: 5949ms
gsets_hash + 500 padding bytes: 5680ms
Since the effect of padding is going to vary over time as other,
unrelated, patches happen, I think I can safely claim that the
performance of the patch at least overlaps the noise range of the
performance of current code. To get a more definitive result, it would
be necessary to run at least some dozens of tests, with different
padding sizes, and determine whether the average changes detectably
between the two versions. I will go ahead and do this, out of sheer
curiosity if nothing else, but the preliminary results suggest there's
probably nothing worth holding up the patch for.
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mar 22, 2017, at 8:09 AM, Mark Dilger <hornschnorter@gmail.com> wrote:
On Mar 22, 2017, at 7:55 AM, Andrew Gierth <andrew@tao11.riddles.org.uk> wrote:
[snip]
This thread seems to have gone quiet - is it time for me to just go
ahead and commit the thing anyway? Anyone else want to weigh in?Sorry for the delay. I'll try to look at this today. Others are most welcome
to weigh in.
You define DiscreteKnapsack to take integer weights and double values,
and perform the usual Dynamic Programming algorithm to solve. But
the only place you call this, you pass in NULL for the values, indicating
that all the values are equal. I'm confused why in this degenerate case
you bother with the DP at all. Can't you just load the knapsack from
lightest to heaviest after sorting, reducing the runtime complexity to O(nlogn)?
It's been a long day. Sorry if I am missing something obvious.
I'm guessing you intend to extend the code at some later date to have
different values for items. Is that right?
I reapplied your patch and reran the regression tests on my laptop and they
all pass.
mark
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Import Notes
Reply to msg id not found: D79B6D27-CC27-4AF5-818C-5AD7C525F18C@gmail.com
"Andres" == Andres Freund <andres@anarazel.de> writes:
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1); + Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
Andres> I think this somewhere in the file header needs an expanded
Andres> explanation about what these "special" phases mean.
I could move most of the block comment for ExecInitAgg up into the file
header; would that be better?
Andres> I've not yet read up on the thread, or the whole patch, but an
Andres> explanation of the memory management for all those tables would
Andres> be good somewhere (maybe I've just not read that far).
I can expand on that.
Andres> "mixed types" sounds a bit weird.
changing to "if there are both types present"
+ values = palloc((1 + max_weight) * sizeof(double)); + sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
Andres> We usually cast the result of palloc.
Rough count in the backend has ~400 without casts to ~1350 with, so this
doesn't seem to have been consistently enforced.
+static void +_outGroupingSetData(StringInfo str, const GroupingSetData *node) +{ + WRITE_NODE_TYPE("GSDATA"); + + WRITE_NODE_FIELD(set); + WRITE_FLOAT_FIELD(numGroups, "%.0f"); +}
Andres> .0f?
Copied from every other node that uses "%.0f" to write out a numGroups
value.
+static grouping_sets_data * +preprocess_grouping_sets(PlannerInfo *root) +{
Andres> It'd not hurt to add a comment as to what this function is
Andres> actually doing.
Sure.
Andres> I hope there's tests for both these branches.
A number of tests for both unsortable and unhashable cases are in the
regression test.
+ /* + * Is it hashable? We pretend empty sets are hashable even though we + * actually force them not to be hashed later. But don't bother if + * there's nothing but empty sets. + */
Andres> Why?
If there's no non-empty grouping set then there's nothing to hash (the
hash code assumes at least one column). I'll put that in the comment.
@@ -3214,6 +3407,11 @@ get_number_of_groups(PlannerInfo *root, * estimate_hashagg_tablesize * estimate the number of bytes that a hash aggregate hashtable will * require based on the agg_costs, path width and dNumGroups. + * + * XXX this may be over-estimating the size now that hashagg knows to omit + * unneeded columns from the hashtable. Also for mixed-mode grouping sets, + * grouping columns not in the hashed set are counted here even though hashagg + * won't store them. Is this a problem? */
Andres> Hm. This seems like it could move us to use sorting, even if
Andres> not actually warranted.
This is largely a pre-existing issue that this patch doesn't try and
fix. That would be an issue for a separate patch, I think. I merely
added the comment to point it out.
Andres> What's the newline after the comment about?
Old style habits die hard.
+ RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Query *parse = root->parse; + + /* + * If we're not being offered sorted input, then only consider plans that + * can be done entirely by hashing. + * + * We can hash everything if it looks like it'll fit in work_mem. But if + * the input is actually sorted despite not being advertised as such, we + * prefer to make use of that in order to use less memory. + * If none of the grouping sets are sortable, then ignore the work_mem + * limit and generate a path anyway, since otherwise we'll just fail. + */ + if (!is_sorted) + { + List *new_rollups = NIL; + RollupData *unhashable_rollup = NULL; + List *sets_data; + List *empty_sets_data = NIL; + List *empty_sets = NIL; + ListCell *lc; + ListCell *l_start = list_head(gd->rollups); + AggStrategy strat = AGG_HASHED; + Size hashsize; + double exclude_groups = 0.0; + + Assert(can_hash); + + if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + unhashable_rollup = lfirst(l_start);
Andres> a) cast result of lfirst/lnext/whatnot.
casting lfirst is defensible (and only about 10% of existing uses don't
cast it), but why would you ever cast the result of lnext?
Andres> b) Why is this guaranteed to be unhashable?
It might not be unhashable; that probably needs a better name. It's the
rollup that we aren't going to hash because we're being presented with
input already in correct order. Also, it's the only rollup that can
contain empty grouping sets, which the hash code can't cope with.
Obviously this needs a comment and some consideration of naming.
+ if (hashsize > work_mem * 1024L && gd->rollups) + return; /* nope, won't fit */
Andres> Didn't you above talk about generating a path anyway? Or is
Andres> rollups guaranteed not to be set in that case (if so, not
Andres> obvious on a casual read)?
Unsortable groups can't participate in rollups. The presence of a rollup
implies that we're going to be offered at least one is_sorted input
path.
+ if (can_hash && gd->any_hashable) + { + List *rollups = NIL; + List *hash_sets = list_copy(gd->unsortable_sets); + double availspace = (work_mem * 1024.0);
Andres> Why the parens, and why the .0?
The .0 is because the * should be done as a double, not an int. This
isn't an uncommon idiom, there are quite a few similar uses in the
planner alone.
The parens because I thought it was clearer that way.
Andres> I think you need a higher level explanation of how you're
Andres> mapping the work_mem issue to knapsack somewhere.
The general overview ("work_mem is the knapsack size, and we need to
figure out how best to pack it with hashtables"), or the specific issue
of the scale factor for discrete chunking of the size?
Andres> Hm. So we're solely optimizing for the number of sorts, rather
Andres> the cost of the sorts. Probably an acceptable tradeoff.
The existing cost model for sorts doesn't actually seem to help us here;
all of our sorts sort the same data, just with different comparison
columns, but cost_sort doesn't actually account for that (all its
callers except the CLUSTER planning code pass in 0.0 for
comparison_cost).
If the sort costs were correct, we could compute a "value" for each
rollup that reflected the cost difference between the sort+unique
comparisons for it, and the hash functions that would be called if we
hashed instead; and just pass that to the knapsack function.
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> You define DiscreteKnapsack to take integer weights and double
Mark> values, and perform the usual Dynamic Programming algorithm to
Mark> solve. But the only place you call this, you pass in NULL for
Mark> the values, indicating that all the values are equal. I'm
Mark> confused why in this degenerate case you bother with the DP at
Mark> all. Can't you just load the knapsack from lightest to heaviest
Mark> after sorting, reducing the runtime complexity to O(nlogn)? It's
Mark> been a long day. Sorry if I am missing something obvious.
That's actually a very good question.
(Though in passing I should point out that the runtime cost is going to
be negligible in all practical cases. Even an 8-way cube (256 grouping
sets) has only 70 rollups, and a 4-way cube has only 6; the limit of
4096 grouping sets was only chosen because it was a nice round number
that was larger than comparable limits in other database products. Other
stuff in the grouping sets code has worse time bounds; the
bipartite-match code used to minimize the number of rollups is
potentially O(n^2.5) in the number of grouping sets.)
Thinking about it, it's not at all obvious what we should be optimizing
for here in the absence of accurate sort costs. Right now what matters
is saving as many sort steps as possible, since that's a saving likely
to be many orders of magnitude greater than the differences between two
sorts. The one heuristic that might be useful is to prefer the smaller
estimated size if other factors are equal, just on memory usage grounds,
but even that seems a bit tenuous.
I'm inclined to leave things as they are in the current patch, and maybe
revisit it during beta if we get any relevant feedback from people
actually using grouping sets?
Mark> I'm guessing you intend to extend the code at some later date to
Mark> have different values for items. Is that right?
Well, as I mentioned in a reply to Andres, we probably should eventually
optimize for greatest reduction in cost, and the code as it stands would
allow that to be added easily, but that would require having more
accurate cost info than we have now. cost_sort doesn't take into
consideration the number or types of sort columns or the costs of their
comparison functions, so all sorts of the same input data are costed
equally.
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2017-03-23 03:43:57 +0000, Andrew Gierth wrote:
"Andres" == Andres Freund <andres@anarazel.de> writes:
Andres> Changes to advance_aggregates() are, in my experience, quite
Andres> likely to have performance effects. This needs some
Andres> performance tests.
[...]
Andres> Looks like it could all be noise, but it seems worthwhile to
Andres> look into some.Trying to sort out signal from noise when dealing with performance
impacts of no more than a few percent is _extremely hard_ these days.
Indeed. But that doesn't mean we needn't try. With some determination
and profiling you can often sepearate signal from noise - I managed to
track down 0.12% regressions in the expression evaluation work...
I will go ahead and do this, out of sheer curiosity if nothing else,
but the preliminary results suggest there's probably nothing worth
holding up the patch for.
Agreed. I'd want to run one more profile, checking whether the profiles
indicate new hotspots, but other than that...
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mar 23, 2017, at 11:22 AM, Andrew Gierth <andrew@tao11.riddles.org.uk> wrote:
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> You define DiscreteKnapsack to take integer weights and double
Mark> values, and perform the usual Dynamic Programming algorithm to
Mark> solve. But the only place you call this, you pass in NULL for
Mark> the values, indicating that all the values are equal. I'm
Mark> confused why in this degenerate case you bother with the DP at
Mark> all. Can't you just load the knapsack from lightest to heaviest
Mark> after sorting, reducing the runtime complexity to O(nlogn)? It's
Mark> been a long day. Sorry if I am missing something obvious.That's actually a very good question.
(Though in passing I should point out that the runtime cost is going to
be negligible in all practical cases. Even an 8-way cube (256 grouping
sets) has only 70 rollups, and a 4-way cube has only 6; the limit of
4096 grouping sets was only chosen because it was a nice round number
that was larger than comparable limits in other database products. Other
stuff in the grouping sets code has worse time bounds; the
bipartite-match code used to minimize the number of rollups is
potentially O(n^2.5) in the number of grouping sets.)Thinking about it, it's not at all obvious what we should be optimizing
for here in the absence of accurate sort costs.
Is there a performance test case where this patch should shine
brightest? I'd like to load a schema with lots of data, and run
a grouping sets query, both before and after applying the patch,
to see what the performance advantage is.
mark
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2017-03-23 05:09:46 +0000, Andrew Gierth wrote:
"Andres" == Andres Freund <andres@anarazel.de> writes:
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1); + Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);Andres> I think this somewhere in the file header needs an expanded
Andres> explanation about what these "special" phases mean.I could move most of the block comment for ExecInitAgg up into the file
header; would that be better?
Yes, I think so.
+ values = palloc((1 + max_weight) * sizeof(double)); + sets = palloc((1 + max_weight) * sizeof(Bitmapset *));Andres> We usually cast the result of palloc.
Rough count in the backend has ~400 without casts to ~1350 with, so this
doesn't seem to have been consistently enforced.
Yea, but we're still trying.
+ RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Query *parse = root->parse; + + /* + * If we're not being offered sorted input, then only consider plans that + * can be done entirely by hashing. + * + * We can hash everything if it looks like it'll fit in work_mem. But if + * the input is actually sorted despite not being advertised as such, we + * prefer to make use of that in order to use less memory. + * If none of the grouping sets are sortable, then ignore the work_mem + * limit and generate a path anyway, since otherwise we'll just fail. + */ + if (!is_sorted) + { + List *new_rollups = NIL; + RollupData *unhashable_rollup = NULL; + List *sets_data; + List *empty_sets_data = NIL; + List *empty_sets = NIL; + ListCell *lc; + ListCell *l_start = list_head(gd->rollups); + AggStrategy strat = AGG_HASHED; + Size hashsize; + double exclude_groups = 0.0; + + Assert(can_hash); + + if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + unhashable_rollup = lfirst(l_start);Andres> a) cast result of lfirst/lnext/whatnot.
casting lfirst is defensible (and only about 10% of existing uses don't
cast it), but why would you ever cast the result of lnext?
Obviously lnext is bogus, was thinking of linitial...
+ if (can_hash && gd->any_hashable) + { + List *rollups = NIL; + List *hash_sets = list_copy(gd->unsortable_sets); + double availspace = (work_mem * 1024.0);Andres> Why the parens, and why the .0?
The .0 is because the * should be done as a double, not an int.
I was just missing why - but now that I've not already been awake for 20
hours, 8 of those crammed into a plane, it's obviously beneficial
because of overflow concerns.
Andres> I think you need a higher level explanation of how you're
Andres> mapping the work_mem issue to knapsack somewhere.The general overview ("work_mem is the knapsack size, and we need to
figure out how best to pack it with hashtables"), or the specific issue
of the scale factor for discrete chunking of the size?
The general overview - the scaling thing doesn't seem that important to
understand in detail, to understand the approach. Briefly explaining
what we're trying to minimize (sort steps), what the constraint is
(memory), that the problem is representable as the classic "knapsack
problem".
Andres> Hm. So we're solely optimizing for the number of sorts, rather
Andres> the cost of the sorts. Probably an acceptable tradeoff.The existing cost model for sorts doesn't actually seem to help us here;
all of our sorts sort the same data, just with different comparison
columns, but cost_sort doesn't actually account for that (all its
callers except the CLUSTER planning code pass in 0.0 for
comparison_cost).If the sort costs were correct, we could compute a "value" for each
rollup that reflected the cost difference between the sort+unique
comparisons for it, and the hash functions that would be called if we
hashed instead; and just pass that to the knapsack function.
That'd indeed be desirable - but I think we can punt on that for now.
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Mark" == Mark Dilger <hornschnorter@gmail.com> writes:
Mark> Is there a performance test case where this patch should shine
Mark> brightest? I'd like to load a schema with lots of data, and run
Mark> a grouping sets query, both before and after applying the patch,
Mark> to see what the performance advantage is.
The area which I think is most important for performance is the handling
of small cubes; without this patch, a 2d cube needs 2 full sorts, a 3d
one needs 3, and a 4d one needs 6. In many real-world data sets these
would all hash entirely in memory.
So here's a very simple example (deliberately using integers for
grouping to minimize the advantage; grouping by text columns in a non-C
locale would show a much greater speedup for the patch):
create table sales (
id serial,
product_id integer,
store_id integer,
customer_id integer,
qty integer);
-- random integer function
create function d(integer) returns integer language sql
as $f$ select floor(random()*$1)::integer + 1; $f$;
-- 10 million random rows
insert into sales (product_id,store_id,customer_id,qty)
select d(20), d(6), d(10), d(100) from generate_series(1,10000000);
-- example 2d cube:
select product_id, store_id, count(*), sum(qty)
from sales
group by cube(product_id, store_id);
-- example 3d cube:
select product_id, store_id, customer_id, count(*), sum(qty)
from sales
group by cube(product_id, store_id, customer_id);
-- example 4d cube with a computed column:
select product_id, store_id, customer_id, (qty / 10), count(*), sum(qty)
from sales
group by cube(product_id, store_id, customer_id, (qty / 10));
On my machine, the 2d cube is about 3.6 seconds with the patch, and
about 8 seconds without it; the 4d is about 18 seconds with the patch
and about 32 seconds without it (all with work_mem=1GB, compiled with
-O2 and assertions off).
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Andres" == Andres Freund <andres@anarazel.de> writes:
Andres> We usually cast the result of palloc.
Rough count in the backend has ~400 without casts to ~1350 with, so
this doesn't seem to have been consistently enforced.
Andres> Yea, but we're still trying.
Well, a lot of the uncasted ones are in fairly new code, from quite a
number of different committers.
So if this is a big deal, why don't we already have
#define palloc_array(etype,ecount) (((etype) *) palloc((ecount) * sizeof(etype)))
#define palloc_object(otype) (((otype) *) palloc(sizeof(otype)))
or something of that ilk?
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Andres" == Andres Freund <andres@anarazel.de> writes:
Andres> a) cast result of lfirst/lnext/whatnot.
Again, what we need here is something like
#define lfirst_node(_type_, l) (castNode(_type_, lfirst(l)))
etc.
--
Andrew (irc:RhodiumToad)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
"Andres" == Andres Freund <andres@anarazel.de> writes:
I could move most of the block comment for ExecInitAgg up into the
file header; would that be better?
Andres> Yes, I think so.
Done that and some other clarifications. Attached is the latest patch;
no substantive changes, only comments, formatting and a change to a
variable name.
The performance side of things I'm still working on; I think that would
be best handled as a followup patch.
--
Andrew (irc:RhodiumToad)
Attachments:
gshash26.patchtext/x-patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 059c5c309a..a466bf2079 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3276,16 +3276,19 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
@@ -3298,16 +3301,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
- QUERY PLAN
----------------------------------------------------------------------------------------------------
- GroupAggregate
- Output: c2, sum(c1)
- Group Key: ft1.c2
- Group Key: ()
- -> Foreign Scan on public.ft1
- Output: c2, c1
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(7 rows)
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Output: c2, (sum(c1))
+ Sort Key: ft1.c2
+ -> MixedAggregate
+ Output: c2, sum(c1)
+ Hash Key: ft1.c2
+ Group Key: ()
+ -> Foreign Scan on public.ft1
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
@@ -3320,20 +3326,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last
explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
- -> GroupAggregate
+ -> HashAggregate
Output: c2, c6, sum(c1)
- Group Key: ft1.c2
- Sort Key: ft1.c6
- Group Key: ft1.c6
+ Hash Key: ft1.c2
+ Hash Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
- Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
-(11 rows)
+ Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
+(10 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b4c7466666..e0004fe620 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1015,6 +1015,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate";
strategy = "Hashed";
break;
+ case AGG_MIXED:
+ pname = "MixedAggregate";
+ strategy = "Mixed";
+ break;
default:
pname = "Aggregate ???";
strategy = "???";
@@ -1978,6 +1982,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc;
List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx;
+ const char *keyname;
+ const char *keysetname;
+
+ if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+ {
+ keyname = "Hash Key";
+ keysetname = "Hash Keys";
+ }
+ else
+ {
+ keyname = "Group Key";
+ keysetname = "Group Keys";
+ }
ExplainOpenGroup("Grouping Set", NULL, true, es);
@@ -1992,7 +2009,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++;
}
- ExplainOpenGroup("Group Keys", "Group Keys", false, es);
+ ExplainOpenGroup(keysetname, keysetname, false, es);
foreach(lc, gsets)
{
@@ -2016,12 +2033,12 @@ show_grouping_set_keys(PlanState *planstate,
}
if (!result && es->format == EXPLAIN_FORMAT_TEXT)
- ExplainPropertyText("Group Key", "()", es);
+ ExplainPropertyText(keyname, "()", es);
else
- ExplainPropertyListNested("Group Key", result, es);
+ ExplainPropertyListNested(keyname, result, es);
}
- ExplainCloseGroup("Group Keys", "Group Keys", false, es);
+ ExplainCloseGroup(keysetname, keysetname, false, es);
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 3207ee460c..be371af99d 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -122,12 +122,19 @@
* specific).
*
* Where more complex grouping sets are used, we break them down into
- * "phases", where each phase has a different sort order. During each
- * phase but the last, the input tuples are additionally stored in a
- * tuplesort which is keyed to the next phase's sort order; during each
- * phase but the first, the input tuples are drawn from the previously
- * sorted data. (The sorting of the data for the first phase is handled by
- * the planner, as it might be satisfied by underlying nodes.)
+ * "phases", where each phase has a different sort order (except phase 0
+ * which is reserved for hashing). During each phase but the last, the
+ * input tuples are additionally stored in a tuplesort which is keyed to the
+ * next phase's sort order; during each phase but the first, the input
+ * tuples are drawn from the previously sorted data. (The sorting of the
+ * data for the first phase is handled by the planner, as it might be
+ * satisfied by underlying nodes.)
+ *
+ * Hashing can be mixed with sorted grouping. To do this, we have an
+ * AGG_MIXED strategy that populates the hashtables during the first sorted
+ * phase, and switches to reading them out after completing all sort phases.
+ * We can also support AGG_HASHED with multiple hash tables and no sorting
+ * at all.
*
* From the perspective of aggregate transition and final functions, the
* only issue regarding grouping sets is this: a single call site (flinfo)
@@ -139,7 +146,54 @@
* sensitive to the grouping set for which the aggregate function is
* currently being called.
*
- * TODO: AGG_HASHED doesn't support multiple grouping sets yet.
+ * Plan structure:
+ *
+ * What we get from the planner is actually one "real" Agg node which is
+ * part of the plan tree proper, but which optionally has an additional list
+ * of Agg nodes hung off the side via the "chain" field. This is because an
+ * Agg node happens to be a convenient representation of all the data we
+ * need for grouping sets.
+ *
+ * For many purposes, we treat the "real" node as if it were just the first
+ * node in the chain. The chain must be ordered such that hashed entries
+ * come before sorted/plain entries; the real node is marked AGG_MIXED if
+ * there are both types present (in which case the real node describes one
+ * of the hashed groupings, other AGG_HASHED nodes may optionally follow in
+ * the chain, followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If
+ * the real node is marked AGG_HASHED or AGG_SORTED, then all the chained
+ * nodes must be of the same type; if it is AGG_PLAIN, there can be no
+ * chained nodes.
+ *
+ * We collect all hashed nodes into a single "phase", numbered 0, and create
+ * a sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node.
+ * Phase 0 is allocated even if there are no hashes, but remains unused in
+ * that case.
+ *
+ * AGG_HASHED nodes actually refer to only a single grouping set each,
+ * because for each hashed grouping we need a separate grpColIdx and
+ * numGroups estimate. AGG_SORTED nodes represent a "rollup", a list of
+ * grouping sets that share a sort order. Each AGG_SORTED node other than
+ * the first one has an associated Sort node which describes the sort order
+ * to be used; the first sorted node takes its input from the outer subtree,
+ * which the planner has already arranged to provide ordered data.
+ *
+ * Memory and ExprContext usage:
+ *
+ * Because we're accumulating aggregate values across input rows, we need to
+ * use more memory contexts than just simple input/output tuple contexts.
+ * In fact, for a rollup, we need a separate context for each grouping set
+ * so that we can reset the inner (finer-grained) aggregates on their group
+ * boundaries while continuing to accumulate values for outer
+ * (coarser-grained) groupings. On top of this, we might be simultaneously
+ * populating hashtables; however, we only need one context for all the
+ * hashtables.
+ *
+ * So we create an array, aggcontexts, with an ExprContext for each grouping
+ * set in the largest rollup that we're going to process, and use the
+ * per-tuple memory context of those ExprContexts to store the aggregate
+ * transition values. hashcontext is the single context created to support
+ * all hash tables.
+ *
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -432,6 +486,7 @@ typedef struct AggStatePerGroupData
*/
typedef struct AggStatePerPhaseData
{
+ AggStrategy aggstrategy; /* strategy for this phase */
int numsets; /* number of grouping sets (or 0) */
int *gset_lengths; /* lengths of grouping sets */
Bitmapset **grouped_cols; /* column groupings for rollup */
@@ -440,7 +495,30 @@ typedef struct AggStatePerPhaseData
Sort *sortnode; /* Sort node for input ordering for phase */
} AggStatePerPhaseData;
+/*
+ * AggStatePerHashData - per-hashtable state
+ *
+ * When doing grouping sets with hashing, we have one of these for each
+ * grouping set. (When doing hashing without grouping sets, we have just one of
+ * them.)
+ */
+typedef struct AggStatePerHashData
+{
+ TupleHashTable hashtable; /* hash table with one entry per group */
+ TupleHashIterator hashiter; /* for iterating through hash table */
+ TupleTableSlot *hashslot; /* slot for loading hash table */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
+ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
+ int numCols; /* number of hash key columns */
+ int numhashGrpCols; /* number of columns in hash table */
+ int largestGrpColIdx; /* largest col required for hashing */
+ AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
+ AttrNumber *hashGrpColIdxHash; /* indices in hashtbl tuples */
+ Agg *aggnode; /* original Agg node, for numGroups etc. */
+} AggStatePerHashData;
+
+static void select_current_set(AggState *aggstate, int setno, bool is_hash);
static void initialize_phase(AggState *aggstate, int newphase);
static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
static void initialize_aggregates(AggState *aggstate,
@@ -449,7 +527,8 @@ static void initialize_aggregates(AggState *aggstate,
static void advance_transition_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
-static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup,
+ AggStatePerGroup *pergroups);
static void advance_combine_function(AggState *aggstate,
AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate);
@@ -473,14 +552,13 @@ static void prepare_projection_slot(AggState *aggstate,
int currentSet);
static void finalize_aggregates(AggState *aggstate,
AggStatePerAgg peragg,
- AggStatePerGroup pergroup,
- int currentSet);
+ AggStatePerGroup pergroup);
static TupleTableSlot *project_aggregates(AggState *aggstate);
static Bitmapset *find_unaggregated_cols(AggState *aggstate);
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
static void build_hash_table(AggState *aggstate);
-static TupleHashEntryData *lookup_hash_entry(AggState *aggstate,
- TupleTableSlot *inputslot);
+static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
+static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
static void agg_fill_hash_table(AggState *aggstate);
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -501,13 +579,31 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
/*
- * Switch to phase "newphase", which must either be 0 (to reset) or
+ * Select the current grouping set; affects current_set and
+ * curaggcontext.
+ */
+static void
+select_current_set(AggState *aggstate, int setno, bool is_hash)
+{
+ if (is_hash)
+ aggstate->curaggcontext = aggstate->hashcontext;
+ else
+ aggstate->curaggcontext = aggstate->aggcontexts[setno];
+
+ aggstate->current_set = setno;
+}
+
+/*
+ * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
* current_phase + 1. Juggle the tuplesorts accordingly.
+ *
+ * Phase 0 is for hashing, which we currently handle last in the AGG_MIXED
+ * case, so when entering phase 0, all we need to do is drop open sorts.
*/
static void
initialize_phase(AggState *aggstate, int newphase)
{
- Assert(newphase == 0 || newphase == aggstate->current_phase + 1);
+ Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
/*
* Whatever the previous state, we're now done with whatever input
@@ -519,7 +615,7 @@ initialize_phase(AggState *aggstate, int newphase)
aggstate->sort_in = NULL;
}
- if (newphase == 0)
+ if (newphase <= 1)
{
/*
* Discard any existing output tuplesort.
@@ -546,7 +642,7 @@ initialize_phase(AggState *aggstate, int newphase)
* If this isn't the last phase, we need to sort appropriately for the
* next phase in sequence.
*/
- if (newphase < aggstate->numphases - 1)
+ if (newphase > 0 && newphase < aggstate->numphases - 1)
{
Sort *sortnode = aggstate->phases[newphase + 1].sortnode;
PlanState *outerNode = outerPlanState(aggstate);
@@ -567,7 +663,7 @@ initialize_phase(AggState *aggstate, int newphase)
}
/*
- * Fetch a tuple from either the outer plan (for phase 0) or from the sorter
+ * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
* populated by the previous phase. Copy it to the sorter for the next phase
* if any.
*/
@@ -595,8 +691,8 @@ fetch_input_tuple(AggState *aggstate)
/*
* (Re)Initialize an individual aggregate.
*
- * This function handles only one grouping set (already set in
- * aggstate->current_set).
+ * This function handles only one grouping set, already set in
+ * aggstate->current_set.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -653,7 +749,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
MemoryContext oldContext;
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -676,8 +772,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
*
* If there are multiple grouping sets, we initialize only the first numReset
* of them (the grouping sets are ordered so that the most specific one, which
- * is reset most often, is first). As a convenience, if numReset is < 1, we
- * reinitialize all sets.
+ * is reset most often, is first). As a convenience, if numReset is 0, we
+ * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
+ * which case the caller must have used select_current_set appropriately.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
@@ -689,25 +786,37 @@ initialize_aggregates(AggState *aggstate,
int transno;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
int setno = 0;
+ int numTrans = aggstate->numtrans;
AggStatePerTrans transstates = aggstate->pertrans;
- if (numReset < 1)
+ if (numReset == 0)
numReset = numGroupingSets;
- for (transno = 0; transno < aggstate->numtrans; transno++)
+ for (transno = 0; transno < numTrans; transno++)
{
AggStatePerTrans pertrans = &transstates[transno];
- for (setno = 0; setno < numReset; setno++)
+ if (numReset < 0)
{
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
-
- aggstate->current_set = setno;
+ pergroupstate = &pergroup[transno];
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
+ else
+ {
+ for (setno = 0; setno < numReset; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ select_current_set(aggstate, setno, false);
+
+ initialize_aggregate(aggstate, pertrans, pergroupstate);
+ }
+ }
}
}
@@ -757,7 +866,7 @@ advance_transition_function(AggState *aggstate,
* do not need to pfree the old transValue, since it's NULL.
*/
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -807,7 +916,7 @@ advance_transition_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -838,17 +947,21 @@ advance_transition_function(AggState *aggstate,
/*
* Advance each aggregate transition state for one input tuple. The input
* tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
- * accessible to ExecEvalExpr. pergroup is the array of per-group structs to
- * use (this might be in a hashtable entry).
+ * accessible to ExecEvalExpr.
+ *
+ * We have two sets of transition states to handle: one for sorted aggregation
+ * and one for hashed; we do them both here, to avoid multiple evaluation of
+ * the inputs.
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static void
-advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
{
int transno;
int setno = 0;
int numGroupingSets = Max(aggstate->phase->numsets, 1);
+ int numHashes = aggstate->num_hashes;
int numTrans = aggstate->numtrans;
TupleTableSlot *slot = aggstate->evalslot;
@@ -880,6 +993,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
{
/* DISTINCT and/or ORDER BY case */
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
+ Assert(!pergroups);
/*
* If the transfn is strict, we want to check for nullity before
@@ -940,13 +1054,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
}
- for (setno = 0; setno < numGroupingSets; setno++)
+ if (pergroup)
{
- AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
+ /* advance transition states for ordered grouping */
+
+ for (setno = 0; setno < numGroupingSets; setno++)
+ {
+ AggStatePerGroup pergroupstate;
- aggstate->current_set = setno;
+ select_current_set(aggstate, setno, false);
- advance_transition_function(aggstate, pertrans, pergroupstate);
+ pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
+ }
+
+ if (pergroups)
+ {
+ /* advance transition states for hashed grouping */
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ AggStatePerGroup pergroupstate;
+
+ select_current_set(aggstate, setno, true);
+
+ pergroupstate = &pergroups[setno][transno];
+
+ advance_transition_function(aggstate, pertrans, pergroupstate);
+ }
}
}
}
@@ -967,7 +1104,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
TupleTableSlot *slot;
/* combine not supported with grouping sets */
- Assert(aggstate->phase->numsets == 0);
+ Assert(aggstate->phase->numsets <= 1);
/* compute input for all aggregates */
slot = ExecProject(aggstate->evalproj);
@@ -1060,7 +1197,7 @@ advance_combine_function(AggState *aggstate,
if (!pertrans->transtypeByVal)
{
oldContext = MemoryContextSwitchTo(
- aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal,
pertrans->transtypeLen);
@@ -1105,7 +1242,7 @@ advance_combine_function(AggState *aggstate,
{
if (!fcinfo->isnull)
{
- MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+ MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal,
false,
pertrans->transtypeLen) &&
@@ -1559,15 +1696,16 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/*
* Compute the final value of all aggregates for one group.
*
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time, which the caller must
+ * have selected. It's also the caller's responsibility to adjust the supplied
+ * pergroup parameter to point to the current set's transvalues.
*
* Results are stored in the output econtext aggvalues/aggnulls.
*/
static void
finalize_aggregates(AggState *aggstate,
AggStatePerAgg peraggs,
- AggStatePerGroup pergroup,
- int currentSet)
+ AggStatePerGroup pergroup)
{
ExprContext *econtext = aggstate->ss.ps.ps_ExprContext;
Datum *aggvalues = econtext->ecxt_aggvalues;
@@ -1575,11 +1713,6 @@ finalize_aggregates(AggState *aggstate,
int aggno;
int transno;
- Assert(currentSet == 0 ||
- ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
- aggstate->current_set = currentSet;
-
/*
* If there were any DISTINCT and/or ORDER BY aggregates, sort their
* inputs and run the transition functions.
@@ -1589,11 +1722,12 @@ finalize_aggregates(AggState *aggstate,
AggStatePerTrans pertrans = &aggstate->pertrans[transno];
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+ pergroupstate = &pergroup[transno];
if (pertrans->numSortCols > 0)
{
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED &&
+ aggstate->aggstrategy != AGG_MIXED);
if (pertrans->numInputs == 1)
process_ordered_aggregate_single(aggstate,
@@ -1615,7 +1749,7 @@ finalize_aggregates(AggState *aggstate,
int transno = peragg->transno;
AggStatePerGroup pergroupstate;
- pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+ pergroupstate = &pergroup[transno];
if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
finalize_partialaggregate(aggstate, peragg, pergroupstate,
@@ -1697,7 +1831,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
}
/*
- * Initialize the hash table to empty.
+ * Initialize the hash table(s) to empty.
*
* To implement hashed aggregation, we need a hashtable that stores a
* representative tuple and an array of AggStatePerGroup structs for each
@@ -1705,29 +1839,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(),
* for each entry.
*
- * The hash table always lives in the aggcontext memory context.
+ * We have a separate hashtable and associated perhash data structure for each
+ * grouping set for which we're doing hashing.
+ *
+ * The hash tables always live in the hashcontext's per-tuple memory context
+ * (there is only one of these for all tables together, since they are all
+ * reset at the same time).
*/
static void
build_hash_table(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize;
+ int i;
- Assert(node->aggstrategy == AGG_HASHED);
- Assert(node->numGroups > 0);
+ Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
- additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData);
+ additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
- aggstate->hashtable = BuildTupleHashTable(node->numCols,
- aggstate->hashGrpColIdxHash,
- aggstate->phase->eqfunctions,
- aggstate->hashfunctions,
- node->numGroups,
- additionalsize,
- aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
- tmpmem,
+ for (i = 0; i < aggstate->num_hashes; ++i)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[i];
+
+ Assert(perhash->aggnode->numGroups > 0);
+
+ perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+ perhash->hashGrpColIdxHash,
+ perhash->eqfunctions,
+ perhash->hashfunctions,
+ perhash->aggnode->numGroups,
+ additionalsize,
+ aggstate->hashcontext->ecxt_per_tuple_memory,
+ tmpmem,
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+ }
}
/*
@@ -1750,72 +1895,98 @@ build_hash_table(AggState *aggstate)
* the array is preserved over ExecReScanAgg, so we allocate it in the
* per-query context (unlike the hash table itself).
*/
-static List *
+static void
find_hash_columns(AggState *aggstate)
{
- Agg *node = (Agg *) aggstate->ss.ps.plan;
- Bitmapset *colnos;
- List *collist;
- TupleDesc hashDesc;
+ Bitmapset *base_colnos;
List *outerTlist = outerPlanState(aggstate)->plan->targetlist;
- List *hashTlist = NIL;
- int i;
-
- aggstate->largestGrpColIdx = 0;
+ int numHashes = aggstate->num_hashes;
+ int j;
/* Find Vars that will be needed in tlist and qual */
- colnos = find_unaggregated_cols(aggstate);
- /* Add in all the grouping columns */
- for (i = 0; i < node->numCols; i++)
- colnos = bms_add_member(colnos, node->grpColIdx[i]);
- /* Convert to list, using lcons so largest element ends up first */
- collist = NIL;
+ base_colnos = find_unaggregated_cols(aggstate);
- aggstate->hashGrpColIdxInput =
- palloc(bms_num_members(colnos) * sizeof(AttrNumber));
- aggstate->hashGrpColIdxHash =
- palloc(node->numCols * sizeof(AttrNumber));
-
- /*
- * First build mapping for columns directly hashed. These are the first,
- * because they'll be accessed when computing hash values and comparing
- * tuples for exact matches. We also build simple mapping for
- * execGrouping, so it knows where to find the to-be-hashed / compared
- * columns in the input.
- */
- for (i = 0; i < node->numCols; i++)
+ for (j = 0; j < numHashes; ++j)
{
- aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i];
- aggstate->hashGrpColIdxHash[i] = i + 1;
- aggstate->numhashGrpCols++;
- /* delete already mapped columns */
- bms_del_member(colnos, node->grpColIdx[i]);
- }
+ AggStatePerHash perhash = &aggstate->perhash[j];
+ Bitmapset *colnos = bms_copy(base_colnos);
+ AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
+ List *hashTlist = NIL;
+ TupleDesc hashDesc;
+ int i;
- /* and add the remaining columns */
- while ((i = bms_first_member(colnos)) >= 0)
- {
- aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i;
- aggstate->numhashGrpCols++;
- }
+ perhash->largestGrpColIdx = 0;
- /* and build a tuple descriptor for the hashtable */
- for (i = 0; i < aggstate->numhashGrpCols; i++)
- {
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ /*
+ * If we're doing grouping sets, then some Vars might be referenced in
+ * tlist/qual for the benefit of other grouping sets, but not needed
+ * when hashing; i.e. prepare_projection_slot will null them out, so
+ * there'd be no point storing them. Use prepare_projection_slot's
+ * logic to determine which.
+ */
+ if (aggstate->phases[0].grouped_cols)
+ {
+ Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j];
+ ListCell *lc;
- hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
- aggstate->largestGrpColIdx =
- Max(varNumber + 1, aggstate->largestGrpColIdx);
- }
+ foreach(lc, aggstate->all_grouped_cols)
+ {
+ int attnum = lfirst_int(lc);
- hashDesc = ExecTypeFromTL(hashTlist, false);
- ExecSetSlotDescriptor(aggstate->hashslot, hashDesc);
+ if (!bms_is_member(attnum, grouped_cols))
+ colnos = bms_del_member(colnos, attnum);
+ }
+ }
+ /* Add in all the grouping columns */
+ for (i = 0; i < perhash->numCols; i++)
+ colnos = bms_add_member(colnos, grpColIdx[i]);
- list_free(hashTlist);
- bms_free(colnos);
+ perhash->hashGrpColIdxInput =
+ palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+ perhash->hashGrpColIdxHash =
+ palloc(perhash->numCols * sizeof(AttrNumber));
+
+ /*
+ * First build mapping for columns directly hashed. These are the
+ * first, because they'll be accessed when computing hash values and
+ * comparing tuples for exact matches. We also build simple mapping
+ * for execGrouping, so it knows where to find the to-be-hashed /
+ * compared columns in the input.
+ */
+ for (i = 0; i < perhash->numCols; i++)
+ {
+ perhash->hashGrpColIdxInput[i] = grpColIdx[i];
+ perhash->hashGrpColIdxHash[i] = i + 1;
+ perhash->numhashGrpCols++;
+ /* delete already mapped columns */
+ bms_del_member(colnos, grpColIdx[i]);
+ }
- return collist;
+ /* and add the remaining columns */
+ while ((i = bms_first_member(colnos)) >= 0)
+ {
+ perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
+ perhash->numhashGrpCols++;
+ }
+
+ /* and build a tuple descriptor for the hashtable */
+ for (i = 0; i < perhash->numhashGrpCols; i++)
+ {
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
+
+ hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
+ perhash->largestGrpColIdx =
+ Max(varNumber + 1, perhash->largestGrpColIdx);
+ }
+
+ hashDesc = ExecTypeFromTL(hashTlist, false);
+ ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
+
+ list_free(hashTlist);
+ bms_free(colnos);
+ }
+
+ bms_free(base_colnos);
}
/*
@@ -1840,26 +2011,30 @@ hash_agg_entry_size(int numAggs)
}
/*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * Find or create a hashtable entry for the tuple group containing the current
+ * tuple (already set in tmpcontext's outertuple slot), in the current grouping
+ * set (which the caller must have selected - note that initialize_aggregate
+ * depends on this).
*
* When called, CurrentMemoryContext should be the per-query context.
*/
static TupleHashEntryData *
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate)
{
- TupleTableSlot *hashslot = aggstate->hashslot;
+ TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
+ AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+ TupleTableSlot *hashslot = perhash->hashslot;
TupleHashEntryData *entry;
bool isnew;
- int i;
+ int i;
/* transfer just the needed columns into hashslot */
- slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx);
+ slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot);
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
@@ -1867,22 +2042,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
ExecStoreVirtualTuple(hashslot);
/* find or create the hashtable entry using the filtered tuple */
- entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew);
+ entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew)
{
entry->additional = (AggStatePerGroup)
- MemoryContextAlloc(aggstate->hashtable->tablecxt,
+ MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
/* initialize aggregates for new tuple group */
initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
- 0);
+ -1);
}
return entry;
}
/*
+ * Look up hash entries for the current tuple in all hashed grouping sets,
+ * returning an array of pergroup pointers suitable for advance_aggregates.
+ *
+ * Be aware that lookup_hash_entry can reset the tmpcontext.
+ */
+static AggStatePerGroup *
+lookup_hash_entries(AggState *aggstate)
+{
+ int numHashes = aggstate->num_hashes;
+ AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+ int setno;
+
+ for (setno = 0; setno < numHashes; setno++)
+ {
+ select_current_set(aggstate, setno, true);
+ pergroup[setno] = lookup_hash_entry(aggstate)->additional;
+ }
+
+ return pergroup;
+}
+
+/*
* ExecAgg -
*
* ExecAgg receives tuples from its outer subplan and aggregates over
@@ -1898,19 +2095,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
TupleTableSlot *
ExecAgg(AggState *node)
{
- TupleTableSlot *result;
+ TupleTableSlot *result = NULL;
if (!node->agg_done)
{
/* Dispatch based on strategy */
- switch (node->phase->aggnode->aggstrategy)
+ switch (node->phase->aggstrategy)
{
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
+ /* FALLTHROUGH */
+ case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
- default:
+ case AGG_PLAIN:
+ case AGG_SORTED:
result = agg_retrieve_direct(node);
break;
}
@@ -1933,6 +2133,7 @@ agg_retrieve_direct(AggState *aggstate)
ExprContext *tmpcontext;
AggStatePerAgg peragg;
AggStatePerGroup pergroup;
+ AggStatePerGroup *hash_pergroups = NULL;
TupleTableSlot *outerslot;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
@@ -2019,6 +2220,19 @@ agg_retrieve_direct(AggState *aggstate)
node = aggstate->phase->aggnode;
numReset = numGroupingSets;
}
+ else if (aggstate->aggstrategy == AGG_MIXED)
+ {
+ /*
+ * Mixed mode; we've output all the grouped stuff and have
+ * full hashtables, so switch to outputting those.
+ */
+ initialize_phase(aggstate, 0);
+ aggstate->table_filled = true;
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
+ select_current_set(aggstate, 0, true);
+ return agg_retrieve_hash_table(aggstate);
+ }
else
{
aggstate->agg_done = true;
@@ -2055,7 +2269,7 @@ agg_retrieve_direct(AggState *aggstate)
*----------
*/
if (aggstate->input_done ||
- (node->aggstrategy == AGG_SORTED &&
+ (node->aggstrategy != AGG_PLAIN &&
aggstate->projected_set != -1 &&
aggstate->projected_set < (numGroupingSets - 1) &&
nextSetSize > 0 &&
@@ -2168,10 +2382,22 @@ agg_retrieve_direct(AggState *aggstate)
*/
for (;;)
{
+ /*
+ * During phase 1 only of a mixed agg, we need to update
+ * hashtables as well in advance_aggregates.
+ */
+ if (aggstate->aggstrategy == AGG_MIXED &&
+ aggstate->current_phase == 1)
+ {
+ hash_pergroups = lookup_hash_entries(aggstate);
+ }
+ else
+ hash_pergroups = NULL;
+
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, pergroup);
else
- advance_aggregates(aggstate, pergroup);
+ advance_aggregates(aggstate, pergroup, hash_pergroups);
/* Reset per-input-tuple context after each tuple */
ResetExprContext(tmpcontext);
@@ -2198,7 +2424,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group
* boundary.
*/
- if (node->aggstrategy == AGG_SORTED)
+ if (node->aggstrategy != AGG_PLAIN)
{
if (!execTuplesMatch(firstSlot,
outerslot,
@@ -2231,7 +2457,11 @@ agg_retrieve_direct(AggState *aggstate)
prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet);
- finalize_aggregates(aggstate, peragg, pergroup, currentSet);
+ select_current_set(aggstate, currentSet, false);
+
+ finalize_aggregates(aggstate,
+ peragg,
+ pergroup + (currentSet * aggstate->numtrans));
/*
* If there's no row to project right now, we must continue rather
@@ -2247,21 +2477,13 @@ agg_retrieve_direct(AggState *aggstate)
}
/*
- * ExecAgg for hashed case: phase 1, read input and build hash table
+ * ExecAgg for hashed case: read input and build hash table
*/
static void
agg_fill_hash_table(AggState *aggstate)
{
- ExprContext *tmpcontext;
- TupleHashEntryData *entry;
TupleTableSlot *outerslot;
-
- /*
- * get state info from node
- *
- * tmpcontext is the per-input-tuple expression context
- */
- tmpcontext = aggstate->tmpcontext;
+ ExprContext *tmpcontext = aggstate->tmpcontext;
/*
* Process each outer-plan tuple, and then fetch the next one, until we
@@ -2269,32 +2491,40 @@ agg_fill_hash_table(AggState *aggstate)
*/
for (;;)
{
+ AggStatePerGroup *pergroups;
+
outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot))
break;
- /* set up for advance_aggregates call */
+
+ /* set up for lookup_hash_entries and advance_aggregates */
tmpcontext->ecxt_outertuple = outerslot;
- /* Find or build hashtable entry for this tuple's group */
- entry = lookup_hash_entry(aggstate, outerslot);
+ /* Find or build hashtable entries */
+ pergroups = lookup_hash_entries(aggstate);
/* Advance the aggregates */
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
- combine_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ combine_aggregates(aggstate, pergroups[0]);
else
- advance_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+ advance_aggregates(aggstate, NULL, pergroups);
- /* Reset per-input-tuple context after each tuple */
- ResetExprContext(tmpcontext);
+ /*
+ * Reset per-input-tuple context after each tuple, but note that the
+ * hash lookups do this too
+ */
+ ResetExprContext(aggstate->tmpcontext);
}
aggstate->table_filled = true;
- /* Initialize to walk the hash table */
- ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+ /* Initialize to walk the first hash table */
+ select_current_set(aggstate, 0, true);
+ ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+ &aggstate->perhash[0].hashiter);
}
/*
- * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ * ExecAgg for hashed case: retrieving groups from hash table
*/
static TupleTableSlot *
agg_retrieve_hash_table(AggState *aggstate)
@@ -2305,17 +2535,22 @@ agg_retrieve_hash_table(AggState *aggstate)
TupleHashEntryData *entry;
TupleTableSlot *firstSlot;
TupleTableSlot *result;
- TupleTableSlot *hashslot;
+ AggStatePerHash perhash;
/*
- * get state info from node
+ * get state info from node.
+ *
+ * econtext is the per-output-tuple expression context.
*/
- /* econtext is the per-output-tuple expression context */
econtext = aggstate->ss.ps.ps_ExprContext;
peragg = aggstate->peragg;
firstSlot = aggstate->ss.ss_ScanTupleSlot;
- hashslot = aggstate->hashslot;
+ /*
+ * Note that perhash (and therefore anything accessed through it) can
+ * change inside the loop, as we change between grouping sets.
+ */
+ perhash = &aggstate->perhash[aggstate->current_set];
/*
* We loop retrieving groups until we find one satisfying
@@ -2323,17 +2558,37 @@ agg_retrieve_hash_table(AggState *aggstate)
*/
while (!aggstate->agg_done)
{
- int i;
+ TupleTableSlot *hashslot = perhash->hashslot;
+ int i;
/*
* Find the next entry in the hash table
*/
- entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter);
+ entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
if (entry == NULL)
{
- /* No more entries in hashtable, so done */
- aggstate->agg_done = TRUE;
- return NULL;
+ int nextset = aggstate->current_set + 1;
+
+ if (nextset < aggstate->num_hashes)
+ {
+ /*
+ * Switch to next grouping set, reinitialize, and restart the
+ * loop.
+ */
+ select_current_set(aggstate, nextset, true);
+
+ perhash = &aggstate->perhash[aggstate->current_set];
+
+ ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
+
+ continue;
+ }
+ else
+ {
+ /* No more hashtables, so done */
+ aggstate->agg_done = TRUE;
+ return NULL;
+ }
}
/*
@@ -2356,9 +2611,9 @@ agg_retrieve_hash_table(AggState *aggstate)
memset(firstSlot->tts_isnull, true,
firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
- for (i = 0; i < aggstate->numhashGrpCols; i++)
+ for (i = 0; i < perhash->numhashGrpCols; i++)
{
- int varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+ int varNumber = perhash->hashGrpColIdxInput[i] - 1;
firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
@@ -2367,14 +2622,18 @@ agg_retrieve_hash_table(AggState *aggstate)
pergroup = (AggStatePerGroup) entry->additional;
- finalize_aggregates(aggstate, peragg, pergroup, 0);
-
/*
* Use the representative input tuple for any references to
* non-aggregated input columns in the qual and tlist.
*/
econtext->ecxt_outertuple = firstSlot;
+ prepare_projection_slot(aggstate,
+ econtext->ecxt_outertuple,
+ aggstate->current_set);
+
+ finalize_aggregates(aggstate, peragg, pergroup);
+
result = project_aggregates(aggstate);
if (result)
return result;
@@ -2388,7 +2647,8 @@ agg_retrieve_hash_table(AggState *aggstate)
* ExecInitAgg
*
* Creates the run-time information for the agg node produced by the
- * planner and initializes its outer subtree
+ * planner and initializes its outer subtree.
+ *
* -----------------
*/
AggState *
@@ -2403,14 +2663,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
transno,
aggno;
int phase;
+ int phaseidx;
List *combined_inputeval;
ListCell *l;
Bitmapset *all_grouped_cols = NULL;
int numGroupingSets = 1;
int numPhases;
+ int numHashes;
int column_offset;
int i = 0;
int j = 0;
+ bool use_hashing = (node->aggstrategy == AGG_HASHED ||
+ node->aggstrategy == AGG_MIXED);
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2425,9 +2689,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggs = NIL;
aggstate->numaggs = 0;
aggstate->numtrans = 0;
+ aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0;
- aggstate->hashfunctions = NULL;
aggstate->projected_set = -1;
aggstate->current_set = 0;
aggstate->peragg = NULL;
@@ -2437,18 +2701,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->agg_done = false;
aggstate->pergroup = NULL;
aggstate->grp_firstTuple = NULL;
- aggstate->hashtable = NULL;
aggstate->sort_in = NULL;
aggstate->sort_out = NULL;
/*
+ * phases[0] always exists, but is dummy in sorted/plain mode
+ */
+ numPhases = (use_hashing ? 1 : 2);
+ numHashes = (use_hashing ? 1 : 0);
+
+ /*
* Calculate the maximum number of grouping sets in any phase; this
- * determines the size of some allocations.
+ * determines the size of some allocations. Also calculate the number of
+ * phases, since all hashed/mixed nodes contribute to only a single phase.
*/
if (node->groupingSets)
{
- Assert(node->aggstrategy != AGG_HASHED);
-
numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain)
@@ -2457,22 +2725,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets));
+
+ /*
+ * additional AGG_HASHED aggs become part of phase 0, but all
+ * others add an extra phase.
+ */
+ if (agg->aggstrategy != AGG_HASHED)
+ ++numPhases;
+ else
+ ++numHashes;
}
}
aggstate->maxsets = numGroupingSets;
- aggstate->numphases = numPhases = 1 + list_length(node->chain);
+ aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets);
/*
* Create expression contexts. We need three or more, one for
- * per-input-tuple processing, one for per-output-tuple processing, and
- * one for each grouping set. The per-tuple memory context of the
- * per-grouping-set ExprContexts (aggcontexts) replaces the standalone
- * memory context formerly used to hold transition values. We cheat a
- * little by using ExecAssignExprContext() to build all of them.
+ * per-input-tuple processing, one for per-output-tuple processing, one
+ * for all the hashtables, and one for each grouping set. The per-tuple
+ * memory context of the per-grouping-set ExprContexts (aggcontexts)
+ * replaces the standalone memory context formerly used to hold transition
+ * values. We cheat a little by using ExecAssignExprContext() to build
+ * all of them.
*
* NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple
@@ -2488,14 +2766,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
}
+ if (use_hashing)
+ {
+ ExecAssignExprContext(estate, &aggstate->ss.ps);
+ aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+ }
+
ExecAssignExprContext(estate, &aggstate->ss.ps);
/*
- * tuple table initialization
+ * tuple table initialization.
+ *
+ * For hashtables, we create some additional slots below.
*/
ExecInitScanTupleSlot(estate, &aggstate->ss);
ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
- aggstate->hashslot = ExecInitExtraTupleSlot(estate);
aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
/*
@@ -2560,19 +2845,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing.
*/
-
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
- for (phase = 0; phase < numPhases; ++phase)
+ aggstate->num_hashes = numHashes;
+ if (numHashes)
+ {
+ aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+ aggstate->phases[0].numsets = 0;
+ aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
+ aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
+ }
+
+ phase = 0;
+ for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{
- AggStatePerPhase phasedata = &aggstate->phases[phase];
Agg *aggnode;
Sort *sortnode;
- int num_sets;
- if (phase > 0)
+ if (phaseidx > 0)
{
- aggnode = castNode(Agg, list_nth(node->chain, phase - 1));
+ aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1));
sortnode = castNode(Sort, aggnode->plan.lefttree);
}
else
@@ -2581,53 +2873,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
sortnode = NULL;
}
- phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+ Assert(phase <= 1 || sortnode);
- if (num_sets)
+ if (aggnode->aggstrategy == AGG_HASHED
+ || aggnode->aggstrategy == AGG_MIXED)
{
- phasedata->gset_lengths = palloc(num_sets * sizeof(int));
- phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+ AggStatePerPhase phasedata = &aggstate->phases[0];
+ AggStatePerHash perhash;
+ Bitmapset *cols = NULL;
- i = 0;
- foreach(l, aggnode->groupingSets)
- {
- int current_length = list_length(lfirst(l));
- Bitmapset *cols = NULL;
+ Assert(phase == 0);
+ i = phasedata->numsets++;
+ perhash = &aggstate->perhash[i];
- /* planner forces this to be correct */
- for (j = 0; j < current_length; ++j)
- cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+ /* phase 0 always points to the "real" Agg in the hash case */
+ phasedata->aggnode = node;
+ phasedata->aggstrategy = node->aggstrategy;
- phasedata->grouped_cols[i] = cols;
- phasedata->gset_lengths[i] = current_length;
- ++i;
- }
+ /* but the actual Agg node representing this hash is saved here */
+ perhash->aggnode = aggnode;
+
+ phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
+
+ for (j = 0; j < aggnode->numCols; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
- all_grouped_cols = bms_add_members(all_grouped_cols,
- phasedata->grouped_cols[0]);
+ all_grouped_cols = bms_add_members(all_grouped_cols, cols);
+ continue;
}
else
{
- Assert(phase == 0);
+ AggStatePerPhase phasedata = &aggstate->phases[++phase];
+ int num_sets;
- phasedata->gset_lengths = NULL;
- phasedata->grouped_cols = NULL;
- }
+ phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
- /*
- * If we are grouping, precompute fmgr lookup data for inner loop.
- */
- if (aggnode->aggstrategy == AGG_SORTED)
- {
- Assert(aggnode->numCols > 0);
+ if (num_sets)
+ {
+ phasedata->gset_lengths = palloc(num_sets * sizeof(int));
+ phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
- phasedata->eqfunctions =
- execTuplesMatchPrepare(aggnode->numCols,
- aggnode->grpOperators);
- }
+ i = 0;
+ foreach(l, aggnode->groupingSets)
+ {
+ int current_length = list_length(lfirst(l));
+ Bitmapset *cols = NULL;
+
+ /* planner forces this to be correct */
+ for (j = 0; j < current_length; ++j)
+ cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+ phasedata->grouped_cols[i] = cols;
+ phasedata->gset_lengths[i] = current_length;
+
+ ++i;
+ }
+
+ all_grouped_cols = bms_add_members(all_grouped_cols,
+ phasedata->grouped_cols[0]);
+ }
+ else
+ {
+ Assert(phaseidx == 0);
+
+ phasedata->gset_lengths = NULL;
+ phasedata->grouped_cols = NULL;
+ }
+
+ /*
+ * If we are grouping, precompute fmgr lookup data for inner loop.
+ */
+ if (aggnode->aggstrategy == AGG_SORTED)
+ {
+ Assert(aggnode->numCols > 0);
+
+ phasedata->eqfunctions =
+ execTuplesMatchPrepare(aggnode->numCols,
+ aggnode->grpOperators);
+ }
- phasedata->aggnode = aggnode;
- phasedata->sortnode = sortnode;
+ phasedata->aggnode = aggnode;
+ phasedata->aggstrategy = aggnode->aggstrategy;
+ phasedata->sortnode = sortnode;
+ }
}
/*
@@ -2638,13 +2968,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
- * Initialize current phase-dependent values to initial phase
- */
-
- aggstate->current_phase = 0;
- initialize_phase(aggstate, 0);
-
- /*
* Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage
*/
@@ -2658,23 +2981,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates;
-
/*
* Hashing can only appear in the initial phase.
*/
- if (node->aggstrategy == AGG_HASHED)
+ if (use_hashing)
{
- find_hash_columns(aggstate);
+ for (i = 0; i < numHashes; ++i)
+ {
+ aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
+
+ execTuplesHashPrepare(aggstate->perhash[i].numCols,
+ aggstate->perhash[i].aggnode->grpOperators,
+ &aggstate->perhash[i].eqfunctions,
+ &aggstate->perhash[i].hashfunctions);
+ }
- execTuplesHashPrepare(node->numCols,
- node->grpOperators,
- &aggstate->phases[0].eqfunctions,
- &aggstate->hashfunctions);
+ /* this is an array of pointers, not structures */
+ aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
+ find_hash_columns(aggstate);
build_hash_table(aggstate);
aggstate->table_filled = false;
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
AggStatePerGroup pergroup;
@@ -2685,6 +3015,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->pergroup = pergroup;
}
+ /*
+ * Initialize current phase-dependent values to initial phase. The initial
+ * phase is 1 (first sort pass) for all strategies that use sorting (if
+ * hashing is being done too, then phase 0 is processed last); but if only
+ * hashing is being done, then phase 0 is all there is.
+ */
+ if (node->aggstrategy == AGG_HASHED)
+ {
+ aggstate->current_phase = 0;
+ initialize_phase(aggstate, 0);
+ select_current_set(aggstate, 0, true);
+ }
+ else
+ {
+ aggstate->current_phase = 1;
+ initialize_phase(aggstate, 1);
+ select_current_set(aggstate, 0, false);
+ }
+
/* -----------------
* Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data.
@@ -3263,7 +3612,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case
* (yet)
*/
- Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+ Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
/* If we have only one input, we need its len/byval info. */
if (numInputs == 1)
@@ -3512,6 +3861,8 @@ ExecEndAgg(AggState *node)
/* And ensure any agg shutdown callbacks have been called */
for (setno = 0; setno < numGroupingSets; setno++)
ReScanExprContext(node->aggcontexts[setno]);
+ if (node->hashcontext)
+ ReScanExprContext(node->hashcontext);
/*
* We don't actually free any ExprContexts here (see comment in
@@ -3539,7 +3890,7 @@ ExecReScanAgg(AggState *node)
node->agg_done = false;
- if (aggnode->aggstrategy == AGG_HASHED)
+ if (node->aggstrategy == AGG_HASHED)
{
/*
* In the hashed case, if we haven't yet built the hash table then we
@@ -3559,7 +3910,9 @@ ExecReScanAgg(AggState *node)
if (outerPlan->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{
- ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ ResetTupleHashIterator(node->perhash[0].hashtable,
+ &node->perhash[0].hashiter);
+ select_current_set(node, 0, true);
return;
}
}
@@ -3584,11 +3937,7 @@ ExecReScanAgg(AggState *node)
* ExecReScan already did it. But we do need to reset our per-grouping-set
* contexts, which may have transvalues stored in them. (We use rescan
* rather than just reset because transfns may have registered callbacks
- * that need to be run now.)
- *
- * Note that with AGG_HASHED, the hash table is allocated in a sub-context
- * of the aggcontext. This used to be an issue, but now, resetting a
- * context automatically deletes sub-contexts too.
+ * that need to be run now.) For the AGG_HASHED case, see below.
*/
for (setno = 0; setno < numGroupingSets; setno++)
@@ -3608,13 +3957,21 @@ ExecReScanAgg(AggState *node)
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
- if (aggnode->aggstrategy == AGG_HASHED)
+ /*
+ * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
+ * the hashcontext. This used to be an issue, but now, resetting a context
+ * automatically deletes sub-contexts too.
+ */
+ if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
{
+ ReScanExprContext(node->hashcontext);
/* Rebuild an empty hash table */
build_hash_table(node);
node->table_filled = false;
+ /* iterator will be reset when the table is filled */
}
- else
+
+ if (node->aggstrategy != AGG_HASHED)
{
/*
* Reset the per-group state (in particular, mark transvalues null)
@@ -3622,8 +3979,8 @@ ExecReScanAgg(AggState *node)
MemSet(node->pergroup, 0,
sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
- /* reset to phase 0 */
- initialize_phase(node, 0);
+ /* reset to phase 1 */
+ initialize_phase(node, 1);
node->input_done = false;
node->projected_set = -1;
@@ -3664,7 +4021,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
if (aggcontext)
{
AggState *aggstate = ((AggState *) fcinfo->context);
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
*aggcontext = cxt->ecxt_per_tuple_memory;
}
@@ -3753,7 +4110,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
if (fcinfo->context && IsA(fcinfo->context, AggState))
{
AggState *aggstate = (AggState *) fcinfo->context;
- ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+ ExprContext *cxt = aggstate->curaggcontext;
RegisterExprContextCallback(cxt, func, arg);
diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 2d2ba84fe9..f222c6c20d 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/lib
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
- rbtree.o stringinfo.o
+OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
+ pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c
new file mode 100644
index 0000000000..ddf2b9afa3
--- /dev/null
+++ b/src/backend/lib/knapsack.c
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * knapsack.c
+ * Knapsack problem solver
+ *
+ * Given input vectors of integral item weights (must be >= 0) and values
+ * (double >= 0), compute the set of items which produces the greatest total
+ * value without exceeding a specified total weight; each item is included at
+ * most once (this is the 0/1 knapsack problem). Weight 0 items will always be
+ * included.
+ *
+ * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
+ * weight limit. To use with non-integral weights or approximate solutions,
+ * the caller should pre-scale the input weights to a suitable range. This
+ * allows approximate solutions in polynomial time (the general case of the
+ * exact problem is NP-hard).
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/lib/knapsack.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "lib/knapsack.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+
+/*
+ * DiscreteKnapsack
+ *
+ * The item_values input is optional; if omitted, all the items are assumed to
+ * have value 1.
+ *
+ * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
+ * inclusion in the solution.
+ *
+ * This uses the usual dynamic-programming algorithm, adapted to reuse the
+ * memory on each pass (by working from larger weights to smaller). At the
+ * start of pass number i, the values[w] array contains the largest value
+ * computed with total weight <= w, using only items with indices < i; and
+ * sets[w] contains the bitmap of items actually used for that value. (The
+ * bitmapsets are all pre-initialized with an unused high bit so that memory
+ * allocation is done only once.)
+ */
+Bitmapset *
+DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values)
+{
+ MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "Knapsack",
+ ALLOCSET_SMALL_MINSIZE,
+ ALLOCSET_SMALL_INITSIZE,
+ ALLOCSET_SMALL_MAXSIZE);
+ MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
+ double *values;
+ Bitmapset **sets;
+ Bitmapset *result;
+ int i,
+ j;
+
+ Assert(max_weight >= 0);
+ Assert(num_items > 0 && item_weights);
+
+ values = palloc((1 + max_weight) * sizeof(double));
+ sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
+
+ for (i = 0; i <= max_weight; ++i)
+ {
+ values[i] = 0;
+ sets[i] = bms_make_singleton(num_items);
+ }
+
+ for (i = 0; i < num_items; ++i)
+ {
+ int iw = item_weights[i];
+ double iv = item_values ? item_values[i] : 1;
+
+ for (j = max_weight; j >= iw; --j)
+ {
+ int ow = j - iw;
+
+ if (values[j] <= values[ow] + iv)
+ {
+ /* copy sets[ow] to sets[j] without realloc */
+ if (j != ow)
+ {
+ sets[j] = bms_del_members(sets[j], sets[j]);
+ sets[j] = bms_add_members(sets[j], sets[ow]);
+ }
+
+ sets[j] = bms_add_member(sets[j], i);
+
+ values[j] = values[ow] + iv;
+ }
+ }
+ }
+
+ MemoryContextSwitchTo(oldctx);
+
+ result = bms_del_member(bms_copy(sets[max_weight]), num_items);
+
+ MemoryContextDelete(local_ctx);
+
+ return result;
+}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 252af5c870..bf8545d437 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -21,6 +21,7 @@
#include "postgres.h"
#include "access/hash.h"
+#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
@@ -458,6 +459,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
}
/*
+ * bms_overlap_list - does a set overlap an integer list?
+ */
+bool
+bms_overlap_list(const Bitmapset *a, const List *b)
+{
+ ListCell *lc;
+ int wordnum,
+ bitnum;
+
+ if (a == NULL || b == NIL)
+ return false;
+
+ foreach(lc, b)
+ {
+ int x = lfirst_int(lc);
+
+ if (x < 0)
+ elog(ERROR, "negative bitmapset member not allowed");
+ wordnum = WORDNUM(x);
+ bitnum = BITNUM(x);
+ if (wordnum < a->nwords)
+ if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*
* bms_nonempty_difference - do sets have a nonempty difference?
*/
bool
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 541af02935..bbb63a4bfa 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1942,6 +1942,28 @@ _outAggPath(StringInfo str, const AggPath *node)
}
static void
+_outRollupData(StringInfo str, const RollupData *node)
+{
+ WRITE_NODE_TYPE("ROLLUP");
+
+ WRITE_NODE_FIELD(groupClause);
+ WRITE_NODE_FIELD(gsets);
+ WRITE_NODE_FIELD(gsets_data);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+ WRITE_BOOL_FIELD(hashable);
+ WRITE_BOOL_FIELD(is_hashed);
+}
+
+static void
+_outGroupingSetData(StringInfo str, const GroupingSetData *node)
+{
+ WRITE_NODE_TYPE("GSDATA");
+
+ WRITE_NODE_FIELD(set);
+ WRITE_FLOAT_FIELD(numGroups, "%.0f");
+}
+
+static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
WRITE_NODE_TYPE("GROUPINGSETSPATH");
@@ -1949,8 +1971,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
- WRITE_NODE_FIELD(rollup_groupclauses);
- WRITE_NODE_FIELD(rollup_lists);
+ WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
+ WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual);
}
@@ -3961,14 +3983,18 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
+ case T_RollupData:
+ _outRollupData(str, obj);
+ break;
+ case T_GroupingSetData:
+ _outGroupingSetData(str, obj);
+ break;
case T_StatisticExtInfo:
_outStatisticExtInfo(str, obj);
break;
-
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
break;
-
case T_CreateStmt:
_outCreateStmt(str, obj);
break;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a129d1ecb3..eb7a618341 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1;
}
- else if (aggstrategy == AGG_SORTED)
+ else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
{
/* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost;
total_cost = input_total_cost;
+ if (aggstrategy == AGG_MIXED && !enable_hashagg)
+ {
+ startup_cost += disable_cost;
+ total_cost += disable_cost;
+ }
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c80c9992c9..aafec58281 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1783,18 +1783,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
{
Agg *plan;
Plan *subplan;
- List *rollup_groupclauses = best_path->rollup_groupclauses;
- List *rollup_lists = best_path->rollup_lists;
+ List *rollups = best_path->rollups;
AttrNumber *grouping_map;
int maxref;
List *chain;
- ListCell *lc,
- *lc2;
+ ListCell *lc;
/* Shouldn't get here without grouping sets */
Assert(root->parse->groupingSets);
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
+ Assert(rollups != NIL);
/*
* Agg can project, so no need to be terribly picky about child tlist, but
@@ -1846,72 +1843,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* costs will be shown by EXPLAIN.
*/
chain = NIL;
- if (list_length(rollup_groupclauses) > 1)
+ if (list_length(rollups) > 1)
{
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+ ListCell *lc2 = lnext(list_head(rollups));
+ bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
+
+ for_each_cell(lc, lc2)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
+ RollupData *rollup = lfirst(lc);
AttrNumber *new_grpColIdx;
- Plan *sort_plan;
+ Plan *sort_plan = NULL;
Plan *agg_plan;
+ AggStrategy strat;
- /* We want to iterate over all but the last rollup list elements */
- if (lnext(lc) == NULL)
- break;
+ new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
+
+ if (!rollup->is_hashed && !is_first_sort)
+ {
+ sort_plan = (Plan *)
+ make_sort_from_groupcols(rollup->groupClause,
+ new_grpColIdx,
+ subplan);
+ }
- new_grpColIdx = remap_groupColIdx(root, groupClause);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
- sort_plan = (Plan *)
- make_sort_from_groupcols(groupClause,
- new_grpColIdx,
- subplan);
+ if (rollup->is_hashed)
+ strat = AGG_HASHED;
+ else if (list_length(linitial(rollup->gsets)) == 0)
+ strat = AGG_PLAIN;
+ else
+ strat = AGG_SORTED;
agg_plan = (Plan *) make_agg(NIL,
NIL,
- AGG_SORTED,
+ strat,
AGGSPLIT_SIMPLE,
- list_length((List *) linitial(gsets)),
+ list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
NIL,
- 0, /* numGroups not needed */
+ rollup->numGroups,
sort_plan);
/*
- * Nuke stuff we don't need to avoid bloating debug output.
+ * Remove stuff we don't need to avoid bloating debug output.
*/
- sort_plan->targetlist = NIL;
- sort_plan->lefttree = NULL;
+ if (sort_plan)
+ {
+ sort_plan->targetlist = NIL;
+ sort_plan->lefttree = NULL;
+ }
chain = lappend(chain, agg_plan);
}
}
/*
- * Now make the final Agg node
+ * Now make the real Agg node
*/
{
- List *groupClause = (List *) llast(rollup_groupclauses);
- List *gsets = (List *) llast(rollup_lists);
+ RollupData *rollup = linitial(rollups);
AttrNumber *top_grpColIdx;
int numGroupCols;
- top_grpColIdx = remap_groupColIdx(root, groupClause);
+ top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
- numGroupCols = list_length((List *) linitial(gsets));
+ numGroupCols = list_length((List *) linitial(rollup->gsets));
plan = make_agg(build_path_tlist(root, &best_path->path),
best_path->qual,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+ best_path->aggstrategy,
AGGSPLIT_SIMPLE,
numGroupCols,
top_grpColIdx,
- extract_grouping_ops(groupClause),
- gsets,
+ extract_grouping_ops(rollup->groupClause),
+ rollup->gsets,
chain,
- 0, /* numGroups not needed */
+ rollup->numGroups,
subplan);
/* Copy cost data from Path to Plan */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 68d74cb432..1c1f8a3af9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
@@ -91,12 +92,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra;
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+ List *rollups;
+ List *hash_sets_idx;
+ double dNumHashGroups;
+ bool any_hashable;
+ Bitmapset *unsortable_refs;
+ Bitmapset *unhashable_refs;
+ List *unsortable_sets;
+ int *tleref_to_colnum_map;
+} grouping_sets_data;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+ int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
@@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
@@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses);
+ grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *input_target,
@@ -1540,8 +1567,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
- List *rollup_lists = NIL;
- List *rollup_groupclauses = NIL;
+ grouping_sets_data *gset_data = NULL;
standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
@@ -1550,84 +1576,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets)
{
- int *tleref_to_colnum_map;
- List *sets;
- int maxref;
- ListCell *lc;
- ListCell *lc2;
- ListCell *lc_set;
-
- parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
- /* Identify max SortGroupRef in groupClause, for array sizing */
- maxref = 0;
- foreach(lc, parse->groupClause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- if (gc->tleSortGroupRef > maxref)
- maxref = gc->tleSortGroupRef;
- }
-
- /* Allocate workspace array for remapping */
- tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
- /* Examine the rollup sets */
- sets = extract_rollup_sets(parse->groupingSets);
-
- foreach(lc_set, sets)
- {
- List *current_sets = (List *) lfirst(lc_set);
- List *groupclause;
- int ref;
-
- /*
- * Reorder the current list of grouping sets into correct
- * prefix order. If only one aggregation pass is needed, try
- * to make the list match the ORDER BY clause; if more than
- * one pass is needed, we don't bother with that.
- */
- current_sets = reorder_grouping_sets(current_sets,
- (list_length(sets) == 1
- ? parse->sortClause
- : NIL));
-
- /*
- * Order the groupClause appropriately. If the first grouping
- * set is empty, this can match regular GROUP BY
- * preprocessing, otherwise we have to force the groupClause
- * to match that grouping set's order.
- */
- groupclause = preprocess_groupclause(root,
- linitial(current_sets));
-
- /*
- * Now that we've pinned down an order for the groupClause for
- * this list of grouping sets, we need to remap the entries in
- * the grouping sets from sortgrouprefs to plain indices
- * (0-based) into the groupClause for this collection of
- * grouping sets.
- */
- ref = 0;
- foreach(lc, groupclause)
- {
- SortGroupClause *gc = lfirst(lc);
-
- tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
- }
-
- foreach(lc, current_sets)
- {
- foreach(lc2, (List *) lfirst(lc))
- {
- lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
- }
- }
-
- /* Save the reordered sets and corresponding groupclauses */
- rollup_lists = lcons(current_sets, rollup_lists);
- rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
- }
+ gset_data = preprocess_grouping_sets(root);
}
else
{
@@ -1721,8 +1670,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause =
- parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+ qp_extra.groupClause = (gset_data
+ ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -1922,8 +1872,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
&agg_costs,
- rollup_lists,
- rollup_groupclauses);
+ gset_data);
/* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs)
adjust_paths_for_srfs(root, current_rel,
@@ -1960,7 +1909,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root,
current_rel);
}
-
} /* end of if (setOperations) */
/*
@@ -2113,6 +2061,221 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Note: currently, we leave it to callers to do set_cheapest() */
}
+/*
+ * Do preprocessing for groupingSets clause and related data. This handles the
+ * preliminary steps of expanding the grouping sets, organizing them into lists
+ * of rollups, and preparing annotations which will later be filled in with
+ * size estimates.
+ */
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *sets;
+ int maxref = 0;
+ ListCell *lc;
+ ListCell *lc_set;
+ grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+ parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+ gd->any_hashable = false;
+ gd->unhashable_refs = NULL;
+ gd->unsortable_refs = NULL;
+ gd->unsortable_sets = NIL;
+
+ if (parse->groupClause)
+ {
+ ListCell *lc;
+
+ foreach(lc, parse->groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+ Index ref = gc->tleSortGroupRef;
+
+ if (ref > maxref)
+ maxref = ref;
+
+ if (!gc->hashable)
+ gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+ if (!OidIsValid(gc->sortop))
+ gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+ }
+ }
+
+ /* Allocate workspace array for remapping */
+ gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+ /*
+ * If we have any unsortable sets, we must extract them before trying to
+ * prepare rollups. Unsortable sets don't go through
+ * reorder_grouping_sets, so we must apply the GroupingSetData annotation
+ * here.
+ */
+ if (!bms_is_empty(gd->unsortable_refs))
+ {
+ List *sortable_sets = NIL;
+
+ foreach(lc, parse->groupingSets)
+ {
+ List *gset = lfirst(lc);
+
+ if (bms_overlap_list(gd->unsortable_refs, gset))
+ {
+ GroupingSetData *gs = makeNode(GroupingSetData);
+
+ gs->set = gset;
+ gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+ /*
+ * We must enforce here that an unsortable set is hashable;
+ * later code assumes this. Parse analysis only checks that
+ * every individual column is either hashable or sortable.
+ *
+ * Note that passing this test doesn't guarantee we can
+ * generate a plan; there might be other showstoppers.
+ */
+ if (bms_overlap_list(gd->unhashable_refs, gset))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement GROUP BY"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ }
+ else
+ sortable_sets = lappend(sortable_sets, gset);
+ }
+
+ if (sortable_sets)
+ sets = extract_rollup_sets(sortable_sets);
+ else
+ sets = NIL;
+ }
+ else
+ sets = extract_rollup_sets(parse->groupingSets);
+
+ foreach(lc_set, sets)
+ {
+ List *current_sets = (List *) lfirst(lc_set);
+ RollupData *rollup = makeNode(RollupData);
+ GroupingSetData *gs;
+
+ /*
+ * Reorder the current list of grouping sets into correct prefix
+ * order. If only one aggregation pass is needed, try to make the
+ * list match the ORDER BY clause; if more than one pass is needed, we
+ * don't bother with that.
+ *
+ * Note that this reorders the sets from smallest-member-first to
+ * largest-member-first, and applies the GroupingSetData annotations,
+ * though the data will be filled in later.
+ */
+ current_sets = reorder_grouping_sets(current_sets,
+ (list_length(sets) == 1
+ ? parse->sortClause
+ : NIL));
+
+ /*
+ * Get the initial (and therefore largest) grouping set.
+ */
+ gs = linitial(current_sets);
+
+ /*
+ * Order the groupClause appropriately. If the first grouping set is
+ * empty, then the groupClause must also be empty; otherwise we have
+ * to force the groupClause to match that grouping set's order.
+ *
+ * (The first grouping set can be empty even though parse->groupClause
+ * is not empty only if all non-empty grouping sets are unsortable.
+ * The groupClauses for hashed grouping sets are built later on.)
+ */
+ if (gs->set)
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ else
+ rollup->groupClause = NIL;
+
+ /*
+ * Is it hashable? We pretend empty sets are hashable even though we
+ * actually force them not to be hashed later. But don't bother if
+ * there's nothing but empty sets (since in that case we can't hash
+ * anything).
+ */
+ if (gs->set &&
+ !bms_overlap_list(gd->unhashable_refs, gs->set))
+ {
+ rollup->hashable = true;
+ gd->any_hashable = true;
+ }
+
+ /*
+ * Now that we've pinned down an order for the groupClause for this
+ * list of grouping sets, we need to remap the entries in the grouping
+ * sets from sortgrouprefs to plain indices (0-based) into the
+ * groupClause for this collection of grouping sets. We keep the
+ * original form for later use, though.
+ */
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ current_sets,
+ gd->tleref_to_colnum_map);
+ rollup->gsets_data = current_sets;
+
+ gd->rollups = lappend(gd->rollups, rollup);
+ }
+
+ if (gd->unsortable_sets)
+ {
+ /*
+ * We have not yet pinned down a groupclause for this, but we will
+ * need index-based lists for estimation purposes. Construct
+ * hash_sets_idx based on the entire original groupclause for now.
+ */
+ gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+ gd->unsortable_sets,
+ gd->tleref_to_colnum_map);
+ gd->any_hashable = true;
+ }
+
+ return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+ List *gsets,
+ int *tleref_to_colnum_map)
+{
+ int ref = 0;
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, groupClause)
+ {
+ SortGroupClause *gc = lfirst(lc);
+
+ tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+ }
+
+ foreach(lc, gsets)
+ {
+ List *set = NIL;
+ ListCell *lc2;
+ GroupingSetData *gs = lfirst(lc);
+
+ foreach(lc2, gs->set)
+ {
+ set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+ }
+
+ result = lappend(result, set);
+ }
+
+ return result;
+}
+
+
/*
* Detect whether a plan node is a "dummy" plan created when a relation
@@ -3028,7 +3191,7 @@ extract_rollup_sets(List *groupingSets)
/*
* Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
*
* The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure
@@ -3051,6 +3214,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{
List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous);
+ GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0)
{
@@ -3078,7 +3242,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
}
- result = lcons(list_copy(previous), result);
+ gs->set = list_copy(previous);
+ result = lcons(gs, result);
list_free(new_elems);
}
@@ -3173,15 +3338,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping)
*
* path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- * or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
*/
static double
get_number_of_groups(PlannerInfo *root,
double path_rows,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
double dNumGroups;
@@ -3193,28 +3359,60 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets)
{
/* Add up the estimates for each grouping set */
- ListCell *lc,
- *lc2;
+ ListCell *lc;
+ ListCell *lc2;
dNumGroups = 0;
- forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+ foreach(lc, gd->rollups)
{
- List *groupClause = (List *) lfirst(lc);
- List *gsets = (List *) lfirst(lc2);
- ListCell *lc3;
+ RollupData *rollup = lfirst(lc);
+ ListCell *lc;
- groupExprs = get_sortgrouplist_exprs(groupClause,
+ groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList);
- foreach(lc3, gsets)
+ rollup->numGroups = 0.0;
+
+ forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{
- List *gset = (List *) lfirst(lc3);
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ rollup->numGroups += numGroups;
+ }
+
+ dNumGroups += rollup->numGroups;
+ }
+
+ if (gd->hash_sets_idx)
+ {
+ ListCell *lc;
+
+ gd->dNumHashGroups = 0;
- dNumGroups += estimate_num_groups(root,
- groupExprs,
- path_rows,
- &gset);
+ groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ parse->targetList);
+
+ forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+ {
+ List *gset = (List *) lfirst(lc);
+ GroupingSetData *gs = lfirst(lc2);
+ double numGroups = estimate_num_groups(root,
+ groupExprs,
+ path_rows,
+ &gset);
+
+ gs->numGroups = numGroups;
+ gd->dNumHashGroups += numGroups;
}
+
+ dNumGroups += gd->dNumHashGroups;
}
}
else
@@ -3250,6 +3448,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
*/
static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3300,8 +3503,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
const AggClauseCosts *agg_costs,
- List *rollup_lists,
- List *rollup_groupclauses)
+ grouping_sets_data *gd)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
@@ -3410,8 +3612,7 @@ create_grouping_paths(PlannerInfo *root,
*/
dNumGroups = get_number_of_groups(root,
cheapest_path->rows,
- rollup_lists,
- rollup_groupclauses);
+ gd);
/*
* Determine whether it's possible to perform sort-based implementations
@@ -3419,15 +3620,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.)
+ *
+ * If we have grouping sets, we might be able to sort some but not all of
+ * them; in this case, we need can_sort to be true as long as we must
+ * consider any sorted-input plan.
*/
- can_sort = grouping_is_sortable(parse->groupClause);
+ can_sort = (gd && gd->rollups != NIL)
+ || grouping_is_sortable(parse->groupClause);
/*
* Determine whether we should consider hash-based implementations of
* grouping.
*
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
+ * Hashed aggregation only applies if we're grouping. If we have grouping
+ * sets, some groups might be hashable but others not; in this case we set
+ * can_hash true as long as there is nothing globally preventing us from
+ * hashing (and we should therefore consider plans with hashes).
*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@@ -3440,9 +3648,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last.
*/
can_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 &&
- grouping_is_hashable(parse->groupClause));
+ (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3508,8 +3715,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
- NIL,
- NIL);
+ gd);
/*
* Collect statistics about aggregates for estimating costs of
@@ -3642,20 +3848,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
- /*
- * We have grouping sets, possibly with aggregation. Make
- * a GroupingSetsPath.
- */
- add_path(grouped_rel, (Path *)
- create_groupingsets_path(root,
- grouped_rel,
- path,
- target,
- (List *) parse->havingQual,
- rollup_lists,
- rollup_groupclauses,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ path, true, can_hash, target,
+ gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
@@ -3816,33 +4011,45 @@ create_grouping_paths(PlannerInfo *root,
if (can_hash)
{
- hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
- agg_costs,
- dNumGroups);
-
- /*
- * Provided that the estimated size of the hashtable does not exceed
- * work_mem, we'll generate a HashAgg Path, although if we were unable
- * to sort above, then we'd better generate a Path, so that we at
- * least have one.
- */
- if (hashaggtablesize < work_mem * 1024L ||
- grouped_rel->pathlist == NIL)
+ if (parse->groupingSets)
{
/*
- * We just need an Agg over the cheapest-total input path, since
- * input order won't matter.
+ * Try for a hash-only groupingsets path over unsorted input.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- parse->groupClause,
- (List *) parse->havingQual,
- agg_costs,
- dNumGroups));
+ consider_groupingsets_paths(root, grouped_rel,
+ cheapest_path, false, true, target,
+ gd, agg_costs, dNumGroups);
+ }
+ else
+ {
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ agg_costs,
+ dNumGroups);
+
+ /*
+ * Provided that the estimated size of the hashtable does not
+ * exceed work_mem, we'll generate a HashAgg Path, although if we
+ * were unable to sort above, then we'd better generate a Path, so
+ * that we at least have one.
+ */
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path,
+ * since input order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ agg_costs,
+ dNumGroups));
+ }
}
/*
@@ -3921,6 +4128,344 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel;
}
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting. This can be called multiple
+ * times, so it's important that it not scribble on input. No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ Path *path,
+ bool is_sorted,
+ bool can_hash,
+ PathTarget *target,
+ grouping_sets_data *gd,
+ const AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Query *parse = root->parse;
+
+ /*
+ * If we're not being offered sorted input, then only consider plans that
+ * can be done entirely by hashing.
+ *
+ * We can hash everything if it looks like it'll fit in work_mem. But if
+ * the input is actually sorted despite not being advertised as such, we
+ * prefer to make use of that in order to use less memory.
+ *
+ * If none of the grouping sets are sortable, then ignore the work_mem
+ * limit and generate a path anyway, since otherwise we'll just fail.
+ */
+ if (!is_sorted)
+ {
+ List *new_rollups = NIL;
+ RollupData *unhashed_rollup = NULL;
+ List *sets_data;
+ List *empty_sets_data = NIL;
+ List *empty_sets = NIL;
+ ListCell *lc;
+ ListCell *l_start = list_head(gd->rollups);
+ AggStrategy strat = AGG_HASHED;
+ Size hashsize;
+ double exclude_groups = 0.0;
+
+ Assert(can_hash);
+
+ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+ {
+ unhashed_rollup = lfirst(l_start);
+ exclude_groups = unhashed_rollup->numGroups;
+ l_start = lnext(l_start);
+ }
+
+ hashsize = estimate_hashagg_tablesize(path,
+ agg_costs,
+ dNumGroups - exclude_groups);
+
+ /*
+ * gd->rollups is empty if we have only unsortable columns to work
+ * with. Override work_mem in that case; otherwise, we'll rely on the
+ * sorted-input case to generate usable mixed paths.
+ */
+ if (hashsize > work_mem * 1024L && gd->rollups)
+ return; /* nope, won't fit */
+
+ /*
+ * We need to burst the existing rollups list into individual grouping
+ * sets and recompute a groupClause for each set.
+ */
+ sets_data = list_copy(gd->unsortable_sets);
+
+ for_each_cell(lc, l_start)
+ {
+ RollupData *rollup = lfirst(lc);
+
+ /*
+ * If we find an unhashable rollup that's not been skipped by the
+ * "actually sorted" check above, we can't cope; we'd need sorted
+ * input (with a different sort order) but we can't get that here.
+ * So bail out; we'll get a valid path from the is_sorted case
+ * instead.
+ *
+ * The mere presence of empty grouping sets doesn't make a rollup
+ * unhashable (see preprocess_grouping_sets), we handle those
+ * specially below.
+ */
+ if (!rollup->hashable)
+ return;
+ else
+ sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+ }
+ foreach(lc, sets_data)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ List *gset = gs->set;
+ RollupData *rollup;
+
+ if (gset == NIL)
+ {
+ /* Empty grouping sets can't be hashed. */
+ empty_sets_data = lappend(empty_sets_data, gs);
+ empty_sets = lappend(empty_sets, NIL);
+ }
+ else
+ {
+ rollup = makeNode(RollupData);
+
+ rollup->groupClause = preprocess_groupclause(root, gset);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ new_rollups = lappend(new_rollups, rollup);
+ }
+ }
+
+ /*
+ * If we didn't find anything nonempty to hash, then bail. We'll
+ * generate a path from the is_sorted case.
+ */
+ if (new_rollups == NIL)
+ return;
+
+ /*
+ * If there were empty grouping sets they should have been in the
+ * first rollup.
+ */
+ Assert(!unhashed_rollup || !empty_sets);
+
+ if (unhashed_rollup)
+ {
+ new_rollups = lappend(new_rollups, unhashed_rollup);
+ strat = AGG_MIXED;
+ }
+ else if (empty_sets)
+ {
+ RollupData *rollup = makeNode(RollupData);
+
+ rollup->groupClause = NIL;
+ rollup->gsets_data = empty_sets_data;
+ rollup->gsets = empty_sets;
+ rollup->numGroups = list_length(empty_sets);
+ rollup->hashable = false;
+ rollup->is_hashed = false;
+ new_rollups = lappend(new_rollups, rollup);
+ strat = AGG_MIXED;
+ }
+
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ strat,
+ new_rollups,
+ agg_costs,
+ dNumGroups));
+ return;
+ }
+
+ /*
+ * If we have sorted input but nothing we can do with it, bail.
+ */
+ if (list_length(gd->rollups) == 0)
+ return;
+
+ /*
+ * Given sorted input, we try and make two paths: one sorted and one mixed
+ * sort/hash. (We need to try both because hashagg might be disabled, or
+ * some columns might not be sortable.)
+ *
+ * can_hash is passed in as false if some obstacle elsewhere (such as
+ * ordered aggs) means that we shouldn't consider hashing at all.
+ */
+ if (can_hash && gd->any_hashable)
+ {
+ List *rollups = NIL;
+ List *hash_sets = list_copy(gd->unsortable_sets);
+ double availspace = (work_mem * 1024.0);
+ ListCell *lc;
+
+ /*
+ * Account first for space needed for groups we can't sort at all.
+ */
+ availspace -= (double) estimate_hashagg_tablesize(path,
+ agg_costs,
+ gd->dNumHashGroups);
+
+ if (availspace > 0 && list_length(gd->rollups) > 1)
+ {
+ double scale;
+ int num_rollups = list_length(gd->rollups);
+ int k_capacity;
+ int *k_weights = palloc(num_rollups * sizeof(int));
+ Bitmapset *hash_items = NULL;
+ int i;
+
+ /*
+ * We treat this as a knapsack problem: the knapsack capacity
+ * represents work_mem, the item weights are the estimated memory
+ * usage of the hashtables needed to implement a single rollup, and
+ * we really ought to use the cost saving as the item value;
+ * however, currently the costs assigned to sort nodes don't
+ * reflect the comparison costs well, and so we treat all items as
+ * of equal value (each rollup we hash instead saves us one sort).
+ *
+ * To use the discrete knapsack, we need to scale the values to a
+ * reasonably small bounded range. We choose to allow a 5% error
+ * margin; we have no more than 4096 rollups in the worst possible
+ * case, which with a 5% error margin will require a bit over 42MB
+ * of workspace. (Anyone wanting to plan queries that complex had
+ * better have the memory for it. In more reasonable cases, with
+ * no more than a couple of dozen rollups, the memory usage will
+ * be negligible.)
+ *
+ * k_capacity is naturally bounded, but we clamp the values for
+ * scale and weight (below) to avoid overflows or underflows (or
+ * uselessly trying to use a scale factor less than 1 byte).
+ */
+ scale = Max(availspace / (20.0 * num_rollups), 1.0);
+ k_capacity = (int) floor(availspace / scale);
+
+ /*
+ * We leave the first rollup out of consideration since it's the
+ * one that matches the input sort order. We assign indexes "i"
+ * to only those entries considered for hashing; the second loop,
+ * below, must use the same condition.
+ */
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ double sz = estimate_hashagg_tablesize(path,
+ agg_costs,
+ rollup->numGroups);
+
+ /*
+ * If sz is enormous, but work_mem (and hence scale) is
+ * small, avoid integer overflow here.
+ */
+ k_weights[i] = (int) Min(floor(sz / scale),
+ k_capacity + 1.0);
+ ++i;
+ }
+ }
+
+ /*
+ * Apply knapsack algorithm; compute the set of items which
+ * maximizes the value stored (in this case the number of sorts
+ * saved) while keeping the total size (approximately) within
+ * capacity.
+ */
+ if (i > 0)
+ hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+ if (!bms_is_empty(hash_items))
+ {
+ rollups = list_make1(linitial(gd->rollups));
+
+ i = 0;
+ for_each_cell(lc, lnext(list_head(gd->rollups)))
+ {
+ RollupData *rollup = lfirst(lc);
+
+ if (rollup->hashable)
+ {
+ if (bms_is_member(i, hash_items))
+ hash_sets = list_concat(hash_sets,
+ list_copy(rollup->gsets_data));
+ else
+ rollups = lappend(rollups, rollup);
+ ++i;
+ }
+ else
+ rollups = lappend(rollups, rollup);
+ }
+ }
+ }
+
+ if (!rollups && hash_sets)
+ rollups = list_copy(gd->rollups);
+
+ foreach(lc, hash_sets)
+ {
+ GroupingSetData *gs = lfirst(lc);
+ RollupData *rollup = makeNode(RollupData);
+
+ Assert(gs->set != NIL);
+
+ rollup->groupClause = preprocess_groupclause(root, gs->set);
+ rollup->gsets_data = list_make1(gs);
+ rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+ rollup->gsets_data,
+ gd->tleref_to_colnum_map);
+ rollup->numGroups = gs->numGroups;
+ rollup->hashable = true;
+ rollup->is_hashed = true;
+ rollups = lcons(rollup, rollups);
+ }
+
+ if (rollups)
+ {
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_MIXED,
+ rollups,
+ agg_costs,
+ dNumGroups));
+ }
+ }
+
+ /*
+ * Now try the simple sorted case.
+ */
+ if (!gd->unsortable_sets)
+ add_path(grouped_rel, (Path *)
+ create_groupingsets_path(root,
+ grouped_rel,
+ path,
+ target,
+ (List *) parse->havingQual,
+ AGG_SORTED,
+ gd->rollups,
+ agg_costs,
+ dNumGroups));
+}
+
/*
* create_window_paths
*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fca96eb001..999ebcee70 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2697,10 +2697,9 @@ create_agg_path(PlannerInfo *root,
* 'subpath' is the path representing the source of data
* 'target' is the PathTarget to be computed
* 'having_qual' is the HAVING quals if any
- * 'rollup_lists' is a list of grouping sets
- * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
+ * 'rollups' is a list of RollupData nodes
* 'agg_costs' contains cost info about the aggregate functions to be computed
- * 'numGroups' is the estimated number of groups
+ * 'numGroups' is the estimated total number of groups
*/
GroupingSetsPath *
create_groupingsets_path(PlannerInfo *root,
@@ -2708,13 +2707,15 @@ create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups)
{
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
- int numGroupCols;
+ ListCell *lc;
+ bool is_first = true;
+ bool is_first_sort = true;
/* The topmost generated Plan node will be an Agg */
pathnode->path.pathtype = T_Agg;
@@ -2728,74 +2729,109 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->subpath = subpath;
/*
+ * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
+ * to AGG_HASHED, here if possible.
+ */
+ if (aggstrategy == AGG_SORTED &&
+ list_length(rollups) == 1 &&
+ ((RollupData *) linitial(rollups))->groupClause == NIL)
+ aggstrategy = AGG_PLAIN;
+
+ if (aggstrategy == AGG_MIXED &&
+ list_length(rollups) == 1)
+ aggstrategy = AGG_HASHED;
+
+ /*
* Output will be in sorted order by group_pathkeys if, and only if, there
* is a single rollup operation on a non-empty list of grouping
* expressions.
*/
- if (list_length(rollup_groupclauses) == 1 &&
- ((List *) linitial(rollup_groupclauses)) != NIL)
+ if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
pathnode->path.pathkeys = root->group_pathkeys;
else
pathnode->path.pathkeys = NIL;
- pathnode->rollup_groupclauses = rollup_groupclauses;
- pathnode->rollup_lists = rollup_lists;
+ pathnode->aggstrategy = aggstrategy;
+ pathnode->rollups = rollups;
pathnode->qual = having_qual;
- Assert(rollup_lists != NIL);
- Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
-
- /* Account for cost of the topmost Agg node */
- numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
-
- cost_agg(&pathnode->path, root,
- (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
- agg_costs,
- numGroupCols,
- numGroups,
- subpath->startup_cost,
- subpath->total_cost,
- subpath->rows);
+ Assert(rollups != NIL);
+ Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
+ Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
- /*
- * Add in the costs and output rows of the additional sorting/aggregation
- * steps, if any. Only total costs count, since the extra sorts aren't
- * run on startup.
- */
- if (list_length(rollup_lists) > 1)
+ foreach(lc, rollups)
{
- ListCell *lc;
+ RollupData *rollup = lfirst(lc);
+ List *gsets = rollup->gsets;
+ int numGroupCols = list_length(linitial(gsets));
- foreach(lc, rollup_lists)
+ /*
+ * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
+ * (already-sorted) input, and following ones do their own sort.
+ *
+ * In AGG_HASHED mode, there is one rollup for each grouping set.
+ *
+ * In AGG_MIXED mode, the first rollups are hashed, the first
+ * non-hashed one takes the (already-sorted) input, and following ones
+ * do their own sort.
+ */
+ if (is_first)
+ {
+ cost_agg(&pathnode->path, root,
+ aggstrategy,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows);
+ is_first = false;
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
{
- List *gsets = (List *) lfirst(lc);
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
- /* We must iterate over all but the last rollup_lists element */
- if (lnext(lc) == NULL)
- break;
-
- /* Account for cost of sort, but don't charge input cost again */
- cost_sort(&sort_path, root, NIL,
- 0.0,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
-
- /* Account for cost of aggregation */
- numGroupCols = list_length((List *) linitial(gsets));
-
- cost_agg(&agg_path, root,
- AGG_SORTED,
- agg_costs,
- numGroupCols,
- numGroups, /* XXX surely not right for all steps? */
- sort_path.startup_cost,
- sort_path.total_cost,
- sort_path.rows);
+ if (rollup->is_hashed || is_first_sort)
+ {
+ /*
+ * Account for cost of aggregation, but don't charge input
+ * cost again
+ */
+ cost_agg(&agg_path, root,
+ rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ 0.0, 0.0,
+ subpath->rows);
+ if (!rollup->is_hashed)
+ is_first_sort = false;
+ }
+ else
+ {
+ /* Account for cost of sort, but don't charge input cost again */
+ cost_sort(&sort_path, root, NIL,
+ 0.0,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+
+ /* Account for cost of aggregation */
+
+ cost_agg(&agg_path, root,
+ AGG_SORTED,
+ agg_costs,
+ numGroupCols,
+ rollup->numGroups,
+ sort_path.startup_cost,
+ sort_path.total_cost,
+ sort_path.rows);
+ }
pathnode->path.total_cost += agg_path.total_cost;
pathnode->path.rows += agg_path.rows;
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 0000000000..8d1e6d0aa0
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,17 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "postgres.h"
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 4f1910e5a9..109f7b0c14 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -21,6 +21,11 @@
#define BITMAPSET_H
/*
+ * Forward decl to save including pg_list.h
+ */
+struct List;
+
+/*
* Data representation
*/
@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
+extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index f856f6036f..33063071b4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1943,6 +1943,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
+typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
@@ -1950,15 +1951,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */
+ AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */
- int numphases; /* number of phases */
+ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */
+ ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */
+ ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */
@@ -1970,21 +1973,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */
- Tuplesortstate *sort_in; /* sorted input to phases > 0 */
+ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
- /* these fields are used in AGG_HASHED mode: */
- TupleHashTable hashtable; /* hash table with one entry per group */
- TupleTableSlot *hashslot; /* slot for loading hash table */
- int numhashGrpCols; /* number of columns in hash table */
- int largestGrpColIdx; /* largest column required for hashing */
- AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
- AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
bool table_filled; /* hash table filled yet? */
- TupleHashIterator hashiter; /* for iterating through hash table */
+ int num_hashes;
+ AggStatePerHash perhash;
+ AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index fc883a6f3e..4584aed319 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -279,6 +279,8 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
+ T_RollupData,
+ T_GroupingSetData,
T_StatisticExtInfo,
/*
@@ -742,7 +744,8 @@ typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
- AGG_HASHED /* grouped agg, use internal hashtable */
+ AGG_HASHED, /* grouped agg, use internal hashtable */
+ AGG_MIXED /* grouped agg, hash and sort both used */
} AggStrategy;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 4a95e16b69..6e531b6238 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -758,7 +758,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
- /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
+ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */
} Agg;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 0a5187cef3..8930edf826 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1418,17 +1418,37 @@ typedef struct AggPath
} AggPath;
/*
+ * Various annotations used for grouping sets in the planner.
+ */
+
+typedef struct GroupingSetData
+{
+ NodeTag type;
+ List *set; /* grouping set as list of sortgrouprefs */
+ double numGroups; /* est. number of result groups */
+} GroupingSetData;
+
+typedef struct RollupData
+{
+ NodeTag type;
+ List *groupClause; /* applicable subset of parse->groupClause */
+ List *gsets; /* lists of integer indexes into groupClause */
+ List *gsets_data; /* list of GroupingSetData */
+ double numGroups; /* est. number of result groups */
+ bool hashable; /* can be hashed */
+ bool is_hashed; /* to be implemented as a hashagg */
+} RollupData;
+
+/*
* GroupingSetsPath represents a GROUPING SETS aggregation
- *
- * Currently we only support this in sorted not hashed form, so the input
- * must always be appropriately presorted.
*/
+
typedef struct GroupingSetsPath
{
Path path;
Path *subpath; /* path representing input source */
- List *rollup_groupclauses; /* list of lists of SortGroupClause's */
- List *rollup_lists; /* parallel list of lists of grouping sets */
+ AggStrategy aggstrategy; /* basic strategy */
+ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 81640de7ab..c72c7e02cb 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -195,8 +195,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index b0886da8d7..1374710087 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -13,6 +13,13 @@ copy gstest2 from stdin;
create temp table gstest3 (a integer, b integer, c integer, d integer);
copy gstest3 from stdin;
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
returns setof record
@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
end;
$f$ language plpgsql;
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
- | | 3 | 25 | 2 | 13
- | | 3 | 14 | 1 | 14
- | | 3 | 15 | 1 | 15
- | | 3 | 16 | 1 | 16
- | | 3 | 17 | 1 | 17
- | | 3 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
(21 rows)
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
@@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500
(6 rows)
+-- hashing support
+set enable_hashagg = true;
+-- failure cases
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+ERROR: could not implement GROUP BY
+DETAIL: Some of the datatypes only support hashing, while others only support sorting.
+-- simple cases
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+(8 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(6 rows)
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ 1 | | 1 | 60 | 5 | 14
+ 2 | | 1 | 15 | 1 | 15
+ 3 | | 1 | 33 | 2 | 17
+ 4 | | 1 | 37 | 2 | 19
+ | 1 | 2 | 58 | 4 | 19
+ | 2 | 2 | 25 | 2 | 13
+ | 3 | 2 | 45 | 3 | 16
+ | 4 | 2 | 17 | 1 | 17
+ | | 3 | 145 | 10 | 19
+(16 rows)
+
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
+ -> MixedAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: "*VALUES*".column1
+ Hash Key: "*VALUES*".column2
+ Group Key: ()
+ -> Values Scan on "*VALUES*"
+(8 rows)
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+ QUERY PLAN
+----------------------------------------------------------
+ GroupAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ Group Key: "*VALUES*".column1
+ Group Key: ()
+ Sort Key: "*VALUES*".column2
+ Group Key: "*VALUES*".column2
+ -> Sort
+ Sort Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+(9 rows)
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 2 | 17
+ 0001 | | 1 | 2 | 34
+ 0010 | | 1 | 2 | 68
+ 0011 | | 1 | 2 | 136
+ | 2 | 2 | 4 | 60
+ | 1 | 2 | 4 | 195
+(6 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: unsortable_col
+ Group Key: unhashable_col
+ -> Sort
+ Sort Key: unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ unhashable_col | unsortable_col | grouping | count | sum
+----------------+----------------+----------+-------+-----
+ 0000 | | 1 | 1 | 1
+ 0001 | | 1 | 1 | 2
+ 0010 | | 1 | 1 | 4
+ 0011 | | 1 | 1 | 8
+ 0000 | | 1 | 1 | 16
+ 0001 | | 1 | 1 | 32
+ 0010 | | 1 | 1 | 64
+ 0011 | | 1 | 1 | 128
+ | 1 | 2 | 1 | 1
+ | 1 | 2 | 1 | 2
+ | 2 | 2 | 1 | 4
+ | 2 | 2 | 1 | 8
+ | 2 | 2 | 1 | 16
+ | 2 | 2 | 1 | 32
+ | 1 | 2 | 1 | 64
+ | 1 | 2 | 1 | 128
+(16 rows)
+
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
+ -> MixedAggregate
+ Hash Key: v, unsortable_col
+ Group Key: v, unhashable_col
+ -> Sort
+ Sort Key: v, unhashable_col
+ -> Seq Scan on gstest4
+(8 rows)
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ a | b | sum | count
+---+---+-----+-------
+(0 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+ QUERY PLAN
+--------------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a
+ -> Seq Scan on gstest_empty
+(4 rows)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+(1 row)
+
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ a | b | sum | count
+---+---+-----+-------
+ | | | 0
+ | | | 0
+ | | | 0
+(3 rows)
+
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+ QUERY PLAN
+--------------------------------
+ MixedAggregate
+ Hash Key: a, b
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(6 rows)
+
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ sum | count
+-----+-------
+ | 0
+ | 0
+ | 0
+(3 rows)
+
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+ QUERY PLAN
+--------------------------------
+ Aggregate
+ Group Key: ()
+ Group Key: ()
+ Group Key: ()
+ -> Seq Scan on gstest_empty
+(5 rows)
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ a | d | grouping
+---+---+----------
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 1 | 1 | 2
+ 2 | 2 | 2
+(4 rows)
+
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+ QUERY PLAN
+---------------------------
+ HashAggregate
+ Hash Key: a, b
+ Hash Key: a, c
+ -> Seq Scan on gstest3
+(4 rows)
+
+-- simple rescan tests
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ a | b | sum
+---+---+-----
+ 2 | | 6
+ 1 | | 3
+ | 2 | 3
+ | 3 | 3
+ | 1 | 3
+(5 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+ QUERY PLAN
+------------------------------------------
+ HashAggregate
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(6 rows)
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
+ ^
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+ERROR: aggregate functions are not allowed in FROM clause of their own query level
+LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
+ ^
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ a | b | grouping | sum | count | max
+---+---+----------+-----+-------+-----
+ 1 | 1 | 0 | 21 | 2 | 11
+ 1 | 2 | 0 | 25 | 2 | 13
+ 1 | 3 | 0 | 14 | 1 | 14
+ 2 | 3 | 0 | 15 | 1 | 15
+ 3 | 3 | 0 | 16 | 1 | 16
+ 3 | 4 | 0 | 17 | 1 | 17
+ 4 | 1 | 0 | 37 | 2 | 19
+ | | 3 | 21 | 2 | 11
+ | | 3 | 21 | 2 | 11
+ | | 3 | 25 | 2 | 13
+ | | 3 | 25 | 2 | 13
+ | | 3 | 14 | 1 | 14
+ | | 3 | 14 | 1 | 14
+ | | 3 | 15 | 1 | 15
+ | | 3 | 15 | 1 | 15
+ | | 3 | 16 | 1 | 16
+ | | 3 | 16 | 1 | 16
+ | | 3 | 17 | 1 | 17
+ | | 3 | 17 | 1 | 17
+ | | 3 | 37 | 2 | 19
+ | | 3 | 37 | 2 | 19
+(21 rows)
+
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), (max("*VALUES*".column3))
+ -> HashAggregate
+ Hash Key: "*VALUES*".column1, "*VALUES*".column2
+ Hash Key: ("*VALUES*".column1 + 1), ("*VALUES*".column2 + 1)
+ Hash Key: ("*VALUES*".column1 + 2), ("*VALUES*".column2 + 2)
+ -> Values Scan on "*VALUES*"
+(7 rows)
+
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ a | b | sum | rsum
+---+---+-----+------
+ 1 | 1 | 8 | 8
+ 1 | 2 | 2 | 10
+ 1 | | 10 | 20
+ 2 | 2 | 2 | 22
+ 2 | | 2 | 24
+ | 1 | 8 | 32
+ | 2 | 4 | 36
+ | | 12 | 48
+(8 rows)
+
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+ QUERY PLAN
+---------------------------------------------
+ Sort
+ Sort Key: (sum((sum(c))) OVER (?)), a, b
+ -> WindowAgg
+ -> Sort
+ Sort Key: a, b
+ -> MixedAggregate
+ Hash Key: a, b
+ Hash Key: a
+ Hash Key: b
+ Group Key: ()
+ -> Seq Scan on gstest2
+(11 rows)
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ a | b | sum
+---+---+-----
+ 1 | 1 | 1
+ 1 | 2 | 1
+ 1 | 3 | 1
+ 1 | | 3
+ 2 | 1 | 2
+ 2 | 2 | 2
+ 2 | 3 | 2
+ 2 | | 6
+ | 1 | 3
+ | 2 | 3
+ | 3 | 3
+ | | 9
+(12 rows)
+
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+ QUERY PLAN
+------------------------------------------------
+ Sort
+ Sort Key: gstest_data.a, gstest_data.b
+ -> MixedAggregate
+ Hash Key: gstest_data.a, gstest_data.b
+ Hash Key: gstest_data.a
+ Hash Key: gstest_data.b
+ Group Key: ()
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Function Scan on gstest_data
+(10 rows)
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | 0 | 50
+ 1 | 1 | 0 | 2 | 50
+ 1 | 1 | 0 | 4 | 50
+ 1 | 1 | 0 | 6 | 50
+ 1 | 1 | 0 | 8 | 50
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | 1 | 50
+ 1 | 1 | 1 | 3 | 50
+ 1 | 1 | 1 | 5 | 50
+ 1 | 1 | 1 | 7 | 50
+ 1 | 1 | 1 | 9 | 50
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | 0 | 50
+ 1 | 1 | 2 | 2 | 50
+ 1 | 1 | 2 | 4 | 50
+ 1 | 1 | 2 | 6 | 50
+ 1 | 1 | 2 | 8 | 50
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | 1 | 50
+ 1 | 1 | 3 | 3 | 50
+ 1 | 1 | 3 | 5 | 50
+ 1 | 1 | 3 | 7 | 50
+ 1 | 1 | 3 | 9 | 50
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 1 | 1 | | | 1000
+ 2 | 2 | 0 | 0 | 50
+ 2 | 2 | 0 | 2 | 50
+ 2 | 2 | 0 | 4 | 50
+ 2 | 2 | 0 | 6 | 50
+ 2 | 2 | 0 | 8 | 50
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | 1 | 50
+ 2 | 2 | 1 | 3 | 50
+ 2 | 2 | 1 | 5 | 50
+ 2 | 2 | 1 | 7 | 50
+ 2 | 2 | 1 | 9 | 50
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | 0 | 50
+ 2 | 2 | 2 | 2 | 50
+ 2 | 2 | 2 | 4 | 50
+ 2 | 2 | 2 | 6 | 50
+ 2 | 2 | 2 | 8 | 50
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | 1 | 50
+ 2 | 2 | 3 | 3 | 50
+ 2 | 2 | 3 | 5 | 50
+ 2 | 2 | 3 | 7 | 50
+ 2 | 2 | 3 | 9 | 50
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+ 2 | 2 | | | 1000
+(70 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
+ {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
+(2 rows)
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+ a | a | four | ten | count
+---+---+------+-----+-------
+ 1 | 1 | 0 | | 250
+ 1 | 1 | 1 | | 250
+ 1 | 1 | 2 | | 250
+ 1 | 1 | 3 | | 250
+ 1 | 1 | | 0 | 100
+ 1 | 1 | | 1 | 100
+ 1 | 1 | | 2 | 100
+ 1 | 1 | | 3 | 100
+ 1 | 1 | | 4 | 100
+ 1 | 1 | | 5 | 100
+ 1 | 1 | | 6 | 100
+ 1 | 1 | | 7 | 100
+ 1 | 1 | | 8 | 100
+ 1 | 1 | | 9 | 100
+ 2 | 2 | 0 | | 250
+ 2 | 2 | 1 | | 250
+ 2 | 2 | 2 | | 250
+ 2 | 2 | 3 | | 250
+ 2 | 2 | | 0 | 100
+ 2 | 2 | | 1 | 100
+ 2 | 2 | | 2 | 100
+ 2 | 2 | | 3 | 100
+ 2 | 2 | | 4 | 100
+ 2 | 2 | | 5 | 100
+ 2 | 2 | | 6 | 100
+ 2 | 2 | | 7 | 100
+ 2 | 2 | | 8 | 100
+ 2 | 2 | | 9 | 100
+(28 rows)
+
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+ array
+---------------------------------------------------------------------------------
+ {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
+ {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
+(2 rows)
+
+-- test the knapsack
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ Sort Key: thousand
+ Group Key: thousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(13 rows)
+
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Group Key: unique1
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(9 rows)
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+ QUERY PLAN
+-------------------------------
+ MixedAggregate
+ Hash Key: two
+ Hash Key: four
+ Hash Key: ten
+ Hash Key: hundred
+ Hash Key: thousand
+ Group Key: unique1
+ Sort Key: twothousand
+ Group Key: twothousand
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+(12 rows)
+
-- end
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index 0eeaf9e830..33f370b494 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
(6 rows)
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
dataa | b | g | count
-------+-----+---+-------
@@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
b | bar | | 2
b | | | 2
| | | 6
- a | | 1 | 2
- b | | 1 | 1
- | | 1 | 3
- a | | 2 | 2
- b | | 2 | 1
- | | 2 | 3
| bar | 1 | 2
| bar | 2 | 2
| bar | | 4
| foo | 1 | 1
| foo | 2 | 1
| foo | | 2
+ a | | 1 | 2
+ b | | 1 | 1
+ | | 1 | 3
+ a | | 2 | 2
+ b | | 2 | 1
+ | | 2 | 3
(24 rows)
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
dataa | b | g | count
-------+-----+---+-------
+ a | foo | | 2
+ a | | | 4
+ a | | 2 | 2
a | bar | 1 | 1
a | bar | 2 | 1
a | bar | | 2
a | foo | 1 | 1
a | foo | 2 | 1
- a | foo | | 2
- a | | | 4
a | | 1 | 2
- a | | 2 | 2
- b | bar | 2 | 1
+ b | bar | 1 | 1
b | | | 2
b | | 1 | 1
- b | | 2 | 1
- b | bar | 1 | 1
+ b | bar | 2 | 1
b | bar | | 2
- | foo | | 2
- | foo | 1 | 1
+ b | | 2 | 1
| | 2 | 3
+ | | | 6
| bar | 1 | 2
| bar | 2 | 2
- | | | 6
- | foo | 2 | 1
| bar | | 4
+ | foo | 1 | 1
+ | foo | 2 | 1
+ | foo | | 2
| | 1 | 3
(24 rows)
@@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
a | bar | 1 | 1
a | foo | 1 | 1
b | bar | 1 | 1
+ | bar | 1 | 2
+ | foo | 1 | 1
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
- | bar | 1 | 2
- | foo | 1 | 1
- | foo | 2 | 1
- | bar | 2 | 2
a | | 2 | 2
b | | 2 | 1
- a | bar | 2 | 1
+ | bar | 2 | 2
| | 2 | 3
+ | foo | 2 | 1
+ a | bar | 2 | 1
a | foo | 2 | 1
b | bar | 2 | 1
- a | foo | | 2
+ a | | | 4
b | bar | | 2
b | | | 2
| | | 6
- a | | | 4
+ a | foo | | 2
+ a | bar | | 2
| bar | | 4
| foo | | 2
- a | bar | | 2
(24 rows)
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5));
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index cc557cc552..0e023d9ddc 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -31,6 +31,14 @@ copy gstest3 from stdin;
\.
alter table gstest3 add primary key (a);
+create temp table gstest4(id integer, v integer,
+ unhashable_col bit(4), unsortable_col xid);
+insert into gstest4
+values (1,1,b'0000','1'), (2,2,b'0001','1'),
+ (3,4,b'0010','2'), (4,8,b'0011','2'),
+ (5,16,b'0000','2'), (6,32,b'0001','2'),
+ (7,64,b'0010','1'), (8,128,b'0011','1');
+
create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer)
@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
-- basic functionality
+set enable_hashagg = false; -- test hashing explicitly later
+
-- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping)
+
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by rollup (a,b);
select a, b, grouping(a,b), sum(v), count(*), max(v)
@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
- from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
@@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
select sum(ten) from onek group by two, rollup(four::text) order by 1;
select sum(ten) from onek group by rollup(four::text), two order by 1;
+-- hashing support
+
+set enable_hashagg = true;
+
+-- failure cases
+
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+
+-- simple cases
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by cube(a,b) order by 3,1,2;
+
+-- shouldn't try and hash
+explain (costs off)
+ select a, b, grouping(a,b), array_agg(v order by v)
+ from gstest1 group by cube(a,b);
+
+-- mixed hashable/sortable cases
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3,5;
+
+select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+explain (costs off)
+ select unhashable_col, unsortable_col,
+ grouping(unhashable_col, unsortable_col),
+ count(*), sum(v)
+ from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3,5;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+explain (costs off)
+ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+explain (costs off)
+ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- check that functionally dependent cols are not nulled
+select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+explain (costs off)
+ select a, d, grouping(a,b,c)
+ from gstest3
+ group by grouping sets ((a,b), (a,c));
+
+-- simple rescan tests
+
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by grouping sets (a,b);
+
+select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+explain (costs off)
+ select *
+ from (values (1),(2)) v(x),
+ lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+
+-- Tests for chained aggregates
+select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+explain (costs off)
+ select a, b, grouping(a,b), sum(v), count(*), max(v)
+ from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+explain (costs off)
+ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+ from gstest2 group by cube (a,b) order by rsum, a, b;
+select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+explain (costs off)
+ select a, b, sum(v.x)
+ from (values (1),(2)) v(x), gstest_data(v.x)
+ group by cube (a,b) order by a,b;
+
+-- More rescan tests
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- test the knapsack
+
+set work_mem = '64kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+
+set work_mem = '384kB';
+explain (costs off)
+ select unique1,
+ count(two), count(four), count(ten),
+ count(hundred), count(thousand), count(twothousand),
+ count(*)
+ from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+
-- end
diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql
index e627bb99ed..417e78c53d 100644
--- a/src/test/regress/sql/tsrf.sql
+++ b/src/test/regress/sql/tsrf.sql
@@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
+set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
+reset enable_hashagg;
-- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;