array_agg (was Re: The Axe list)
2008/10/15 Ian Caulfield <ian.caulfield@gmail.com>:
I started to look at implementing array_agg by making the existing
intagg stuff more generic
... and here's what I've come up with.
I've currently implemented this as a contrib module to make it quicker
to develop/test.
The aggregate uses the same basic approach to memory management as
intagg, but instead of putting data into the lower bound, an extra
structure is placed just before the data, after the null bitmap. This
holds a copy of the typlen, typbyval and typalign, as well as a
pointer to the next (unaligned) free data byte. In order to avoid
breakage due to initial values being set in another aggregate, the
transition function will reject an array value where extra padding
isn't present before the data values.
The ORDER BY clause of array_agg isn't implemented - it seems that
this would work best as a generic solution to pass ordered data to
aggregates, and is beyond my ability to implement currently.
Ian
Attachments:
I've been taking a look at this as well and came up with a slightly
different approach. The attached patch is intended to go in core (not
contrib) and uses some array-construction facilities that already
exist in core. I'm not sure which approach is better, so I'll throw
this out there with yours for input...
...Robert
Attachments:
array_accum-v1.patchtext/x-diff; name=array_accum-v1.patchDownload
Index: src/backend/utils/adt/array_userfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/array_userfuncs.c,v
retrieving revision 1.23
diff -c -r1.23 array_userfuncs.c
*** src/backend/utils/adt/array_userfuncs.c 1 Jan 2008 19:45:52 -0000 1.23
--- src/backend/utils/adt/array_userfuncs.c 16 Oct 2008 02:22:03 -0000
***************
*** 12,17 ****
--- 12,18 ----
*/
#include "postgres.h"
+ #include "nodes/execnodes.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
***************
*** 465,467 ****
--- 466,510 ----
return construct_md_array(dvalues, NULL, ndims, dims, lbs, element_type,
typlen, typbyval, typalign);
}
+
+ /*
+ * aggregate values into an array
+ */
+ Datum
+ array_accum_trans(PG_FUNCTION_ARGS)
+ {
+ ArrayBuildState *ain, *aout;
+ MemoryContext *mctx;
+ Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+
+ if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
+ ereport(ERROR,
+ (errmsg("array_accum_trans may only be used as an aggregate")));
+ mctx = ((AggState *) fcinfo->context)->aggcontext;
+ if (arg1_typeid == InvalidOid)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ if (PG_ARGISNULL(0))
+ ain = NULL;
+ else
+ ain = PG_GETARG_POINTER(0);
+
+ aout = accumArrayResult(ain, PG_GETARG_DATUM(1), PG_ARGISNULL(1),
+ arg1_typeid, mctx);
+ PG_RETURN_POINTER(aout);
+ }
+
+ /*
+ * finalize accumulated array
+ */
+ Datum
+ array_accum_final(PG_FUNCTION_ARGS)
+ {
+ ArrayBuildState *astate;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ astate = (ArrayBuildState *) PG_GETARG_POINTER(0);
+ PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+ }
Index: src/include/catalog/pg_aggregate.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v
retrieving revision 1.66
diff -c -r1.66 pg_aggregate.h
*** src/include/catalog/pg_aggregate.h 27 Mar 2008 03:57:34 -0000 1.66
--- src/include/catalog/pg_aggregate.h 16 Oct 2008 02:22:04 -0000
***************
*** 220,225 ****
--- 220,228 ----
/* xml */
DATA(insert ( 2901 xmlconcat2 - 0 142 _null_ ));
+ /* array */
+ DATA(insert ( 2335 array_accum_trans array_accum_final 0 2281 _null_ ));
+
/*
* prototypes for functions in pg_aggregate.c
*/
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.520
diff -c -r1.520 pg_proc.h
*** src/include/catalog/pg_proc.h 14 Oct 2008 17:12:33 -0000 1.520
--- src/include/catalog/pg_proc.h 16 Oct 2008 02:22:13 -0000
***************
*** 1017,1022 ****
--- 1017,1024 ----
DESCR("array constructor with value");
DATA(insert OID = 1286 ( array_fill PGNSP PGUID 12 1 0 0 f f f f i 3 2277 "2283 1007 1007" _null_ _null_ _null_ array_fill_with_lower_bounds _null_ _null_ _null_ ));
DESCR("array constructor with value");
+ DATA(insert OID = 2333 ( array_accum_trans PGNSP PGUID 12 1 0 0 f f f f i 2 2281 "2281 2283" _null_ _null_ _null_ array_accum_trans _null_ _null_ _null_ ));
+ DATA(insert OID = 2334 ( array_accum_final PGNSP PGUID 12 1 0 0 f f f f i 1 2277 "2281" _null_ _null_ _null_ array_accum_final _null_ _null_ _null_ ));
DATA(insert OID = 760 ( smgrin PGNSP PGUID 12 1 0 0 f f t f s 1 210 "2275" _null_ _null_ _null_ smgrin _null_ _null_ _null_ ));
DESCR("I/O");
DATA(insert OID = 761 ( smgrout PGNSP PGUID 12 1 0 0 f f t f s 1 2275 "210" _null_ _null_ _null_ smgrout _null_ _null_ _null_ ));
***************
*** 3439,3444 ****
--- 3441,3448 ----
DATA(insert OID = 2829 ( corr PGNSP PGUID 12 1 0 0 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
DESCR("correlation coefficient");
+ DATA(insert OID = 2335 ( array_accum PGNSP PGUID 12 1 0 0 t f f f i 1 2277 "2283" _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+
DATA(insert OID = 2160 ( text_pattern_lt PGNSP PGUID 12 1 0 0 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt _null_ _null_ _null_ ));
DATA(insert OID = 2161 ( text_pattern_le PGNSP PGUID 12 1 0 0 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le _null_ _null_ _null_ ));
DATA(insert OID = 2163 ( text_pattern_ge PGNSP PGUID 12 1 0 0 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_ge _null_ _null_ _null_ ));
Index: src/include/utils/array.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/array.h,v
retrieving revision 1.68
diff -c -r1.68 array.h
*** src/include/utils/array.h 16 Jul 2008 00:48:54 -0000 1.68
--- src/include/utils/array.h 16 Oct 2008 02:22:13 -0000
***************
*** 204,209 ****
--- 204,211 ----
extern Datum generate_subscripts_nodir(PG_FUNCTION_ARGS);
extern Datum array_fill(PG_FUNCTION_ARGS);
extern Datum array_fill_with_lower_bounds(PG_FUNCTION_ARGS);
+ extern Datum array_accum_trans(PG_FUNCTION_ARGS);
+ extern Datum array_accum_final(PG_FUNCTION_ARGS);
extern Datum array_ref(ArrayType *array, int nSubscripts, int *indx,
int arraytyplen, int elmlen, bool elmbyval, char elmalign,
Ian Caulfield wrote:
2008/10/15 Ian Caulfield <ian.caulfield@gmail.com>:
I started to look at implementing array_agg by making the existing
intagg stuff more generic... and here's what I've come up with.
Going through the commit fest listings, I think we can safely omit this
patch and work out an in-core solution somewhere between the patches of
Robert Haas and Jeff Davis.