array_agg (was Re: The Axe list)

Started by Ian Caulfieldabout 17 years ago3 messages
#1Ian Caulfield
ian.caulfield@gmail.com
1 attachment(s)

2008/10/15 Ian Caulfield <ian.caulfield@gmail.com>:

I started to look at implementing array_agg by making the existing
intagg stuff more generic

... and here's what I've come up with.

I've currently implemented this as a contrib module to make it quicker
to develop/test.

The aggregate uses the same basic approach to memory management as
intagg, but instead of putting data into the lower bound, an extra
structure is placed just before the data, after the null bitmap. This
holds a copy of the typlen, typbyval and typalign, as well as a
pointer to the next (unaligned) free data byte. In order to avoid
breakage due to initial values being set in another aggregate, the
transition function will reject an array value where extra padding
isn't present before the data values.

The ORDER BY clause of array_agg isn't implemented - it seems that
this would work best as a generic solution to pass ordered data to
aggregates, and is beyond my ability to implement currently.

Ian

Attachments:

array_agg.tar.gzapplication/x-gzip; name=array_agg.tar.gzDownload
#2Robert Haas
robertmhaas@gmail.com
In reply to: Ian Caulfield (#1)
1 attachment(s)
Re: array_agg (was Re: The Axe list)

I've been taking a look at this as well and came up with a slightly
different approach. The attached patch is intended to go in core (not
contrib) and uses some array-construction facilities that already
exist in core. I'm not sure which approach is better, so I'll throw
this out there with yours for input...

...Robert

Attachments:

array_accum-v1.patchtext/x-diff; name=array_accum-v1.patchDownload
Index: src/backend/utils/adt/array_userfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/array_userfuncs.c,v
retrieving revision 1.23
diff -c -r1.23 array_userfuncs.c
*** src/backend/utils/adt/array_userfuncs.c	1 Jan 2008 19:45:52 -0000	1.23
--- src/backend/utils/adt/array_userfuncs.c	16 Oct 2008 02:22:03 -0000
***************
*** 12,17 ****
--- 12,18 ----
   */
  #include "postgres.h"
  
+ #include "nodes/execnodes.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
***************
*** 465,467 ****
--- 466,510 ----
  	return construct_md_array(dvalues, NULL, ndims, dims, lbs, element_type,
  							  typlen, typbyval, typalign);
  }
+ 
+ /*
+  * aggregate values into an array
+  */
+ Datum
+ array_accum_trans(PG_FUNCTION_ARGS)
+ {
+ 	ArrayBuildState *ain, *aout;
+ 	MemoryContext *mctx;
+     Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ 
+     if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
+         ereport(ERROR,
+ 			(errmsg("array_accum_trans may only be used as an aggregate")));
+ 	mctx = ((AggState *) fcinfo->context)->aggcontext;
+     if (arg1_typeid == InvalidOid)
+         ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+             errmsg("could not determine input data type")));
+ 
+ 	if (PG_ARGISNULL(0))
+ 		ain = NULL;
+ 	else
+ 		ain = PG_GETARG_POINTER(0);
+ 
+ 	aout = accumArrayResult(ain, PG_GETARG_DATUM(1), PG_ARGISNULL(1),
+ 		arg1_typeid, mctx);
+ 	PG_RETURN_POINTER(aout);
+ }
+ 
+ /*
+  * finalize accumulated array
+  */
+ Datum
+ array_accum_final(PG_FUNCTION_ARGS)
+ {
+ 	ArrayBuildState *astate;
+ 
+ 	if (PG_ARGISNULL(0))
+ 		PG_RETURN_NULL();
+ 	astate = (ArrayBuildState *) PG_GETARG_POINTER(0);
+ 	PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+ }
Index: src/include/catalog/pg_aggregate.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v
retrieving revision 1.66
diff -c -r1.66 pg_aggregate.h
*** src/include/catalog/pg_aggregate.h	27 Mar 2008 03:57:34 -0000	1.66
--- src/include/catalog/pg_aggregate.h	16 Oct 2008 02:22:04 -0000
***************
*** 220,225 ****
--- 220,228 ----
  /* xml */
  DATA(insert ( 2901 xmlconcat2	  -					0	142		_null_ ));
  
+ /* array */
+ DATA(insert ( 2335 array_accum_trans	array_accum_final	0	2281	_null_ ));
+ 
  /*
   * prototypes for functions in pg_aggregate.c
   */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.520
diff -c -r1.520 pg_proc.h
*** src/include/catalog/pg_proc.h	14 Oct 2008 17:12:33 -0000	1.520
--- src/include/catalog/pg_proc.h	16 Oct 2008 02:22:13 -0000
***************
*** 1017,1022 ****
--- 1017,1024 ----
  DESCR("array constructor with value");
  DATA(insert OID = 1286 (  array_fill PGNSP PGUID 12 1 0 0 f f f f i 3 2277 "2283 1007 1007" _null_ _null_ _null_ array_fill_with_lower_bounds _null_ _null_ _null_ ));
  DESCR("array constructor with value");
+ DATA(insert OID = 2333 (  array_accum_trans	PGNSP PGUID 12 1 0 0 f f f f i 2 2281 "2281 2283" _null_ _null_ _null_ array_accum_trans _null_ _null_ _null_ ));
+ DATA(insert OID = 2334 (  array_accum_final PGNSP PGUID 12 1 0 0 f f f f i 1 2277 "2281" _null_ _null_ _null_ array_accum_final _null_ _null_ _null_ ));
  DATA(insert OID = 760 (  smgrin			   PGNSP PGUID 12 1 0 0 f f t f s 1 210 "2275" _null_ _null_ _null_  smgrin _null_ _null_ _null_ ));
  DESCR("I/O");
  DATA(insert OID = 761 (  smgrout		   PGNSP PGUID 12 1 0 0 f f t f s 1 2275 "210" _null_ _null_ _null_  smgrout _null_ _null_ _null_ ));
***************
*** 3439,3444 ****
--- 3441,3448 ----
  DATA(insert OID = 2829 (  corr				PGNSP PGUID 12 1 0 0 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy _null_ _null_ _null_ ));
  DESCR("correlation coefficient");
  
+ DATA(insert OID = 2335 (  array_accum		PGNSP PGUID 12 1 0 0 t f f f i 1 2277 "2283" _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ 
  DATA(insert OID = 2160 ( text_pattern_lt	 PGNSP PGUID 12 1 0 0 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt _null_ _null_ _null_ ));
  DATA(insert OID = 2161 ( text_pattern_le	 PGNSP PGUID 12 1 0 0 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le _null_ _null_ _null_ ));
  DATA(insert OID = 2163 ( text_pattern_ge	 PGNSP PGUID 12 1 0 0 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_ge _null_ _null_ _null_ ));
Index: src/include/utils/array.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/array.h,v
retrieving revision 1.68
diff -c -r1.68 array.h
*** src/include/utils/array.h	16 Jul 2008 00:48:54 -0000	1.68
--- src/include/utils/array.h	16 Oct 2008 02:22:13 -0000
***************
*** 204,209 ****
--- 204,211 ----
  extern Datum generate_subscripts_nodir(PG_FUNCTION_ARGS);
  extern Datum array_fill(PG_FUNCTION_ARGS);
  extern Datum array_fill_with_lower_bounds(PG_FUNCTION_ARGS);
+ extern Datum array_accum_trans(PG_FUNCTION_ARGS);
+ extern Datum array_accum_final(PG_FUNCTION_ARGS);
  
  extern Datum array_ref(ArrayType *array, int nSubscripts, int *indx,
  		  int arraytyplen, int elmlen, bool elmbyval, char elmalign,
#3Peter Eisentraut
peter_e@gmx.net
In reply to: Ian Caulfield (#1)
Re: array_agg (was Re: The Axe list)

Ian Caulfield wrote:

2008/10/15 Ian Caulfield <ian.caulfield@gmail.com>:

I started to look at implementing array_agg by making the existing
intagg stuff more generic

... and here's what I've come up with.

Going through the commit fest listings, I think we can safely omit this
patch and work out an in-core solution somewhere between the patches of
Robert Haas and Jeff Davis.