sig 11 in RC3
I have a couple functions which form the basis of an aggregate. The purpose of
the aggregate function is to be able to perform a GROUP BY on a one to many
table and produce a summary able where all the "many" integers will be packed
in a single array. If this were a text search query, rather than searching
hundreds of entries in the table, one fetch and possibly a detoast is used. It
is MUCH faster for my purpose.
It is used like this:
create table array_lookup as select id1, int_array_aggregate(id2) from lookup
group by (id1) ;
I have written a good number of functions in PGSQL, I'm not a newbe. Could
someone take a look at it? I don't think I am doing anything that would kill
the back end, so it may be a bug in RC3, I am just pulling my hair out. (FYI,
the one to many table may have thousands of rows for an entry.) One more thing:
I'm not getting any elog messages, so it should not be a memory issue.
-- Internal function for the aggregate
-- Is called for each item in an aggregation
create function int_agg_state (int4, int4)
returns int4
as 'MODULE_FILENAME','int_agg_state'
language 'c';
-- Internal function for the aggregate
-- Is called at the end of the aggregation, and returns an array.
create function int_agg_final_array (int4)
returns int4[]
as 'MODULE_FILENAME','int_agg_final_array'
language 'c';
-- The aggration funcion.
-- uses the above functions to create an array of integers from an aggregation.
create aggregate int_array_aggregate
(
BASETYPE = int4,
SFUNC = int_agg_state,
STYPE = int4,
FINALFUNC = int_agg_final_array,
INITCOND = 0
);
/* This is actually a postgres version of a one dimentional array */
typedef struct agg
{
ArrayType a;
int items; /* Number of items in array */
int lower; /* Lower bounds of array, used as max during aggregation
*/
int4 array[1];
}PGARRAY;
#define TOASTED 1
#define START_NUM 8
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4)))
PGARRAY * GetPGArray(int4 state, int fAdd);
Datum int_agg_state(PG_FUNCTION_ARGS);
Datum int_agg_final_array(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(int_agg_state);
PG_FUNCTION_INFO_V1(int_agg_final_array);
/* Manage the aggregation state of the array */
PGARRAY * GetPGArray(int4 state, int fAdd)
{
PGARRAY *p = (PGARRAY *) state;
if(!state)
{
/* New array */
int cb = PGARRAY_SIZE(START_NUM);
p = (PGARRAY *) palloc(cb);
if(!p)
{
elog(ERROR,"Integer aggregator, cant allocate
memory\n");
return 0;
}
p->a.size = cb;
p->a.ndim= 0;
p->a.flags = 0;
p->items = 0;
p->lower= START_NUM;
return p;
}
else if(fAdd)
{
/* Ensure array has space */
if(p->items >= p->lower)
{
PGARRAY *pn;
int n = p->lower + p->lower;
int cbNew = PGARRAY_SIZE(n);
pn = (PGARRAY *) palloc(cbNew);
if(!pn)
{
elog(ERROR,"Integer aggregator, cant allocate
memory\n");
}
else
{
memcpy(pn, p, p->a.size);
pn->a.size = cbNew;
pn->lower = n;
pfree(p);
return pn;
}
}
}
return p;
}
/* Called for each iteration during an aggregate function */
Datum int_agg_state(PG_FUNCTION_ARGS)
{
int4 state = PG_GETARG_INT32(0);
int4 value = PG_GETARG_INT32(1);
PGARRAY *p = GetPGArray(state, 1);
if(!p)
{
elog(ERROR,"No aggregate storage\n");
}
else if(p->items >= p->lower)
{
elog(ERROR,"aggregate storage too small\n");
}
else
{
p->array[p->items++]= value;
}
PG_RETURN_INT32(p);
}
/* This is the final function used for the integer aggregator. It returns all
the integers
* collected as a one dimentional integer array */
Datum int_agg_final_array(PG_FUNCTION_ARGS)
{
PGARRAY *p = GetPGArray(PG_GETARG_INT32(0),0);
if(p)
{
/* Fix up the fields in the structure, so Postgres understands
*/
p->a.size = PGARRAY_SIZE(p->items);
p->a.ndim=1;
p->a.flags = 0;
p->lower = 0;
PG_RETURN_POINTER(p);
}
PG_RETURN_NULL();
}
mlw <markw@mohawksoft.com> writes:
I have a couple functions which form the basis of an aggregate.
You can't lie to the system by claiming your state value is an integer
when it's really a pointer to palloc'd memory. The memory will get
recycled out from under you.
Try declaring the aggregate as using int4[] as the transition type,
and make sure that the intermediate states are valid at least to the
point of having a correct varlena length word. This will allow the
system to copy the values around when it needs to.
Alternatively, keep the data structure in a longer-lived context
(TransactionCommandContext should work) instead of the per-tuple
context. That's uglier but would avoid a lot of copying.
See src/backend/executor/nodeAgg.c if you are wondering why the state
values need to be copied around.
regards, tom lane
On Fri, Jan 04, 2002 at 11:02:04AM -0500, Tom Lane wrote:
mlw <markw@mohawksoft.com> writes:
Alternatively, keep the data structure in a longer-lived context
(TransactionCommandContext should work) instead of the per-tuple
context.
It depends, I had to use `TopTransactionContect' in a similar case.
If you really want to return a pointer, I would recommend to
introduce a new type `pointer', where the input and output functions
simply call `elog(ERROR,..)'. This way you can avoid to forget that
your pointer cannot be externalized.
--
Holger Krug
hkrug@rationalizer.com