onlyvalue aggregate (was: First Aggregate Funtion?)

Started by Marko Tiikkajaabout 10 years ago19 messages
#1Marko Tiikkaja
marko@joh.to
1 attachment(s)

Hi,

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com . I
called it "onlyvalue", which is a horrible name, but I have nothing
better to offer. (Corey called it "only", but that doesn't really work
since ONLY is a fully reserved keyword.)

I'll add this to September's commit fest, but if you want to bash me or
the patch in the meanwhile, go ahead.

.m

Attachments:

onlyvalue_v1.patchtext/x-patch; name=onlyvalue_v1.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2946122..6edc220 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -12631,6 +12631,26 @@ NULL baz</literallayout>(3 rows)</entry>
      <row>
       <entry>
        <indexterm>
+        <primary>onlyvalue</primary>
+       </indexterm>
+       <function>
+         onlyvalue(<replaceable class="parameter">expression</replaceable>)
+       </function>
+      </entry>
+      <entry>
+       any type for which the equality operator has been defined
+      </entry>
+      <entry>
+       same as argument type
+      </entry>
+      <entry>returns the single distinct non-NULL value from the input
+      values; if any of the input values is NULL or more than one distinct
+      value exists, an exception is raised</entry>
+     </row>
+
+     <row>
+      <entry>
+       <indexterm>
         <primary>string_agg</primary>
        </indexterm>
        <function>
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index c0495d9..72bb55c 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -40,6 +40,7 @@
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/timestamp.h"
+#include "utils/typcache.h"
 
 #define atooid(x)  ((Oid) strtoul((x), NULL, 10))
 
@@ -598,3 +599,93 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
 
 	PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
 }
+
+struct onlyvalue_agg_stype
+{
+	FunctionCallInfoData fcinfo;
+	Datum datum;
+};
+
+Datum
+onlyvalue_agg_transfn(PG_FUNCTION_ARGS)
+{
+	Oid		 arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+	MemoryContext aggcontext;
+	struct onlyvalue_agg_stype *state;
+
+	if (arg1_typeid == InvalidOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("could not determine input data type")));
+
+	if (!AggCheckCallContext(fcinfo, &aggcontext))
+	{
+		/* cannot be called directly because of internal-type argument */
+		elog(ERROR, "onlyvalue_agg_transfn called in non-aggregate context");
+	}
+
+	if (PG_ARGISNULL(1))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+		errmsg("NULL value passed to onlyvalue")));
+	}
+
+	if (PG_ARGISNULL(0))
+	{
+		TypeCacheEntry *typentry;
+
+		state = (struct onlyvalue_agg_stype *) MemoryContextAlloc(aggcontext, sizeof(struct onlyvalue_agg_stype));
+		state->datum = PG_GETARG_DATUM(1);
+
+		typentry = lookup_type_cache(arg1_typeid,
+									 TYPECACHE_EQ_OPR_FINFO);
+		if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_FUNCTION),
+			errmsg("could not identify an equality operator for type %s",
+				   format_type_be(arg1_typeid))));
+
+		InitFunctionCallInfoData(state->fcinfo, &typentry->eq_opr_finfo, 2,
+								 InvalidOid, NULL, NULL);
+	}
+	else
+	{
+		bool oprresult;
+
+		state = (struct onlyvalue_agg_stype *) PG_GETARG_POINTER(0);
+
+		state->fcinfo.argnull[0] = false;
+		state->fcinfo.argnull[1] = false;
+		state->fcinfo.arg[0] = state->datum;
+		state->fcinfo.arg[1] = PG_GETARG_DATUM(1);
+		state->fcinfo.isnull = false;
+		oprresult = DatumGetBool(FunctionCallInvoke(&state->fcinfo));
+		if (!oprresult)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			errmsg("more than one distinct value passed to onlyvalue")));
+	}
+
+	/*
+	 * The transition type for onlyvalue() is declared to be "internal", which
+	 * is a pass-by-value type the same size as a pointer.  So we can safely
+	 * pass the pointer through nodeAgg.c's machinations.
+	 */
+	PG_RETURN_POINTER(state);
+}
+
+Datum
+onlyvalue_agg_finalfn(PG_FUNCTION_ARGS)
+{
+	struct onlyvalue_agg_stype *state;
+
+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+
+	/* cannot be called directly because of internal-type argument */
+	Assert(AggCheckCallContext(fcinfo, NULL));
+
+	state = (struct onlyvalue_agg_stype *) PG_GETARG_POINTER(0);
+	PG_RETURN_DATUM(state->datum);
+}
diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h
index dd6079f..9d6c44a 100644
--- a/src/include/catalog/pg_aggregate.h
+++ b/src/include/catalog/pg_aggregate.h
@@ -292,6 +292,9 @@ DATA(insert ( 3197	n 0 json_object_agg_transfn json_object_agg_finalfn -				-
 DATA(insert ( 3267	n 0 jsonb_agg_transfn	jsonb_agg_finalfn			-				-				-				f f 0	2281	0	0		0	_null_ _null_ ));
 DATA(insert ( 3270	n 0 jsonb_object_agg_transfn jsonb_object_agg_finalfn -				-				-				f f 0	2281	0	0		0	_null_ _null_ ));
 
+/* onlyvalue */
+DATA(insert ( 4202 n 0 onlyvalue_agg_transfn    onlyvalue_agg_finalfn    -               -               -               t f 0   2281    0   0       0   _null_ _null_ ));
+
 /* ordered-set and hypothetical-set aggregates */
 DATA(insert ( 3972	o 1 ordered_set_transition			percentile_disc_final					-		-		-		t f 0	2281	0	0		0	_null_ _null_ ));
 DATA(insert ( 3974	o 1 ordered_set_transition			percentile_cont_float8_final			-		-		-		f f 0	2281	0	0		0	_null_ _null_ ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f688454..f1361ee 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -944,6 +944,12 @@ DATA(insert OID = 4053 (  array_agg		   PGNSP PGUID 12 1 0 0 0 t f f f f f i s 1
 DESCR("concatenate aggregate input into an array");
 DATA(insert OID = 3218 ( width_bucket	   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "2283 2277" _null_ _null_ _null_ _null_ _null_ width_bucket_array _null_ _null_ _null_ ));
 DESCR("bucket number of operand given a sorted array of bucket lower bounds");
+DATA(insert OID = 4200 ( onlyvalue_agg_transfn   PGNSP PGUID 12 1 0 0 0 f f f f f f i s 2 0 2281 "2281 2283" _null_ _null_ _null_ _null_ _null_ onlyvalue_agg_transfn _null_ _null_ _null_ ));
+DESCR("aggregate transition function");
+DATA(insert OID = 4201 ( onlyvalue_agg_finalfn   PGNSP PGUID 12 1 0 0 0 f f f f f f i s 2 0 2283 "2281 2283" _null_ _null_ _null_ _null_ _null_ onlyvalue_agg_finalfn _null_ _null_ _null_ ));
+DESCR("aggregate final function");
+DATA(insert OID = 4202 ( onlyvalue   PGNSP PGUID 12 1 0 0 0 t f f f f f i s 1 0 2283 "2283" _null_ _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+DESCR("return the single distinct value from aggregate input");
 DATA(insert OID = 3816 (  array_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 16 "2281" _null_ _null_ _null_ _null_ _null_ array_typanalyze _null_ _null_ _null_ ));
 DESCR("array typanalyze");
 DATA(insert OID = 3817 (  arraycontsel	   PGNSP PGUID 12 1 0 0 0 f f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ arraycontsel _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index fc1679e..0b8e9e8 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -495,6 +495,8 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
 extern Datum pg_collation_for(PG_FUNCTION_ARGS);
 extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
 extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+extern Datum onlyvalue_agg_transfn(PG_FUNCTION_ARGS);
+extern Datum onlyvalue_agg_finalfn(PG_FUNCTION_ARGS);
 
 /* oid.c */
 extern Datum oidin(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index de826b5..12dd519 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1784,3 +1784,38 @@ NOTICE:  sum_transfn called with 4
 (1 row)
 
 rollback;
+/* onlyvalue */
+select onlyvalue(vv.v) from (values (1), (1)) vv(v);
+ onlyvalue 
+-----------
+         1
+(1 row)
+
+select onlyvalue(vv.v) from (values (1), (2)) vv(v);
+ERROR:  more than one distinct value passed to onlyvalue
+select onlyvalue(vv.v) from (values (1), (NULL)) vv(v);
+ERROR:  NULL value passed to onlyvalue
+select onlyvalue(vv.v) from (values (json '{}')) vv(v);
+ERROR:  could not identify an equality operator for type json
+select onlyvalue(vv.v1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+ERROR:  more than one distinct value passed to onlyvalue
+select vv.v2, onlyvalue(vv.v1) from (values (1,1), (2,2), (2,2), (3,3)) vv(v1, v2) group by vv.v2;
+ v2 | onlyvalue 
+----+-----------
+  1 |         1
+  2 |         2
+  3 |         3
+(3 rows)
+
+select onlyvalue(vv.v1) FILTER (WHERE vv.v2 = 1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+ onlyvalue 
+-----------
+         1
+(1 row)
+
+select onlyvalue(vv.v) from (select 1) vv(V) where false;
+ onlyvalue 
+-----------
+          
+(1 row)
+
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 8d501dc..9aba5df 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -755,3 +755,13 @@ create aggregate my_half_sum(int4)
 select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
 
 rollback;
+
+/* onlyvalue */
+select onlyvalue(vv.v) from (values (1), (1)) vv(v);
+select onlyvalue(vv.v) from (values (1), (2)) vv(v);
+select onlyvalue(vv.v) from (values (1), (NULL)) vv(v);
+select onlyvalue(vv.v) from (values (json '{}')) vv(v);
+select onlyvalue(vv.v1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+select vv.v2, onlyvalue(vv.v1) from (values (1,1), (2,2), (2,2), (3,3)) vv(v1, v2) group by vv.v2;
+select onlyvalue(vv.v1) FILTER (WHERE vv.v2 = 1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+select onlyvalue(vv.v) from (select 1) vv(V) where false;
#2Pavel Stehule
pavel.stehule@gmail.com
In reply to: Marko Tiikkaja (#1)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

2015-10-28 17:50 GMT+01:00 Marko Tiikkaja <marko@joh.to>:

Hi,

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com . I
called it "onlyvalue", which is a horrible name, but I have nothing better
to offer. (Corey called it "only", but that doesn't really work since ONLY
is a fully reserved keyword.)

I'll add this to September's commit fest, but if you want to bash me or
the patch in the meanwhile, go ahead.

Hi

what is use case for this function and why it should be in core?

Regards

Pavel

Show quoted text

.m

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Marko Tiikkaja
marko@joh.to
In reply to: Pavel Stehule (#2)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 10/28/15 5:53 PM, Pavel Stehule wrote:

what is use case for this function and why it should be in core?

Corey had one example in his email, but I can offer another one which
came up this week at $work. The query looked something like this:

SELECT a, sum(amount), onlyvalue(rolling_count)
FROM
(
SELECT a, amount, count(*) OVER (ORDER BY a) AS rolling_count
FROM tbl
) ss
GROUP BY a;

We know that all the values for the column are going to be the same
value for every "a", so we could use min() or max(). But the advantage
of "onlyvalue" is that it actually checks that, so if someone went and
changed the window frame to do something slightly different, the query
would blow up instead of silently returning the (now likely incorrect)
minimum or maximum value. It's also self-documenting for the reader of
such queries.

In my experience this problem comes up often enough that it would be
make sense to have this aggregate in core.

.m

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Pavel Stehule
pavel.stehule@gmail.com
In reply to: Marko Tiikkaja (#3)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

2015-10-28 18:03 GMT+01:00 Marko Tiikkaja <marko@joh.to>:

On 10/28/15 5:53 PM, Pavel Stehule wrote:

what is use case for this function and why it should be in core?

Corey had one example in his email, but I can offer another one which came
up this week at $work. The query looked something like this:

SELECT a, sum(amount), onlyvalue(rolling_count)
FROM
(
SELECT a, amount, count(*) OVER (ORDER BY a) AS rolling_count
FROM tbl
) ss
GROUP BY a;

We know that all the values for the column are going to be the same value
for every "a", so we could use min() or max(). But the advantage of
"onlyvalue" is that it actually checks that, so if someone went and changed
the window frame to do something slightly different, the query would blow
up instead of silently returning the (now likely incorrect) minimum or
maximum value. It's also self-documenting for the reader of such queries.

In my experience this problem comes up often enough that it would be make
sense to have this aggregate in core.

This function is pretty inconsistent with any other builtin aggregate
function, so it is one argument against to push it to core. Next, this
function can be pretty simply implemented in PLpgSQL. And my last argument
- I don't remember too often request for this functionality. It looks like
module for PGXN much more, than PG core functionality.

Regards

Pavel

Show quoted text

.m

#5Tom Lane
tgl@sss.pgh.pa.us
In reply to: Marko Tiikkaja (#1)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

Marko Tiikkaja <marko@joh.to> writes:

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com . I
called it "onlyvalue", which is a horrible name, but I have nothing
better to offer. (Corey called it "only", but that doesn't really work
since ONLY is a fully reserved keyword.)

On the name front, maybe think "single" rather than "only"? This might
lead to "single()" or "single_value()", or "singleton()" if you want to
sound highbrow.

On the semantics front, I'm not sure that I like excluding nulls from the
input domain. I'd rather that it acted like IS NOT DISTINCT, ie, nulls
are fine as long as all the input values are nulls. The implementation
would need some work for that.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#5)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

2015-10-28 18:38 GMT+01:00 Tom Lane <tgl@sss.pgh.pa.us>:

Marko Tiikkaja <marko@joh.to> writes:

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com . I
called it "onlyvalue", which is a horrible name, but I have nothing
better to offer. (Corey called it "only", but that doesn't really work
since ONLY is a fully reserved keyword.)

On the name front, maybe think "single" rather than "only"? This might
lead to "single()" or "single_value()", or "singleton()" if you want to
sound highbrow.

this function should to have some distinguish name than other aggregates
because important work of this func is not some calculation but some
constraint check.

Show quoted text

On the semantics front, I'm not sure that I like excluding nulls from the
input domain. I'd rather that it acted like IS NOT DISTINCT, ie, nulls
are fine as long as all the input values are nulls. The implementation
would need some work for that.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#7David G. Johnston
david.g.johnston@gmail.com
In reply to: Tom Lane (#5)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On Wed, Oct 28, 2015 at 1:38 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Marko Tiikkaja <marko@joh.to> writes:

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com . I
called it "onlyvalue", which is a horrible name, but I have nothing
better to offer. (Corey called it "only", but that doesn't really work
since ONLY is a fully reserved keyword.)

On the name front, maybe think "single" rather than "only"? This might
lead to "single()" or "single_value()", or "singleton()" if you want to
sound highbrow.

On the semantics front, I'm not sure that I like excluding nulls from the
input domain. I'd rather that it acted like IS NOT DISTINCT, ie, nulls
are fine as long as all the input values are nulls. The implementation
would need some work for that.

​homogeneous() ?

It is basically an assertion function since in most cases where you would
use this you needn't use a function at all but instead simply place the
column in the GROUP BY clause.

I have at various times desired having various assertion functions that
return the input value if the assertion is met but causes a query error if
it is not. That said functions can be both scalar and aggregate oriented
makes sense. Adding just this one function in seems like it adds a partial
feature to -core. I'd rather it bake more in PGXN before considering
putting it into core. There doesn't seem to be any hard need or benefit to
doing so at this time.

I would probably stick to the concept of assertion and call it something
like
"assert_nongrouping()"
​ to denote that the input does not cause multiple groups to be created due
to their being multiple values.

David J.

#8Robert Haas
robertmhaas@gmail.com
In reply to: Marko Tiikkaja (#1)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On Wed, Oct 28, 2015 at 5:50 PM, Marko Tiikkaja <marko@joh.to> wrote:

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com . I
called it "onlyvalue", which is a horrible name, but I have nothing better
to offer. (Corey called it "only", but that doesn't really work since ONLY
is a fully reserved keyword.)

I've written an aggregate that does something like this a few times.
I think one time I called it "the", which is probably too clever, but
then you could query for the(project_manager) and similar. I've
usually written it to not error-check and just return the first
non-NULL value it runs across, which suggests a name like any_old() or
whatever().

I actually think by comparison with those ideas, onlyvalue() - or
maybe only_value() - is not bad.

I'll add this to September's commit fest,

November, probably.

but if you want to bash me or the
patch in the meanwhile, go ahead.

What if we want to say nice things?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#9Dean Rasheed
dean.a.rasheed@gmail.com
In reply to: Marko Tiikkaja (#1)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 28 October 2015 at 16:50, Marko Tiikkaja <marko@joh.to> wrote:

Hi,

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com .

+1. I've wanted something like this a few times. Of the names
suggested so far, I think I prefer "single_value", and yes I think it
should work with NULLs.

Regards,
Dean

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#10Simon Riggs
simon@2ndQuadrant.com
In reply to: Dean Rasheed (#9)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 2 November 2015 at 09:32, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:

On 28 October 2015 at 16:50, Marko Tiikkaja <marko@joh.to> wrote:

Hi,

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com .

+1. I've wanted something like this a few times. Of the names
suggested so far, I think I prefer "single_value", and yes I think it
should work with NULLs.

+1

I think we should avoid using ONLY or VALUE within the names since those
already have other meanings in Postgres.

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#11Marko Tiikkaja
marko@joh.to
In reply to: Dean Rasheed (#9)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 11/2/15 9:32 AM, Dean Rasheed wrote:

On 28 October 2015 at 16:50, Marko Tiikkaja <marko@joh.to> wrote:

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com .

+1. I've wanted something like this a few times. Of the names
suggested so far, I think I prefer "single_value", and yes I think it
should work with NULLs.

This was actually a last-minute design change I made before submitting
the patch. The two times I've previously written this aggregate both
accepted NULLs and only enforced that there must not be more than one
non-NULL value, but that's only because I was thinking about the "poor
man's FILTER" case, which is obsolete since version 9.4. The reason I
changed in this version is that accepting NULLs can also hide bugs, and
it's (usually) easy to filter them out with FILTER.

Did you have some specific use case in mind where accepting NULLs would
be beneficial?

.m

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#12Dean Rasheed
dean.a.rasheed@gmail.com
In reply to: Simon Riggs (#10)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 2 November 2015 at 09:10, Simon Riggs <simon@2ndquadrant.com> wrote:

I think we should avoid using ONLY or VALUE within the names since those
already have other meanings in Postgres.

We already have window functions called first_value, last_value and
nth_value, so I think use of "value" in the name is OK (and
consistent). Actually thinking some more, I think the name
"unique_value" is better because it's more suggestive of the fact that
uniqueness is being enforced.

Regards,
Dean

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#13Dean Rasheed
dean.a.rasheed@gmail.com
In reply to: Marko Tiikkaja (#11)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 2 November 2015 at 10:59, Marko Tiikkaja <marko@joh.to> wrote:

On 11/2/15 9:32 AM, Dean Rasheed wrote:

On 28 October 2015 at 16:50, Marko Tiikkaja <marko@joh.to> wrote:

Here's a patch for the aggregate function outlined by Corey Huinker in
CADkLM=foA_oC_Ri23F9PbfLnfwXFbC3Lt8bBzRu3=CB77G9_qw@mail.gmail.com .

+1. I've wanted something like this a few times. Of the names
suggested so far, I think I prefer "single_value", and yes I think it
should work with NULLs.

This was actually a last-minute design change I made before submitting the
patch. The two times I've previously written this aggregate both accepted
NULLs and only enforced that there must not be more than one non-NULL value,
but that's only because I was thinking about the "poor man's FILTER" case,
which is obsolete since version 9.4. The reason I changed in this version
is that accepting NULLs can also hide bugs, and it's (usually) easy to
filter them out with FILTER.

Did you have some specific use case in mind where accepting NULLs would be
beneficial?

I'm not sure what you mean when you say accepting NULLs can hide bugs.
I think that if the input values to the aggregate were
1,1,1,NULL,1,1,1 then it should raise an error. ITSM that that is more
likely to reveal problems with your underlying data or the query. If
you want to ignore NULLs, you can always add a FILTER(WHERE val IS NOT
NULL) clause.

Regards,
Dean

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#14Marko Tiikkaja
marko@joh.to
In reply to: Dean Rasheed (#13)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 11/2/15 12:40 PM, Dean Rasheed wrote:

I'm not sure what you mean when you say accepting NULLs can hide bugs.
I think that if the input values to the aggregate were
1,1,1,NULL,1,1,1 then it should raise an error. ITSM that that is more
likely to reveal problems with your underlying data or the query. If
you want to ignore NULLs, you can always add a FILTER(WHERE val IS NOT
NULL) clause.

Ah, I see. So you're arguing that the aggregate should accept NULLs as
input, but consider them distinct from any non-NULL values. I thought
you meant accepting NULLs and *not* considering them distinct, which
could easily hide problems.

In that case, I don't oppose to changing the behavior. I'll make the
necessary changes.

.m

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#15Marko Tiikkaja
marko@joh.to
In reply to: Marko Tiikkaja (#14)
1 attachment(s)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

Hi Dean,

Here's v2 of the patch. How's this look?

.m

Attachments:

single_value_v2.patchtext/x-patch; name=single_value_v2.patchDownload
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 12635,12640 **** NULL baz</literallayout>(3 rows)</entry>
--- 12635,12660 ----
       <row>
        <entry>
         <indexterm>
+         <primary>single_value</primary>
+        </indexterm>
+        <function>
+          single_value(<replaceable class="parameter">expression</replaceable>)
+        </function>
+       </entry>
+       <entry>
+        any type for which the equality operator has been defined
+       </entry>
+       <entry>
+        same as argument type
+       </entry>
+       <entry>returns the single distinct value from the input values;
+       if more than one distinct value exists (while considering NULLs equal),
+       an exception is raised</entry>
+      </row>
+ 
+      <row>
+       <entry>
+        <indexterm>
          <primary>string_agg</primary>
         </indexterm>
         <function>
*** a/src/backend/utils/adt/misc.c
--- b/src/backend/utils/adt/misc.c
***************
*** 40,45 ****
--- 40,46 ----
  #include "utils/acl.h"
  #include "utils/builtins.h"
  #include "utils/timestamp.h"
+ #include "utils/typcache.h"
  
  #define atooid(x)  ((Oid) strtoul((x), NULL, 10))
  
***************
*** 598,600 **** pg_column_is_updatable(PG_FUNCTION_ARGS)
--- 599,702 ----
  
  	PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
  }
+ 
+ struct single_value_agg_stype
+ {
+ 	FunctionCallInfoData fcinfo;
+ 	Datum datum;
+ 	bool isnull;
+ };
+ 
+ Datum
+ single_value_agg_transfn(PG_FUNCTION_ARGS)
+ {
+ 	MemoryContext aggcontext;
+ 	struct single_value_agg_stype *state;
+ 
+ 	if (!AggCheckCallContext(fcinfo, &aggcontext))
+ 	{
+ 		/* cannot be called directly because of internal-type argument */
+ 		elog(ERROR, "single_value_agg_transfn called in non-aggregate context");
+ 	}
+ 
+ 	if (PG_ARGISNULL(0))
+ 	{
+ 		TypeCacheEntry *typentry;
+ 		Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ 
+ 		if (arg1_typeid == InvalidOid)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 					 errmsg("could not determine input data type")));
+ 
+ 		typentry = lookup_type_cache(arg1_typeid,
+ 									 TYPECACHE_EQ_OPR_FINFO);
+ 		if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_UNDEFINED_FUNCTION),
+ 			errmsg("could not identify an equality operator for type %s",
+ 				   format_type_be(arg1_typeid))));
+ 
+ 		state = (struct single_value_agg_stype *) MemoryContextAlloc(aggcontext, sizeof(struct single_value_agg_stype));
+ 
+ 		if (PG_ARGISNULL(1))
+ 		{
+ 			state->datum = (Datum) 0;
+ 			state->isnull = true;
+ 			memset(&state->fcinfo, 0, sizeof(state->fcinfo));
+ 		}
+ 		else
+ 		{
+ 			state->datum = PG_GETARG_DATUM(1);
+ 			state->isnull = false;
+ 			InitFunctionCallInfoData(state->fcinfo, &typentry->eq_opr_finfo, 2,
+ 									 InvalidOid, NULL, NULL);
+ 		}
+ 	}
+ 	else
+ 	{
+ 		bool oprresult;
+ 
+ 		state = (struct single_value_agg_stype *) PG_GETARG_POINTER(0);
+ 
+ 		if (state->isnull)
+ 			oprresult = PG_ARGISNULL(1);
+ 		else
+ 		{
+ 			state->fcinfo.argnull[0] = false;
+ 			state->fcinfo.argnull[1] = false;
+ 			state->fcinfo.arg[0] = state->datum;
+ 			state->fcinfo.arg[1] = PG_GETARG_DATUM(1);
+ 			state->fcinfo.isnull = false;
+ 			oprresult = DatumGetBool(FunctionCallInvoke(&state->fcinfo));
+ 		}
+ 		if (!oprresult)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 			errmsg("more than one distinct value passed to single_value")));
+ 	}
+ 
+ 	/*
+ 	 * The transition type for single_value() is declared to be "internal",
+ 	 * which is a pass-by-value type the same size as a pointer.  So we can
+ 	 * safely pass the pointer through nodeAgg.c's machinations.
+ 	 */
+ 	PG_RETURN_POINTER(state);
+ }
+ 
+ Datum
+ single_value_agg_finalfn(PG_FUNCTION_ARGS)
+ {
+ 	struct single_value_agg_stype *state;
+ 
+ 	if (PG_ARGISNULL(0))
+ 		PG_RETURN_NULL();
+ 
+ 	/* cannot be called directly because of internal-type argument */
+ 	Assert(AggCheckCallContext(fcinfo, NULL));
+ 
+ 	state = (struct single_value_agg_stype *) PG_GETARG_POINTER(0);
+ 	if (state->isnull)
+ 		PG_RETURN_NULL();
+ 	PG_RETURN_DATUM(state->datum);
+ }
*** a/src/include/catalog/pg_aggregate.h
--- b/src/include/catalog/pg_aggregate.h
***************
*** 292,297 **** DATA(insert ( 3197	n 0 json_object_agg_transfn json_object_agg_finalfn -				-
--- 292,300 ----
  DATA(insert ( 3267	n 0 jsonb_agg_transfn	jsonb_agg_finalfn			-				-				-				f f 0	2281	0	0		0	_null_ _null_ ));
  DATA(insert ( 3270	n 0 jsonb_object_agg_transfn jsonb_object_agg_finalfn -				-				-				f f 0	2281	0	0		0	_null_ _null_ ));
  
+ /* single_value */
+ DATA(insert ( 4202 n 0 single_value_agg_transfn    single_value_agg_finalfn    -               -               -               t f 0   2281    0   0       0   _null_ _null_ ));
+ 
  /* ordered-set and hypothetical-set aggregates */
  DATA(insert ( 3972	o 1 ordered_set_transition			percentile_disc_final					-		-		-		t f 0	2281	0	0		0	_null_ _null_ ));
  DATA(insert ( 3974	o 1 ordered_set_transition			percentile_cont_float8_final			-		-		-		f f 0	2281	0	0		0	_null_ _null_ ));
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 945,950 **** DATA(insert OID = 4053 (  array_agg		   PGNSP PGUID 12 1 0 0 0 t f f f f f i s 1
--- 945,956 ----
  DESCR("concatenate aggregate input into an array");
  DATA(insert OID = 3218 ( width_bucket	   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "2283 2277" _null_ _null_ _null_ _null_ _null_ width_bucket_array _null_ _null_ _null_ ));
  DESCR("bucket number of operand given a sorted array of bucket lower bounds");
+ DATA(insert OID = 4200 ( single_value_agg_transfn   PGNSP PGUID 12 1 0 0 0 f f f f f f i s 2 0 2281 "2281 2283" _null_ _null_ _null_ _null_ _null_ single_value_agg_transfn _null_ _null_ _null_ ));
+ DESCR("aggregate transition function");
+ DATA(insert OID = 4201 ( single_value_agg_finalfn   PGNSP PGUID 12 1 0 0 0 f f f f f f i s 2 0 2283 "2281 2283" _null_ _null_ _null_ _null_ _null_ single_value_agg_finalfn _null_ _null_ _null_ ));
+ DESCR("aggregate final function");
+ DATA(insert OID = 4202 ( single_value   PGNSP PGUID 12 1 0 0 0 t f f f f f i s 1 0 2283 "2283" _null_ _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("return the single distinct value from aggregate input");
  DATA(insert OID = 3816 (  array_typanalyze PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 16 "2281" _null_ _null_ _null_ _null_ _null_ array_typanalyze _null_ _null_ _null_ ));
  DESCR("array typanalyze");
  DATA(insert OID = 3817 (  arraycontsel	   PGNSP PGUID 12 1 0 0 0 f f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ arraycontsel _null_ _null_ _null_ ));
*** a/src/include/utils/builtins.h
--- b/src/include/utils/builtins.h
***************
*** 495,500 **** extern Datum pg_typeof(PG_FUNCTION_ARGS);
--- 495,502 ----
  extern Datum pg_collation_for(PG_FUNCTION_ARGS);
  extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
  extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+ extern Datum single_value_agg_transfn(PG_FUNCTION_ARGS);
+ extern Datum single_value_agg_finalfn(PG_FUNCTION_ARGS);
  
  /* oid.c */
  extern Datum oidin(PG_FUNCTION_ARGS);
*** a/src/test/regress/expected/aggregates.out
--- b/src/test/regress/expected/aggregates.out
***************
*** 1784,1786 **** NOTICE:  sum_transfn called with 4
--- 1784,1827 ----
  (1 row)
  
  rollback;
+ /* single_value */
+ select single_value(vv.v) from (values (1), (1)) vv(v);
+  single_value 
+ --------------
+             1
+ (1 row)
+ 
+ select single_value(vv.v) from (values (1), (2)) vv(v);
+ ERROR:  more than one distinct value passed to single_value
+ select single_value(vv.v) from (values (1), (NULL)) vv(v);
+ ERROR:  more than one distinct value passed to single_value
+ select single_value(vv.v) from (values (NULL), (NULL)) vv(v);
+  single_value 
+ --------------
+  
+ (1 row)
+ 
+ select single_value(vv.v) from (values (json '{}')) vv(v);
+ ERROR:  could not identify an equality operator for type json
+ select single_value(vv.v1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+ ERROR:  more than one distinct value passed to single_value
+ select vv.v2, single_value(vv.v1) from (values (1,1), (2,2), (2,2), (3,3)) vv(v1, v2) group by vv.v2;
+  v2 | single_value 
+ ----+--------------
+   1 |            1
+   2 |            2
+   3 |            3
+ (3 rows)
+ 
+ select single_value(vv.v1) FILTER (WHERE vv.v2 = 1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+  single_value 
+ --------------
+             1
+ (1 row)
+ 
+ select single_value(vv.v) from (select 1) vv(V) where false;
+  single_value 
+ --------------
+              
+ (1 row)
+ 
*** a/src/test/regress/sql/aggregates.sql
--- b/src/test/regress/sql/aggregates.sql
***************
*** 755,757 **** create aggregate my_half_sum(int4)
--- 755,768 ----
  select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one);
  
  rollback;
+ 
+ /* single_value */
+ select single_value(vv.v) from (values (1), (1)) vv(v);
+ select single_value(vv.v) from (values (1), (2)) vv(v);
+ select single_value(vv.v) from (values (1), (NULL)) vv(v);
+ select single_value(vv.v) from (values (NULL), (NULL)) vv(v);
+ select single_value(vv.v) from (values (json '{}')) vv(v);
+ select single_value(vv.v1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+ select vv.v2, single_value(vv.v1) from (values (1,1), (2,2), (2,2), (3,3)) vv(v1, v2) group by vv.v2;
+ select single_value(vv.v1) FILTER (WHERE vv.v2 = 1) from (values (1,1), (2,2), (3,3)) vv(v1, v2);
+ select single_value(vv.v) from (select 1) vv(V) where false;
#16Corey Huinker
corey.huinker@gmail.com
In reply to: Marko Tiikkaja (#15)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On Fri, Nov 20, 2015 at 10:54 PM, Marko Tiikkaja <marko@joh.to> wrote:

Hi Dean,

Here's v2 of the patch. How's this look?

.m

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Sorry I didn't see this thread sooner.

I haven't applied the patch, but the test cases illustrate that the
function does everything I want, and has inspired me to think a bit about
some other assert-ish functions. Thanks!

#17Dean Rasheed
dean.a.rasheed@gmail.com
In reply to: Marko Tiikkaja (#15)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On 21 November 2015 at 03:54, Marko Tiikkaja <marko@joh.to> wrote:

Here's v2 of the patch. How's this look?

Here are some initial review comments:

* My first thought on reading this patch is that it is somewhat
under-commented. For example, I would expect at least a block comment
at the top of the new code added by this patch. Also the fields of the
new structure could use some comments -- it might be obvious what
datum and isnull are for, but fcinfo is less obvious until you read
the code that uses it. Likewise the transfn is quite long, with almost
no explanatory comments.

* There's a clear bug in the null handling in the second branch of the
transfn -- when the new value is null and the previous value wasn't.
For example:

SELECT single_value(x) FROM (VALUES ('x'), (null)) t(x);
server closed the connection unexpectedly

* In the finalfn, I think calling AggCheckCallContext() should be the
first thing it does. Compare for example array_agg_array_finalfn().

* There's another less obvious bug in the way these functions handle
complex types. For example:

SELECT single_value(t) FROM (VALUES (1,'One'), (1,'One')) t(x,y);
ERROR: cache lookup failed for type 2139062143

You might want to look at how array_agg() handles that. Also the code
behind array_position() has some elements in common with this patch.

* Consider collation handling, as illustrated by array_position().

So I'm afraid there's still some work to do, but there are plenty of
examples in existing code to borrow from.

Regards,
Dean

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#18Greg Stark
stark@mit.edu
In reply to: Marko Tiikkaja (#3)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On Wed, Oct 28, 2015 at 5:03 PM, Marko Tiikkaja <marko@joh.to> wrote:

SELECT a, sum(amount), onlyvalue(rolling_count)
FROM
(
SELECT a, amount, count(*) OVER (ORDER BY a) AS rolling_count
FROM tbl
) ss
GROUP BY a;

The same thing would happen even in the more common case of having
functionally dependent columns if they happen to be buried in a
subquery. That might well be convenient if you have some expression
you want to use in multiple aggregates such as:

SELECT pk, acol, avg(x), min(x), max(x)
FROM (
SELECT a,pk, a,acol, b.c+b.d+b.e AS x
FROM a JOIN b ON (a.pk = b.fk)
)
GROUP BY pk

Postgres would happily accept that if you collapsed the subquery and
ran the group by directly on the join but the subquery in between is
actually enough to hide the functional dependency information so it
complains that acol is not functionally dependent on the group by
column.

--
greg

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#19Michael Paquier
michael.paquier@gmail.com
In reply to: Dean Rasheed (#17)
Re: onlyvalue aggregate (was: First Aggregate Funtion?)

On Mon, Nov 23, 2015 at 6:29 PM, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:

On 21 November 2015 at 03:54, Marko Tiikkaja <marko@joh.to> wrote:

Here's v2 of the patch. How's this look?

Here are some initial review comments:

* My first thought on reading this patch is that it is somewhat
under-commented. For example, I would expect at least a block comment
at the top of the new code added by this patch. Also the fields of the
new structure could use some comments -- it might be obvious what
datum and isnull are for, but fcinfo is less obvious until you read
the code that uses it. Likewise the transfn is quite long, with almost
no explanatory comments.

* There's a clear bug in the null handling in the second branch of the
transfn -- when the new value is null and the previous value wasn't.
For example:

SELECT single_value(x) FROM (VALUES ('x'), (null)) t(x);
server closed the connection unexpectedly

* In the finalfn, I think calling AggCheckCallContext() should be the
first thing it does. Compare for example array_agg_array_finalfn().

* There's another less obvious bug in the way these functions handle
complex types. For example:

SELECT single_value(t) FROM (VALUES (1,'One'), (1,'One')) t(x,y);
ERROR: cache lookup failed for type 2139062143

You might want to look at how array_agg() handles that. Also the code
behind array_position() has some elements in common with this patch.

* Consider collation handling, as illustrated by array_position().

So I'm afraid there's still some work to do, but there are plenty of
examples in existing code to borrow from.

There is a review, but no input from the author for more than 1 month,
hence patch has been marked as "returned with feedback". Feel free to
move it to next CF if you want to post a new version.
--
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers