[PATCH] Add min/max aggregate functions to BYTEA
Hello. BYTEA type has the ability to use comparison operations. But it
is absent of min/max aggregate functions. They are nice to have to
provide consistency with the TEXT type.
--
With best regards,
Marat Bukharov
Attachments:
v1-0001-add-bytea-agg-funcs.patchtext/x-patch; charset=US-ASCII; name=v1-0001-add-bytea-agg-funcs.patchDownload
From 109b91999b7a8d8c1808658134494b91616c2825 Mon Sep 17 00:00:00 2001
From: Marat Bukharov <maratik@yandex-team.ru>
Date: Wed, 3 Jul 2024 15:53:52 +0300
Subject: [PATCH v1] add bytea agg funcs
---
doc/src/sgml/func.sgml | 4 +--
src/backend/utils/adt/varlena.c | 38 ++++++++++++++++++++++++
src/include/catalog/pg_aggregate.dat | 6 ++++
src/include/catalog/pg_proc.dat | 7 +++++
src/test/regress/expected/aggregates.out | 12 ++++++++
src/test/regress/expected/opr_sanity.out | 3 +-
src/test/regress/sql/aggregates.sql | 5 +++-
7 files changed, 71 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index f1f22a1960..891f7a4259 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -21976,7 +21976,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the maximum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
@@ -21995,7 +21995,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the minimum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba..408fff0aff 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3956,6 +3956,44 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 >= len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_TEXT_PP(0);
+ bytea *arg2 = PG_GETARG_TEXT_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 <= len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
Datum
bytea_sortsupport(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 5f13532abc..f0ee024b7a 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -158,6 +158,9 @@
{ aggfnoid => 'max(xid8)', aggtransfn => 'xid8_larger',
aggcombinefn => 'xid8_larger', aggsortop => '>(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'max(bytea)', aggtransfn => 'bytea_larger',
+ aggcombinefn => 'bytea_larger', aggsortop => '>(bytea,bytea)',
+ aggtranstype => 'bytea' }
# min
{ aggfnoid => 'min(int8)', aggtransfn => 'int8smaller',
@@ -226,6 +229,9 @@
{ aggfnoid => 'min(xid8)', aggtransfn => 'xid8_smaller',
aggcombinefn => 'xid8_smaller', aggsortop => '<(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'min(bytea)', aggtransfn => 'bytea_smaller',
+ aggcombinefn => 'bytea_smaller', aggsortop => '<(bytea,bytea)',
+ aggtranstype => 'bytea' }
# count
{ aggfnoid => 'count(any)', aggtransfn => 'int8inc_any',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d4ac578ae6..fd2e4e7fd6 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1278,6 +1278,13 @@
proname => 'text_smaller', proleakproof => 't', prorettype => 'text',
proargtypes => 'text text', prosrc => 'text_smaller' },
+{ oid => '6347', descr => 'larger of two',
+ proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_larger' },
+{ oid => '6348', descr => 'smaller of two',
+ proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' },
+
{ oid => '460', descr => 'I/O',
proname => 'int8in', prorettype => 'int8', proargtypes => 'cstring',
prosrc => 'int8in' },
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1c1ca7573a..0540ccaa9f 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1959,6 +1959,18 @@ select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
\xffeeaa
(1 row)
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+------
+ \xff
+(1 row)
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
create table pagg_test (x int, y int) with (autovacuum_enabled = off);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..693f339eba 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -634,6 +634,8 @@ name(text)
name(character)
text_larger(text,text)
text_smaller(text,text)
+bytea_larger(bytea,bytea)
+bytea_smaller(bytea,bytea)
int8eq(bigint,bigint)
int8ne(bigint,bigint)
int8lt(bigint,bigint)
@@ -2300,4 +2302,3 @@ WHERE icoll != 0 AND
indexrelid | indrelid | iclass | icoll
------------+----------+--------+-------
(0 rows)
-
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 1a18ca3d8f..1f438dbf79 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -741,7 +741,7 @@ select string_agg(distinct f1::text, ',' order by f1) from varchar_tbl; -- not
select string_agg(distinct f1, ',' order by f1::text) from varchar_tbl; -- not ok
select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -- ok
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
@@ -756,6 +756,9 @@ select string_agg(v, '') from bytea_test_table;
select string_agg(v, NULL) from bytea_test_table;
select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
--
2.44.2
V2 patch with fixed tests
--
With best regards,
Marat Bukharov
ср, 3 июл. 2024 г. в 16:03, Marat Buharov <marat.buharov@gmail.com>:
Show quoted text
Hello. BYTEA type has the ability to use comparison operations. But it
is absent of min/max aggregate functions. They are nice to have to
provide consistency with the TEXT type.--
With best regards,
Marat Bukharov
Attachments:
v2-0001-add-bytea-agg-funcs.patchtext/x-patch; charset=US-ASCII; name=v2-0001-add-bytea-agg-funcs.patchDownload
From f6895fd61696f65bdf7a1b7e2508228468aa5caf Mon Sep 17 00:00:00 2001
From: Marat Bukharov <maratik@yandex-team.ru>
Date: Wed, 3 Jul 2024 17:11:11 +0300
Subject: [PATCH v2] add bytea agg funcs
---
doc/src/sgml/func.sgml | 4 +--
src/backend/utils/adt/varlena.c | 38 ++++++++++++++++++++++++
src/include/catalog/pg_aggregate.dat | 6 ++++
src/include/catalog/pg_proc.dat | 13 ++++++++
src/test/regress/expected/aggregates.out | 14 ++++++++-
src/test/regress/expected/opr_sanity.out | 2 ++
src/test/regress/sql/aggregates.sql | 5 +++-
7 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index f1f22a1960..891f7a4259 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -21976,7 +21976,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the maximum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
@@ -21995,7 +21995,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the minimum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba..408fff0aff 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3956,6 +3956,44 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 >= len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_TEXT_PP(0);
+ bytea *arg2 = PG_GETARG_TEXT_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 <= len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
Datum
bytea_sortsupport(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 5f13532abc..970d9a70fd 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -158,6 +158,9 @@
{ aggfnoid => 'max(xid8)', aggtransfn => 'xid8_larger',
aggcombinefn => 'xid8_larger', aggsortop => '>(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'max(bytea)', aggtransfn => 'bytea_larger',
+ aggcombinefn => 'bytea_larger', aggsortop => '>(bytea,bytea)',
+ aggtranstype => 'bytea' },
# min
{ aggfnoid => 'min(int8)', aggtransfn => 'int8smaller',
@@ -226,6 +229,9 @@
{ aggfnoid => 'min(xid8)', aggtransfn => 'xid8_smaller',
aggcombinefn => 'xid8_smaller', aggsortop => '<(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'min(bytea)', aggtransfn => 'bytea_smaller',
+ aggcombinefn => 'bytea_smaller', aggsortop => '<(bytea,bytea)',
+ aggtranstype => 'bytea' },
# count
{ aggfnoid => 'count(any)', aggtransfn => 'int8inc_any',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d4ac578ae6..f16f17ef3a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1278,6 +1278,13 @@
proname => 'text_smaller', proleakproof => 't', prorettype => 'text',
proargtypes => 'text text', prosrc => 'text_smaller' },
+{ oid => '6347', descr => 'larger of two',
+ proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_larger' },
+{ oid => '6348', descr => 'smaller of two',
+ proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' },
+
{ oid => '460', descr => 'I/O',
proname => 'int8in', prorettype => 'int8', proargtypes => 'cstring',
prosrc => 'int8in' },
@@ -6764,6 +6771,9 @@
{ oid => '5099', descr => 'maximum value of all xid8 input values',
proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6349', descr => 'maximum value of all bytea input values',
+ proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
{ oid => '2131', descr => 'minimum value of all bigint input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
@@ -6834,6 +6844,9 @@
{ oid => '5100', descr => 'minimum value of all xid8 input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6350', descr => 'minimum value of all bytea input values',
+ proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
# count has two forms: count(any) and count(*)
{ oid => '2147',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1c1ca7573a..db741d0c36 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1925,7 +1925,7 @@ select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -
a,ab,abcd
(1 row)
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
string_agg
@@ -1959,6 +1959,18 @@ select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
\xffeeaa
(1 row)
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+------
+ \xff
+(1 row)
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
create table pagg_test (x int, y int) with (autovacuum_enabled = off);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..087e349894 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -874,6 +874,8 @@ xid8ne(xid8,xid8)
xid8cmp(xid8,xid8)
uuid_extract_timestamp(uuid)
uuid_extract_version(uuid)
+bytea_larger(bytea,bytea)
+bytea_smaller(bytea,bytea)
-- restore normal output mode
\a\t
-- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 1a18ca3d8f..1f438dbf79 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -741,7 +741,7 @@ select string_agg(distinct f1::text, ',' order by f1) from varchar_tbl; -- not
select string_agg(distinct f1, ',' order by f1::text) from varchar_tbl; -- not ok
select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -- ok
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
@@ -756,6 +756,9 @@ select string_agg(v, '') from bytea_test_table;
select string_agg(v, NULL) from bytea_test_table;
select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
--
2.44.2
V3 patch with fixed length comparison
--
With best regards,
Marat Bukharov
Show quoted text
V2 patch with fixed tests
Hello. BYTEA type has the ability to use comparison operations. But it
is absent of min/max aggregate functions. They are nice to have to
provide consistency with the TEXT type.
V3 patch with fixed length comparison
--
With best regards,
Marat Bukharov
Show quoted text
V2 patch with fixed tests
Hello. BYTEA type has the ability to use comparison operations. But it
is absent of min/max aggregate functions. They are nice to have to
provide consistency with the TEXT type.
Attachments:
v3-0001-add-bytea-agg-funcs.patchtext/x-patch; charset=US-ASCII; name=v3-0001-add-bytea-agg-funcs.patchDownload
From a2cf0f0a7d1063b3fc6927c0117f4d463ab9087a Mon Sep 17 00:00:00 2001
From: Marat Bukharov <maratik@yandex-team.ru>
Date: Wed, 3 Jul 2024 17:52:16 +0300
Subject: [PATCH v3] add bytea agg funcs
---
doc/src/sgml/func.sgml | 4 +--
src/backend/utils/adt/varlena.c | 38 ++++++++++++++++++++++++
src/include/catalog/pg_aggregate.dat | 6 ++++
src/include/catalog/pg_proc.dat | 13 ++++++++
src/test/regress/expected/aggregates.out | 14 ++++++++-
src/test/regress/expected/opr_sanity.out | 2 ++
src/test/regress/sql/aggregates.sql | 5 +++-
7 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index f1f22a1960..891f7a4259 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -21976,7 +21976,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the maximum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
@@ -21995,7 +21995,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the minimum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba..9fd7310d09 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3956,6 +3956,44 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_TEXT_PP(0);
+ bytea *arg2 = PG_GETARG_TEXT_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
Datum
bytea_sortsupport(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 5f13532abc..970d9a70fd 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -158,6 +158,9 @@
{ aggfnoid => 'max(xid8)', aggtransfn => 'xid8_larger',
aggcombinefn => 'xid8_larger', aggsortop => '>(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'max(bytea)', aggtransfn => 'bytea_larger',
+ aggcombinefn => 'bytea_larger', aggsortop => '>(bytea,bytea)',
+ aggtranstype => 'bytea' },
# min
{ aggfnoid => 'min(int8)', aggtransfn => 'int8smaller',
@@ -226,6 +229,9 @@
{ aggfnoid => 'min(xid8)', aggtransfn => 'xid8_smaller',
aggcombinefn => 'xid8_smaller', aggsortop => '<(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'min(bytea)', aggtransfn => 'bytea_smaller',
+ aggcombinefn => 'bytea_smaller', aggsortop => '<(bytea,bytea)',
+ aggtranstype => 'bytea' },
# count
{ aggfnoid => 'count(any)', aggtransfn => 'int8inc_any',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d4ac578ae6..f16f17ef3a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1278,6 +1278,13 @@
proname => 'text_smaller', proleakproof => 't', prorettype => 'text',
proargtypes => 'text text', prosrc => 'text_smaller' },
+{ oid => '6347', descr => 'larger of two',
+ proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_larger' },
+{ oid => '6348', descr => 'smaller of two',
+ proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' },
+
{ oid => '460', descr => 'I/O',
proname => 'int8in', prorettype => 'int8', proargtypes => 'cstring',
prosrc => 'int8in' },
@@ -6764,6 +6771,9 @@
{ oid => '5099', descr => 'maximum value of all xid8 input values',
proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6349', descr => 'maximum value of all bytea input values',
+ proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
{ oid => '2131', descr => 'minimum value of all bigint input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
@@ -6834,6 +6844,9 @@
{ oid => '5100', descr => 'minimum value of all xid8 input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6350', descr => 'minimum value of all bytea input values',
+ proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
# count has two forms: count(any) and count(*)
{ oid => '2147',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1c1ca7573a..db741d0c36 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1925,7 +1925,7 @@ select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -
a,ab,abcd
(1 row)
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
string_agg
@@ -1959,6 +1959,18 @@ select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
\xffeeaa
(1 row)
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+------
+ \xff
+(1 row)
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
create table pagg_test (x int, y int) with (autovacuum_enabled = off);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..087e349894 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -874,6 +874,8 @@ xid8ne(xid8,xid8)
xid8cmp(xid8,xid8)
uuid_extract_timestamp(uuid)
uuid_extract_version(uuid)
+bytea_larger(bytea,bytea)
+bytea_smaller(bytea,bytea)
-- restore normal output mode
\a\t
-- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 1a18ca3d8f..1f438dbf79 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -741,7 +741,7 @@ select string_agg(distinct f1::text, ',' order by f1) from varchar_tbl; -- not
select string_agg(distinct f1, ',' order by f1::text) from varchar_tbl; -- not ok
select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -- ok
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
@@ -756,6 +756,9 @@ select string_agg(v, '') from bytea_test_table;
select string_agg(v, NULL) from bytea_test_table;
select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
--
2.44.2
V4 path with fixed usage PG_GETARG_BYTEA_PP instead of PG_GETARG_TEXT_PP
--
With best regards,
Marat Bukharov
Show quoted text
V3 patch with fixed length comparison
V2 patch with fixed tests
Hello. BYTEA type has the ability to use comparison operations. But it
is absent of min/max aggregate functions. They are nice to have to
provide consistency with the TEXT type.
Attachments:
v4-0001-add-bytea-agg-funcs.patchtext/x-patch; charset=US-ASCII; name=v4-0001-add-bytea-agg-funcs.patchDownload
From b046ef46463cda32b83ebb010a82633ff74802e3 Mon Sep 17 00:00:00 2001
From: Marat Bukharov <maratik@yandex-team.ru>
Date: Wed, 3 Jul 2024 19:02:35 +0300
Subject: [PATCH v4] add bytea agg funcs
---
doc/src/sgml/func.sgml | 4 +--
src/backend/utils/adt/varlena.c | 38 ++++++++++++++++++++++++
src/include/catalog/pg_aggregate.dat | 6 ++++
src/include/catalog/pg_proc.dat | 13 ++++++++
src/test/regress/expected/aggregates.out | 14 ++++++++-
src/test/regress/expected/opr_sanity.out | 2 ++
src/test/regress/sql/aggregates.sql | 5 +++-
7 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index f1f22a1960..891f7a4259 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -21976,7 +21976,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the maximum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
@@ -21995,7 +21995,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the minimum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba..e80c789442 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3956,6 +3956,44 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
Datum
bytea_sortsupport(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 5f13532abc..970d9a70fd 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -158,6 +158,9 @@
{ aggfnoid => 'max(xid8)', aggtransfn => 'xid8_larger',
aggcombinefn => 'xid8_larger', aggsortop => '>(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'max(bytea)', aggtransfn => 'bytea_larger',
+ aggcombinefn => 'bytea_larger', aggsortop => '>(bytea,bytea)',
+ aggtranstype => 'bytea' },
# min
{ aggfnoid => 'min(int8)', aggtransfn => 'int8smaller',
@@ -226,6 +229,9 @@
{ aggfnoid => 'min(xid8)', aggtransfn => 'xid8_smaller',
aggcombinefn => 'xid8_smaller', aggsortop => '<(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'min(bytea)', aggtransfn => 'bytea_smaller',
+ aggcombinefn => 'bytea_smaller', aggsortop => '<(bytea,bytea)',
+ aggtranstype => 'bytea' },
# count
{ aggfnoid => 'count(any)', aggtransfn => 'int8inc_any',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d4ac578ae6..f16f17ef3a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1278,6 +1278,13 @@
proname => 'text_smaller', proleakproof => 't', prorettype => 'text',
proargtypes => 'text text', prosrc => 'text_smaller' },
+{ oid => '6347', descr => 'larger of two',
+ proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_larger' },
+{ oid => '6348', descr => 'smaller of two',
+ proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' },
+
{ oid => '460', descr => 'I/O',
proname => 'int8in', prorettype => 'int8', proargtypes => 'cstring',
prosrc => 'int8in' },
@@ -6764,6 +6771,9 @@
{ oid => '5099', descr => 'maximum value of all xid8 input values',
proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6349', descr => 'maximum value of all bytea input values',
+ proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
{ oid => '2131', descr => 'minimum value of all bigint input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
@@ -6834,6 +6844,9 @@
{ oid => '5100', descr => 'minimum value of all xid8 input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6350', descr => 'minimum value of all bytea input values',
+ proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
# count has two forms: count(any) and count(*)
{ oid => '2147',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1c1ca7573a..db741d0c36 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1925,7 +1925,7 @@ select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -
a,ab,abcd
(1 row)
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
string_agg
@@ -1959,6 +1959,18 @@ select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
\xffeeaa
(1 row)
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+------
+ \xff
+(1 row)
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
create table pagg_test (x int, y int) with (autovacuum_enabled = off);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..087e349894 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -874,6 +874,8 @@ xid8ne(xid8,xid8)
xid8cmp(xid8,xid8)
uuid_extract_timestamp(uuid)
uuid_extract_version(uuid)
+bytea_larger(bytea,bytea)
+bytea_smaller(bytea,bytea)
-- restore normal output mode
\a\t
-- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 1a18ca3d8f..1f438dbf79 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -741,7 +741,7 @@ select string_agg(distinct f1::text, ',' order by f1) from varchar_tbl; -- not
select string_agg(distinct f1, ',' order by f1::text) from varchar_tbl; -- not ok
select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -- ok
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
@@ -756,6 +756,9 @@ select string_agg(v, '') from bytea_test_table;
select string_agg(v, NULL) from bytea_test_table;
select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
--
2.44.2
Hi Marat,
V4 path with fixed usage PG_GETARG_BYTEA_PP instead of PG_GETARG_TEXT_PP
Thanks for the patch.
Please add it to the nearest open commitfest [1]https://commitfest.postgresql.org/.
```
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+------
+ \xff
+(1 row)
```
If I understand correctly, all the v's are of the same size. If this
is the case you should add more test cases.
[1]: https://commitfest.postgresql.org/
--
Best regards,
Aleksander Alekseev
What part of commitfest should I put the current patch to: "SQL
Commands", "Miscellaneous" or something else? I can't figure it out.
--
With best regards,
Marat Bukharov
Show quoted text
Hi Marat,
V4 path with fixed usage PG_GETARG_BYTEA_PP instead of PG_GETARG_TEXT_PP
Thanks for the patch.
Please add it to the nearest open commitfest [1].
``` +select min(v) from bytea_test_table; + min +------ + \xaa +(1 row) + +select max(v) from bytea_test_table; + max +------ + \xff +(1 row) ```If I understand correctly, all the v's are of the same size. If this
is the case you should add more test cases.[1]: https://commitfest.postgresql.org/
--
Best regards,
Aleksander Alekseev
Hi,
What part of commitfest should I put the current patch to: "SQL
Commands", "Miscellaneous" or something else? I can't figure it out.
Personally I qualified a similar patch [1]https://commitfest.postgresql.org/48/4905/ as "Server Features",
although I'm not 100% sure if this was the best choice.
[1]: https://commitfest.postgresql.org/48/4905/
--
Best regards,
Aleksander Alekseev
V5 patch. I've added more tests with different bytea sizes
--
With best regards,
Marat Bukharov
чт, 4 июл. 2024 г. в 15:29, Aleksander Alekseev <aleksander@timescale.com>:
Show quoted text
Hi Marat,
V4 path with fixed usage PG_GETARG_BYTEA_PP instead of PG_GETARG_TEXT_PP
Thanks for the patch.
Please add it to the nearest open commitfest [1].
``` +select min(v) from bytea_test_table; + min +------ + \xaa +(1 row) + +select max(v) from bytea_test_table; + max +------ + \xff +(1 row) ```If I understand correctly, all the v's are of the same size. If this
is the case you should add more test cases.[1]: https://commitfest.postgresql.org/
--
Best regards,
Aleksander Alekseev
Attachments:
v5-0001-add-bytea-agg-funcs.patchtext/x-patch; charset=US-ASCII; name=v5-0001-add-bytea-agg-funcs.patchDownload
From 3bc3786ede117442b06d6cbe36a7b015d76a7cf4 Mon Sep 17 00:00:00 2001
From: Marat Bukharov <maratik@yandex-team.ru>
Date: Wed, 24 Jul 2024 17:37:31 +0300
Subject: [PATCH v5] add bytea agg funcs
---
doc/src/sgml/func.sgml | 4 +--
src/backend/utils/adt/varlena.c | 38 ++++++++++++++++++++++++
src/include/catalog/pg_aggregate.dat | 6 ++++
src/include/catalog/pg_proc.dat | 13 ++++++++
src/test/regress/expected/aggregates.out | 28 ++++++++++++++++-
src/test/regress/expected/opr_sanity.out | 2 ++
src/test/regress/sql/aggregates.sql | 11 ++++++-
7 files changed, 98 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index f1f22a1960..891f7a4259 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -21976,7 +21976,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the maximum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
@@ -21995,7 +21995,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the minimum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and arrays of any of these types.
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba..e80c789442 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3956,6 +3956,44 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
Datum
bytea_sortsupport(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 5f13532abc..970d9a70fd 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -158,6 +158,9 @@
{ aggfnoid => 'max(xid8)', aggtransfn => 'xid8_larger',
aggcombinefn => 'xid8_larger', aggsortop => '>(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'max(bytea)', aggtransfn => 'bytea_larger',
+ aggcombinefn => 'bytea_larger', aggsortop => '>(bytea,bytea)',
+ aggtranstype => 'bytea' },
# min
{ aggfnoid => 'min(int8)', aggtransfn => 'int8smaller',
@@ -226,6 +229,9 @@
{ aggfnoid => 'min(xid8)', aggtransfn => 'xid8_smaller',
aggcombinefn => 'xid8_smaller', aggsortop => '<(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'min(bytea)', aggtransfn => 'bytea_smaller',
+ aggcombinefn => 'bytea_smaller', aggsortop => '<(bytea,bytea)',
+ aggtranstype => 'bytea' },
# count
{ aggfnoid => 'count(any)', aggtransfn => 'int8inc_any',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d4ac578ae6..f16f17ef3a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1278,6 +1278,13 @@
proname => 'text_smaller', proleakproof => 't', prorettype => 'text',
proargtypes => 'text text', prosrc => 'text_smaller' },
+{ oid => '6347', descr => 'larger of two',
+ proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_larger' },
+{ oid => '6348', descr => 'smaller of two',
+ proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' },
+
{ oid => '460', descr => 'I/O',
proname => 'int8in', prorettype => 'int8', proargtypes => 'cstring',
prosrc => 'int8in' },
@@ -6764,6 +6771,9 @@
{ oid => '5099', descr => 'maximum value of all xid8 input values',
proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6349', descr => 'maximum value of all bytea input values',
+ proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
{ oid => '2131', descr => 'minimum value of all bigint input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
@@ -6834,6 +6844,9 @@
{ oid => '5100', descr => 'minimum value of all xid8 input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '6350', descr => 'minimum value of all bytea input values',
+ proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
# count has two forms: count(any) and count(*)
{ oid => '2147',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1c1ca7573a..69d91df779 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1925,7 +1925,7 @@ select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -
a,ab,abcd
(1 row)
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
string_agg
@@ -1959,6 +1959,32 @@ select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
\xffeeaa
(1 row)
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+------
+ \xff
+(1 row)
+
+insert into bytea_test_table values(decode('ffff','hex'));
+insert into bytea_test_table values(decode('aaaa','hex'));
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+--------
+ \xffff
+(1 row)
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
create table pagg_test (x int, y int) with (autovacuum_enabled = off);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..087e349894 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -874,6 +874,8 @@ xid8ne(xid8,xid8)
xid8cmp(xid8,xid8)
uuid_extract_timestamp(uuid)
uuid_extract_version(uuid)
+bytea_larger(bytea,bytea)
+bytea_smaller(bytea,bytea)
-- restore normal output mode
\a\t
-- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 1a18ca3d8f..935e7360c6 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -741,7 +741,7 @@ select string_agg(distinct f1::text, ',' order by f1) from varchar_tbl; -- not
select string_agg(distinct f1, ',' order by f1::text) from varchar_tbl; -- not ok
select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -- ok
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
@@ -756,6 +756,15 @@ select string_agg(v, '') from bytea_test_table;
select string_agg(v, NULL) from bytea_test_table;
select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
+insert into bytea_test_table values(decode('ffff','hex'));
+insert into bytea_test_table values(decode('aaaa','hex'));
+
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
--
2.44.2
Added patch to commitfest https://commitfest.postgresql.org/49/5138/
--
With best regards,
Marat Bukharov
Show quoted text
Hi,
What part of commitfest should I put the current patch to: "SQL
Commands", "Miscellaneous" or something else? I can't figure it out.Personally I qualified a similar patch [1] as "Server Features",
although I'm not 100% sure if this was the best choice.[1]: https://commitfest.postgresql.org/48/4905/
--
Best regards,
Aleksander Alekseev
On 24 Jul 2024, at 17:42, Marat Bukharov <marat.buharov@gmail.com> wrote:
V5 patch. I've added more tests with different bytea sizes
Hi Marat!
This looks like a nice feature to have.
I’ve took a look into the patch and have few suggestions:
0. Please write more descriptive commit message akin to [0]https://github.com/postgres/postgres/commit/a0f1fce80c03
1. Use oids from development range 8000-9999
2. Replace VARDATA_ANY\memcmp dance with a call to varstrfastcmp_c().
Thank you!
Best regards, Andrey Borodin.
[0]: https://github.com/postgres/postgres/commit/a0f1fce80c03
"Andrey M. Borodin" <x4mmm@yandex-team.ru> writes:
0. Please write more descriptive commit message akin to [0]
1. Use oids from development range 8000-9999
Yeah, we don't try anymore to manually select permanent oids [1]https://www.postgresql.org/docs/devel/system-catalog-initial-data.html#SYSTEM-CATALOG-OID-ASSIGNMENT.
2. Replace VARDATA_ANY\memcmp dance with a call to varstrfastcmp_c().
I don't agree with that recommendation in the slightest: it's a
fundamental type pun for bytea to piggyback on text/varchar functions.
It risks bugs in bytea due to somebody inserting collation dependencies
into those functions. It also creates special cases that those
functions shouldn't have to cope with, see e.g. comment for
varstr_sortsupport about how we have to allow NUL bytes there but
only if it's C locale. That's a laughably rickety bit of coding.
I see that somebody already made such a pun in bytea_sortsupport,
but that was a bad idea that we should undo not double down on.
I wonder if we shouldn't pull all the bytea support functions out
of varlena.c (say into a new file bytea.c), to discourage such
gamesmanship in the future.
regards, tom lane
[1]: https://www.postgresql.org/docs/devel/system-catalog-initial-data.html#SYSTEM-CATALOG-OID-ASSIGNMENT
On Thu, 25 Jul 2024 at 02:42, Marat Bukharov <marat.buharov@gmail.com> wrote:
V5 patch. I've added more tests with different bytea sizes
I just glanced over this patch. Are you still planning on working on
it? There's been no adjustments made since the last feedback you got
in early August.
Can you address Andrey's feedback on point #1?
Also, for bytea_larger() and bytea_smaller(), I suggest copying what's
been done in record_larger() and record_smaller() except use
byteacmp(). That'll remove all the duplicated code.
If you fix those up, I see no reason not to commit the patch.
David
Hi,
I just glanced over this patch. Are you still planning on working on
it? There's been no adjustments made since the last feedback you got
in early August.Can you address Andrey's feedback on point #1?
Also, for bytea_larger() and bytea_smaller(), I suggest copying what's
been done in record_larger() and record_smaller() except use
byteacmp(). That'll remove all the duplicated code.If you fix those up, I see no reason not to commit the patch.
Since we haven't heard from Marat since July I decided to rebase the
patch and address the feedback received so far. PFA patch v6.
--
Best regards,
Aleksander Alekseev
Attachments:
v6-0001-Add-min-and-max-aggregates-for-bytea-type.patchapplication/octet-stream; name=v6-0001-Add-min-and-max-aggregates-for-bytea-type.patchDownload
From bd6115e9d27dba3e681966ec84b2b78bd323d34d Mon Sep 17 00:00:00 2001
From: Marat Bukharov <maratik@yandex-team.ru>
Date: Wed, 24 Jul 2024 17:37:31 +0300
Subject: [PATCH v6] Add min and max aggregates for bytea type
This is a rather straightforward change:
SELECT min(x), max(x)
FROM unnest(ARRAY['\xaa' :: bytea, '\xbbbb']) AS x;
min | max
------+--------
\xaa | \xbbbb
Marat Buharov, Aleksander Alekseev. Reviewed by Andrey Borodin, Tom Lane.
Discussion: https://postgr.es/m/CAPCEVGXiASjodos4P8pgyV7ixfVn-ZgG9YyiRZRbVqbGmfuDyg@mail.gmail.com
BUMP CATVERSION
---
doc/src/sgml/func.sgml | 4 +--
src/backend/utils/adt/varlena.c | 38 ++++++++++++++++++++++++
src/include/catalog/pg_aggregate.dat | 6 ++++
src/include/catalog/pg_proc.dat | 13 ++++++++
src/test/regress/expected/aggregates.out | 28 ++++++++++++++++-
src/test/regress/expected/opr_sanity.out | 2 ++
src/test/regress/sql/aggregates.sql | 11 ++++++-
7 files changed, 98 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7b4fbb5047..b26db3b04b 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -22125,7 +22125,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the maximum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and also arrays and composite types containing sortable data types.
@@ -22144,7 +22144,7 @@ SELECT NULLIF(value, '(none)') ...
<para>
Computes the minimum of the non-null input
values. Available for any numeric, string, date/time, or enum type,
- as well as <type>inet</type>, <type>interval</type>,
+ as well as <type>bytea</type>, <type>inet</type>, <type>interval</type>,
<type>money</type>, <type>oid</type>, <type>pg_lsn</type>,
<type>tid</type>, <type>xid8</type>,
and also arrays and composite types containing sortable data types.
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d46ed3ccf9..533bebc1c7 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3931,6 +3931,44 @@ byteacmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(cmp);
}
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
Datum
bytea_sortsupport(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index b6b6352d91..6ba6a997c2 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -161,6 +161,9 @@
{ aggfnoid => 'max(xid8)', aggtransfn => 'xid8_larger',
aggcombinefn => 'xid8_larger', aggsortop => '>(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'max(bytea)', aggtransfn => 'bytea_larger',
+ aggcombinefn => 'bytea_larger', aggsortop => '>(bytea,bytea)',
+ aggtranstype => 'bytea' },
# min
{ aggfnoid => 'min(int8)', aggtransfn => 'int8smaller',
@@ -232,6 +235,9 @@
{ aggfnoid => 'min(xid8)', aggtransfn => 'xid8_smaller',
aggcombinefn => 'xid8_smaller', aggsortop => '<(xid8,xid8)',
aggtranstype => 'xid8' },
+{ aggfnoid => 'min(bytea)', aggtransfn => 'bytea_smaller',
+ aggcombinefn => 'bytea_smaller', aggsortop => '<(bytea,bytea)',
+ aggtranstype => 'bytea' },
# count
{ aggfnoid => 'count(any)', aggtransfn => 'int8inc_any',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 77f54a79e6..3ae31a614c 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1314,6 +1314,13 @@
proname => 'text_smaller', proleakproof => 't', prorettype => 'text',
proargtypes => 'text text', prosrc => 'text_smaller' },
+{ oid => '8920', descr => 'larger of two',
+ proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_larger' },
+{ oid => '8921', descr => 'smaller of two',
+ proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea',
+ proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' },
+
{ oid => '460', descr => 'I/O',
proname => 'int8in', prorettype => 'int8', proargtypes => 'cstring',
prosrc => 'int8in' },
@@ -6879,6 +6886,9 @@
{ oid => '5099', descr => 'maximum value of all xid8 input values',
proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '8922', descr => 'maximum value of all bytea input values',
+ proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
{ oid => '2131', descr => 'minimum value of all bigint input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
@@ -6952,6 +6962,9 @@
{ oid => '5100', descr => 'minimum value of all xid8 input values',
proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8',
proargtypes => 'xid8', prosrc => 'aggregate_dummy' },
+{ oid => '8923', descr => 'minimum value of all bytea input values',
+ proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea',
+ proargtypes => 'bytea', prosrc => 'aggregate_dummy' },
# count has two forms: count(any) and count(*)
{ oid => '2147',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 8ac13b562c..e14e735656 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1950,7 +1950,7 @@ select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -
a,ab,abcd
(1 row)
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
string_agg
@@ -1984,6 +1984,32 @@ select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
\xffeeaa
(1 row)
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+------
+ \xff
+(1 row)
+
+insert into bytea_test_table values(decode('ffff','hex'));
+insert into bytea_test_table values(decode('aaaa','hex'));
+select min(v) from bytea_test_table;
+ min
+------
+ \xaa
+(1 row)
+
+select max(v) from bytea_test_table;
+ max
+--------
+ \xffff
+(1 row)
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
create table pagg_test (x int, y int) with (autovacuum_enabled = off);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 0d734169f1..34a32bd11d 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -876,6 +876,8 @@ uuid_extract_timestamp(uuid)
uuid_extract_version(uuid)
crc32(bytea)
crc32c(bytea)
+bytea_larger(bytea,bytea)
+bytea_smaller(bytea,bytea)
-- restore normal output mode
\a\t
-- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index ca6d1bcfb7..ddf38bafb4 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -747,7 +747,7 @@ select string_agg(distinct f1::text, ',' order by f1) from varchar_tbl; -- not
select string_agg(distinct f1, ',' order by f1::text) from varchar_tbl; -- not ok
select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; -- ok
--- string_agg bytea tests
+-- string_agg, min, max bytea tests
create table bytea_test_table(v bytea);
select string_agg(v, '') from bytea_test_table;
@@ -762,6 +762,15 @@ select string_agg(v, '') from bytea_test_table;
select string_agg(v, NULL) from bytea_test_table;
select string_agg(v, decode('ee', 'hex')) from bytea_test_table;
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
+insert into bytea_test_table values(decode('ffff','hex'));
+insert into bytea_test_table values(decode('aaaa','hex'));
+
+select min(v) from bytea_test_table;
+select max(v) from bytea_test_table;
+
drop table bytea_test_table;
-- Test parallel string_agg and array_agg
--
2.46.0
On 8 Oct 2024, at 19:23, Aleksander Alekseev <aleksander@timescale.com> wrote:
PFA patch v6.
IMO the patch looks RfC.
Best regards, Andrey Borodin.
"Andrey M. Borodin" <x4mmm@yandex-team.ru> writes:
IMO the patch looks RfC.
LGTM too. Pushed.
regards, tom lane
Thank you for the corrections. I was busy recently and did not follow
conversations in the mailing list. Sorry about that.
--
With best regards,
Marat Bukharov
вт, 8 окт. 2024 г. в 17:24, Aleksander Alekseev <aleksander@timescale.com>:
Show quoted text
Hi,
I just glanced over this patch. Are you still planning on working on
it? There's been no adjustments made since the last feedback you got
in early August.Can you address Andrey's feedback on point #1?
Also, for bytea_larger() and bytea_smaller(), I suggest copying what's
been done in record_larger() and record_smaller() except use
byteacmp(). That'll remove all the duplicated code.If you fix those up, I see no reason not to commit the patch.
Since we haven't heard from Marat since July I decided to rebase the
patch and address the feedback received so far. PFA patch v6.--
Best regards,
Aleksander Alekseev