general purpose array_sort
Hi hackers,
per David's suggestion, this patch implements general
purpose array sort.
We can do the following with this patch:
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž
Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž
Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'asc', 'pg_c_utf8');
--
Regards
Junwang Zhao
Attachments:
v1-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v1-0001-general-purpose-array_sort.patchDownload
From 2981a7ce87277fec51533c9d7cd501919f8909ec Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v1] general purpose array_sort
Sorts anyarray in either ascending or descending order.
The array must be empty or one-dimensional.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 20 ++++
src/backend/utils/adt/array_userfuncs.c | 120 ++++++++++++++++++++++++
src/include/catalog/pg_proc.dat | 9 ++
src/test/regress/expected/arrays.out | 67 +++++++++++++
src/test/regress/sql/arrays.sql | 13 +++
5 files changed, 229 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e39d524b6b..7441d413bd 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20419,6 +20419,26 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>dir</parameter> </optional> <optional>, <parameter>collation</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array in either ascending or descending order.
+ <parameter>dir</parameter> must be <literal>asc</literal>
+ or <literal>desc</literal>. The array must be empty or one-dimensional.
+ If the array element is text variants, specify <parameter>collation</parameter> as needed.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
+ <returnvalue>{1,2,3,4,5,6}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..e507eb3dbb 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
*/
#include "postgres.h"
+#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1688,120 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+ int32 dc = (dirstr) ? VARSIZE_ANY_EXHDR(dirstr) : 0;
+ char *d = (dirstr) ? VARDATA_ANY(dirstr) : NULL;
+ Oid elmtyp;
+ text *collstr = (fcinfo->nargs > 2) ? PG_GETARG_TEXT_PP(2) : NULL;
+ char *collname = (collstr) ? text_to_cstring(collstr) : NULL;
+ Oid collid;
+ int dir = -1;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildState *astate = NULL;
+
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("multidimensional arrays are not supported")));
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if (ARR_HASNULL(array))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array must not contain nulls")));
+
+ if (dirstr == NULL || (dc == 3
+ && (d[0] == 'a' || d[0] == 'A')
+ && (d[1] == 's' || d[1] == 'S')
+ && (d[2] == 'c' || d[2] == 'C')))
+ dir = 1;
+ else if (dc == 4
+ && (d[0] == 'd' || d[0] == 'D')
+ && (d[1] == 'e' || d[1] == 'E')
+ && (d[2] == 's' || d[2] == 'S')
+ && (d[3] == 'c' || d[3] == 'C'))
+ dir = 0;
+ if (dir == -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("second parameter must be \"ASC\" or \"DESC\"")));
+
+ elmtyp = ARR_ELEMTYPE(array);
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, dir == 1 ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ if (collname == NULL)
+ collid = typentry->typcollation;
+ else
+ {
+ collid = CollationGetCollid(collname);
+ if (!OidIsValid(collid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("collation \"%s\" not found", collname)));
+ }
+
+ tuplesortstate = tuplesort_begin_datum(elmtyp,
+ dir == 1 ? typentry->lt_opr : typentry->gt_opr,
+ collid,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, 0, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ Assert(!isnull);
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ Assert(!isnull);
+ astate = accumArrayResult(astate, value, false,
+ elmtyp, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_collation(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 43f608d7a0..1769a5d2c5 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1734,6 +1734,15 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text text', prosrc => 'array_sort_order_collation'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..9326009188 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,70 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+ array_sort
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+ array_sort
+------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'asc', 'pg_c_utf8');
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF","DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥȺ,ⱥⱥⱥ}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc', 'pg_c_utf8');
+ array_sort
+------------------------------------------------------------------
+ {ⱥⱥⱥ,ⱥȺ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž","ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..b9bf5fd294 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,16 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'asc', 'pg_c_utf8');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc', 'pg_c_utf8');
--
2.39.5
On Fri, Sep 27, 2024 at 9:15 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
Hi hackers,
per David's suggestion, this patch implements general
purpose array sort.We can do the following with this patch:
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž
Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž
Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'asc', 'pg_c_utf8');--
Regards
Junwang Zhao
PFA v2, use COLLATE keyword to supply the collation suggested by
Andreas offlist.
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž
Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž
Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8");
I also created a CF entry[1]https://commitfest.postgresql.org/50/5277/ so it can be easily reviewed.
[1]: https://commitfest.postgresql.org/50/5277/
--
Regards
Junwang Zhao
Attachments:
v2-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v2-0001-general-purpose-array_sort.patchDownload
From 95b3b6cde4c2312d25c3b6b26f44064abc61e371 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v2] general purpose array_sort
Sorts anyarray in either ascending or descending order.
The array must be empty or one-dimensional.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 19 +++++
src/backend/utils/adt/array_userfuncs.c | 102 ++++++++++++++++++++++++
src/include/catalog/pg_proc.dat | 6 ++
src/test/regress/expected/arrays.out | 67 ++++++++++++++++
src/test/regress/sql/arrays.sql | 13 +++
5 files changed, 207 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e39d524b6b..b517c7e35f 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20419,6 +20419,25 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>dir</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array in either ascending or descending order.
+ <parameter>dir</parameter> must be <literal>asc</literal>
+ or <literal>desc</literal>. The array must be empty or one-dimensional.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
+ <returnvalue>{1,2,3,4,5,6}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..841fe435e1 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
*/
#include "postgres.h"
+#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1688,102 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+ int32 dc = (dirstr) ? VARSIZE_ANY_EXHDR(dirstr) : 0;
+ char *d = (dirstr) ? VARDATA_ANY(dirstr) : NULL;
+ Oid elmtyp;
+ Oid collation = PG_GET_COLLATION();
+ int dir = -1;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildState *astate = NULL;
+
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("multidimensional arrays are not supported")));
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if (ARR_HASNULL(array))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array must not contain nulls")));
+
+ if (dirstr == NULL || (dc == 3
+ && (d[0] == 'a' || d[0] == 'A')
+ && (d[1] == 's' || d[1] == 'S')
+ && (d[2] == 'c' || d[2] == 'C')))
+ dir = 1;
+ else if (dc == 4
+ && (d[0] == 'd' || d[0] == 'D')
+ && (d[1] == 'e' || d[1] == 'E')
+ && (d[2] == 's' || d[2] == 'S')
+ && (d[3] == 'c' || d[3] == 'C'))
+ dir = 0;
+ if (dir == -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("second parameter must be \"ASC\" or \"DESC\"")));
+
+ elmtyp = ARR_ELEMTYPE(array);
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, dir == 1 ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ tuplesortstate = tuplesort_begin_datum(elmtyp,
+ dir == 1 ? typentry->lt_opr : typentry->gt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, 0, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ Assert(!isnull);
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ Assert(!isnull);
+ astate = accumArrayResult(astate, value, false,
+ elmtyp, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 43f608d7a0..1b2d64ac39 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1734,6 +1734,12 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..d9dc11d98c 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,70 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+ array_sort
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+ array_sort
+------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF","DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥȺ,ⱥⱥⱥ}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+ array_sort
+------------------------------------------------------------------
+ {ⱥⱥⱥ,ⱥȺ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž","ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..3bb13b49de 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,16 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
--
2.39.5
On Sat, Sep 28, 2024 at 7:52 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
PFA v2, use COLLATE keyword to supply the collation suggested by
Andreas offlist.
this is better. otherwise we need extra care to handle case like:
SELECT array_sort('{1,3,5,2,4,6}'::int[] COLLATE "pg_c_utf8");
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type>
<optional>, <parameter>dir</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array in either ascending or descending order.
+ <parameter>dir</parameter> must be <literal>asc</literal>
+ or <literal>desc</literal>. The array must be empty or one-dimensional.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
+ <returnvalue>{1,2,3,4,5,6}</returnvalue>
+ </para></entry>
+ </row>
I am confused with <parameter>dir</parameter>. I guess you want to say
"direction"
But here, I think <parameter>sort_asc</parameter> would be more appropriate?
<parameter>dir</parameter> can have only two potential values, make it
as a boolean would be more easier?
you didn't mention information: "by default, it will sort by
ascending order; the sort collation by default is using the array
element type's collation"
tuplesort_begin_datum can do null-first, null-last, so the
one-dimension array can allow null values.
Based on the above and others, I did some refactoring, feel free to take it.
my changes, changed the function signature, so you need to pay
attention to sql test file.
Attachments:
array_sort_changes.no-cfbotapplication/octet-stream; name=array_sort_changes.no-cfbotDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 095b02c3b2..d485fc3b0a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20426,13 +20426,25 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>dir</parameter> </optional>)
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional>
+ <optional>, <parameter>sort_asc</parameter> <type>boolean</type>
+ <optional>, <parameter>nulls_first</parameter> <type>boolean</type>
+ </optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
+
<para>
- Sorts the array in either ascending or descending order.
- <parameter>dir</parameter> must be <literal>asc</literal>
- or <literal>desc</literal>. The array must be empty or one-dimensional.
+ Sorts the array based on the given parameter.
+ </para>
+ <para>
+ If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ using array element type's collation.
+ If <parameter>sort_asc</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>sort_asc</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values, false means nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to true when <parameter>sort_asc</parameter> is false,
+ <parameter>nulls_first</parameter> defaults to false when <parameter>sort_asc</parameter> is true.
+ The array cannot be multidimensional.
</para>
<para>
<literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index b0d0de051e..454691f39a 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -498,6 +498,14 @@ LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'make_interval';
+CREATE OR REPLACE FUNCTION
+ array_sort(src_array anyarray, sort_asc boolean DEFAULT true,
+ nulls_first boolean DEFAULT false)
+RETURNS anyarray
+LANGUAGE INTERNAL
+STRICT VOLATILE
+AS 'array_sort';
+
CREATE OR REPLACE FUNCTION
jsonb_set(jsonb_in jsonb, path text[] , replacement jsonb,
create_if_missing boolean DEFAULT true)
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 841fe435e1..b8fa7c07e7 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1699,12 +1699,10 @@ Datum
array_sort(PG_FUNCTION_ARGS)
{
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
- text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
- int32 dc = (dirstr) ? VARSIZE_ANY_EXHDR(dirstr) : 0;
- char *d = (dirstr) ? VARDATA_ANY(dirstr) : NULL;
+ bool sort_asc = true;
+ bool nulls_first = true;
Oid elmtyp;
Oid collation = PG_GET_COLLATION();
- int dir = -1;
TypeCacheEntry *typentry;
Tuplesortstate *tuplesortstate;
ArrayIterator array_iterator;
@@ -1715,49 +1713,43 @@ array_sort(PG_FUNCTION_ARGS)
if (ARR_NDIM(array) > 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("multidimensional arrays are not supported")));
+ errmsg("multidimensional arrays sorting are not supported")));
if (ARR_NDIM(array) < 1)
PG_RETURN_ARRAYTYPE_P(array);
+ /* by default, we sort array by asc, nulls last */
+ if (PG_NARGS() >= 2)
+ {
+ sort_asc = PG_GETARG_BOOL(1);
+ if (sort_asc)
+ nulls_first = false;
+ }
- if (ARR_HASNULL(array))
- ereport(ERROR,
- (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
- errmsg("array must not contain nulls")));
-
- if (dirstr == NULL || (dc == 3
- && (d[0] == 'a' || d[0] == 'A')
- && (d[1] == 's' || d[1] == 'S')
- && (d[2] == 'c' || d[2] == 'C')))
- dir = 1;
- else if (dc == 4
- && (d[0] == 'd' || d[0] == 'D')
- && (d[1] == 'e' || d[1] == 'E')
- && (d[2] == 's' || d[2] == 'S')
- && (d[3] == 'c' || d[3] == 'C'))
- dir = 0;
- if (dir == -1)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("second parameter must be \"ASC\" or \"DESC\"")));
+ if (PG_NARGS() >= 3)
+ nulls_first = PG_GETARG_BOOL(2);
elmtyp = ARR_ELEMTYPE(array);
typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, dir == 1 ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((sort_asc && typentry->lt_opr == InvalidOid) ||
+ (!sort_asc && typentry->gt_opr == InvalidOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("type %s has no default btree operator class",
+ format_type_be(elmtyp))));
fcinfo->flinfo->fn_extra = (void *) typentry;
}
tuplesortstate = tuplesort_begin_datum(elmtyp,
- dir == 1 ? typentry->lt_opr : typentry->gt_opr,
+ sort_asc ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, 0, NULL);
while (array_iterate(array_iterator, &value, &isnull))
{
- Assert(!isnull);
tuplesort_putdatum(tuplesortstate, value, isnull);
}
array_free_iterator(array_iterator);
@@ -1769,8 +1761,7 @@ array_sort(PG_FUNCTION_ARGS)
while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
{
- Assert(!isnull);
- astate = accumArrayResult(astate, value, false,
+ astate = accumArrayResult(astate, value, isnull,
elmtyp, CurrentMemoryContext);
}
@@ -1781,9 +1772,3 @@ array_sort(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
}
-Datum
-array_sort_order(PG_FUNCTION_ARGS)
-{
- return array_sort(fcinfo);
-}
-
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index b5385fbefe..c40dd2d7e3 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1736,10 +1736,7 @@
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
- proargtypes => 'anyarray', prosrc => 'array_sort'},
-{ oid => '8811', descr => 'sort array',
- proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
- proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
+ proargtypes => 'anyarray bool bool', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 31345295c1..032977e790 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort.
+SELECT array_sort('{a,B, null, null}'::text[] collate case_insensitive, false, false);
+ array_sort
+-----------------
+ {B,a,NULL,NULL}
+(1 row)
+
+SELECT array_sort('{a,B, null, null}'::text[] collate case_sensitive, false, false);
+ array_sort
+-----------------
+ {B,a,NULL,NULL}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 3bb13b49de..99b3abe56f 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -830,8 +830,9 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
-- array_sort
SELECT array_sort('{}'::int[]);
-SELECT array_sort('{1,3,5,2,4,6}'::int[]);
-SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[] COLLATE "pg_c_utf8");
+SELECT array_sort('{1,3,5,2,4,6}'::int[], false);
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
@@ -840,3 +841,10 @@ SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺ
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+SELECT array_sort('{a,B, null, null}'::text[]);
+SELECT array_sort('{a,B, null, null}'::text[]) =
+ array_sort('{a,B, null, null}'::text[], true, false) as expect_true;
+SELECT array_sort('{a,B, null, null}'::text[], false);
+SELECT array_sort('{a,B, null, null}'::text[], true, false);
+SELECT array_sort('{a,B, null, null}'::text[], true, true);
+SELECT array_sort('{a,B, null, null}'::text[], false, true);
\ No newline at end of file
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..b0190196ff 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort.
+SELECT array_sort('{a,B, null, null}'::text[] collate case_insensitive, false, false);
+SELECT array_sort('{a,B, null, null}'::text[] collate case_sensitive, false, false);
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
On Sat, Sep 28, 2024 at 10:41 PM jian he <jian.universality@gmail.com> wrote:
On Sat, Sep 28, 2024 at 7:52 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
PFA v2, use COLLATE keyword to supply the collation suggested by
Andreas offlist.this is better. otherwise we need extra care to handle case like:
SELECT array_sort('{1,3,5,2,4,6}'::int[] COLLATE "pg_c_utf8");+ <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>array_sort</primary> + </indexterm> + <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>dir</parameter> </optional>) + <returnvalue>anyarray</returnvalue> + </para> + <para> + Sorts the array in either ascending or descending order. + <parameter>dir</parameter> must be <literal>asc</literal> + or <literal>desc</literal>. The array must be empty or one-dimensional. + </para> + <para> + <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal> + <returnvalue>{1,2,3,4,5,6}</returnvalue> + </para></entry> + </row> I am confused with <parameter>dir</parameter>. I guess you want to say "direction" But here, I think <parameter>sort_asc</parameter> would be more appropriate?
This doc is mostly copied and edited from intarray.sgml sort part.
And the logic is basically the same, you can check the intarray module.
<parameter>dir</parameter> can have only two potential values, make it
as a boolean would be more easier?
you didn't mention information: "by default, it will sort by
ascending order; the sort collation by default is using the array
element type's collation"tuplesort_begin_datum can do null-first, null-last, so the
one-dimension array can allow null values.
The following(create extension intarry first) will give an error, I
keep the same for array_sort.
SELECT sort('{1234234,-30,234234, null}');
Based on the above and others, I did some refactoring, feel free to take it.
my changes, changed the function signature, so you need to pay
attention to sql test file.
Thanks for your refactor, I will take some in the next version.
--
Regards
Junwang Zhao
On Sat, Sep 28, 2024 at 7:05 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Sat, Sep 28, 2024 at 10:41 PM jian he <jian.universality@gmail.com>
wrote:<parameter>dir</parameter> can have only two potential values, make it
as a boolean would be more easier?
you didn't mention information: "by default, it will sort by
ascending order; the sort collation by default is using the array
element type's collation"tuplesort_begin_datum can do null-first, null-last, so the
one-dimension array can allow null values.The following(create extension intarry first) will give an error, I
keep the same for array_sort.SELECT sort('{1234234,-30,234234, null}');
I would suggest accepting:
asc
desc
asc nulls first
asc nulls last *
desc nulls first *
desc nulls last
As valid inputs for "dir" - and that the starred options are the defaults
when null position is omitted.
In short, mimic create index.
David J.
On Sun, Sep 29, 2024 at 10:51 AM David G. Johnston
<david.g.johnston@gmail.com> wrote:
On Sat, Sep 28, 2024 at 7:05 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Sat, Sep 28, 2024 at 10:41 PM jian he <jian.universality@gmail.com> wrote:
<parameter>dir</parameter> can have only two potential values, make it
as a boolean would be more easier?
you didn't mention information: "by default, it will sort by
ascending order; the sort collation by default is using the array
element type's collation"tuplesort_begin_datum can do null-first, null-last, so the
one-dimension array can allow null values.The following(create extension intarry first) will give an error, I
keep the same for array_sort.SELECT sort('{1234234,-30,234234, null}');
I would suggest accepting:
asc
desc
asc nulls first
asc nulls last *
desc nulls first *
desc nulls lastAs valid inputs for "dir" - and that the starred options are the defaults when null position is omitted.
In short, mimic create index.
David J.
PFA v3 with David's suggestion addressed.
--
Regards
Junwang Zhao
Attachments:
v3-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v3-0001-general-purpose-array_sort.patchDownload
From 8f5678e921cfc128749d8be9203d544dca033697 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v3] general purpose array_sort
Sorts anyarray in either ascending or descending order.
The array must be empty or one-dimensional.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 32 +++++
src/backend/utils/adt/array_userfuncs.c | 180 ++++++++++++++++++++++++
src/include/catalog/pg_proc.dat | 6 +
src/test/regress/expected/arrays.out | 122 ++++++++++++++++
src/test/regress/sql/arrays.sql | 23 +++
src/tools/pgindent/typedefs.list | 1 +
6 files changed, 364 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e39d524b6b..cf30484e2a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20419,6 +20419,38 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional> , <parameter>dir</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array based on the given parameter. The array must be empty or one-dimensional.
+ </para>
+ <para>
+ If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ using array element type's collation.
+ <parameter>dir</parameter> must be one of the following, and that the starred options are the defaults when null position is omitted.
+<programlisting>
+ asc
+ desc
+ asc nulls first
+ asc nulls last *
+ desc nulls first *
+ desc nulls last
+ nulls first
+ nulls last
+</programlisting>
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
+ <returnvalue>{1,2,3,4,5,6}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..0ff1a4f6f4 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
*/
#include "postgres.h"
+#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1688,180 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+
+#define WHITESPACE " \t\n\r"
+
+typedef enum
+{
+ PARSE_SORT_ORDER_INIT,
+ PARSE_SORT_ORDER_DIRECTION_SET,
+ PARSE_SORT_ORDER_NULLS_OPTION,
+ PARSE_SORT_ORDER_ERROR,
+ PARSE_SORT_ORDER_DONE
+} ParseSortOrderState;
+
+static bool
+parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first)
+{
+ char *token;
+ char *saveptr;
+ char *str_copy = pstrdup(str);
+ bool nulls_first_set = false;
+ ParseSortOrderState state = PARSE_SORT_ORDER_INIT;
+
+ token = strtok_r(str_copy, WHITESPACE, &saveptr);
+
+ while (token != NULL && state != PARSE_SORT_ORDER_ERROR)
+ {
+ switch (state)
+ {
+ case PARSE_SORT_ORDER_INIT:
+ if (pg_strcasecmp(token, "ASC") == 0)
+ {
+ *sort_asc = true;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "DESC") == 0)
+ {
+ *sort_asc = false;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DIRECTION_SET:
+ if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_NULLS_OPTION:
+ if (pg_strcasecmp(token, "FIRST") == 0)
+ {
+ *nulls_first = true;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else if (pg_strcasecmp(token, "LAST") == 0)
+ {
+ *nulls_first = false;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DONE:
+ /* No more tokens should be processed after first/last */
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ default:
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+ }
+
+ token = strtok_r(NULL, WHITESPACE, &saveptr);
+ }
+
+ if (state == PARSE_SORT_ORDER_INIT ||
+ state == PARSE_SORT_ORDER_DIRECTION_SET)
+ state = PARSE_SORT_ORDER_DONE;
+
+ if (state == PARSE_SORT_ORDER_NULLS_OPTION)
+ state = PARSE_SORT_ORDER_ERROR;
+
+ if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE)
+ *nulls_first = !*sort_asc;
+
+ pfree(str_copy);
+ return state == PARSE_SORT_ORDER_DONE;
+}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+ bool sort_asc = true;
+ bool nulls_first = false;
+ Oid elmtyp;
+ Oid collation = PG_GET_COLLATION();
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildState *astate = NULL;
+
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("multidimensional arrays sorting are not supported")));
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if (dirstr != NULL)
+ {
+ if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("second parameter must be a valid sort direction")));
+ }
+
+ elmtyp = ARR_ELEMTYPE(array);
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ tuplesortstate = tuplesort_begin_datum(elmtyp,
+ sort_asc ? typentry->lt_opr : typentry->gt_opr,
+ collation,
+ nulls_first, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, 0, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResult(astate, value, isnull,
+ elmtyp, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 43f608d7a0..1b2d64ac39 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1734,6 +1734,12 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..9c4116cf04 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,125 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+ array_sort
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+ array_sort
+------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF","DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥȺ,ⱥⱥⱥ}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+ array_sort
+------------------------------------------------------------------
+ {ⱥⱥⱥ,ⱥȺ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž","ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+-- nulls first/last tests
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]);
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc');
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last');
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last');
+ array_sort
+----------------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc",NULL,NULL}
+(1 row)
+
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..2cdd26386b 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,26 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+-- nulls first/last tests
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last');
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b6135f0347..99f1abe869 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2025,6 +2025,7 @@ ParseLoc
ParseNamespaceColumn
ParseNamespaceItem
ParseParamRefHook
+ParseSortOrderState
ParseState
ParsedLex
ParsedScript
--
2.39.5
On Mon, Sep 30, 2024 at 1:01 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
I would suggest accepting:
asc
desc
asc nulls first
asc nulls last *
desc nulls first *
desc nulls lastAs valid inputs for "dir" - and that the starred options are the defaults when null position is omitted.
In short, mimic create index.
David J.
PFA v3 with David's suggestion addressed.
I think just adding 2 bool arguments (asc/desc, nulls last/not nulls
last) would be easier.
but either way, (i don't have a huge opinion)
but document the second argument, imagine case
SELECT array_sort('{a,B}'::text[] , E'aSc NulLs LaST \t\r\n');
would be tricky?
errmsg("multidimensional arrays sorting are not supported")));
write a sql test to trigger the error message that would be great.
you can add two or one example to collate.icu.utf8.sql to demo that it
actually works with COLLATE collation_name
like:
SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
SELECT array_sort('{a,B}'::text[] COLLATE "C");
#define WHITESPACE " \t\n\r"
you may also check function scanner_isspace
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR :
TYPECACHE_GT_OPR);
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
you need to one-time check typentry->lt_opr or typentry->gt_opr exists?
see CreateStatistics.
/* Disallow data types without a less-than operator */
type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
if (type->lt_opr == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("column \"%s\" cannot be used in
statistics because its type %s has no default btree operator class",
attname, format_type_be(attForm->atttypid))));
Hi Jian,
On Mon, Sep 30, 2024 at 11:13 PM jian he <jian.universality@gmail.com> wrote:
On Mon, Sep 30, 2024 at 1:01 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
I would suggest accepting:
asc
desc
asc nulls first
asc nulls last *
desc nulls first *
desc nulls lastAs valid inputs for "dir" - and that the starred options are the defaults when null position is omitted.
In short, mimic create index.
David J.
PFA v3 with David's suggestion addressed.
I think just adding 2 bool arguments (asc/desc, nulls last/not nulls
last) would be easier.
Yeah, this would be easier, it's just the intarray module use
the direction parameter, I keep it here for the same user
experience, I don't insist if some committer thinks 2 bool arguments
would be a better option.
but either way, (i don't have a huge opinion)
but document the second argument, imagine case
SELECT array_sort('{a,B}'::text[] , E'aSc NulLs LaST \t\r\n');
would be tricky?
The case you provide should give the correct results, but
I doubt users will do this.
I'm not good at document wording, so you might give me some help
with the document part.
errmsg("multidimensional arrays sorting are not supported")));
write a sql test to trigger the error message that would be great.you can add two or one example to collate.icu.utf8.sql to demo that it
actually works with COLLATE collation_name
like:
SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
SELECT array_sort('{a,B}'::text[] COLLATE "C");
Fixed.
#define WHITESPACE " \t\n\r"
you may also check function scanner_isspace
Fixed.
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } you need to one-time check typentry->lt_opr or typentry->gt_opr exists? see CreateStatistics. /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid))));
I added an Assert for this part, not sure if that is enough.
--
Regards
Junwang Zhao
Attachments:
v4-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v4-0001-general-purpose-array_sort.patchDownload
From 0ca06aed04cbafe69c021bbf991b44595b77b1b9 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v4] general purpose array_sort
Sorts anyarray in either ascending or descending order.
The array must be empty or one-dimensional.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 32 ++++
src/backend/utils/adt/array_userfuncs.c | 181 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 6 +
src/test/regress/expected/arrays.out | 125 ++++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 25 +++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 387 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e39d524b6b..cf30484e2a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20419,6 +20419,38 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional> , <parameter>dir</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array based on the given parameter. The array must be empty or one-dimensional.
+ </para>
+ <para>
+ If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ using array element type's collation.
+ <parameter>dir</parameter> must be one of the following, and that the starred options are the defaults when null position is omitted.
+<programlisting>
+ asc
+ desc
+ asc nulls first
+ asc nulls last *
+ desc nulls first *
+ desc nulls last
+ nulls first
+ nulls last
+</programlisting>
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
+ <returnvalue>{1,2,3,4,5,6}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..3cb890a734 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
*/
#include "postgres.h"
+#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1688,181 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+
+#define WHITESPACE " \t\n\r\v\f"
+
+typedef enum
+{
+ PARSE_SORT_ORDER_INIT,
+ PARSE_SORT_ORDER_DIRECTION_SET,
+ PARSE_SORT_ORDER_NULLS_OPTION,
+ PARSE_SORT_ORDER_ERROR,
+ PARSE_SORT_ORDER_DONE
+} ParseSortOrderState;
+
+static bool
+parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first)
+{
+ char *token;
+ char *saveptr;
+ char *str_copy = pstrdup(str);
+ bool nulls_first_set = false;
+ ParseSortOrderState state = PARSE_SORT_ORDER_INIT;
+
+ token = strtok_r(str_copy, WHITESPACE, &saveptr);
+
+ while (token != NULL && state != PARSE_SORT_ORDER_ERROR)
+ {
+ switch (state)
+ {
+ case PARSE_SORT_ORDER_INIT:
+ if (pg_strcasecmp(token, "ASC") == 0)
+ {
+ *sort_asc = true;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "DESC") == 0)
+ {
+ *sort_asc = false;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DIRECTION_SET:
+ if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_NULLS_OPTION:
+ if (pg_strcasecmp(token, "FIRST") == 0)
+ {
+ *nulls_first = true;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else if (pg_strcasecmp(token, "LAST") == 0)
+ {
+ *nulls_first = false;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DONE:
+ /* No more tokens should be processed after first/last */
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ default:
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+ }
+
+ token = strtok_r(NULL, WHITESPACE, &saveptr);
+ }
+
+ if (state == PARSE_SORT_ORDER_INIT ||
+ state == PARSE_SORT_ORDER_DIRECTION_SET)
+ state = PARSE_SORT_ORDER_DONE;
+
+ if (state == PARSE_SORT_ORDER_NULLS_OPTION)
+ state = PARSE_SORT_ORDER_ERROR;
+
+ if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE)
+ *nulls_first = !*sort_asc;
+
+ pfree(str_copy);
+ return state == PARSE_SORT_ORDER_DONE;
+}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+ bool sort_asc = true;
+ bool nulls_first = false;
+ Oid elmtyp;
+ Oid collation = PG_GET_COLLATION();
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildState *astate = NULL;
+
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("multidimensional arrays sorting are not supported")));
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if (dirstr != NULL)
+ {
+ if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("second parameter must be a valid sort direction")));
+ }
+
+ elmtyp = ARR_ELEMTYPE(array);
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ Assert(OidIsValid(typentry->lt_opr) || OidIsValid(typentry->gt_opr));
+ tuplesortstate = tuplesort_begin_datum(elmtyp,
+ sort_asc ? typentry->lt_opr : typentry->gt_opr,
+ collation,
+ nulls_first, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, 0, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResult(astate, value, isnull,
+ elmtyp, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 43f608d7a0..1b2d64ac39 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1734,6 +1734,12 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..9c71a65b5a 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,128 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+ array_sort
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+ array_sort
+------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF","DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥȺ,ⱥⱥⱥ}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+ array_sort
+------------------------------------------------------------------
+ {ⱥⱥⱥ,ⱥȺ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž","ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+-- nulls first/last tests
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]);
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc');
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last');
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last');
+ array_sort
+----------------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc",NULL,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[1,2],[3,4],[5,6]]); -- fail
+ERROR: multidimensional arrays sorting are not supported
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 31345295c1..743b4d8199 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..30606700ef 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,28 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+-- nulls first/last tests
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last');
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[1,2],[3,4],[5,6]]); -- fail
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b6135f0347..99f1abe869 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2025,6 +2025,7 @@ ParseLoc
ParseNamespaceColumn
ParseNamespaceItem
ParseParamRefHook
+ParseSortOrderState
ParseState
ParsedLex
ParsedScript
--
2.39.5
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } you need to one-time check typentry->lt_opr or typentry->gt_opr exists? see CreateStatistics. /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid))));I added an Assert for this part, not sure if that is enough.
i think it really should be:
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR :
TYPECACHE_GT_OPR);
fcinfo->flinfo->fn_extra = (void *) typentry;
if ((sort_asc && !OidIsValid(typentry->lt_opr) || (!sort_as &&
OidIsValid(typentry->gt_opr));
ereport(ERROR,....)
}
Imagine a type that doesn't have TYPECACHE_LT_OPR or TYPECACHE_GT_OPR
then we cannot do the sort, we should just error out.
I just tried this colour type [1]https://github.com/hlinnaka/colour-datatype/blob/master/colour.c with (CREATE TYPE colour (INPUT =
colour_in, OUTPUT = colour_out, LIKE = pg_catalog.int4);
select array_sort('{#FF0000, #FF0000}'::colour[]);
of course it will segfault with your new Assert.
[1]: https://github.com/hlinnaka/colour-datatype/blob/master/colour.c
On Wed, Oct 2, 2024 at 9:51 AM jian he <jian.universality@gmail.com> wrote:
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } you need to one-time check typentry->lt_opr or typentry->gt_opr exists? see CreateStatistics. /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid))));I added an Assert for this part, not sure if that is enough.
i think it really should be:
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR :
TYPECACHE_GT_OPR);
fcinfo->flinfo->fn_extra = (void *) typentry;
if ((sort_asc && !OidIsValid(typentry->lt_opr) || (!sort_as &&
OidIsValid(typentry->gt_opr));
ereport(ERROR,....)
}Imagine a type that doesn't have TYPECACHE_LT_OPR or TYPECACHE_GT_OPR
then we cannot do the sort, we should just error out.I just tried this colour type [1] with (CREATE TYPE colour (INPUT =
colour_in, OUTPUT = colour_out, LIKE = pg_catalog.int4);select array_sort('{#FF0000, #FF0000}'::colour[]);
of course it will segfault with your new Assert.[1] https://github.com/hlinnaka/colour-datatype/blob/master/colour.c
Make sense, PFA v5 with Jian's suggestion.
--
Regards
Junwang Zhao
Attachments:
v5-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v5-0001-general-purpose-array_sort.patchDownload
From e2ce82f1bc8326dd1a451b17c6df4cd61907c558 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v5] general purpose array_sort
Sorts anyarray in either ascending or descending order.
The array must be empty or one-dimensional.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 32 +++
src/backend/utils/adt/array_userfuncs.c | 186 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 6 +
src/test/regress/expected/arrays.out | 125 ++++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 25 +++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 392 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e39d524b6b..cf30484e2a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20419,6 +20419,38 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional> , <parameter>dir</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array based on the given parameter. The array must be empty or one-dimensional.
+ </para>
+ <para>
+ If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ using array element type's collation.
+ <parameter>dir</parameter> must be one of the following, and that the starred options are the defaults when null position is omitted.
+<programlisting>
+ asc
+ desc
+ asc nulls first
+ asc nulls last *
+ desc nulls first *
+ desc nulls last
+ nulls first
+ nulls last
+</programlisting>
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
+ <returnvalue>{1,2,3,4,5,6}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..9ec006d716 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
*/
#include "postgres.h"
+#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1688,186 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+
+#define WHITESPACE " \t\n\r\v\f"
+
+typedef enum
+{
+ PARSE_SORT_ORDER_INIT,
+ PARSE_SORT_ORDER_DIRECTION_SET,
+ PARSE_SORT_ORDER_NULLS_OPTION,
+ PARSE_SORT_ORDER_ERROR,
+ PARSE_SORT_ORDER_DONE
+} ParseSortOrderState;
+
+static bool
+parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first)
+{
+ char *token;
+ char *saveptr;
+ char *str_copy = pstrdup(str);
+ bool nulls_first_set = false;
+ ParseSortOrderState state = PARSE_SORT_ORDER_INIT;
+
+ token = strtok_r(str_copy, WHITESPACE, &saveptr);
+
+ while (token != NULL && state != PARSE_SORT_ORDER_ERROR)
+ {
+ switch (state)
+ {
+ case PARSE_SORT_ORDER_INIT:
+ if (pg_strcasecmp(token, "ASC") == 0)
+ {
+ *sort_asc = true;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "DESC") == 0)
+ {
+ *sort_asc = false;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DIRECTION_SET:
+ if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_NULLS_OPTION:
+ if (pg_strcasecmp(token, "FIRST") == 0)
+ {
+ *nulls_first = true;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else if (pg_strcasecmp(token, "LAST") == 0)
+ {
+ *nulls_first = false;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DONE:
+ /* No more tokens should be processed after first/last */
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ default:
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+ }
+
+ token = strtok_r(NULL, WHITESPACE, &saveptr);
+ }
+
+ if (state == PARSE_SORT_ORDER_INIT ||
+ state == PARSE_SORT_ORDER_DIRECTION_SET)
+ state = PARSE_SORT_ORDER_DONE;
+
+ if (state == PARSE_SORT_ORDER_NULLS_OPTION)
+ state = PARSE_SORT_ORDER_ERROR;
+
+ if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE)
+ *nulls_first = !*sort_asc;
+
+ pfree(str_copy);
+ return state == PARSE_SORT_ORDER_DONE;
+}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+ bool sort_asc = true;
+ bool nulls_first = false;
+ Oid elmtyp;
+ Oid collation = PG_GET_COLLATION();
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildState *astate = NULL;
+
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("multidimensional arrays sorting are not supported")));
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if (dirstr != NULL)
+ {
+ if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("second parameter must be a valid sort direction")));
+ }
+
+ elmtyp = ARR_ELEMTYPE(array);
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
+ (!sort_asc && !OidIsValid(typentry->gt_opr)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an ordering operator for type %s",
+ format_type_be(elmtyp))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ tuplesortstate = tuplesort_begin_datum(elmtyp,
+ sort_asc ? typentry->lt_opr : typentry->gt_opr,
+ collation,
+ nulls_first, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, 0, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResult(astate, value, isnull,
+ elmtyp, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 43f608d7a0..1b2d64ac39 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1734,6 +1734,12 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..9c71a65b5a 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,128 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+ array_sort
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+ array_sort
+------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+ array_sort
+------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF","DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥȺ,ⱥⱥⱥ}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+ array_sort
+------------------------------------------------------------------
+ {ⱥⱥⱥ,ⱥȺ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž","ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+-- nulls first/last tests
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]);
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc');
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last');
+ array_sort
+----------------------------------------------------------------------------
+ {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first');
+ array_sort
+----------------------------------------------------------------------------
+ {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"}
+(1 row)
+
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last');
+ array_sort
+----------------------------------------------------------------------------
+ {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc",NULL,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[1,2],[3,4],[5,6]]); -- fail
+ERROR: multidimensional arrays sorting are not supported
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 31345295c1..743b4d8199 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..30606700ef 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,28 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc');
+-- nulls first/last tests
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]);
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first');
+SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last');
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[1,2],[3,4],[5,6]]); -- fail
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b6135f0347..99f1abe869 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2025,6 +2025,7 @@ ParseLoc
ParseNamespaceColumn
ParseNamespaceItem
ParseParamRefHook
+ParseSortOrderState
ParseState
ParsedLex
ParsedScript
--
2.39.5
Hi Junwang,
On Wed, Oct 2, 2024 at 11:46 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Wed, Oct 2, 2024 at 9:51 AM jian he <jian.universality@gmail.com> wrote:
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } you need to one-time check typentry->lt_opr or typentry->gt_opr exists? see CreateStatistics. /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid))));I added an Assert for this part, not sure if that is enough.
i think it really should be:
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR :
TYPECACHE_GT_OPR);
fcinfo->flinfo->fn_extra = (void *) typentry;
if ((sort_asc && !OidIsValid(typentry->lt_opr) || (!sort_as &&
OidIsValid(typentry->gt_opr));
ereport(ERROR,....)
}Imagine a type that doesn't have TYPECACHE_LT_OPR or TYPECACHE_GT_OPR
then we cannot do the sort, we should just error out.I just tried this colour type [1] with (CREATE TYPE colour (INPUT =
colour_in, OUTPUT = colour_out, LIKE = pg_catalog.int4);select array_sort('{#FF0000, #FF0000}'::colour[]);
of course it will segfault with your new Assert.[1] https://github.com/hlinnaka/colour-datatype/blob/master/colour.c
Make sense, PFA v5 with Jian's suggestion.
Have you noticed that the tests have failed on Cirrus CI runs of this patch?
https://cirrus-ci.com/github/postgresql-cfbot/postgresql/cf%2F5277
It might be related to the test machines having a different *default*
locale than your local environment, which could result in a different
sort order for the test data. You may need to add an explicit COLLATE
clause to the tests to ensure consistent sorting across systems.
--
Thanks, Amit Langote
Hi Amit,
On Thu, Oct 3, 2024 at 2:22 PM Amit Langote <amitlangote09@gmail.com> wrote:
Hi Junwang,
On Wed, Oct 2, 2024 at 11:46 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Wed, Oct 2, 2024 at 9:51 AM jian he <jian.universality@gmail.com> wrote:
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } you need to one-time check typentry->lt_opr or typentry->gt_opr exists? see CreateStatistics. /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid))));I added an Assert for this part, not sure if that is enough.
i think it really should be:
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR :
TYPECACHE_GT_OPR);
fcinfo->flinfo->fn_extra = (void *) typentry;
if ((sort_asc && !OidIsValid(typentry->lt_opr) || (!sort_as &&
OidIsValid(typentry->gt_opr));
ereport(ERROR,....)
}Imagine a type that doesn't have TYPECACHE_LT_OPR or TYPECACHE_GT_OPR
then we cannot do the sort, we should just error out.I just tried this colour type [1] with (CREATE TYPE colour (INPUT =
colour_in, OUTPUT = colour_out, LIKE = pg_catalog.int4);select array_sort('{#FF0000, #FF0000}'::colour[]);
of course it will segfault with your new Assert.[1] https://github.com/hlinnaka/colour-datatype/blob/master/colour.c
Make sense, PFA v5 with Jian's suggestion.
Have you noticed that the tests have failed on Cirrus CI runs of this patch?
https://cirrus-ci.com/github/postgresql-cfbot/postgresql/cf%2F5277
Sorry for the late reply due to my vacation. I should have paid
more attention to Cirrus CI earlier ;)
It might be related to the test machines having a different *default*
locale than your local environment, which could result in a different
sort order for the test data. You may need to add an explicit COLLATE
clause to the tests to ensure consistent sorting across systems.
I've changed the tests to use just ASCII characters, then added
*COLLATE "C"* to the tests and CI passed, PFA v6.
--
Thanks, Amit Langote
--
Regards
Junwang Zhao
Attachments:
v6-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v6-0001-general-purpose-array_sort.patchDownload
From e44100b2947305cba33dec6baa6d03398daf363b Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v6] general purpose array_sort
Sorts anyarray in either ascending or descending order.
The array must be empty or one-dimensional.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 32 +++
src/backend/utils/adt/array_userfuncs.c | 186 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 6 +
src/test/regress/expected/arrays.out | 119 +++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 24 +++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 385 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7b4fbb5047..49d3197955 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20421,6 +20421,38 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional> , <parameter>dir</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array based on the given parameter. The array must be empty or one-dimensional.
+ </para>
+ <para>
+ If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ using array element type's collation.
+ <parameter>dir</parameter> must be one of the following, and that the starred options are the defaults when null position is omitted.
+<programlisting>
+ asc
+ desc
+ asc nulls first
+ asc nulls last *
+ desc nulls first *
+ desc nulls last
+ nulls first
+ nulls last
+</programlisting>
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[1,2,5,6,3,4])</literal>
+ <returnvalue>{1,2,3,4,5,6}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..9ec006d716 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
*/
#include "postgres.h"
+#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1688,186 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+
+#define WHITESPACE " \t\n\r\v\f"
+
+typedef enum
+{
+ PARSE_SORT_ORDER_INIT,
+ PARSE_SORT_ORDER_DIRECTION_SET,
+ PARSE_SORT_ORDER_NULLS_OPTION,
+ PARSE_SORT_ORDER_ERROR,
+ PARSE_SORT_ORDER_DONE
+} ParseSortOrderState;
+
+static bool
+parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first)
+{
+ char *token;
+ char *saveptr;
+ char *str_copy = pstrdup(str);
+ bool nulls_first_set = false;
+ ParseSortOrderState state = PARSE_SORT_ORDER_INIT;
+
+ token = strtok_r(str_copy, WHITESPACE, &saveptr);
+
+ while (token != NULL && state != PARSE_SORT_ORDER_ERROR)
+ {
+ switch (state)
+ {
+ case PARSE_SORT_ORDER_INIT:
+ if (pg_strcasecmp(token, "ASC") == 0)
+ {
+ *sort_asc = true;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "DESC") == 0)
+ {
+ *sort_asc = false;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DIRECTION_SET:
+ if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_NULLS_OPTION:
+ if (pg_strcasecmp(token, "FIRST") == 0)
+ {
+ *nulls_first = true;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else if (pg_strcasecmp(token, "LAST") == 0)
+ {
+ *nulls_first = false;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DONE:
+ /* No more tokens should be processed after first/last */
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ default:
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+ }
+
+ token = strtok_r(NULL, WHITESPACE, &saveptr);
+ }
+
+ if (state == PARSE_SORT_ORDER_INIT ||
+ state == PARSE_SORT_ORDER_DIRECTION_SET)
+ state = PARSE_SORT_ORDER_DONE;
+
+ if (state == PARSE_SORT_ORDER_NULLS_OPTION)
+ state = PARSE_SORT_ORDER_ERROR;
+
+ if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE)
+ *nulls_first = !*sort_asc;
+
+ pfree(str_copy);
+ return state == PARSE_SORT_ORDER_DONE;
+}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+ bool sort_asc = true;
+ bool nulls_first = false;
+ Oid elmtyp;
+ Oid collation = PG_GET_COLLATION();
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildState *astate = NULL;
+
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("multidimensional arrays sorting are not supported")));
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if (dirstr != NULL)
+ {
+ if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("second parameter must be a valid sort direction")));
+ }
+
+ elmtyp = ARR_ELEMTYPE(array);
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
+ (!sort_asc && !OidIsValid(typentry->gt_opr)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an ordering operator for type %s",
+ format_type_be(elmtyp))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ tuplesortstate = tuplesort_begin_datum(elmtyp,
+ sort_asc ? typentry->lt_opr : typentry->gt_opr,
+ collation,
+ nulls_first, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, 0, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResult(astate, value, isnull,
+ elmtyp, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 77f54a79e6..5850ecf550 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1734,6 +1734,12 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..426a34487e 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,122 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+ array_sort
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc');
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc');
+ array_sort
+-----------------------
+ {foo,bbc,bar,CCC,Abc}
+(1 row)
+
+-- nulls first/last tests
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc');
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc');
+ array_sort
+----------------------------
+ {NULL,foo,bbc,bar,CCC,Abc}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+ array_sort
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+ array_sort
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first');
+ array_sort
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last');
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first');
+ array_sort
+----------------------------
+ {NULL,foo,bbc,bar,CCC,Abc}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last');
+ array_sort
+----------------------------
+ {foo,bbc,bar,CCC,Abc,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[1,2],[3,4],[5,6]]); -- fail
+ERROR: multidimensional arrays sorting are not supported
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 31345295c1..743b4d8199 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..dbc0a4e23e 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,27 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc');
+-- nulls first/last tests
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last');
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[1,2],[3,4],[5,6]]); -- fail
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 5fabb127d7..c4c8a11bd3 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2025,6 +2025,7 @@ ParseLoc
ParseNamespaceColumn
ParseNamespaceItem
ParseParamRefHook
+ParseSortOrderState
ParseState
ParsedLex
ParsedScript
--
2.39.5
On Wed, Oct 9, 2024 at 10:10 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
Hi Amit,
On Thu, Oct 3, 2024 at 2:22 PM Amit Langote <amitlangote09@gmail.com> wrote:
Hi Junwang,
On Wed, Oct 2, 2024 at 11:46 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Wed, Oct 2, 2024 at 9:51 AM jian he <jian.universality@gmail.com> wrote:
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } you need to one-time check typentry->lt_opr or typentry->gt_opr exists? see CreateStatistics. /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid))));I added an Assert for this part, not sure if that is enough.
i think it really should be:
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR :
TYPECACHE_GT_OPR);
fcinfo->flinfo->fn_extra = (void *) typentry;
if ((sort_asc && !OidIsValid(typentry->lt_opr) || (!sort_as &&
OidIsValid(typentry->gt_opr));
ereport(ERROR,....)
}Imagine a type that doesn't have TYPECACHE_LT_OPR or TYPECACHE_GT_OPR
then we cannot do the sort, we should just error out.I just tried this colour type [1] with (CREATE TYPE colour (INPUT =
colour_in, OUTPUT = colour_out, LIKE = pg_catalog.int4);select array_sort('{#FF0000, #FF0000}'::colour[]);
of course it will segfault with your new Assert.[1] https://github.com/hlinnaka/colour-datatype/blob/master/colour.c
Make sense, PFA v5 with Jian's suggestion.
Have you noticed that the tests have failed on Cirrus CI runs of this patch?
https://cirrus-ci.com/github/postgresql-cfbot/postgresql/cf%2F5277
Sorry for the late reply due to my vacation. I should have paid
more attention to Cirrus CI earlier ;)It might be related to the test machines having a different *default*
locale than your local environment, which could result in a different
sort order for the test data. You may need to add an explicit COLLATE
clause to the tests to ensure consistent sorting across systems.I've changed the tests to use just ASCII characters, then added
*COLLATE "C"* to the tests and CI passed, PFA v6.
Sadly the CI only passed on my own github repo, it failed on
cfbot[1]https://cirrus-ci.com/task/5815925960605696, will dig into the reason later because I can not open the cirrus
ci page right now ;(
[1]: https://cirrus-ci.com/task/5815925960605696
--
Thanks, Amit Langote--
Regards
Junwang Zhao
--
Regards
Junwang Zhao
On Wed, Oct 9, 2024 at 11:46 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Wed, Oct 9, 2024 at 10:10 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
Hi Amit,
On Thu, Oct 3, 2024 at 2:22 PM Amit Langote <amitlangote09@gmail.com> wrote:
Hi Junwang,
On Wed, Oct 2, 2024 at 11:46 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Wed, Oct 2, 2024 at 9:51 AM jian he <jian.universality@gmail.com> wrote:
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } you need to one-time check typentry->lt_opr or typentry->gt_opr exists? see CreateStatistics. /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid))));I added an Assert for this part, not sure if that is enough.
i think it really should be:
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR :
TYPECACHE_GT_OPR);
fcinfo->flinfo->fn_extra = (void *) typentry;
if ((sort_asc && !OidIsValid(typentry->lt_opr) || (!sort_as &&
OidIsValid(typentry->gt_opr));
ereport(ERROR,....)
}Imagine a type that doesn't have TYPECACHE_LT_OPR or TYPECACHE_GT_OPR
then we cannot do the sort, we should just error out.I just tried this colour type [1] with (CREATE TYPE colour (INPUT =
colour_in, OUTPUT = colour_out, LIKE = pg_catalog.int4);select array_sort('{#FF0000, #FF0000}'::colour[]);
of course it will segfault with your new Assert.[1] https://github.com/hlinnaka/colour-datatype/blob/master/colour.c
Make sense, PFA v5 with Jian's suggestion.
Have you noticed that the tests have failed on Cirrus CI runs of this patch?
https://cirrus-ci.com/github/postgresql-cfbot/postgresql/cf%2F5277
Sorry for the late reply due to my vacation. I should have paid
more attention to Cirrus CI earlier ;)It might be related to the test machines having a different *default*
locale than your local environment, which could result in a different
sort order for the test data. You may need to add an explicit COLLATE
clause to the tests to ensure consistent sorting across systems.I've changed the tests to use just ASCII characters, then added
*COLLATE "C"* to the tests and CI passed, PFA v6.Sadly the CI only passed on my own github repo, it failed on
cfbot[1], will dig into the reason later because I can not open the cirrus
ci page right now ;(
Seems the failure is not related to this patch, I guess the reason for
this is the stop phase doesn't
release the port properly?
2024-10-09 14:53:10.079 UTC [43052][checkpointer] LOG: checkpoint
complete: wrote 5617 buffers (34.3%), wrote 3 SLRU buffers; 0 WAL
file(s) added, 0 removed, 3 recycled; write=0.107 s, sync=0.001 s,
total=0.107 s; sync files=0, longest=0.000 s, average=0.000 s;
distance=45239 kB, estimate=45239 kB; lsn=0/414A138, redo
lsn=0/414A138
2024-10-09 14:53:10.084 UTC [43050][postmaster] LOG: database system
is shut down
2024-10-09 14:53:10.215 UTC [43270][postmaster] LOG: starting
PostgreSQL 18devel on x86_64-freebsd, compiled by clang-17.0.6, 64-bit
2024-10-09 14:53:10.215 UTC [43270][postmaster] LOG: could not bind
IPv4 address "127.0.0.1": Address already in use
2024-10-09 14:53:10.215 UTC [43270][postmaster] HINT: Is another
postmaster already running on port 11643? If not, wait a few seconds
and retry.
2024-10-09 14:53:10.215 UTC [43270][postmaster] WARNING: could not
create listen socket for "127.0.0.1"
2024-10-09 14:53:10.218 UTC [43270][postmaster] FATAL: could not
create any TCP/IP sockets
2024-10-09 14:53:10.218 UTC [43270][postmaster] LOG: database system
is shut down
--
Thanks, Amit Langote--
Regards
Junwang Zhao--
Regards
Junwang Zhao
--
Regards
Junwang Zhao
tricky case:
should we allow array element type to be composite/domain?
currently seems to work fine.
create table t(b int[]);
insert into t values ('{{1,3}}'), ('{{1,2}}');
select array_sort((select array_agg(t) from t), 'desc');
array_sort
-----------------------------------
{"(\"{{1,3}}\")","(\"{{1,2}}\")"}
select array_sort((t.b)) from t;
ERROR: multidimensional arrays sorting are not supported
select array_sort((select array_agg(t.b) from t));
ERROR: multidimensional arrays sorting are not supported
Hi Jian,
On Fri, Oct 11, 2024 at 1:12 PM jian he <jian.universality@gmail.com> wrote:
tricky case:
should we allow array element type to be composite/domain?
currently seems to work fine.create table t(b int[]);
insert into t values ('{{1,3}}'), ('{{1,2}}');select array_sort((select array_agg(t) from t), 'desc');
array_sort
-----------------------------------
{"(\"{{1,3}}\")","(\"{{1,2}}\")"}select array_sort((t.b)) from t;
ERROR: multidimensional arrays sorting are not supportedselect array_sort((select array_agg(t.b) from t));
ERROR: multidimensional arrays sorting are not supported
I tried the above cases, and the first one works because it's
a one dim array of composite type, the other two fails because
they are multidimensional.
It seems there is not much meaning to sort composite type,
so are you proposing we should error on that?
--
Regards
Junwang Zhao
Junwang Zhao <zhjwpku@gmail.com> writes:
It seems there is not much meaning to sort composite type,
so are you proposing we should error on that?
It's hardly "general purpose" if it randomly refuses to
sort certain types. I would say it should be able to sort
anything that ORDER BY will handle --- and that certainly
includes the cases shown here.
regards, tom lane
Sorry for the late reply.
On Mon, Oct 14, 2024 at 4:10 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Junwang Zhao <zhjwpku@gmail.com> writes:
It seems there is not much meaning to sort composite type,
so are you proposing we should error on that?It's hardly "general purpose" if it randomly refuses to
sort certain types. I would say it should be able to sort
anything that ORDER BY will handle --- and that certainly
includes the cases shown here.
Yeah, agreed.
PFA v7 with multi-array support.
regards, tom lane
--
Regards
Junwang Zhao
Attachments:
v7-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v7-0001-general-purpose-array_sort.patchDownload
From 783791442fdb4005cdfb4775f8aad977cbfbfce9 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v7] general purpose array_sort
Sorts anyarray in either ascending or descending order.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 32 +++
src/backend/utils/adt/array_userfuncs.c | 205 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 6 +
src/test/regress/expected/arrays.out | 123 +++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 24 ++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 408 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ad663c94d7..0414b43523 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20421,6 +20421,38 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional> , <parameter>dir</parameter> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the array based on the given parameter.
+ </para>
+ <para>
+ If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ using array element type's collation.
+ <parameter>dir</parameter> must be one of the following, and that the starred options are the defaults when null position is omitted.
+<programlisting>
+ asc
+ desc
+ asc nulls first
+ asc nulls last *
+ desc nulls first *
+ desc nulls last
+ nulls first
+ nulls last
+</programlisting>
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..b6c948e60f 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,15 +12,18 @@
*/
#include "postgres.h"
+#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1688,205 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+
+#define WHITESPACE " \t\n\r\v\f"
+
+typedef enum
+{
+ PARSE_SORT_ORDER_INIT,
+ PARSE_SORT_ORDER_DIRECTION_SET,
+ PARSE_SORT_ORDER_NULLS_OPTION,
+ PARSE_SORT_ORDER_ERROR,
+ PARSE_SORT_ORDER_DONE
+} ParseSortOrderState;
+
+static bool
+parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first)
+{
+ char *token;
+ char *saveptr;
+ char *str_copy = pstrdup(str);
+ bool nulls_first_set = false;
+ ParseSortOrderState state = PARSE_SORT_ORDER_INIT;
+
+ token = strtok_r(str_copy, WHITESPACE, &saveptr);
+
+ while (token != NULL && state != PARSE_SORT_ORDER_ERROR)
+ {
+ switch (state)
+ {
+ case PARSE_SORT_ORDER_INIT:
+ if (pg_strcasecmp(token, "ASC") == 0)
+ {
+ *sort_asc = true;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "DESC") == 0)
+ {
+ *sort_asc = false;
+ state = PARSE_SORT_ORDER_DIRECTION_SET;
+ }
+ else if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DIRECTION_SET:
+ if (pg_strcasecmp(token, "NULLS") == 0)
+ state = PARSE_SORT_ORDER_NULLS_OPTION;
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_NULLS_OPTION:
+ if (pg_strcasecmp(token, "FIRST") == 0)
+ {
+ *nulls_first = true;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else if (pg_strcasecmp(token, "LAST") == 0)
+ {
+ *nulls_first = false;
+ nulls_first_set = true;
+ state = PARSE_SORT_ORDER_DONE;
+ }
+ else
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ case PARSE_SORT_ORDER_DONE:
+ /* No more tokens should be processed after first/last */
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+
+ default:
+ state = PARSE_SORT_ORDER_ERROR;
+ break;
+ }
+
+ token = strtok_r(NULL, WHITESPACE, &saveptr);
+ }
+
+ if (state == PARSE_SORT_ORDER_INIT ||
+ state == PARSE_SORT_ORDER_DIRECTION_SET)
+ state = PARSE_SORT_ORDER_DONE;
+
+ if (state == PARSE_SORT_ORDER_NULLS_OPTION)
+ state = PARSE_SORT_ORDER_ERROR;
+
+ if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE)
+ *nulls_first = !*sort_asc;
+
+ pfree(str_copy);
+ return state == PARSE_SORT_ORDER_DONE;
+}
+
+/*
+ * array_sort
+ *
+ * Sorts the array in either ascending or descending order.
+ * The array must be empty or one-dimensional.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL;
+ bool sort_asc = true;
+ bool nulls_first = false;
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if (dirstr != NULL)
+ {
+ if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("second parameter must be a valid sort direction")));
+ }
+
+ elmtyp = ARR_ELEMTYPE(array);
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (ARR_NDIM(array) == 1)
+ {
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
+ (!sort_asc && !OidIsValid(typentry->gt_opr)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an ordering operator for type %s",
+ format_type_be(elmtyp))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+ }
+ else
+ {
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
+ (!sort_asc && !OidIsValid(typentry->gt_opr)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an ordering operator for type %s",
+ format_type_be(elmtyp))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+ }
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ sort_asc ? typentry->lt_opr : typentry->gt_opr,
+ collation,
+ nulls_first, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 7c0b74fe05..a92fd3a6c2 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1741,6 +1741,12 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray text', prosrc => 'array_sort_order'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..3c733e5aab 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,126 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+ array_sort
+---------------
+ {6,5,4,3,2,1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+ array_sort
+---------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc');
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc');
+ array_sort
+-----------------------
+ {foo,bbc,bar,CCC,Abc}
+(1 row)
+
+-- nulls first/last tests
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc');
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc');
+ array_sort
+----------------------------
+ {NULL,foo,bbc,bar,CCC,Abc}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+ array_sort
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+ array_sort
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first');
+ array_sort
+----------------------------
+ {NULL,Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last');
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first');
+ array_sort
+----------------------------
+ {NULL,foo,bbc,bar,CCC,Abc}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last');
+ array_sort
+----------------------------
+ {foo,bbc,bar,CCC,Abc,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..821c4eab0b 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,27 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc');
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc');
+-- nulls first/last tests
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first');
+SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last');
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 57de1acff3..f085b3cf87 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2025,6 +2025,7 @@ ParseLoc
ParseNamespaceColumn
ParseNamespaceItem
ParseParamRefHook
+ParseSortOrderState
ParseState
ParsedLex
ParsedScript
--
2.39.5
On Wed, Oct 23, 2024 at 10:28 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
PFA v7 with multi-array support.
if (ARR_NDIM(array) == 1)
{
}
else
{
}
can be simplified.
i think beginning part of array_sort can be like the following:
(newline emitted)
---------------------------------------------------------------------
if (ARR_NDIM(array) < 1)
PG_RETURN_ARRAYTYPE_P(array);
if (dirstr != NULL)
{
if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("second parameter must be a valid sort
direction")));
}
elmtyp = ARR_ELEMTYPE(array);
if (ARR_NDIM(array) > 1)
elmtyp = get_array_type(elmtyp);
typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ?
TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
(!sort_asc && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an ordering operator
for type %s",
format_type_be(elmtyp))));
fcinfo->flinfo->fn_extra = (void *) typentry;
}
---------------------------------------------------------------------
/*
* array_sort
*
* Sorts the array in either ascending or descending order.
* The array must be empty or one-dimensional.
*/
comments need to be updated.
typedef enum
PARSE_SORT_ORDER_DONE
} ParseSortOrderState;
last one, should have comma, like
"PARSE_SORT_ORDER_DONE, "
On Thu, Oct 24, 2024 at 8:40 PM jian he <jian.universality@gmail.com> wrote:
On Wed, Oct 23, 2024 at 10:28 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
PFA v7 with multi-array support.
if (ARR_NDIM(array) == 1)
{
}
else
{
}
can be simplified.
i think beginning part of array_sort can be like the following:
(newline emitted)---------------------------------------------------------------------
if (ARR_NDIM(array) < 1)
PG_RETURN_ARRAYTYPE_P(array);
if (dirstr != NULL)
{
if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("second parameter must be a valid sort
direction")));
}
elmtyp = ARR_ELEMTYPE(array);
if (ARR_NDIM(array) > 1)
elmtyp = get_array_type(elmtyp);
I'm wondering should we cache the type entry for both element type and
the corresponding array type, for example if we have a table:
create table t(b int[]);
insert into t values ('{1,3}'),('{{2,3}}'),('{{1,2},{0,2}}');
with 1-d array and m-d array interleaved, then the following query will
call lookup_type_cache multiple times.
select array_sort((t.b)) from t;
typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
if (typentry == NULL || typentry->type_id != elmtyp)
{
typentry = lookup_type_cache(elmtyp, sort_asc ?
TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
if ((sort_asc && !OidIsValid(typentry->lt_opr)) ||
(!sort_asc && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an ordering operator
for type %s",
format_type_be(elmtyp))));
fcinfo->flinfo->fn_extra = (void *) typentry;
}
---------------------------------------------------------------------
/*
* array_sort
*
* Sorts the array in either ascending or descending order.
* The array must be empty or one-dimensional.
*/
comments need to be updated.
will fix it in the next version of patch.
typedef enum
PARSE_SORT_ORDER_DONE
} ParseSortOrderState;last one, should have comma, like
"PARSE_SORT_ORDER_DONE, "
will fix it.
--
Regards
Junwang Zhao
Hi,
It's hardly "general purpose" if it randomly refuses to
sort certain types. I would say it should be able to sort
anything that ORDER BY will handle --- and that certainly
includes the cases shown here.
I wonder how useful / convenient the new function will be considering
that we already have CTEs and can do:
SELECT array_agg(x ORDER BY x) FROM unnest(ARRAY[5,1,3,2,4]) AS x;
Perhaps there are use cases I didn't consider?
--
Best regards,
Aleksander Alekseev
On Thu, Oct 24, 2024 at 7:58 AM Aleksander Alekseev <
aleksander@timescale.com> wrote:
Hi,
It's hardly "general purpose" if it randomly refuses to
sort certain types. I would say it should be able to sort
anything that ORDER BY will handle --- and that certainly
includes the cases shown here.I wonder how useful / convenient the new function will be considering
that we already have CTEs and can do:SELECT array_agg(x ORDER BY x) FROM unnest(ARRAY[5,1,3,2,4]) AS x;
Perhaps there are use cases I didn't consider?
Succinctness of expression. Plus I'm under the impression that a function
doing this is going to be somewhat faster than composing two functions
together within a multi-node subtree.
I feel like the same observation could have been made for array_shuffle but
we added that. This function being added feels to me like just completing
the set.
David J.
Hi David,
It's hardly "general purpose" if it randomly refuses to
sort certain types. I would say it should be able to sort
anything that ORDER BY will handle --- and that certainly
includes the cases shown here.I wonder how useful / convenient the new function will be considering
that we already have CTEs and can do:SELECT array_agg(x ORDER BY x) FROM unnest(ARRAY[5,1,3,2,4]) AS x;
Perhaps there are use cases I didn't consider?
Succinctness of expression. Plus I'm under the impression that a function doing this is going to be somewhat faster than composing two functions together within a multi-node subtree.
I feel like the same observation could have been made for array_shuffle but we added that. This function being added feels to me like just completing the set.
Right. To clarify, I'm not opposed to array_sort(). In fact personally
I would prefer using it instead of array_agg() + unnest().
Just making an observation / thinking out loud that the requirement to
support everything ORDER BY handles / supports (and will handle /
support?) might make this function impractical to maintain.
array_shuffle() or a recently proposed array_reverse() [1]https://commitfest.postgresql.org/50/5314/ -- Best regards, Aleksander Alekseev are rather
simple since they just move the array elements without looking at
them. array_sort() does look at the elements and thus is very
different.
Particularly, does the function really need to support dir => asc |
desc | asc nulls first | etc... ? Maybe array_sort() + array_reverse(
array_sort() ) will handle most of the practical cases? I don't know.
[1]: https://commitfest.postgresql.org/50/5314/ -- Best regards, Aleksander Alekseev
--
Best regards,
Aleksander Alekseev
On Thu, Oct 24, 2024 at 8:27 AM Aleksander Alekseev <
aleksander@timescale.com> wrote:
Just making an observation / thinking out loud that the requirement to
support everything ORDER BY handles / supports (and will handle /
support?) might make this function impractical to maintain.Particularly, does the function really need to support dir => asc |
desc | asc nulls first | etc... ? Maybe array_sort() + array_reverse(
array_sort() ) will handle most of the practical cases? I don't know.
Composing function calls here seems quite undesirable from a performance
standpoint, but maybe I over-estimate the cost of
exploding-manipulating-freezing an array datum. Also, while I'm not in a
good position to judge the challenge of implementing the sort parameters I
would rather have them than not since the order by API has them (plus
performance). I also, maybe unreasonably, believe that our extensible type
system has already limited the maintenance burden.
David J.
"David G. Johnston" <david.g.johnston@gmail.com> writes:
Composing function calls here seems quite undesirable from a performance
standpoint, but maybe I over-estimate the cost of
exploding-manipulating-freezing an array datum. Also, while I'm not in a
good position to judge the challenge of implementing the sort parameters I
would rather have them than not since the order by API has them (plus
performance). I also, maybe unreasonably, believe that our extensible type
system has already limited the maintenance burden.
Oh! I had not actually looked at the details of what was being
proposed here. I imagined "array_sort(anyarray)" and the sort would
use the default sort order for the array's element type. This
business with a textual representation of a sort clause seems like
over-engineering ... except that it's also under-engineered, because
the parsing is lame and incomplete. (No USING option, and the fact
that collation comes from somewhere else seems impossibly confusing.)
Let's drop that. As already noted, if you need a non-default sort
order you can build what you want with a sub-select. The ambition
here should be to make easy cases easy.
regards, tom lane
On Thu, Oct 24, 2024 at 9:06 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
This business with a textual representation of a sort clause seems like
over-engineering ... except that it's also under-engineered, because
the parsing is lame and incomplete. (No USING option, and the fact
that collation comes from somewhere else seems impossibly confusing.)
Let's drop that.
I can accept this outcome though an optional three-valued boolean sort
order (ascending and descending only) I'd argue is worth keeping. null
value placement too I guess, three-valued boolean (nulls_first).
David J.
Hi,
I can accept this outcome though an optional three-valued boolean sort order (ascending and descending only) I'd argue is worth keeping. null value placement too I guess, three-valued boolean (nulls_first).
Perhaps these optional arguments deserve separate discussions. I
suggest merging something everyone agrees on first. This will simplify
the review process and allow us to deliver value to the users quickly.
Arguments like `reverse => true` and `nulls_first => true` can always
be implemented and added as separate patches.
--
Best regards,
Aleksander Alekseev
On Fri, Oct 25, 2024 at 1:19 AM Aleksander Alekseev
<aleksander@timescale.com> wrote:
Hi,
I can accept this outcome though an optional three-valued boolean sort order (ascending and descending only) I'd argue is worth keeping. null value placement too I guess, three-valued boolean (nulls_first).
Perhaps these optional arguments deserve separate discussions. I
suggest merging something everyone agrees on first. This will simplify
the review process and allow us to deliver value to the users quickly.
Arguments like `reverse => true` and `nulls_first => true` can always
be implemented and added as separate patches.
As this patch uses the tuplesort infrastructure, we need to supply the
sortOperator, sortCollation and nullsFirstFlag, I tend to agree with
David. I admit that the parsing part is not good, so I will remove it
by using two boolean parameters Jian suggested earlier.
Will send out another version by tomorrow.
--
Best regards,
Aleksander Alekseev
--
Regards
Junwang Zhao
On Fri, Oct 25, 2024 at 8:02 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Fri, Oct 25, 2024 at 1:19 AM Aleksander Alekseev
<aleksander@timescale.com> wrote:Hi,
I can accept this outcome though an optional three-valued boolean sort order (ascending and descending only) I'd argue is worth keeping. null value placement too I guess, three-valued boolean (nulls_first).
Perhaps these optional arguments deserve separate discussions. I
suggest merging something everyone agrees on first. This will simplify
the review process and allow us to deliver value to the users quickly.
Arguments like `reverse => true` and `nulls_first => true` can always
be implemented and added as separate patches.As this patch uses the tuplesort infrastructure, we need to supply the
sortOperator, sortCollation and nullsFirstFlag, I tend to agree with
David. I admit that the parsing part is not good, so I will remove it
by using two boolean parameters Jian suggested earlier.Will send out another version by tomorrow.
Based on the previous discussion, I split it into two patches in V8.
0001 is the general sort part without `is_ascending` or `nulls_first`,
the sort order is determined by the "<" operator of the element type.
It also cached the type entry of both eletyp and the corresponding
array type.
0002 adds the `is_ascending` and `nulls_first` part, it now uses
two boolean parameters instead of parsing one text parameter.
--
Best regards,
Aleksander Alekseev--
Regards
Junwang Zhao
--
Regards
Junwang Zhao
Attachments:
v8-0002-support-sort-order-and-nullsfirst-flag.patchapplication/octet-stream; name=v8-0002-support-sort-order-and-nullsfirst-flag.patchDownload
From 5a89f51da4ce7adc5d26efab19b199507300dd0f Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sat, 26 Oct 2024 03:53:17 +0000
Subject: [PATCH v8 2/2] support sort order and nullsfirst flag
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 11 +++++--
src/backend/utils/adt/array_userfuncs.c | 42 ++++++++++++++++++++-----
src/include/catalog/pg_proc.dat | 6 ++++
src/test/regress/expected/arrays.out | 26 +++++++++++++++
src/test/regress/sql/arrays.sql | 8 +++++
5 files changed, 84 insertions(+), 9 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 668fe5d358..129699aa8a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20426,12 +20426,19 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional>)
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional> <optional>, <parameter>is_ascending</parameter> <type>boolean</type> <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
- Sorts the first dimension of the array. If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ Sorts the first dimension of the array.
+ </para>
+ <para>
+ If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
using array element type's collation.
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values, otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 9b775313ca..1ff61033b2 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1698,7 +1698,6 @@ typedef struct ArraySortCachedInfo
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1714,10 +1713,23 @@ array_sort(PG_FUNCTION_ARGS)
Datum value;
bool isnull;
ArrayBuildStateAny *astate = NULL;
+ bool is_ascending = true;
+ bool nulls_first = false;
if (ARR_NDIM(array) < 1)
PG_RETURN_ARRAYTYPE_P(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1735,8 +1747,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1756,8 +1770,10 @@ array_sort(PG_FUNCTION_ARGS)
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(elmtyp))));
- typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(array_type,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1767,9 +1783,9 @@ array_sort(PG_FUNCTION_ARGS)
}
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
while (array_iterate(array_iterator, &value, &isnull))
@@ -1795,3 +1811,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 79641da6ff..9bbc97445c 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,12 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool', prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index ba1da71da1..f4781233e0 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2728,6 +2728,32 @@ SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index b2a814d195..b98e111329 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -834,5 +834,13 @@ SELECT array_sort('{1,3,5,2,4,6}'::int[]);
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.39.5
v8-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v8-0001-general-purpose-array_sort.patchDownload
From dd5a1cdb82212374da50cc52ce69c88544011356 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v8 1/2] general purpose array_sort
Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 18 +++
src/backend/utils/adt/array_userfuncs.c | 110 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 3 +
src/test/regress/expected/arrays.out | 32 +++++
.../regress/expected/collate.icu.utf8.out | 13 +++
src/test/regress/sql/arrays.sql | 9 ++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 190 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7be0324ac8..668fe5d358 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20421,6 +20421,24 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional>)
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array. If the <literal>COLLATE</literal> option is specified then sorting is based on <replaceable>collation_name</replaceable>, otherwise
+ using array element type's collation.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..9b775313ca 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1687,111 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *array_typentry;
+} ArraySortCachedInfo;
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ARR_NDIM(array) == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
+ cache_info->typentry = typentry;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type))));
+ cache_info->array_typentry = typentry;
+ }
+ }
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 1ec0d6f6b5..79641da6ff 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1741,6 +1741,9 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..ba1da71da1 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,35 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..b2a814d195 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,12 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 171a7dd5d2..eceba9605e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.39.5
Hi,
Based on the previous discussion, I split it into two patches in V8.
0001 is the general sort part without `is_ascending` or `nulls_first`,
the sort order is determined by the "<" operator of the element type.
It also cached the type entry of both eletyp and the corresponding
array type.0002 adds the `is_ascending` and `nulls_first` part, it now uses
two boolean parameters instead of parsing one text parameter.
Thanks for the update patch set. Here are some comments.
0001:
+{ oid => '8810', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray', prosrc => 'array_sort'},
I would expect that array_sort() should be IMMUTABLE. Is there a
reason for it to be VOLATILE?
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional>) + <returnvalue>anyarray</returnvalue>
It seems to me that the part about using COLLATE should be moved
below, to the description / examples section, since it's not part of
the function signature.
Also the description should be more specific about how NULLs are
sorted. NULLs also should be covered by tests.
0002:
<parameter>is_ascending</parameter>
I really believe this name is not the best one. I suggest using
`reverse => true`. `nulls_first` is OK.
+Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} + +Datum +array_sort_order_nulls_first(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +}
Any reason not to specify array_sort in pg_proc.dat?
The tests cover is_ascending => true | false, which is OK, but only
(is_ascending = true, nulls_first => true) and (is_ascending => false,
nulls_fist => false). For the case when both optional arguments are
specified you have to test at least 4 combinations.
--
Best regards,
Aleksander Alekseev
On Tue, Oct 29, 2024 at 12:48 AM Aleksander Alekseev
<aleksander@timescale.com> wrote:.
0001:
+{ oid => '8810', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray', prosrc => 'array_sort'},I would expect that array_sort() should be IMMUTABLE. Is there a
reason for it to be VOLATILE?
https://www.postgresql.org/docs/current/sql-createfunction.html says:
IMMUTABLE indicates that the function cannot modify the database and always
returns the same result when given the same argument values; that is, it does
not do database lookups or otherwise use information not directly present in its
argument list. If this option is given, any call of the function with
all-constant arguments can be immediately replaced with the function value.
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
This error can happen. I think this conflicts with the doc IMMUTABLE
description.
Hi Jian,
IMMUTABLE indicates that the function cannot modify the database and always
returns the same result when given the same argument values; that is, it does
not do database lookups or otherwise use information not directly present in its
argument list. If this option is given, any call of the function with
all-constant arguments can be immediately replaced with the function value.+ { + typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR); + if (!OidIsValid(typentry->lt_opr)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify ordering operator for type %s", + format_type_be(elmtyp))));This error can happen. I think this conflicts with the doc IMMUTABLE
description.
lookup_type_cache() is used at least by array_position() which is
marked as IMMUTABLE, so I believe this is fine. Similarly functions
dealing with timezones can return different results between the DBMS
restarts / updates, but we don't care and mark them IMMUTABLE anyway.
Otherwise we couldn't use these functions in functional indexes which
will make them rather useless.
--
Best regards,
Aleksander Alekseev
Hi Aleksander,
On Tue, Oct 29, 2024 at 12:48 AM Aleksander Alekseev
<aleksander@timescale.com> wrote:
Hi,
Based on the previous discussion, I split it into two patches in V8.
0001 is the general sort part without `is_ascending` or `nulls_first`,
the sort order is determined by the "<" operator of the element type.
It also cached the type entry of both eletyp and the corresponding
array type.0002 adds the `is_ascending` and `nulls_first` part, it now uses
two boolean parameters instead of parsing one text parameter.Thanks for the update patch set. Here are some comments.
0001:
+{ oid => '8810', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray', prosrc => 'array_sort'},I would expect that array_sort() should be IMMUTABLE. Is there a
reason for it to be VOLATILE?
I saw Jian's reply about this, but I tend to agree with you, so remove
provolatile => 'v'.
+ <function>array_sort</function> ( <type>anyarray</type> <optional> COLLATE <replaceable>collation_name</replaceable> </optional>) + <returnvalue>anyarray</returnvalue>It seems to me that the part about using COLLATE should be moved
below, to the description / examples section, since it's not part of
the function signature.
Agree, fixed with my own words, help needed with the wording.
Also the description should be more specific about how NULLs are
sorted. NULLs also should be covered by tests.
Fixed.
0002:
<parameter>is_ascending</parameter>
I really believe this name is not the best one. I suggest using
`reverse => true`. `nulls_first` is OK.
Not sure about this, I think `is_ascending` has a more precise
meaning, while `reverse` doesn't show any hint about ascending or
descending, just keep it right now, let's see others' opinions.
+Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} + +Datum +array_sort_order_nulls_first(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +}Any reason not to specify array_sort in pg_proc.dat?
It is specified in 0001 (see oid => '8810').
The tests cover is_ascending => true | false, which is OK, but only
(is_ascending = true, nulls_first => true) and (is_ascending => false,
nulls_fist => false). For the case when both optional arguments are
specified you have to test at least 4 combinations.
The omitted two is the same as the two with two parameters specified,
anyway, add all 4 cases in v9.
--
Best regards,
Aleksander Alekseev
--
Regards
Junwang Zhao
Attachments:
v9-0002-support-sort-order-and-nullsfirst-flag.patchapplication/octet-stream; name=v9-0002-support-sort-order-and-nullsfirst-flag.patchDownload
From a2ef7b07f26edeeac3a914f42a13a831e3df4419 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sat, 26 Oct 2024 03:53:17 +0000
Subject: [PATCH v9 2/2] support sort order and nullsfirst flag
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 6 +++-
src/backend/utils/adt/array_userfuncs.c | 42 ++++++++++++++++++++-----
src/include/catalog/pg_proc.dat | 6 ++++
src/test/regress/expected/arrays.out | 38 ++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 ++++++
5 files changed, 94 insertions(+), 8 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 4095e23567..abde23ffc3 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20426,12 +20426,16 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>is_ascending</parameter> <type>boolean</type> <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
Sorts the first dimension of the array. The sort order is determined by the "<" operator of the element type, nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values, otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 9b775313ca..1ff61033b2 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1698,7 +1698,6 @@ typedef struct ArraySortCachedInfo
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1714,10 +1713,23 @@ array_sort(PG_FUNCTION_ARGS)
Datum value;
bool isnull;
ArrayBuildStateAny *astate = NULL;
+ bool is_ascending = true;
+ bool nulls_first = false;
if (ARR_NDIM(array) < 1)
PG_RETURN_ARRAYTYPE_P(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1735,8 +1747,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1756,8 +1770,10 @@ array_sort(PG_FUNCTION_ARGS)
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(elmtyp))));
- typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(array_type,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1767,9 +1783,9 @@ array_sort(PG_FUNCTION_ARGS)
}
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
while (array_iterate(array_iterator, &value, &isnull))
@@ -1795,3 +1811,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 5609368772..1987ef249e 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,12 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool', prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index da5ffe5eba..d4fa66dbf5 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,6 +2734,44 @@ SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index b4cfb0b038..768332641b 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -835,5 +835,15 @@ SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.39.5
v9-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v9-0001-general-purpose-array_sort.patchDownload
From 2f47117bd4d16782c9237b7459a1a535ec7f9ff9 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v9 1/2] general purpose array_sort
Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 18 +++
src/backend/utils/adt/array_userfuncs.c | 110 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 3 +
src/test/regress/expected/arrays.out | 38 ++++++
.../regress/expected/collate.icu.utf8.out | 13 +++
src/test/regress/sql/arrays.sql | 10 ++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 197 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7be0324ac8..4095e23567 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20421,6 +20421,24 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array. The sort order is determined by the "<" operator of the element type, nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..9b775313ca 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1687,111 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *array_typentry;
+} ArraySortCachedInfo;
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ARR_NDIM(array) == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
+ cache_info->typentry = typentry;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type))));
+ cache_info->array_typentry = typentry;
+ }
+ }
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 1ec0d6f6b5..5609368772 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1741,6 +1741,9 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..da5ffe5eba 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,41 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..b4cfb0b038 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,13 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 171a7dd5d2..eceba9605e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.39.5
Hi,
Thanks for the updated patch set.
+Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} + +Datum +array_sort_order_nulls_first(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +}Any reason not to specify array_sort in pg_proc.dat?
It is specified in 0001 (see oid => '8810').
What I meant was that I don't think these wrapper functions are
needed. I think you can just do:
```
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', prosrc => 'array_sort'}, <--
array_sort is used directly in `prosrc`
```
... unless I'm missing something.
--
Best regards,
Aleksander Alekseev
Hi,
On Wed, Oct 30, 2024 at 9:29 PM Aleksander Alekseev
<aleksander@timescale.com> wrote:
Hi,
Thanks for the updated patch set.
+Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} + +Datum +array_sort_order_nulls_first(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +}Any reason not to specify array_sort in pg_proc.dat?
It is specified in 0001 (see oid => '8810').
What I meant was that I don't think these wrapper functions are
needed. I think you can just do:``` +{ oid => '8811', descr => 'sort array', + proname => 'array_sort', prorettype => 'anyarray', + proargtypes => 'anyarray bool', prosrc => 'array_sort'}, <-- array_sort is used directly in `prosrc` ```... unless I'm missing something.
There is a opr sanity check for this[1]-- Considering only built-in procs (prolang = 12), look for multiple uses -- of the same internal function (ie, matching prosrc fields). It's OK to -- have several entries with different pronames for the same internal function, -- but conflicts in the number of arguments and other critical items should -- be complained of. (We don't check data types here; see next query.) -- Note: ignore aggregate functions here, since they all point to the same -- dummy built-in function., if we remove these wrapper functions,
regression test will fail with:
- oid | proname | oid | proname
------+---------+-----+---------
-(0 rows)
+ oid | proname | oid | proname
+------+------------+------+------------
+ 8811 | array_sort | 8812 | array_sort
+ 8810 | array_sort | 8811 | array_sort
+ 8810 | array_sort | 8812 | array_sort
+(3 rows)
[1]: -- Considering only built-in procs (prolang = 12), look for multiple uses -- of the same internal function (ie, matching prosrc fields). It's OK to -- have several entries with different pronames for the same internal function, -- but conflicts in the number of arguments and other critical items should -- be complained of. (We don't check data types here; see next query.) -- Note: ignore aggregate functions here, since they all point to the same -- dummy built-in function.
-- Considering only built-in procs (prolang = 12), look for multiple uses
-- of the same internal function (ie, matching prosrc fields). It's OK to
-- have several entries with different pronames for the same internal function,
-- but conflicts in the number of arguments and other critical items should
-- be complained of. (We don't check data types here; see next query.)
-- Note: ignore aggregate functions here, since they all point to the same
-- dummy built-in function.
SELECT p1.oid, p1.proname, p2.oid, p2.proname
FROM pg_proc AS p1, pg_proc AS p2
WHERE p1.oid < p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
(p1.prokind != 'a' OR p2.prokind != 'a') AND
(p1.prolang != p2.prolang OR
p1.prokind != p2.prokind OR
p1.prosecdef != p2.prosecdef OR
p1.proleakproof != p2.proleakproof OR
p1.proisstrict != p2.proisstrict OR
p1.proretset != p2.proretset OR
p1.provolatile != p2.provolatile OR
p1.pronargs != p2.pronargs);
--
Best regards,
Aleksander Alekseev
--
Regards
Junwang Zhao
On Wed, Oct 30, 2024 at 10:17 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
Hi,
On Wed, Oct 30, 2024 at 9:29 PM Aleksander Alekseev
<aleksander@timescale.com> wrote:Hi,
Thanks for the updated patch set.
+Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} + +Datum +array_sort_order_nulls_first(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +}Any reason not to specify array_sort in pg_proc.dat?
It is specified in 0001 (see oid => '8810').
What I meant was that I don't think these wrapper functions are
needed. I think you can just do:``` +{ oid => '8811', descr => 'sort array', + proname => 'array_sort', prorettype => 'anyarray', + proargtypes => 'anyarray bool', prosrc => 'array_sort'}, <-- array_sort is used directly in `prosrc` ```... unless I'm missing something.
There is a opr sanity check for this[1], if we remove these wrapper functions,
regression test will fail with:- oid | proname | oid | proname ------+---------+-----+--------- -(0 rows) + oid | proname | oid | proname +------+------------+------+------------ + 8811 | array_sort | 8812 | array_sort + 8810 | array_sort | 8811 | array_sort + 8810 | array_sort | 8812 | array_sort +(3 rows)[1]:
-- Considering only built-in procs (prolang = 12), look for multiple uses
-- of the same internal function (ie, matching prosrc fields). It's OK to
-- have several entries with different pronames for the same internal function,
-- but conflicts in the number of arguments and other critical items should
-- be complained of. (We don't check data types here; see next query.)
-- Note: ignore aggregate functions here, since they all point to the same
-- dummy built-in function.SELECT p1.oid, p1.proname, p2.oid, p2.proname
FROM pg_proc AS p1, pg_proc AS p2
WHERE p1.oid < p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
(p1.prokind != 'a' OR p2.prokind != 'a') AND
(p1.prolang != p2.prolang OR
p1.prokind != p2.prokind OR
p1.prosecdef != p2.prosecdef OR
p1.proleakproof != p2.proleakproof OR
p1.proisstrict != p2.proisstrict OR
p1.proretset != p2.proretset OR
p1.provolatile != p2.provolatile OR
p1.pronargs != p2.pronargs);--
Best regards,
Aleksander Alekseev--
Regards
Junwang Zhao
CFbot failed with doc build, v10 fixed that.
--
Regards
Junwang Zhao
Attachments:
v10-0002-support-sort-order-and-nullsfirst-flag.patchapplication/octet-stream; name=v10-0002-support-sort-order-and-nullsfirst-flag.patchDownload
From 1710c22a854b103562df3ff91a038e3fffd74ba3 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sat, 26 Oct 2024 03:53:17 +0000
Subject: [PATCH v10 2/2] support sort order and nullsfirst flag
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 6 +++-
src/backend/utils/adt/array_userfuncs.c | 42 ++++++++++++++++++++-----
src/include/catalog/pg_proc.dat | 6 ++++
src/test/regress/expected/arrays.out | 38 ++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 ++++++
5 files changed, 94 insertions(+), 8 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index bfe6b6a4de..f0133fe774 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20426,12 +20426,16 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>is_ascending</parameter> <type>boolean</type> <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values, otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 9b775313ca..1ff61033b2 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1698,7 +1698,6 @@ typedef struct ArraySortCachedInfo
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1714,10 +1713,23 @@ array_sort(PG_FUNCTION_ARGS)
Datum value;
bool isnull;
ArrayBuildStateAny *astate = NULL;
+ bool is_ascending = true;
+ bool nulls_first = false;
if (ARR_NDIM(array) < 1)
PG_RETURN_ARRAYTYPE_P(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1735,8 +1747,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1756,8 +1770,10 @@ array_sort(PG_FUNCTION_ARGS)
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(elmtyp))));
- typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(array_type,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1767,9 +1783,9 @@ array_sort(PG_FUNCTION_ARGS)
}
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
while (array_iterate(array_iterator, &value, &isnull))
@@ -1795,3 +1811,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 5609368772..1987ef249e 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,12 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool', prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index da5ffe5eba..d4fa66dbf5 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,6 +2734,44 @@ SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index b4cfb0b038..768332641b 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -835,5 +835,15 @@ SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.39.5
v10-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v10-0001-general-purpose-array_sort.patchDownload
From d65f0cd33185dab1a663d0a344872bc0196524a0 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v10 1/2] general purpose array_sort
Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 18 +++
src/backend/utils/adt/array_userfuncs.c | 110 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 3 +
src/test/regress/expected/arrays.out | 38 ++++++
.../regress/expected/collate.icu.utf8.out | 13 +++
src/test/regress/sql/arrays.sql | 10 ++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 197 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7be0324ac8..bfe6b6a4de 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20421,6 +20421,24 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 6599be2ec5..9b775313ca 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1685,3 +1687,111 @@ array_sample(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *array_typentry;
+} ArraySortCachedInfo;
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ARR_NDIM(array) == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
+ cache_info->typentry = typentry;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type))));
+ cache_info->array_typentry = typentry;
+ }
+ }
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 1ec0d6f6b5..5609368772 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1741,6 +1741,9 @@
{ oid => '6216', descr => 'take samples from array',
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index a6d81fd5f9..da5ffe5eba 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2703,3 +2703,41 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
ERROR: sample size must be between 0 and 6
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
ERROR: sample size must be between 0 and 6
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47058dfde5..b4cfb0b038 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -827,3 +827,13 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 171a7dd5d2..eceba9605e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.39.5
On Wed, Oct 30, 2024 at 10:41 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
On Wed, Oct 30, 2024 at 10:17 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
Hi,
On Wed, Oct 30, 2024 at 9:29 PM Aleksander Alekseev
<aleksander@timescale.com> wrote:Hi,
Thanks for the updated patch set.
+Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} + +Datum +array_sort_order_nulls_first(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +}Any reason not to specify array_sort in pg_proc.dat?
It is specified in 0001 (see oid => '8810').
What I meant was that I don't think these wrapper functions are
needed. I think you can just do:``` +{ oid => '8811', descr => 'sort array', + proname => 'array_sort', prorettype => 'anyarray', + proargtypes => 'anyarray bool', prosrc => 'array_sort'}, <-- array_sort is used directly in `prosrc` ```... unless I'm missing something.
There is a opr sanity check for this[1], if we remove these wrapper functions,
regression test will fail with:- oid | proname | oid | proname ------+---------+-----+--------- -(0 rows) + oid | proname | oid | proname +------+------------+------+------------ + 8811 | array_sort | 8812 | array_sort + 8810 | array_sort | 8811 | array_sort + 8810 | array_sort | 8812 | array_sort +(3 rows)[1]:
-- Considering only built-in procs (prolang = 12), look for multiple uses
-- of the same internal function (ie, matching prosrc fields). It's OK to
-- have several entries with different pronames for the same internal function,
-- but conflicts in the number of arguments and other critical items should
-- be complained of. (We don't check data types here; see next query.)
-- Note: ignore aggregate functions here, since they all point to the same
-- dummy built-in function.SELECT p1.oid, p1.proname, p2.oid, p2.proname
FROM pg_proc AS p1, pg_proc AS p2
WHERE p1.oid < p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
(p1.prokind != 'a' OR p2.prokind != 'a') AND
(p1.prolang != p2.prolang OR
p1.prokind != p2.prokind OR
p1.prosecdef != p2.prosecdef OR
p1.proleakproof != p2.proleakproof OR
p1.proisstrict != p2.proisstrict OR
p1.proretset != p2.proretset OR
p1.provolatile != p2.provolatile OR
p1.pronargs != p2.pronargs);--
Best regards,
Aleksander Alekseev--
Regards
Junwang ZhaoCFbot failed with doc build, v10 fixed that.
--
Regards
Junwang Zhao
Rebase needed due to array_reverse committed, PFA v11.
--
Regards
Junwang Zhao
Attachments:
v11-0002-support-sort-order-and-nullsfirst-flag.patchapplication/octet-stream; name=v11-0002-support-sort-order-and-nullsfirst-flag.patchDownload
From 76ed69b5005e8445d68da4da45994748ff83e0ca Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sat, 26 Oct 2024 03:53:17 +0000
Subject: [PATCH v11 2/2] support sort order and nullsfirst flag
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 6 +++-
src/backend/utils/adt/array_userfuncs.c | 42 ++++++++++++++++++++-----
src/include/catalog/pg_proc.dat | 6 ++++
src/test/regress/expected/arrays.out | 38 ++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 ++++++
5 files changed, 94 insertions(+), 8 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9e019e18cb..de66991d3d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20443,12 +20443,16 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>is_ascending</parameter> <type>boolean</type> <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values, otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index a45586c892..df9822d977 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1810,7 +1810,6 @@ typedef struct ArraySortCachedInfo
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1826,10 +1825,23 @@ array_sort(PG_FUNCTION_ARGS)
Datum value;
bool isnull;
ArrayBuildStateAny *astate = NULL;
+ bool is_ascending = true;
+ bool nulls_first = false;
if (ARR_NDIM(array) < 1)
PG_RETURN_ARRAYTYPE_P(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1847,8 +1859,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1868,8 +1882,10 @@ array_sort(PG_FUNCTION_ARGS)
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(elmtyp))));
- typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(array_type,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1879,9 +1895,9 @@ array_sort(PG_FUNCTION_ARGS)
}
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
while (array_iterate(array_iterator, &value, &isnull))
@@ -1907,3 +1923,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 55b52e138a..ad6de57e57 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1747,6 +1747,12 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool', prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 846d6acd85..b0cc38581e 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2765,6 +2765,44 @@ SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index b1f9c97e5c..bc046d9fe4 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -842,5 +842,15 @@ SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.39.5
v11-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v11-0001-general-purpose-array_sort.patchDownload
From 981ae3c7e4d3899c00b61b02b98559549ce3bc84 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v11 1/2] general purpose array_sort
Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 18 +++
src/backend/utils/adt/array_userfuncs.c | 110 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 3 +
src/test/regress/expected/arrays.out | 38 ++++++
.../regress/expected/collate.icu.utf8.out | 13 +++
src/test/regress/sql/arrays.sql | 12 +-
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 198 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 223d869f8c..9e019e18cb 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20438,6 +20438,24 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index d053808f6e..a45586c892 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1797,3 +1799,111 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *array_typentry;
+} ArraySortCachedInfo;
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ARR_NDIM(array) == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
+ cache_info->typentry = typentry;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type))));
+ cache_info->array_typentry = typentry;
+ }
+ }
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index a38e20f5d9..55b52e138a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,9 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 0b61fb5bb7..846d6acd85 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,3 +2734,41 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 691cff4a12..b1f9c97e5c 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -833,4 +833,14 @@ SELECT array_reverse('{}'::int[]);
SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
-SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
\ No newline at end of file
+SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 171a7dd5d2..eceba9605e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.39.5
On Sun, Nov 03, 2024 at 11:33:05AM +0800, Junwang Zhao wrote:
Rebase needed due to array_reverse committed, PFA v11.
There has been another conflict since you have posted this version
(noticed that after my business in 027124a872d7). I have looked at
0001.
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
There is no point in doing a sort if the array has only one element.
You can add a check based on "ARR_DIMS(array)[0] < 2" to achieve that.
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
The patch introduces two error paths based on the fact that ordering
operators could not be found depending on a data type that lacks the
ordering operator and the array ordering operator part. It is right
to issue an error if these are lacking, like the various stats paths.
Should we have some regression tests with specific data types for
these errors, though? The stats paths don't care much about these
error cases, but it does not mean that we should not care about them.
In short, let's have negative test coverage if we can.
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *array_typentry;
+} ArraySortCachedInfo;
Let's put that at the top of the file, with a comment about how it
links to array_sort() for the caching with fn_extra. Let's also
document the meaning of the fields.
FWIW, I am confused by this implementation, where you have to allocate
the two TypeCacheEntry because of the fact that you have to deal with
the 1-dimension case and the multi-dimension case. In the context of
a single function call, why do you need both typentry and
array_typentry, actually? Wouldn't it be enough to use one typentry
that points to the typcache, meaning that you don't really need to use
the extra business with fn_mcxt, no? If you require both (because I
may be wrong), perhaps you should have a regression test that's able
to break when removing array_typentry, changing the code to only rely
on typentry. Note: I have just removed array_typentry in a quick
test, current coverage was happy about it. Feel free to prove me
wrong.
Agreed that the function should be immutable. The results are fixed
depending on the input even with the COLLATE clauses appended.
Let's add something when there is only one element in the first
dimension of the array, say two cases one with an int and one with an
array of ints like:
SELECT array_sort('{1}'::int[]);
SELECT array_sort('{{1}}'::int[]);
--
Michael
On Mon, Nov 4, 2024 at 1:46 PM Michael Paquier <michael@paquier.xyz> wrote:
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR); + if (!OidIsValid(typentry->lt_opr)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify ordering operator for type %s", + format_type_be(elmtyp))));The patch introduces two error paths based on the fact that ordering
operators could not be found depending on a data type that lacks the
ordering operator and the array ordering operator part. It is right
to issue an error if these are lacking, like the various stats paths.
Should we have some regression tests with specific data types for
these errors, though? The stats paths don't care much about these
error cases, but it does not mean that we should not care about them.
In short, let's have negative test coverage if we can.
select distinct oprleft::regtype from pg_operator where oprname = '='
and oprleft = oprright
except all
select distinct oprleft::regtype from pg_operator where oprname = '<'
and oprleft = oprright;
returns
hstore
cid
aclitem
xid
line
simple tests case using xid data type would be
SELECT array_sort('{{1,2,3}}'::xid[]);
+typedef struct ArraySortCachedInfo +{ + TypeCacheEntry *typentry; + TypeCacheEntry *array_typentry; +} ArraySortCachedInfo;Let's put that at the top of the file, with a comment about how it
links to array_sort() for the caching with fn_extra. Let's also
document the meaning of the fields.FWIW, I am confused by this implementation, where you have to allocate
the two TypeCacheEntry because of the fact that you have to deal with
the 1-dimension case and the multi-dimension case. In the context of
a single function call, why do you need both typentry and
array_typentry, actually? Wouldn't it be enough to use one typentry
that points to the typcache, meaning that you don't really need to use
the extra business with fn_mcxt, no? If you require both (because I
may be wrong), perhaps you should have a regression test that's able
to break when removing array_typentry, changing the code to only rely
on typentry. Note: I have just removed array_typentry in a quick
test, current coverage was happy about it. Feel free to prove me
wrong.
drop table if exists t;
CREATE TABLE t (a int[]);
insert into t values ('{1,3}'),('{1,2,3}'),('{11}');
insert into t values ('{{1,12}}'), ('{{4,3}}');
SELECT array_sort(a) from t;
In the above case,
tuplesort_begin_datum needs the int type information and int[] type information.
otherwise the cached TypeCacheEntry is being used to sort mult-dimension array,
which will make the result false.
On Sun, 3 Nov 2024 at 03:33, Junwang Zhao <zhjwpku@gmail.com> wrote:
PFA v11.
Testing this with an array with non-default lower bounds, it fails to
preserve the array bounds, which I think it should (note:
array_reverse() and array_shuffle() do preserve the bounds):
SELECT array_reverse(a), array_shuffle(a), array_sort(a)
FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
-[ RECORD 1 ]-+-------------------------------------
array_reverse | [10:12][20:21]={{3,4},{10,20},{1,2}}
array_shuffle | [10:12][20:21]={{10,20},{3,4},{1,2}}
array_sort | [1:3][20:21]={{1,2},{3,4},{10,20}}
Regards,
Dean
On Mon, Nov 04, 2024 at 03:16:35PM +0800, jian he wrote:
drop table if exists t;
CREATE TABLE t (a int[]);
insert into t values ('{1,3}'),('{1,2,3}'),('{11}');
insert into t values ('{{1,12}}'), ('{{4,3}}');
SELECT array_sort(a) from t;In the above case,
tuplesort_begin_datum needs the int type information and int[] type information.
otherwise the cached TypeCacheEntry is being used to sort mult-dimension array,
which will make the result false.
All these behaviors need more extensive testing.
This brings me an extra question around the caching. Would the
sorting be able to behave correctly when feeding to a single
array_sort() context array values that have multiple COLLATE clauses?
Or merge_collation_state() would be smart enough to make sure that
collation conflicts never happen to begin with? I am wondering if we
should worry about multiple VALUES, CTEs, or PL functions where
array_sort() could be fed into its cache values that lead to
unpredictible results for some values. This stuff should perhaps have
more testing around such behaviors, stressing what kind of
interactions we have between the sorting of multiple values and the
caching, in the context of a single array_sort() call.
--
Michael
On Mon, Nov 4, 2024 at 7:34 PM Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
Testing this with an array with non-default lower bounds, it fails to
preserve the array bounds, which I think it should (note:
array_reverse() and array_shuffle() do preserve the bounds):SELECT array_reverse(a), array_shuffle(a), array_sort(a)
FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);-[ RECORD 1 ]-+-------------------------------------
array_reverse | [10:12][20:21]={{3,4},{10,20},{1,2}}
array_shuffle | [10:12][20:21]={{10,20},{3,4},{1,2}}
array_sort | [1:3][20:21]={{1,2},{3,4},{10,20}}
if i understand it correctly,
array_create_iterator cannot cope with top dimension bound information.
since input array arguments already have dims, lbs information.
so at the end of array_sort directly copy
from the input array argument to astate.
tuplesort_performsort won't need array bounds, we should be safe?
v12-0001 same as v11-0001-general-purpose-array_sort.patch, only
resolve git conflict
v12-0002 preserve array bound information.
v12-0003 cache ArrayMetaState.
after v12-0003 now
typedef struct ArraySortCachedInfo
{
TypeCacheEntry *typentry;
TypeCacheEntry *array_typentry;
ArrayMetaState array_meta;
} ArraySortCachedInfo;
function array_create_iterator, get_typlenbyvalalign
will do cache search, we can cache ArrayMetaState.
so multiple array_create_iterator calls won't need to call get_typlenbyvalalign.
every time.
0002, I also have a 3 dimensional array test.
create table t(a int[]);
insert into t values ('[-1:-0]={7,1}'::int[]),
('[-2:-0][20:21]={{1,2},{10,20},{1,-4}}'),
('[-2:-0][20:22]={{-11,2,-1},{-11,2, 1},{-11,-4, 10}}'),
('[-13:-10][0:1][20:22]={
{{1,2,112},{1,2,-123}},
{{10,-20,1},{11,123,3}},
{{10,-20,1},{11,-123,-9}},
{{1,2,-11},{1,2,211}}}'::int[]);
SELECT array_sort(t.a) from t;
SELECT array_sort((t.a) [-13:-10][0:1][21:22]) from t where array_ndims(a) = 3;
SELECT array_sort((t.a) [-13:-11][0:1][21:22]) from t where array_ndims(a) = 3;
SELECT array_sort((t.a) [-13:-11][0:0][20:21]) from t where array_ndims(a) = 3;
The test output is ok to me.
Attachments:
v12-0003-array_sort-cache-ArrayMetaState.patchapplication/x-patch; name=v12-0003-array_sort-cache-ArrayMetaState.patchDownload
From caa24ca549c01e99b3873860dcffe0d8161345d4 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Tue, 5 Nov 2024 14:34:02 +0800
Subject: [PATCH v12 3/3] array_sort cache ArrayMetaState
---
src/backend/utils/adt/array_userfuncs.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 436f761614..c14ef1ed90 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1804,6 +1804,7 @@ typedef struct ArraySortCachedInfo
{
TypeCacheEntry *typentry;
TypeCacheEntry *array_typentry;
+ ArrayMetaState array_meta;
} ArraySortCachedInfo;
/*
@@ -1848,6 +1849,7 @@ array_sort(PG_FUNCTION_ARGS)
sizeof(ArraySortCachedInfo));
cache_info->typentry = NULL;
cache_info->array_typentry = NULL;
+ memset(&cache_info->array_meta, 0, sizeof(ArrayMetaState));
fcinfo->flinfo->fn_extra = (void *) cache_info;
}
@@ -1864,6 +1866,10 @@ array_sort(PG_FUNCTION_ARGS)
errmsg("could not identify ordering operator for type %s",
format_type_be(elmtyp))));
cache_info->typentry = typentry;
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
}
}
else
@@ -1886,6 +1892,11 @@ array_sort(PG_FUNCTION_ARGS)
format_type_be(array_type))));
cache_info->array_typentry = typentry;
}
+ cache_info->array_meta.element_type = elmtyp;
+ get_typlenbyvalalign(elmtyp,
+ &cache_info->array_meta.typlen,
+ &cache_info->array_meta.typbyval,
+ &cache_info->array_meta.typalign);
}
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
@@ -1893,7 +1904,7 @@ array_sort(PG_FUNCTION_ARGS)
collation,
false, work_mem, NULL, false);
- array_iterator = array_create_iterator(array, ndim - 1, NULL);
+ array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
while (array_iterate(array_iterator, &value, &isnull))
{
tuplesort_putdatum(tuplesortstate, value, isnull);
--
2.34.1
v12-0002-array_sort-preserve-array-dimenion-and-bound-inf.patchapplication/x-patch; name=v12-0002-array_sort-preserve-array-dimenion-and-bound-inf.patchDownload
From d7c2956893222bb025822810860a01539ca3ec85 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Tue, 5 Nov 2024 14:53:42 +0800
Subject: [PATCH v12 2/3] array_sort preserve array dimenion and bound info
---
src/backend/utils/adt/array_userfuncs.c | 30 +++++++++++++--
src/backend/utils/adt/arrayfuncs.c | 3 +-
src/include/utils/array.h | 1 +
src/test/regress/expected/arrays.out | 49 +++++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 18 ++++++++-
5 files changed, 96 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index a45586c892..436f761614 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1826,8 +1826,18 @@ array_sort(PG_FUNCTION_ARGS)
Datum value;
bool isnull;
ArrayBuildStateAny *astate = NULL;
+ int ndim,
+ *dims,
+ *lbs;
- if (ARR_NDIM(array) < 1)
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ if (ndim < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ if(dims[0] < 2)
PG_RETURN_ARRAYTYPE_P(array);
elmtyp = ARR_ELEMTYPE(array);
@@ -1841,7 +1851,7 @@ array_sort(PG_FUNCTION_ARGS)
fcinfo->flinfo->fn_extra = (void *) cache_info;
}
- if (ARR_NDIM(array) == 1)
+ if (ndim == 1)
{
/* Finds the ordering operator for the type for 1-D arrays */
typentry = cache_info->typentry;
@@ -1883,7 +1893,7 @@ array_sort(PG_FUNCTION_ARGS)
collation,
false, work_mem, NULL, false);
- array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+ array_iterator = array_create_iterator(array, ndim - 1, NULL);
while (array_iterate(array_iterator, &value, &isnull))
{
tuplesort_putdatum(tuplesortstate, value, isnull);
@@ -1903,6 +1913,20 @@ array_sort(PG_FUNCTION_ARGS)
tuplesort_end(tuplesortstate);
+ if (astate->arraystate != NULL)
+ {
+ memcpy(astate->arraystate->dims, dims, ndim * sizeof(int));
+ memcpy(astate->arraystate->lbs, lbs, ndim * sizeof(int));
+ Assert(ndim == astate->arraystate->ndims);
+ }
+ else
+ {
+ astate->scalarstate->lbs = lbs[0];
+
+ /* zero dimension won't reach here */
+ Assert(ndim == 1);
+ }
+
/* Avoid leaking memory when handed toasted input */
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a715e7e0b8..ffdf3449e9 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -5335,6 +5335,7 @@ initArrayResultWithSize(Oid element_type, MemoryContext rcontext,
&astate->typbyval,
&astate->typalign);
+ astate->lbs = 1; /* default lower-bound value set to 1, see array_in also */
return astate;
}
@@ -5867,7 +5868,7 @@ makeArrayResultAny(ArrayBuildStateAny *astate,
/* If no elements were presented, we want to create an empty array */
ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
dims[0] = astate->scalarstate->nelems;
- lbs[0] = 1;
+ lbs[0] = astate->scalarstate->lbs;
result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
rcontext, release);
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 157cc0e4c6..7b7937d494 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -193,6 +193,7 @@ typedef struct ArrayBuildState
int nelems; /* number of valid entries in above arrays */
Oid element_type; /* data type of the Datums */
int16 typlen; /* needed info about datatype */
+ int lbs; /* one dimension, one lower-bound is OK*/
bool typbyval;
char typalign;
bool private_cxt; /* use private memory context */
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 846d6acd85..9e371902d3 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2735,6 +2735,18 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
(1 row)
-- array_sort
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{{2,1}}'::int[]);
+ array_sort
+------------
+ {{2,1}}
+(1 row)
+
SELECT array_sort('{}'::int[]);
array_sort
------------
@@ -2772,3 +2784,40 @@ SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
{{2,1},{2,4},{6,5}}
(1 row)
+create table t(a int[]);
+insert into t values ('[-1:-0]={7,1}'::int[]),
+('[-2:-0][20:21]={{1,2},{10,20},{1,-4}}'),
+('[-2:-0][20:22]={{-11,2,-1},{-11,2, 1},{-11,-4, 10}}'),
+('[-13:-10][0:1][20:22]={
+{{1,2,112},{1,2,-123}},
+{{10,-20,1},{11,123,3}},
+{{10,-20,1},{11,-123,-9}},
+{{1,2,-11},{1,2,211}}}'::int[]);
+SELECT array_sort(t.a) from t;
+ array_sort
+------------------------------------------------------------------------------------------------------------------------
+ [-1:0]={1,7}
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+ [-2:0][20:22]={{-11,-4,10},{-11,2,-1},{-11,2,1}}
+ [-13:-10][0:1][20:22]={{{1,2,-11},{1,2,211}},{{1,2,112},{1,2,-123}},{{10,-20,1},{11,-123,-9}},{{10,-20,1},{11,123,3}}}
+(4 rows)
+
+SELECT array_sort((t.a) [-13:-10][0:1][21:22]) from t where array_ndims(a) = 3;
+ array_sort
+------------------------------------------------------------------------------
+ {{{-20,1},{-123,-9}},{{-20,1},{123,3}},{{2,-11},{2,211}},{{2,112},{2,-123}}}
+(1 row)
+
+SELECT array_sort((t.a) [-13:-11][0:1][21:22]) from t where array_ndims(a) = 3;
+ array_sort
+------------------------------------------------------------
+ {{{-20,1},{-123,-9}},{{-20,1},{123,3}},{{2,112},{2,-123}}}
+(1 row)
+
+SELECT array_sort((t.a) [-13:-11][0:0][20:21]) from t where array_ndims(a) = 3;
+ array_sort
+---------------------------------
+ {{{1,2}},{{10,-20}},{{10,-20}}}
+(1 row)
+
+drop table t;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 17205f483b..2a40c401b5 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -836,6 +836,8 @@ SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
-- array_sort
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{{2,1}}'::int[]);
SELECT array_sort('{}'::int[]);
SELECT array_sort('{1,3,5,2,4,6}'::int[]);
SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
@@ -843,4 +845,18 @@ SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
-- multidimensional array tests
-SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
\ No newline at end of file
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+create table t(a int[]);
+insert into t values ('[-1:-0]={7,1}'::int[]),
+('[-2:-0][20:21]={{1,2},{10,20},{1,-4}}'),
+('[-2:-0][20:22]={{-11,2,-1},{-11,2, 1},{-11,-4, 10}}'),
+('[-13:-10][0:1][20:22]={
+{{1,2,112},{1,2,-123}},
+{{10,-20,1},{11,123,3}},
+{{10,-20,1},{11,-123,-9}},
+{{1,2,-11},{1,2,211}}}'::int[]);
+SELECT array_sort(t.a) from t;
+SELECT array_sort((t.a) [-13:-10][0:1][21:22]) from t where array_ndims(a) = 3;
+SELECT array_sort((t.a) [-13:-11][0:1][21:22]) from t where array_ndims(a) = 3;
+SELECT array_sort((t.a) [-13:-11][0:0][20:21]) from t where array_ndims(a) = 3;
+drop table t;
\ No newline at end of file
--
2.34.1
v12-0001-general-purpose-array_sort.patchapplication/x-patch; name=v12-0001-general-purpose-array_sort.patchDownload
From ea85acab5039ef8d23e1e994c9d1d4ae79bdb29e Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Tue, 5 Nov 2024 14:40:56 +0800
Subject: [PATCH v12 1/3] general purpose array_sort
Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 18 +++
src/backend/utils/adt/array_userfuncs.c | 110 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 3 +
src/test/regress/expected/arrays.out | 38 ++++++
.../regress/expected/collate.icu.utf8.out | 13 +++
src/test/regress/sql/arrays.sql | 10 ++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 197 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 73979f20ff..748611dee5 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20438,6 +20438,24 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index d053808f6e..a45586c892 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -1797,3 +1799,111 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *array_typentry;
+} ArraySortCachedInfo;
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ARR_NDIM(array) == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
+ cache_info->typentry = typentry;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type))));
+ cache_info->array_typentry = typentry;
+ }
+ }
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f23321a41f..54ebbbd135 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,9 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 0b61fb5bb7..846d6acd85 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,3 +2734,41 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 03cc8cfcd9..17205f483b 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -834,3 +834,13 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
\ No newline at end of file
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1847bbfa95..e740e449f5 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.34.1
Hi jian,
On Tue, Nov 5, 2024 at 3:13 PM jian he <jian.universality@gmail.com> wrote:
On Mon, Nov 4, 2024 at 7:34 PM Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
Testing this with an array with non-default lower bounds, it fails to
preserve the array bounds, which I think it should (note:
array_reverse() and array_shuffle() do preserve the bounds):SELECT array_reverse(a), array_shuffle(a), array_sort(a)
FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);-[ RECORD 1 ]-+-------------------------------------
array_reverse | [10:12][20:21]={{3,4},{10,20},{1,2}}
array_shuffle | [10:12][20:21]={{10,20},{3,4},{1,2}}
array_sort | [1:3][20:21]={{1,2},{3,4},{10,20}}if i understand it correctly,
array_create_iterator cannot cope with top dimension bound information.
since input array arguments already have dims, lbs information.
so at the end of array_sort directly copy
from the input array argument to astate.tuplesort_performsort won't need array bounds, we should be safe?
v12-0001 same as v11-0001-general-purpose-array_sort.patch, only
resolve git conflict
v12-0002 preserve array bound information.
v12-0003 cache ArrayMetaState.after v12-0003 now
typedef struct ArraySortCachedInfo
{
TypeCacheEntry *typentry;
TypeCacheEntry *array_typentry;
ArrayMetaState array_meta;
} ArraySortCachedInfo;function array_create_iterator, get_typlenbyvalalign
will do cache search, we can cache ArrayMetaState.
so multiple array_create_iterator calls won't need to call get_typlenbyvalalign.
every time.0002, I also have a 3 dimensional array test.
create table t(a int[]);
insert into t values ('[-1:-0]={7,1}'::int[]),
('[-2:-0][20:21]={{1,2},{10,20},{1,-4}}'),
('[-2:-0][20:22]={{-11,2,-1},{-11,2, 1},{-11,-4, 10}}'),
('[-13:-10][0:1][20:22]={
{{1,2,112},{1,2,-123}},
{{10,-20,1},{11,123,3}},
{{10,-20,1},{11,-123,-9}},
{{1,2,-11},{1,2,211}}}'::int[]);
SELECT array_sort(t.a) from t;
SELECT array_sort((t.a) [-13:-10][0:1][21:22]) from t where array_ndims(a) = 3;
SELECT array_sort((t.a) [-13:-11][0:1][21:22]) from t where array_ndims(a) = 3;
SELECT array_sort((t.a) [-13:-11][0:0][20:21]) from t where array_ndims(a) = 3;The test output is ok to me.
Thanks for the bounds preserve solution, I just looked at 0002,
+ if (astate->arraystate != NULL)
+ {
+ memcpy(astate->arraystate->dims, dims, ndim * sizeof(int));
+ memcpy(astate->arraystate->lbs, lbs, ndim * sizeof(int));
+ Assert(ndim == astate->arraystate->ndims);
+ }
It seems to me we only need to set astate->arraystate->lbs[0] = lbs[0] ?
--
Regards
Junwang Zhao
On Tue, Nov 5, 2024 at 8:30 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
Thanks for the bounds preserve solution, I just looked at 0002,
+ if (astate->arraystate != NULL) + { + memcpy(astate->arraystate->dims, dims, ndim * sizeof(int)); + memcpy(astate->arraystate->lbs, lbs, ndim * sizeof(int)); + Assert(ndim == astate->arraystate->ndims); + }It seems to me we only need to set astate->arraystate->lbs[0] = lbs[0] ?
yes.
+ memcpy(astate->arraystate->dims, dims, ndim * sizeof(int));
thinking about it, this is wrong. we should just do Assert
for(int i = 0; i < ndim; i++)
{
Assert(astate->arraystate->dims[i] == dims[i]);
}
or just remove
memcpy(astate->arraystate->dims, dims, ndim * sizeof(int));
Hi Michael,
On Mon, Nov 4, 2024 at 1:46 PM Michael Paquier <michael@paquier.xyz> wrote:
On Sun, Nov 03, 2024 at 11:33:05AM +0800, Junwang Zhao wrote:
Rebase needed due to array_reverse committed, PFA v11.
There has been another conflict since you have posted this version
(noticed that after my business in 027124a872d7). I have looked at
0001.+ if (ARR_NDIM(array) < 1) + PG_RETURN_ARRAYTYPE_P(array); There is no point in doing a sort if the array has only one element. You can add a check based on "ARR_DIMS(array)[0] < 2" to achieve that.
Yeah, this is reasonable but one case I can't be sure:
SELECT array_sort('{{2,3,4}}'::xid[]);
This will return the array as is, but xid doesn't have a LT_OPR, should
I error out in this case? like:
could not identify ordering operator for type xid[]
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR); + if (!OidIsValid(typentry->lt_opr)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify ordering operator for type %s", + format_type_be(elmtyp))));The patch introduces two error paths based on the fact that ordering
operators could not be found depending on a data type that lacks the
ordering operator and the array ordering operator part. It is right
to issue an error if these are lacking, like the various stats paths.
Should we have some regression tests with specific data types for
these errors, though? The stats paths don't care much about these
error cases, but it does not mean that we should not care about them.
In short, let's have negative test coverage if we can.+typedef struct ArraySortCachedInfo +{ + TypeCacheEntry *typentry; + TypeCacheEntry *array_typentry; +} ArraySortCachedInfo;Let's put that at the top of the file, with a comment about how it
links to array_sort() for the caching with fn_extra. Let's also
document the meaning of the fields.
Will fix it in the following patch set.
FWIW, I am confused by this implementation, where you have to allocate
the two TypeCacheEntry because of the fact that you have to deal with
the 1-dimension case and the multi-dimension case. In the context of
a single function call, why do you need both typentry and
array_typentry, actually? Wouldn't it be enough to use one typentry
that points to the typcache, meaning that you don't really need to use
the extra business with fn_mcxt, no? If you require both (because I
may be wrong), perhaps you should have a regression test that's able
to break when removing array_typentry, changing the code to only rely
on typentry. Note: I have just removed array_typentry in a quick
test, current coverage was happy about it. Feel free to prove me
wrong.Agreed that the function should be immutable. The results are fixed
depending on the input even with the COLLATE clauses appended.Let's add something when there is only one element in the first
dimension of the array, say two cases one with an int and one with an
array of ints like:
SELECT array_sort('{1}'::int[]);
SELECT array_sort('{{1}}'::int[]);
Will add.
--
Michael
--
Regards
Junwang Zhao
On Tue, Nov 5, 2024 at 9:13 AM Michael Paquier <michael@paquier.xyz> wrote:
On Mon, Nov 04, 2024 at 03:16:35PM +0800, jian he wrote:
drop table if exists t;
CREATE TABLE t (a int[]);
insert into t values ('{1,3}'),('{1,2,3}'),('{11}');
insert into t values ('{{1,12}}'), ('{{4,3}}');
SELECT array_sort(a) from t;In the above case,
tuplesort_begin_datum needs the int type information and int[] type information.
otherwise the cached TypeCacheEntry is being used to sort mult-dimension array,
which will make the result false.All these behaviors need more extensive testing.
This brings me an extra question around the caching. Would the
sorting be able to behave correctly when feeding to a single
array_sort() context array values that have multiple COLLATE clauses?
Or merge_collation_state() would be smart enough to make sure that
collation conflicts never happen to begin with? I am wondering if we
should worry about multiple VALUES, CTEs, or PL functions where
array_sort() could be fed into its cache values that lead to
unpredictible results for some values. This stuff should perhaps have
more testing around such behaviors, stressing what kind of
interactions we have between the sorting of multiple values and the
caching, in the context of a single array_sort() call.
I'm afraid this can not be achieved in my current implementation, a simple
case is:
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[]);
{Abc,bar,bbc,CCC,foo,NULL}
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
SELECT array_sort(a COLLATE "C") FROM (VALUES
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C"),
('{foo,bar,null,CCC,Abc,bbc}'::text[])) v(a);
{Abc,CCC,bar,bbc,foo,NULL}
{Abc,CCC,bar,bbc,foo,NULL}
Maybe add some documents to specify this?
--
Michael
--
Regards
Junwang Zhao
Hi jian,
On Tue, Nov 5, 2024 at 3:13 PM jian he <jian.universality@gmail.com> wrote:
On Mon, Nov 4, 2024 at 7:34 PM Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
Testing this with an array with non-default lower bounds, it fails to
preserve the array bounds, which I think it should (note:
array_reverse() and array_shuffle() do preserve the bounds):SELECT array_reverse(a), array_shuffle(a), array_sort(a)
FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);-[ RECORD 1 ]-+-------------------------------------
array_reverse | [10:12][20:21]={{3,4},{10,20},{1,2}}
array_shuffle | [10:12][20:21]={{10,20},{3,4},{1,2}}
array_sort | [1:3][20:21]={{1,2},{3,4},{10,20}}if i understand it correctly,
array_create_iterator cannot cope with top dimension bound information.
since input array arguments already have dims, lbs information.
so at the end of array_sort directly copy
from the input array argument to astate.tuplesort_performsort won't need array bounds, we should be safe?
v12-0001 same as v11-0001-general-purpose-array_sort.patch, only
resolve git conflict
v12-0002 preserve array bound information.
v12-0003 cache ArrayMetaState.after v12-0003 now
typedef struct ArraySortCachedInfo
{
TypeCacheEntry *typentry;
TypeCacheEntry *array_typentry;
ArrayMetaState array_meta;
} ArraySortCachedInfo;function array_create_iterator, get_typlenbyvalalign
will do cache search, we can cache ArrayMetaState.
so multiple array_create_iterator calls won't need to call get_typlenbyvalalign.
every time.0002, I also have a 3 dimensional array test.
create table t(a int[]);
insert into t values ('[-1:-0]={7,1}'::int[]),
('[-2:-0][20:21]={{1,2},{10,20},{1,-4}}'),
('[-2:-0][20:22]={{-11,2,-1},{-11,2, 1},{-11,-4, 10}}'),
('[-13:-10][0:1][20:22]={
{{1,2,112},{1,2,-123}},
{{10,-20,1},{11,123,3}},
{{10,-20,1},{11,-123,-9}},
{{1,2,-11},{1,2,211}}}'::int[]);
SELECT array_sort(t.a) from t;
SELECT array_sort((t.a) [-13:-10][0:1][21:22]) from t where array_ndims(a) = 3;
SELECT array_sort((t.a) [-13:-11][0:1][21:22]) from t where array_ndims(a) = 3;
SELECT array_sort((t.a) [-13:-11][0:0][20:21]) from t where array_ndims(a) = 3;The test output is ok to me.
Merged into V13 with some change and added you as a co-author.
There is one issue left as Micheal pointed out that feeding to a single
array_sort() context array values that have multiple COLLATE clauses,
I have no idea how to resolve it ;(
--
Regards
Junwang Zhao
Attachments:
v13-0002-support-sort-order-and-nullsfirst-flag.patchapplication/octet-stream; name=v13-0002-support-sort-order-and-nullsfirst-flag.patchDownload
From d74ba41d298eaf7f790a1ed4c4e9702ebb519aeb Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sat, 26 Oct 2024 03:53:17 +0000
Subject: [PATCH v13 2/2] support sort order and nullsfirst flag
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 6 +++-
src/backend/utils/adt/array_userfuncs.c | 42 ++++++++++++++++++++-----
src/include/catalog/pg_proc.dat | 6 ++++
src/test/regress/expected/arrays.out | 38 ++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 ++++++
5 files changed, 94 insertions(+), 8 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 748611dee5..afbda78688 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20443,12 +20443,16 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>is_ascending</parameter> <type>boolean</type> <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values, otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 3a50e5ca97..6e5125b46e 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1816,7 +1816,6 @@ array_reverse(PG_FUNCTION_ARGS)
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1835,6 +1834,8 @@ array_sort(PG_FUNCTION_ARGS)
int ndim,
*dims,
*lbs;
+ bool is_ascending = true;
+ bool nulls_first = false;
ndim = ARR_NDIM(array);
dims = ARR_DIMS(array);
@@ -1843,6 +1844,17 @@ array_sort(PG_FUNCTION_ARGS)
if (ndim < 1 || dims[0] < 2)
PG_RETURN_ARRAYTYPE_P(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1860,8 +1872,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1885,8 +1899,10 @@ array_sort(PG_FUNCTION_ARGS)
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(elmtyp))));
- typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(array_type,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1901,9 +1917,9 @@ array_sort(PG_FUNCTION_ARGS)
}
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
while (array_iterate(array_iterator, &value, &isnull))
@@ -1935,3 +1951,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 54ebbbd135..c6593c63fb 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1747,6 +1747,12 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool', prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 9e7b444fd8..88c969ccbd 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2771,6 +2771,44 @@ SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 2b75a21f06..9cb73f3801 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -843,6 +843,16 @@ SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.39.5
v13-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v13-0001-general-purpose-array_sort.patchDownload
From 841fcfce37be69758a006d8645ac3ecf48677d18 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v13 1/2] general purpose array_sort
Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.
Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 18 +++
src/backend/utils/adt/array_userfuncs.c | 138 ++++++++++++++++++
src/backend/utils/adt/arrayfuncs.c | 3 +-
src/include/catalog/pg_proc.dat | 3 +
src/include/utils/array.h | 1 +
src/test/regress/expected/arrays.out | 86 +++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 23 +++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
10 files changed, 289 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 73979f20ff..748611dee5 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20438,6 +20438,24 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index d053808f6e..3a50e5ca97 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -42,6 +44,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
+/*
+ * ArraySortCachedInfo
+ * Used for caching data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry; /* type cache entry for element type */
+ TypeCacheEntry *array_typentry; /* type cache entry for array type */
+ ArrayMetaState array_meta; /* array metadata for better
+ * array_create_iterator performance */
+} ArraySortCachedInfo;
+
static Datum array_position_common(FunctionCallInfo fcinfo);
@@ -1797,3 +1811,127 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+ int ndim,
+ *dims,
+ *lbs;
+
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ if (ndim < 1 || dims[0] < 2)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ndim == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
+ cache_info->typentry = typentry;
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type))));
+ cache_info->array_typentry = typentry;
+ }
+ cache_info->array_meta.element_type = elmtyp;
+ get_typlenbyvalalign(elmtyp,
+ &cache_info->array_meta.typlen,
+ &cache_info->array_meta.typbyval,
+ &cache_info->array_meta.typalign);
+ }
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* bounds preservation */
+ if (ndim == 1)
+ astate->scalarstate->lb = lbs[0];
+ else
+ astate->arraystate->lbs[0] = lbs[0];
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a715e7e0b8..5d5d9b5bbf 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -5330,6 +5330,7 @@ initArrayResultWithSize(Oid element_type, MemoryContext rcontext,
MemoryContextAlloc(arr_context, astate->alen * sizeof(bool));
astate->nelems = 0;
astate->element_type = element_type;
+ astate->lb = 1; /* default lower bound */
get_typlenbyvalalign(element_type,
&astate->typlen,
&astate->typbyval,
@@ -5867,7 +5868,7 @@ makeArrayResultAny(ArrayBuildStateAny *astate,
/* If no elements were presented, we want to create an empty array */
ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
dims[0] = astate->scalarstate->nelems;
- lbs[0] = 1;
+ lbs[0] = astate->scalarstate->lb;
result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
rcontext, release);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f23321a41f..54ebbbd135 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,9 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 157cc0e4c6..f04cbe4eeb 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -192,6 +192,7 @@ typedef struct ArrayBuildState
int alen; /* allocated length of above arrays */
int nelems; /* number of valid entries in above arrays */
Oid element_type; /* data type of the Datums */
+ int lb; /* lower bound for one dimension array */
int16 typlen; /* needed info about datatype */
bool typbyval;
char typalign;
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 0b61fb5bb7..9e7b444fd8 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,3 +2734,89 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR: could not identify ordering operator for type xid[]
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+ array_sort
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index faa376e060..aa5fd75e6e 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 03cc8cfcd9..2b75a21f06 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -834,3 +834,26 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+
+-- no ordering operator tests
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d7..3c739d332b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1847bbfa95..e740e449f5 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.39.5
On Thu, Nov 7, 2024 at 8:56 AM Junwang Zhao <zhjwpku@gmail.com> wrote:
Yeah, this is reasonable but one case I can't be sure:
SELECT array_sort('{{2,3,4}}'::xid[]);
This will return the array as is, but xid doesn't have a LT_OPR, should
I error out in this case? like:could not identify ordering operator for type xid[]
Yes, I think that case needs to error out. It seems best to identify
the ordering operator before you decide whether or not you have >1
element.
--
Robert Haas
EDB: http://www.enterprisedb.com
On Thu, Nov 7, 2024 at 10:29 PM Robert Haas <robertmhaas@gmail.com> wrote:
On Thu, Nov 7, 2024 at 8:56 AM Junwang Zhao <zhjwpku@gmail.com> wrote:
Yeah, this is reasonable but one case I can't be sure:
SELECT array_sort('{{2,3,4}}'::xid[]);
This will return the array as is, but xid doesn't have a LT_OPR, should
I error out in this case? like:could not identify ordering operator for type xid[]
Yes, I think that case needs to error out. It seems best to identify
the ordering operator before you decide whether or not you have >1
element.
Got it, will do this in the next version.
--
Robert Haas
EDB: http://www.enterprisedb.com
--
Regards
Junwang Zhao
On Thu, Nov 07, 2024 at 09:29:05AM -0500, Robert Haas wrote:
Yes, I think that case needs to error out. It seems best to identify
the ordering operator before you decide whether or not you have >1
element.
+1.
--
Michael
On Thu, Nov 07, 2024 at 10:06:04PM +0800, Junwang Zhao wrote:
I'm afraid this can not be achieved in my current implementation, a simple
case is:SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[]);
{Abc,bar,bbc,CCC,foo,NULL}
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}SELECT array_sort(a COLLATE "C") FROM (VALUES
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C"),
('{foo,bar,null,CCC,Abc,bbc}'::text[])) v(a);
{Abc,CCC,bar,bbc,foo,NULL}
{Abc,CCC,bar,bbc,foo,NULL}Maybe add some documents to specify this?
So, if I use that:
CREATE COLLATION case_sensitive (provider = icu, locale = '');
=# SELECT array_sort('{Abc,CCC,bar,bbc,foo,NULL}'::text[]
COLLATE "case_sensitive");
array_sort
----------------------------
{Abc,bar,bbc,CCC,foo,NULL}
(1 row)
=# SELECT array_sort('{Abc,CCC,bar,bbc,foo,NULL}'::text[]
COLLATE "C");
array_sort
----------------------------
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
What takes priority is the collation defined with the array_sort,
which is fine:
=# SELECT array_sort(a collate "case_sensitive") FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C" )) v(a);
array_sort
----------------------------
{Abc,bar,bbc,CCC,foo,NULL}
{Abc,bar,bbc,CCC,foo,NULL}
(2 rows)
=# SELECT array_sort(a collate "C") FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "case_sensitive" )) v(a);
array_sort
----------------------------
{Abc,CCC,bar,bbc,foo,NULL}
{Abc,CCC,bar,bbc,foo,NULL}
(2 rows)
The case where the collation is defined in the set of values is a bit
more troubling to me, as it depends on what the values want to be
applied, still that's OK because the collation applied is the one
coming from the set of values:
=# SELECT array_sort(a) FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "case_sensitive" )) v(a);
array_sort
----------------------------
{Abc,bar,bbc,CCC,foo,NULL}
{Abc,bar,bbc,CCC,foo,NULL}
(2 rows)
=# SELECT array_sort(a) FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C" )) v(a);
array_sort
----------------------------
{Abc,CCC,bar,bbc,foo,NULL}
{Abc,CCC,bar,bbc,foo,NULL}
(2 rows)
I am wondering if there are more fancy cases where the saved cache
could force a state that would lead to puzzling results, say with
different collations that should be applied. I'd recommend to
research that more, to reflect that in the docs and to add tests that
show what we should expect in these cases within 0001 because this
new function is mimicking in the context of a function execution
multiple query clauses where restrictions are applied when analyzing
the query, close to the parser.
For example, UNION and UNION ALL require a common collation when
processing a set of expressions related to them, which would be OK.
Perhaps I lack some imagination to be able to break things.
--
Michael
Hi Michael,
On Fri, Nov 8, 2024 at 8:52 AM Michael Paquier <michael@paquier.xyz> wrote:
On Thu, Nov 07, 2024 at 10:06:04PM +0800, Junwang Zhao wrote:
I'm afraid this can not be achieved in my current implementation, a simple
case is:SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[]);
{Abc,bar,bbc,CCC,foo,NULL}
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}SELECT array_sort(a COLLATE "C") FROM (VALUES
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C"),
('{foo,bar,null,CCC,Abc,bbc}'::text[])) v(a);
{Abc,CCC,bar,bbc,foo,NULL}
{Abc,CCC,bar,bbc,foo,NULL}Maybe add some documents to specify this?
So, if I use that:
CREATE COLLATION case_sensitive (provider = icu, locale = '');
=# SELECT array_sort('{Abc,CCC,bar,bbc,foo,NULL}'::text[]
COLLATE "case_sensitive");
array_sort
----------------------------
{Abc,bar,bbc,CCC,foo,NULL}
(1 row)
=# SELECT array_sort('{Abc,CCC,bar,bbc,foo,NULL}'::text[]
COLLATE "C");
array_sort
----------------------------
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)What takes priority is the collation defined with the array_sort,
which is fine:
=# SELECT array_sort(a collate "case_sensitive") FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C" )) v(a);
array_sort
----------------------------
{Abc,bar,bbc,CCC,foo,NULL}
{Abc,bar,bbc,CCC,foo,NULL}
(2 rows)
=# SELECT array_sort(a collate "C") FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "case_sensitive" )) v(a);
array_sort
----------------------------
{Abc,CCC,bar,bbc,foo,NULL}
{Abc,CCC,bar,bbc,foo,NULL}
(2 rows)The case where the collation is defined in the set of values is a bit
more troubling to me, as it depends on what the values want to be
applied, still that's OK because the collation applied is the one
coming from the set of values:
=# SELECT array_sort(a) FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "case_sensitive" )) v(a);
array_sort
----------------------------
{Abc,bar,bbc,CCC,foo,NULL}
{Abc,bar,bbc,CCC,foo,NULL}
(2 rows)
=# SELECT array_sort(a) FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C" )) v(a);
array_sort
----------------------------
{Abc,CCC,bar,bbc,foo,NULL}
{Abc,CCC,bar,bbc,foo,NULL}
(2 rows)I am wondering if there are more fancy cases where the saved cache
could force a state that would lead to puzzling results, say with
different collations that should be applied. I'd recommend to
research that more, to reflect that in the docs and to add tests that
show what we should expect in these cases within 0001 because this
new function is mimicking in the context of a function execution
multiple query clauses where restrictions are applied when analyzing
the query, close to the parser.For example, UNION and UNION ALL require a common collation when
processing a set of expressions related to them, which would be OK.
Perhaps I lack some imagination to be able to break things.
--
Michael
While trying to come up with more test cases, it comes to me if the
PG_GET_COLLATION() has already done the work to give array_sort
the right collation oid? I did not pass the typentry->typcollation but
PG_GET_COLLATION() to tuplesort_begin_datum.
I tried:
CREATE COLLATION case_sensitive (provider = icu, locale = '');
create table t1(a int, b text[] COLLATE "C");
create table t2(a int, b text[] COLLATE "case_sensitive");
insert into t1 values (1, '{foo,bar,null,CCC,Abc,bbc}'::text[]);
insert into t2 values (2, '{foo,bar,null,CCC,Abc,bbc}'::text[]);
select array_sort(b) from t1;
select array_sort(b) from t2;
Set breakpoint to see the collation oid, they all give the expected result.
For the following cases:
SELECT array_sort(a) FROM
(VALUES ('{foo,bar,null,CCC,Abc,bbc}'::text[]),
('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE
"case_sensitive" )) v(a);
WITH t AS (select '{foo,bar,null,CCC,Abc,bbc}'::text[] a UNION ALL
select '{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "case_sensitive"
a) SELECT array_sort(a) from t;
The collation seems to have been decided in select_common_collation of
the transform phase.
For:
WITH t AS (select '{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C" a
UNION ALL select
'{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "case_sensitive"a)
SELECT array_sort(a) from t;
ERROR: collation mismatch between explicit collations "C" and "case_sensitive"
LINE 2: '{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "case_sens...
merge_collation_state gives out an ERROR since its explicit collation.
But for implicit collation,
select_common_collation sets InvalidOid to context.collation, so the
following works:
select b from t1 UNION ALL select b from t2;
But since the context has the InvalidOid as collation,
PG_GET_COLLATION() in arrary_sort
got InvalidOid, the following errors:
WITH t3 AS (select b from t1 UNION ALL select b from t2) select
array_sort(b) from t3;
ERROR: could not determine which collation to use for string comparison
HINT: Use the COLLATE clause to set the collation explicitly.
The error message comes from tuplesort_begin_datum's call stack, we
can do explicit COLLATE to make it work:
WITH t3 AS (select b from t1 UNION ALL select b from t2) select
array_sort(b collate "C") from t3;
Based on the above analysis, I think it's ok to pass PG_GET_COLLATION()
to tuplesort_begin_datum.
PFA v14 with Robert's comment addressed.
--
Regards
Junwang Zhao
Attachments:
v14-0001-general-purpose-array_sort.patchapplication/octet-stream; name=v14-0001-general-purpose-array_sort.patchDownload
From 5d810740b0fbfc601a89d1132f064518f0cffc63 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v14 1/2] general purpose array_sort
Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.
Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 18 +++
src/backend/utils/adt/array_userfuncs.c | 138 ++++++++++++++++++
src/backend/utils/adt/arrayfuncs.c | 3 +-
src/include/catalog/pg_proc.dat | 3 +
src/include/utils/array.h | 1 +
src/test/regress/expected/arrays.out | 90 ++++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 25 ++++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
10 files changed, 295 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 73979f20ff..748611dee5 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20438,6 +20438,24 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index d053808f6e..49b8d5f534 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -42,6 +44,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
+/*
+ * ArraySortCachedInfo
+ * Used for caching data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry; /* type cache entry for element type */
+ TypeCacheEntry *array_typentry; /* type cache entry for array type */
+ ArrayMetaState array_meta; /* array metadata for better
+ * array_create_iterator performance */
+} ArraySortCachedInfo;
+
static Datum array_position_common(FunctionCallInfo fcinfo);
@@ -1797,3 +1811,127 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+ int ndim,
+ *dims,
+ *lbs;
+
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ndim == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp))));
+ cache_info->typentry = typentry;
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type))));
+ cache_info->array_typentry = typentry;
+ }
+ cache_info->array_meta.element_type = elmtyp;
+ get_typlenbyvalalign(elmtyp,
+ &cache_info->array_meta.typlen,
+ &cache_info->array_meta.typbyval,
+ &cache_info->array_meta.typalign);
+ }
+
+ if (ndim < 1 || dims[0] < 2)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* bounds preservation */
+ if (ndim == 1)
+ astate->scalarstate->lb = lbs[0];
+ else
+ astate->arraystate->lbs[0] = lbs[0];
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a715e7e0b8..5d5d9b5bbf 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -5330,6 +5330,7 @@ initArrayResultWithSize(Oid element_type, MemoryContext rcontext,
MemoryContextAlloc(arr_context, astate->alen * sizeof(bool));
astate->nelems = 0;
astate->element_type = element_type;
+ astate->lb = 1; /* default lower bound */
get_typlenbyvalalign(element_type,
&astate->typlen,
&astate->typbyval,
@@ -5867,7 +5868,7 @@ makeArrayResultAny(ArrayBuildStateAny *astate,
/* If no elements were presented, we want to create an empty array */
ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
dims[0] = astate->scalarstate->nelems;
- lbs[0] = 1;
+ lbs[0] = astate->scalarstate->lb;
result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
rcontext, release);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f23321a41f..54ebbbd135 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,9 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 157cc0e4c6..f04cbe4eeb 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -192,6 +192,7 @@ typedef struct ArrayBuildState
int alen; /* allocated length of above arrays */
int nelems; /* number of valid entries in above arrays */
Oid element_type; /* data type of the Datums */
+ int lb; /* lower bound for one dimension array */
int16 typlen; /* needed info about datatype */
bool typbyval;
char typalign;
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 0b61fb5bb7..9844d5b81f 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,3 +2734,93 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+ERROR: could not identify ordering operator for type xid[]
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR: could not identify ordering operator for type xid[]
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+ array_sort
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 6fa32ae364..6d3bbcf1fb 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 03cc8cfcd9..5a5549c8e0 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -834,3 +834,28 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 49fa9758b4..c1e0f6ee1d 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1847bbfa95..e740e449f5 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.39.5
v14-0002-support-sort-order-and-nullsfirst-flag.patchapplication/octet-stream; name=v14-0002-support-sort-order-and-nullsfirst-flag.patchDownload
From 37b20f0a17b8fb54816c319bd474d2906a44b6ba Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sat, 26 Oct 2024 03:53:17 +0000
Subject: [PATCH v14 2/2] support sort order and nullsfirst flag
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
doc/src/sgml/func.sgml | 6 +++-
src/backend/utils/adt/array_userfuncs.c | 42 ++++++++++++++++++++-----
src/include/catalog/pg_proc.dat | 6 ++++
src/test/regress/expected/arrays.out | 38 ++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 ++++++
5 files changed, 94 insertions(+), 8 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 748611dee5..afbda78688 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20443,12 +20443,16 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> ( <type>anyarray</type> <optional>, <parameter>is_ascending</parameter> <type>boolean</type> <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
Sorts the first dimension of the array. The sort order is determined by the <literal><</literal> operator of the element type, nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values, otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 49b8d5f534..33bf6f5243 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1816,7 +1816,6 @@ array_reverse(PG_FUNCTION_ARGS)
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1835,11 +1834,24 @@ array_sort(PG_FUNCTION_ARGS)
int ndim,
*dims,
*lbs;
+ bool is_ascending = true;
+ bool nulls_first = false;
ndim = ARR_NDIM(array);
dims = ARR_DIMS(array);
lbs = ARR_LBOUND(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1857,8 +1869,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1882,8 +1896,10 @@ array_sort(PG_FUNCTION_ARGS)
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("could not find array type for data type %s",
format_type_be(elmtyp))));
- typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(array_type,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify ordering operator for type %s",
@@ -1901,9 +1917,9 @@ array_sort(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(array);
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
while (array_iterate(array_iterator, &value, &isnull))
@@ -1935,3 +1951,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 54ebbbd135..c6593c63fb 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1747,6 +1747,12 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool', prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 9844d5b81f..3f82972cdf 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2771,6 +2771,44 @@ SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 5a5549c8e0..bc19d84e2c 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -843,6 +843,16 @@ SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.39.5
On Fri, Nov 8, 2024 at 8:52 AM Michael Paquier <michael@paquier.xyz> wrote:
I am wondering if there are more fancy cases where the saved cache
could force a state that would lead to puzzling results, say with
different collations that should be applied. I'd recommend to
research that more, to reflect that in the docs and to add tests that
show what we should expect in these cases within 0001 because this
new function is mimicking in the context of a function execution
multiple query clauses where restrictions are applied when analyzing
the query, close to the parser.For example, UNION and UNION ALL require a common collation when
processing a set of expressions related to them, which would be OK.
Perhaps I lack some imagination to be able to break things.
--
We had 3 error occurrences of
ERROR: could not determine which collation to use for string comparison
in collate.linux.utf8.out.
one is UNION ALL, another two is do comparison with two text arguments.
here array_sort only takes one argument, there is not that much place
to go wrong?
potential misbehavior would be only about UNION ALL?
UNION ALL for two tables, for collation, we can both implicit; both
explicit' one implicit,one explicit.
if both explicit, then it will error out quite easily.
if one side is explicit, another side explicitly, then we use
explicitly, which is what we expected.
the trick is that both are implicit.
drop table t1,t2;
create table t1(a int, b text[] COLLATE "C");
create table t2(a int, b text[] COLLATE case_sensitive);
insert into t1 values (1, '{foo,bar,null,CCC,Abc,bbc}'::text[]);
insert into t2 values (2, '{foo,bar,null,CCC,Abc,bbc}'::text[]);
create domain dtxt as text[] collate case_insensitive;
CREATE OR REPLACE FUNCTION mytxt_coll(x text[]) RETURNS dtxt LANGUAGE
plpgsql AS $$
declare
xx text[] COLLATE case_insensitive;
begin
xx := x;
return xx collate case_insensitive;
end
$$;
--these three fail.
select array_sort(b) from (select b from t1 union all select b from t2) sub;
select array_sort(b) from (select mytxt_coll(b) as b from t2 union all
select b from t1 ) sub;
select array_sort(b) from (select b from
mytxt_coll('{foo,bar,null,CCC,Abc,bbc}'::text[] collate
case_insensitive) f(b) union all select b from t1) sub;
-----
select array_sort(b) from (select b from t1 union all select b from
mytxt_coll('{foo,bar,null,CCC,Abc,bbc}'::text[]) f(b)) sub;
select array_sort(b) from (select b from
mytxt_coll('{foo,bar,null,CCC,Abc,bbc}'::text[]) f(b) union all select
b from t1 ) sub;
these two query outputs are the same, which is what we expected per
quote from manual:
<<>>
otherwise, all input expressions must have the same implicit collation
derivation or the default collation.
If any non-default collation is present, that is the result of the
collation combination.
Otherwise, the result is the default collation.
<<>>
https://www.postgresql.org/docs/current/collation.html#COLLATION-CONCEPTS
also we have varstr_sortsupport->check_collation_set to make sure we
have a single valid collation for array_sort.
overall, I think the current implementation works fine.
hi.
I did some cosmetic changes:
expand commit message.
remove unnecessary parentheses around errcode, per
https://git.postgresql.org/cgit/postgresql.git/commit/?id=e3a87b4991cc2d00b7a3082abb54c5f12baedfd1
change two errorcode from ERRCODE_UNDEFINED_FUNCTION
to ERRCODE_FEATURE_NOT_SUPPORTED
in func.sgml, some lines are too long, adjust it to several lines.
Attachments:
v15-0001-general-purpose-array_sort.patchtext/x-patch; charset=US-ASCII; name=v15-0001-general-purpose-array_sort.patchDownload
From be56e368bb00f07e25b5d73a97816baceb46a92a Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sun, 22 Dec 2024 12:23:14 +0800
Subject: [PATCH v15 1/2] general purpose array_sort
Introduce the SQL-callable function array_sort(anyarray). The parameter passed
to this function cannot truly be a polymorphic data type. Instead, it accepts
any array type that supports the "less than" (`<`) operator.
If the input parameter is a multidimensional array, array_sort will sort based
on the first dimension. By default, sorting is performed based on
the argument's collation. However, you can also specify a collation clause if
needed, for special value NULL: nulls will appear after non-null values.
for example:
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
will sort based on "C" collation.
Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Reviewed-by:
Michael Paquier <michael@paquier.xyz>
Aleksander Alekseev <aleksander@timescale.com>,
Tom Lane <tgl@sss.pgh.pa.us>,
David G. Johnston <david.g.johnston@gmail.com>,
Amit Langote <amitlangote09@gmail.com>,
andreas@proxel.se <andreas@proxel.se>,
Robert Haas <robertmhaas@gmail.com>
Dean Rasheed <dean.a.rasheed@gmail.com>
discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw%40mail.gmail.com
---
doc/src/sgml/func.sgml | 20 +++
src/backend/utils/adt/array_userfuncs.c | 140 ++++++++++++++++++
src/backend/utils/adt/arrayfuncs.c | 3 +-
src/include/catalog/pg_proc.dat | 3 +
src/include/utils/array.h | 1 +
src/test/regress/expected/arrays.out | 90 +++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 25 ++++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
10 files changed, 299 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 47370e581a..b0b1642caf 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20496,6 +20496,26 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array.
+ The sort order is determined by the <literal><</literal> operator of the element type,
+ nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 304a93112e..dd074b1ea2 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -42,6 +44,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
+/*
+ * ArraySortCachedInfo
+ * Used for caching data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry; /* type cache entry for element type */
+ TypeCacheEntry *array_typentry; /* type cache entry for array type */
+ ArrayMetaState array_meta; /* array metadata for better
+ * array_create_iterator performance */
+} ArraySortCachedInfo;
+
static Datum array_position_common(FunctionCallInfo fcinfo);
@@ -1797,3 +1811,129 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type = InvalidOid;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+ int ndim,
+ *dims,
+ *lbs;
+
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ndim == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp)));
+
+ cache_info->typentry = typentry;
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp)));
+
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type)));
+ cache_info->array_typentry = typentry;
+ }
+ cache_info->array_meta.element_type = elmtyp;
+ get_typlenbyvalalign(elmtyp,
+ &cache_info->array_meta.typlen,
+ &cache_info->array_meta.typbyval,
+ &cache_info->array_meta.typalign);
+ }
+
+ if (ndim < 1 || dims[0] < 2)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /* bounds preservation */
+ if (ndim == 1)
+ astate->scalarstate->lb = lbs[0];
+ else
+ astate->arraystate->lbs[0] = lbs[0];
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 593775c27f..7c3f8e0e6e 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -5330,6 +5330,7 @@ initArrayResultWithSize(Oid element_type, MemoryContext rcontext,
MemoryContextAlloc(arr_context, astate->alen * sizeof(bool));
astate->nelems = 0;
astate->element_type = element_type;
+ astate->lb = 1; /* default lower bound */
get_typlenbyvalalign(element_type,
&astate->typlen,
&astate->typbyval,
@@ -5867,7 +5868,7 @@ makeArrayResultAny(ArrayBuildStateAny *astate,
/* If no elements were presented, we want to create an empty array */
ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
dims[0] = astate->scalarstate->nelems;
- lbs[0] = 1;
+ lbs[0] = astate->scalarstate->lb;
result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
rcontext, release);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 2dcc2d42da..536b05d35a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,9 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 157cc0e4c6..f04cbe4eeb 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -192,6 +192,7 @@ typedef struct ArrayBuildState
int alen; /* allocated length of above arrays */
int nelems; /* number of valid entries in above arrays */
Oid element_type; /* data type of the Datums */
+ int lb; /* lower bound for one dimension array */
int16 typlen; /* needed info about datatype */
bool typbyval;
char typalign;
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 0b61fb5bb7..9844d5b81f 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,3 +2734,93 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+ERROR: could not identify ordering operator for type xid[]
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR: could not identify ordering operator for type xid[]
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+ array_sort
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index d4f327636f..13b2668e12 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1377,6 +1377,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 03cc8cfcd9..5a5549c8e0 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -834,3 +834,28 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 5ee2da4e0e..67c19f0529 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -545,6 +545,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e1c4f913f8..0d720def21 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.34.1
v15-0002-general-purpose-array_sort.patchtext/x-patch; charset=US-ASCII; name=v15-0002-general-purpose-array_sort.patchDownload
From 288a299a29244972e6538eb4ae81c0542814d8b1 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sun, 22 Dec 2024 12:25:12 +0800
Subject: [PATCH v15 2/2] general purpose array_sort
Add two arguments to array_sort:
1. is_ascending: If true, then the array will be sorted in ascending order; if
false, it will be sorted in descending order.
2. nulls_first: If true, null values will appear before non-null values;
otherwise, null values will appear after non-null values.
discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw%40mail.gmail.com
---
doc/src/sgml/func.sgml | 12 +++++++-
src/backend/utils/adt/array_userfuncs.c | 36 +++++++++++++++++++----
src/include/catalog/pg_proc.dat | 9 ++++++
src/test/regress/expected/arrays.out | 38 +++++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 +++++++
5 files changed, 99 insertions(+), 6 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b0b1642caf..1c149d3238 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20501,7 +20501,10 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> (
+ <parameter>array</parameter> <type>anyarray</type>
+ <optional>, <parameter>is_ascending</parameter> <type>boolean</type>
+ <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
@@ -20509,6 +20512,13 @@ SELECT NULLIF(value, '(none)') ...
The sort order is determined by the <literal><</literal> operator of the element type,
nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values,
+ otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index dd074b1ea2..baa9b394fa 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1816,7 +1816,6 @@ array_reverse(PG_FUNCTION_ARGS)
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1835,11 +1834,24 @@ array_sort(PG_FUNCTION_ARGS)
int ndim,
*dims,
*lbs;
+ bool is_ascending = true;
+ bool nulls_first = false;
ndim = ARR_NDIM(array);
dims = ARR_DIMS(array);
lbs = ARR_LBOUND(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1857,8 +1869,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not identify ordering operator for type %s",
@@ -1903,9 +1917,9 @@ array_sort(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(array);
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
while (array_iterate(array_iterator, &value, &isnull))
@@ -1937,3 +1951,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 536b05d35a..a9d2c4110d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1747,6 +1747,15 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', proargnames => '{array,is_ascending}',
+ prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool',
+ proargnames => '{array,is_ascending, nulls_first}',
+ prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 9844d5b81f..3f82972cdf 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2771,6 +2771,44 @@ SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 5a5549c8e0..bc19d84e2c 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -843,6 +843,16 @@ SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.34.1
hi.
patch rebased, also did some minor comments tweak.
Attachments:
v16-0001-general-purpose-array_sort.patchtext/x-patch; charset=US-ASCII; name=v16-0001-general-purpose-array_sort.patchDownload
From c9398dfe889f23dce147db1719aa9fe4dfaa3adc Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sun, 9 Mar 2025 20:45:20 +0800
Subject: [PATCH v16 1/2] general purpose array_sort
Introduce the SQL-callable function array_sort(anyarray). The parameter passed
to this function cannot truly be a polymorphic data type. Instead, it accepts
any array type that supports the "less than" (`<`) operator.
If the input parameter is a multidimensional array, array_sort will sort based
on the first dimension. By default, sorting is performed based on
the argument's collation. However, you can also specify a collation clause if
needed, for special value NULL: nulls will appear after non-null values.
for example:
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
will sort based on "C" collation.
Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Reviewed-by:
Michael Paquier <michael@paquier.xyz>
Aleksander Alekseev <aleksander@timescale.com>,
Tom Lane <tgl@sss.pgh.pa.us>,
David G. Johnston <david.g.johnston@gmail.com>,
Amit Langote <amitlangote09@gmail.com>,
andreas@proxel.se <andreas@proxel.se>,
Robert Haas <robertmhaas@gmail.com>,
Dean Rasheed <dean.a.rasheed@gmail.com>
discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw%40mail.gmail.com
---
doc/src/sgml/func.sgml | 20 +++
src/backend/utils/adt/array_userfuncs.c | 143 ++++++++++++++++++
src/backend/utils/adt/arrayfuncs.c | 3 +-
src/include/catalog/pg_proc.dat | 3 +
src/include/utils/array.h | 1 +
src/test/regress/expected/arrays.out | 90 +++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 25 +++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
10 files changed, 302 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 4d6061a8458..e24ef42ad98 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20669,6 +20669,26 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array.
+ The sort order is determined by the <literal><</literal> operator of the element type,
+ nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 2aae2f8ed93..583e56fc805 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -15,6 +15,7 @@
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
+#include "miscadmin.h"
#include "libpq/pqformat.h"
#include "nodes/supportnodes.h"
#include "port/pg_bitutils.h"
@@ -22,6 +23,7 @@
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -43,6 +45,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
+/*
+ * ArraySortCachedInfo
+ * Used for caching data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry; /* type cache entry for element type */
+ TypeCacheEntry *array_typentry; /* type cache entry for array type */
+ ArrayMetaState array_meta; /* array metadata for better
+ * array_create_iterator performance */
+} ArraySortCachedInfo;
+
static Datum array_position_common(FunctionCallInfo fcinfo);
@@ -1858,3 +1872,132 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type = InvalidOid;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+ int ndim,
+ *dims,
+ *lbs;
+
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ndim == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp)));
+
+ cache_info->typentry = typentry;
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp)));
+
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type)));
+ cache_info->array_typentry = typentry;
+ }
+ cache_info->array_meta.element_type = elmtyp;
+ get_typlenbyvalalign(elmtyp,
+ &cache_info->array_meta.typlen,
+ &cache_info->array_meta.typbyval,
+ &cache_info->array_meta.typalign);
+ }
+
+ if (ndim < 1 || dims[0] < 2)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /*
+ * accumArrayResultAny will set the first dimension lower bound to 1, we
+ * need restore it. see accumArrayResultArr.
+ */
+ if (ndim == 1)
+ astate->scalarstate->lb = lbs[0];
+ else
+ astate->arraystate->lbs[0] = lbs[0];
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d777f38ed99..ed4e3877e4d 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -5330,6 +5330,7 @@ initArrayResultWithSize(Oid element_type, MemoryContext rcontext,
MemoryContextAlloc(arr_context, astate->alen * sizeof(bool));
astate->nelems = 0;
astate->element_type = element_type;
+ astate->lb = 1; /* default lower bound */
get_typlenbyvalalign(element_type,
&astate->typlen,
&astate->typbyval,
@@ -5867,7 +5868,7 @@ makeArrayResultAny(ArrayBuildStateAny *astate,
/* If no elements were presented, we want to create an empty array */
ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
dims[0] = astate->scalarstate->nelems;
- lbs[0] = 1;
+ lbs[0] = astate->scalarstate->lb;
result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
rcontext, release);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 134b3dd8689..b6c4f4c2786 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1750,6 +1750,9 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 52f1fbf8d43..ce0ef4c348c 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -192,6 +192,7 @@ typedef struct ArrayBuildState
int alen; /* allocated length of above arrays */
int nelems; /* number of valid entries in above arrays */
Oid element_type; /* data type of the Datums */
+ int lb; /* lower bound for one dimension array */
int16 typlen; /* needed info about datatype */
bool typbyval;
char typalign;
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 0b61fb5bb78..9844d5b81fa 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,3 +2734,93 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+ERROR: could not identify ordering operator for type xid[]
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR: could not identify ordering operator for type xid[]
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+ array_sort
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index aee4755c083..69805d4b9ec 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1471,6 +1471,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 03cc8cfcd91..5a5549c8e0a 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -834,3 +834,28 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 38ebcd99508..dbc190227d0 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -564,6 +564,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9840060997f..30f8e14d6a9 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -151,6 +151,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.34.1
v16-0002-general-purpose-array_sort.patchtext/x-patch; charset=US-ASCII; name=v16-0002-general-purpose-array_sort.patchDownload
From 6dd5e67dd046edbc3ec6c4ec65975ab3e5f041d8 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sun, 9 Mar 2025 20:55:25 +0800
Subject: [PATCH v16 2/2] general purpose array_sort
Add two arguments to array_sort:
1. is_ascending: If true, then the array will be sorted in ascending order,
otherwise in descending order.
2. nulls_first: If true, null values will appear
before non-null values, otherwise, null values will appear after non-null
values.
discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw%40mail.gmail.com
---
doc/src/sgml/func.sgml | 12 +++++++-
src/backend/utils/adt/array_userfuncs.c | 36 +++++++++++++++++++----
src/include/catalog/pg_proc.dat | 9 ++++++
src/test/regress/expected/arrays.out | 38 +++++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 +++++++
5 files changed, 99 insertions(+), 6 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e24ef42ad98..8c3c8df5f36 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20674,7 +20674,10 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> (
+ <parameter>array</parameter> <type>anyarray</type>
+ <optional>, <parameter>is_ascending</parameter> <type>boolean</type>
+ <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
@@ -20682,6 +20685,13 @@ SELECT NULLIF(value, '(none)') ...
The sort order is determined by the <literal><</literal> operator of the element type,
nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values,
+ otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 583e56fc805..2e38ef05e48 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1877,7 +1877,6 @@ array_reverse(PG_FUNCTION_ARGS)
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1896,11 +1895,24 @@ array_sort(PG_FUNCTION_ARGS)
int ndim,
*dims,
*lbs;
+ bool is_ascending = true;
+ bool nulls_first = false;
ndim = ARR_NDIM(array);
dims = ARR_DIMS(array);
lbs = ARR_LBOUND(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1918,8 +1930,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not identify ordering operator for type %s",
@@ -1964,9 +1978,9 @@ array_sort(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(array);
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
while (array_iterate(array_iterator, &value, &isnull))
@@ -2001,3 +2015,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index b6c4f4c2786..c7c169366e3 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1753,6 +1753,15 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', proargnames => '{array,is_ascending}',
+ prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool',
+ proargnames => '{array,is_ascending, nulls_first}',
+ prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 9844d5b81fa..3f82972cdfe 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2771,6 +2771,44 @@ SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
{Abc,CCC,bar,bbc,foo,NULL}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 5a5549c8e0a..bc19d84e2cf 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -843,6 +843,16 @@ SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.34.1
hi.
before commit 4618045bee4a6d3efcb489c319649d8dd9aaa738 ([0]https://git.postgresql.org/cgit/postgresql.git/commit/?id=4618045bee4a6d3efcb489c319649d8dd9aaa738)
select array_sort(array(select '1 4'::int2vector union all select '1
2'::int2vector));
array_sort
--------------------------
[1:2][0:1]={{1,2},{1,4}}
(1 row)
after
select array_sort(array(select '1 4'::int2vector union all select '1
2'::int2vector));
array_sort
---------------
{"1 2","1 4"}
(1 row)
now look closer, the second is the expected result...
I didn't dig deeper why commit 4618045bee made this patch result correct,
but I guess it would be best to include such a test case,
so I've attached a patch.
[0]: https://git.postgresql.org/cgit/postgresql.git/commit/?id=4618045bee4a6d3efcb489c319649d8dd9aaa738
Attachments:
v17-0002-general-purpose-array_sort.patchtext/x-patch; charset=US-ASCII; name=v17-0002-general-purpose-array_sort.patchDownload
From 9ab98e6470dd5f16ef6024c291e5aa99ebfbecde Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sun, 16 Mar 2025 21:31:55 +0800
Subject: [PATCH v17 2/2] general purpose array_sort
Add two arguments to array_sort:
1. is_ascending: If true, then the array will be sorted in ascending order,
otherwise in descending order.
2. nulls_first: If true, null values will appear
before non-null values, otherwise, null values will appear after non-null
values.
discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw%40mail.gmail.com
---
doc/src/sgml/func.sgml | 12 +++++++-
src/backend/utils/adt/array_userfuncs.c | 36 +++++++++++++++++++----
src/include/catalog/pg_proc.dat | 9 ++++++
src/test/regress/expected/arrays.out | 38 +++++++++++++++++++++++++
src/test/regress/sql/arrays.sql | 10 +++++++
5 files changed, 99 insertions(+), 6 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 6d901230bdc..c03a66ffb26 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20708,7 +20708,10 @@ SELECT NULLIF(value, '(none)') ...
<indexterm>
<primary>array_sort</primary>
</indexterm>
- <function>array_sort</function> ( <type>anyarray</type> )
+ <function>array_sort</function> (
+ <parameter>array</parameter> <type>anyarray</type>
+ <optional>, <parameter>is_ascending</parameter> <type>boolean</type>
+ <optional>, <parameter>nulls_first</parameter> <type>boolean</type></optional></optional>)
<returnvalue>anyarray</returnvalue>
</para>
<para>
@@ -20716,6 +20719,13 @@ SELECT NULLIF(value, '(none)') ...
The sort order is determined by the <literal><</literal> operator of the element type,
nulls will appear after non-null values.
The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ If <parameter>is_ascending</parameter> is true then sort by ascending order, otherwise descending order.
+ <parameter>is_ascending</parameter> defaults to true.
+ If <parameter>nulls_first</parameter> is true then nulls appear before non-null values,
+ otherwise nulls appear after non-null values.
+ <parameter>nulls_first</parameter> defaults to the opposite of <parameter>is_ascending</parameter> if not provided.
</para>
<para>
<literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 583e56fc805..2e38ef05e48 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1877,7 +1877,6 @@ array_reverse(PG_FUNCTION_ARGS)
* array_sort
*
* Sorts the first dimension of the array.
- * The sort order is determined by the "<" operator of the element type.
*/
Datum
array_sort(PG_FUNCTION_ARGS)
@@ -1896,11 +1895,24 @@ array_sort(PG_FUNCTION_ARGS)
int ndim,
*dims,
*lbs;
+ bool is_ascending = true;
+ bool nulls_first = false;
ndim = ARR_NDIM(array);
dims = ARR_DIMS(array);
lbs = ARR_LBOUND(array);
+ if (PG_NARGS() > 1)
+ {
+ is_ascending = PG_GETARG_BOOL(1);
+
+ /*
+ * If nulls_first not provided, it defaults to the opposite of
+ * is_ascending.
+ */
+ nulls_first = PG_NARGS() > 2 ? PG_GETARG_BOOL(2) : !is_ascending;
+ }
+
elmtyp = ARR_ELEMTYPE(array);
cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
if (cache_info == NULL)
@@ -1918,8 +1930,10 @@ array_sort(PG_FUNCTION_ARGS)
typentry = cache_info->typentry;
if (typentry == NULL || typentry->type_id != elmtyp)
{
- typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
- if (!OidIsValid(typentry->lt_opr))
+ typentry = lookup_type_cache(elmtyp,
+ is_ascending ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR);
+ if ((is_ascending && !OidIsValid(typentry->lt_opr)) ||
+ (!is_ascending && !OidIsValid(typentry->gt_opr)))
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not identify ordering operator for type %s",
@@ -1964,9 +1978,9 @@ array_sort(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(array);
tuplesortstate = tuplesort_begin_datum(typentry->type_id,
- typentry->lt_opr,
+ is_ascending ? typentry->lt_opr : typentry->gt_opr,
collation,
- false, work_mem, NULL, false);
+ nulls_first, work_mem, NULL, false);
array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
while (array_iterate(array_iterator, &value, &isnull))
@@ -2001,3 +2015,15 @@ array_sort(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(array, 0);
PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ return array_sort(fcinfo);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 90ce03355c2..646425cf7cb 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1775,6 +1775,15 @@
{ oid => '8810', descr => 'sort array',
proname => 'array_sort', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_sort'},
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', proargnames => '{array,is_ascending}',
+ prosrc => 'array_sort_order'},
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool',
+ proargnames => '{array,is_ascending, nulls_first}',
+ prosrc => 'array_sort_order_nulls_first'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 089cb8bc443..26346083f31 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2903,6 +2903,44 @@ SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vec
{"1 2","1 4"}
(1 row)
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
array_sort
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 9161c3aedc2..788f004945a 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -866,6 +866,16 @@ SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
-- multidimensional array tests
SELECT array_sort('{{1}}'::int[]);
SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
--
2.34.1
v17-0001-general-purpose-array_sort.patchtext/x-patch; charset=US-ASCII; name=v17-0001-general-purpose-array_sort.patchDownload
From 7b0fc8280dd24814cfb7bab76bd0462017c2cc15 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sun, 16 Mar 2025 21:40:38 +0800
Subject: [PATCH v17 1/2] general purpose array_sort
Introduce the SQL-callable function array_sort(anyarray). The parameter passed
to this function cannot truly be a polymorphic data type. Instead, it accepts
any array type that supports the "less than" (`<`) operator.
If the input parameter is a multidimensional array, array_sort will sort based
on the first dimension. By default, sorting is performed based on
the argument's collation. However, you can also specify a collation clause if
needed, for special value NULL: nulls will appear after non-null values.
for example:
SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
will sort based on "C" collation.
Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Reviewed-by:
Michael Paquier <michael@paquier.xyz>
Aleksander Alekseev <aleksander@timescale.com>,
Tom Lane <tgl@sss.pgh.pa.us>,
David G. Johnston <david.g.johnston@gmail.com>,
Amit Langote <amitlangote09@gmail.com>,
andreas@proxel.se <andreas@proxel.se>,
Robert Haas <robertmhaas@gmail.com>,
Dean Rasheed <dean.a.rasheed@gmail.com>
discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw%40mail.gmail.com
---
doc/src/sgml/func.sgml | 20 +++
src/backend/utils/adt/array_userfuncs.c | 143 ++++++++++++++++++
src/backend/utils/adt/arrayfuncs.c | 3 +-
src/include/catalog/pg_proc.dat | 3 +
src/include/utils/array.h | 1 +
src/test/regress/expected/arrays.out | 102 +++++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 27 ++++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
10 files changed, 316 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 1c3810e1a04..6d901230bdc 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20703,6 +20703,26 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> ( <type>anyarray</type> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array.
+ The sort order is determined by the <literal><</literal> operator of the element type,
+ nulls will appear after non-null values.
+ The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 2aae2f8ed93..583e56fc805 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -15,6 +15,7 @@
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
+#include "miscadmin.h"
#include "libpq/pqformat.h"
#include "nodes/supportnodes.h"
#include "port/pg_bitutils.h"
@@ -22,6 +23,7 @@
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -43,6 +45,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
+/*
+ * ArraySortCachedInfo
+ * Used for caching data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+ TypeCacheEntry *typentry; /* type cache entry for element type */
+ TypeCacheEntry *array_typentry; /* type cache entry for array type */
+ ArrayMetaState array_meta; /* array metadata for better
+ * array_create_iterator performance */
+} ArraySortCachedInfo;
+
static Datum array_position_common(FunctionCallInfo fcinfo);
@@ -1858,3 +1872,132 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ Oid elmtyp;
+ Oid array_type = InvalidOid;
+ Oid collation = PG_GET_COLLATION();
+ ArraySortCachedInfo *cache_info;
+ TypeCacheEntry *typentry;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+ int ndim,
+ *dims,
+ *lbs;
+
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ elmtyp = ARR_ELEMTYPE(array);
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ cache_info->typentry = NULL;
+ cache_info->array_typentry = NULL;
+ fcinfo->flinfo->fn_extra = (void *) cache_info;
+ }
+
+ if (ndim == 1)
+ {
+ /* Finds the ordering operator for the type for 1-D arrays */
+ typentry = cache_info->typentry;
+ if (typentry == NULL || typentry->type_id != elmtyp)
+ {
+ typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(elmtyp)));
+
+ cache_info->typentry = typentry;
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
+ }
+ }
+ else
+ {
+ /* Finds the ordering operator for the array type for multi-D arrays */
+ typentry = cache_info->array_typentry;
+ if (typentry == NULL || typentry->typelem != elmtyp)
+ {
+ array_type = get_array_type(elmtyp);
+ if (!OidIsValid(array_type))
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp)));
+
+ typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+ if (!OidIsValid(typentry->lt_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify ordering operator for type %s",
+ format_type_be(array_type)));
+ cache_info->array_typentry = typentry;
+ }
+ cache_info->array_meta.element_type = elmtyp;
+ get_typlenbyvalalign(elmtyp,
+ &cache_info->array_meta.typlen,
+ &cache_info->array_meta.typbyval,
+ &cache_info->array_meta.typalign);
+ }
+
+ if (ndim < 1 || dims[0] < 2)
+ PG_RETURN_ARRAYTYPE_P(array);
+
+ tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+ typentry->lt_opr,
+ collation,
+ false, work_mem, NULL, false);
+
+ array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /*
+ * Do the sort.
+ */
+ tuplesort_performsort(tuplesortstate);
+
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ typentry->type_id, CurrentMemoryContext);
+ }
+
+ tuplesort_end(tuplesortstate);
+
+ /*
+ * accumArrayResultAny will set the first dimension lower bound to 1, we
+ * need restore it. see accumArrayResultArr.
+ */
+ if (ndim == 1)
+ astate->scalarstate->lb = lbs[0];
+ else
+ astate->arraystate->lbs[0] = lbs[0];
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+ PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index c8f53c6fbe7..b8062e64802 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -5330,6 +5330,7 @@ initArrayResultWithSize(Oid element_type, MemoryContext rcontext,
MemoryContextAlloc(arr_context, astate->alen * sizeof(bool));
astate->nelems = 0;
astate->element_type = element_type;
+ astate->lb = 1; /* default lower bound */
get_typlenbyvalalign(element_type,
&astate->typlen,
&astate->typbyval,
@@ -5869,7 +5870,7 @@ makeArrayResultAny(ArrayBuildStateAny *astate,
/* If no elements were presented, we want to create an empty array */
ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
dims[0] = astate->scalarstate->nelems;
- lbs[0] = 1;
+ lbs[0] = astate->scalarstate->lb;
result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
rcontext, release);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 890822eaf79..90ce03355c2 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1772,6 +1772,9 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray', prosrc => 'array_sort'},
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 52f1fbf8d43..ce0ef4c348c 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -192,6 +192,7 @@ typedef struct ArrayBuildState
int alen; /* allocated length of above arrays */
int nelems; /* number of valid entries in above arrays */
Oid element_type; /* data type of the Datums */
+ int lb; /* lower bound for one dimension array */
int16 typlen; /* needed info about datatype */
bool typbyval;
char typalign;
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 7afd7356bbe..089cb8bc443 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2860,3 +2860,105 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+ array_sort
+---------------
+ {"1 2","1 4"}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+ array_sort
+---------------------------------
+ {{"1 -2","-1 4"},{"1 2","3 4"}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR: could not identify ordering operator for type xid
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+ERROR: could not identify ordering operator for type xid[]
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR: could not identify ordering operator for type xid[]
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+ array_sort
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index aee4755c083..69805d4b9ec 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1471,6 +1471,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 399a0797f3b..9161c3aedc2 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -856,3 +856,30 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 38ebcd99508..dbc190227d0 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -564,6 +564,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 93339ef3c58..cdde69faced 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -151,6 +151,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.34.1
On Sun, Mar 16, 2025 at 9:50 PM jian he <jian.universality@gmail.com> wrote:
hi.
before commit 4618045bee4a6d3efcb489c319649d8dd9aaa738 ([0])
It seems that patch is treating int2vector and oidvector as scalar
types.
select array_sort(array(select '1 4'::int2vector union all select '1
2'::int2vector));
array_sort
--------------------------
[1:2][0:1]={{1,2},{1,4}}
(1 row)
This query:
select array(select '1 4'::int2vector union all select '1 2'::int2vector);
get
[1:2][0:1]={{1,4},{1,2}}
So the behavior of array_sort is consistent.
after
select array_sort(array(select '1 4'::int2vector union all select '1
2'::int2vector));
array_sort
---------------
{"1 2","1 4"}
(1 row)now look closer, the second is the expected result...
I didn't dig deeper why commit 4618045bee made this patch result correct,
but I guess it would be best to include such a test case,
so I've attached a patch.
No objection for the test case, thanks.
[0] https://git.postgresql.org/cgit/postgresql.git/commit/?id=4618045bee4a6d3efcb489c319649d8dd9aaa738
--
Regards
Junwang Zhao
I spent some time looking at the v17 patchset. There were some pretty
strange things in it --- why were some of the variants of array_sort()
marked as volatile, for example? But the two things I'd like to
suggest functionality-wise are:
* The second argument of the variants with booleans should be defined
as true=descending, not true=ascending. It seems a little odd to me
for the default of a boolean option not to be "false". Also, then
you don't need an inversion between the second and third arguments.
I'm not dead set on this but it just seems a little cleaner.
* I see that the code is set up to detect an unsortable input type
before it takes the fast exit for "no sort required". I think this
is poor engineering: we ought to make the fast path as fast as
possible. The can't-sort case is so rare in real-world usage that
I do not think it matters if the error isn't thrown by every possible
call. Besides which, it is inconsistent anyway: consider
SELECT array_sort(NULL::xid[]);
which will not error because it will never reach the C code. Why's
that okay but delivering an answer for "array_sort('{1}'::xid[])"
is not? I think "throw error only if we must sort and cannot" is
a perfectly fine definition.
At the code level, I didn't like the way that the multiple entry
points were set up. I think it's generally cleaner code to have
a worker function with plain C call and return coding and make
all the SQL-visible functions be wrappers around that. Also the
caching mechanism was overcomplicated, in particular because we
do not need a cache lookup to know which sort operators apply to
arrays.
So all that leads me to v18 attached. (I merged the two patches
into one, didn't see much value in splitting them.)
In v18, it's somewhat annoying that the typcache doesn't cache
the typarray field; we would not need a separate get_array_type()
lookup if it did. I doubt there is any real reason for that except
that pg_type.typarray didn't exist when the typcache was invented.
So I'm tempted to add it. But I looked at existing callers of
get_array_type() and none of them are adjacent to typcache lookups,
so only array_sort would be helped immediately. I left it alone
for the moment; wonder if anyone else has an opinion?
regards, tom lane
Attachments:
v18-general-purpose-array_sort.patchtext/x-diff; charset=us-ascii; name=v18-general-purpose-array_sort.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5bf6656deca..2129d027398 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20741,6 +20741,42 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> (
+ <parameter>array</parameter> <type>anyarray</type>
+ <optional>, <parameter>descending</parameter> <type>boolean</type>
+ <optional>, <parameter>nulls_first</parameter> <type>boolean</type>
+ </optional></optional> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array.
+ The sort order is determined by the default sort ordering of the
+ array's element type; however, if the element type is collatable,
+ the collation to use can be forced by adding
+ a <literal>COLLATE</literal> clause to
+ the <parameter>array</parameter> argument.
+ </para>
+ <para>
+ If <parameter>descending</parameter> is true then sort in
+ descending order, otherwise ascending order. If omitted, the
+ default is ascending order.
+ If <parameter>nulls_first</parameter> is true then nulls appear
+ before non-null values, otherwise nulls appear after non-null
+ values.
+ If omitted, <parameter>nulls_first</parameter> is taken to have
+ the same value as <parameter>descending</parameter>.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 2aae2f8ed93..2a8ea974029 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,16 +12,19 @@
*/
#include "postgres.h"
+#include "catalog/pg_operator_d.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "nodes/supportnodes.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -43,6 +46,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
+/*
+ * ArraySortCachedInfo
+ * Used for caching catalog data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+ ArrayMetaState array_meta; /* metadata for array_create_iterator */
+ Oid elem_lt_opr; /* "<" operator for element type */
+ Oid elem_gt_opr; /* ">" operator for element type */
+ Oid array_type; /* pg_type OID of array type */
+} ArraySortCachedInfo;
+
static Datum array_position_common(FunctionCallInfo fcinfo);
@@ -1858,3 +1873,171 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ */
+static ArrayType *
+array_sort_internal(ArrayType *array, bool descending, bool nulls_first,
+ FunctionCallInfo fcinfo)
+{
+ ArrayType *newarray;
+ Oid collation = PG_GET_COLLATION();
+ int ndim,
+ *dims,
+ *lbs;
+ ArraySortCachedInfo *cache_info;
+ Oid elmtyp;
+ Oid sort_typ;
+ Oid sort_opr;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ /* Quick exit if we don't need to sort */
+ if (ndim < 1 || dims[0] < 2)
+ return array;
+
+ /* Set up cache area if we didn't already */
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *)
+ MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ fcinfo->flinfo->fn_extra = cache_info;
+ }
+
+ /* Fetch and cache required data if we don't have it */
+ elmtyp = ARR_ELEMTYPE(array);
+ if (elmtyp != cache_info->array_meta.element_type)
+ {
+ TypeCacheEntry *typentry;
+
+ typentry = lookup_type_cache(elmtyp,
+ TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
+ cache_info->elem_lt_opr = typentry->lt_opr;
+ cache_info->elem_gt_opr = typentry->gt_opr;
+ /* For some reason the typcache doesn't track array type */
+ cache_info->array_type = InvalidOid;
+ }
+
+ /* Identify the sort operator to use */
+ if (ndim == 1)
+ {
+ /* Need to sort the element type */
+ sort_typ = elmtyp;
+ sort_opr = (descending ? cache_info->elem_gt_opr : cache_info->elem_lt_opr);
+ }
+ else
+ {
+ /* Otherwise we're sorting arrays */
+ if (!OidIsValid(cache_info->array_type))
+ {
+ cache_info->array_type = get_array_type(elmtyp);
+ if (!OidIsValid(cache_info->array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ }
+ sort_typ = cache_info->array_type;
+ /* We know what operators to use for arrays */
+ sort_opr = (descending ? ARRAY_GT_OP : ARRAY_LT_OP);
+ }
+
+ /*
+ * Fail if we don't know how to sort. The error message is chosen to
+ * match what array_lt()/array_gt() will say in the multidimensional case.
+ */
+ if (!OidIsValid(sort_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(elmtyp)));
+
+ /* Put the things to be sorted (elements or sub-arrays) into a tuplesort */
+ tuplesortstate = tuplesort_begin_datum(sort_typ,
+ sort_opr,
+ collation,
+ nulls_first,
+ work_mem,
+ NULL,
+ TUPLESORT_NONE);
+
+ array_iterator = array_create_iterator(array, ndim - 1,
+ &cache_info->array_meta);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /* Do the sort */
+ tuplesort_performsort(tuplesortstate);
+
+ /* Extract results into a new array */
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ sort_typ, CurrentMemoryContext);
+ }
+ tuplesort_end(tuplesortstate);
+
+ newarray = DatumGetArrayTypeP(makeArrayResultAny(astate,
+ CurrentMemoryContext,
+ true));
+
+ /* Adjust lower bound to match the input */
+ ARR_LBOUND(newarray)[0] = lbs[0];
+
+ return newarray;
+}
+
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+ false,
+ false,
+ fcinfo));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ bool descending = PG_GETARG_BOOL(1);
+
+ PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+ descending,
+ descending,
+ fcinfo));
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ bool descending = PG_GETARG_BOOL(1);
+ bool nulls_first = PG_GETARG_BOOL(2);
+
+ PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+ descending,
+ nulls_first,
+ fcinfo));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 8b68b16d79d..7f2426fdb3a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1772,6 +1772,18 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray',
+ prosrc => 'array_sort' },
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', proargnames => '{array,descending}',
+ prosrc => 'array_sort_order' },
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool',
+ proargnames => '{array,descending,nulls_first}',
+ prosrc => 'array_sort_order_nulls_first' },
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 7afd7356bbe..b815473f414 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2860,3 +2860,145 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+ array_sort
+---------------
+ {"1 2","1 4"}
+(1 row)
+
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+ array_sort
+---------------------------------
+ {{"1 -2","-1 4"},{"1 2","3 4"}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- no error because no sort is required
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR: could not identify a comparison function for type xid
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR: could not identify a comparison function for type xid
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+ array_sort
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index aee4755c083..69805d4b9ec 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1471,6 +1471,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 399a0797f3b..47d62c1d38d 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -856,3 +856,39 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- no error because no sort is required
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 38ebcd99508..dbc190227d0 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -564,6 +564,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b66cecd8799..449bafc123c 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -154,6 +154,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
Hi Tom,
On Mon, Mar 31, 2025 at 5:58 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
I spent some time looking at the v17 patchset. There were some pretty
strange things in it --- why were some of the variants of array_sort()
marked as volatile, for example?
I think this was due to some copy-paste of the code nearby.
But the two things I'd like to
suggest functionality-wise are:* The second argument of the variants with booleans should be defined
as true=descending, not true=ascending. It seems a little odd to me
for the default of a boolean option not to be "false". Also, then
you don't need an inversion between the second and third arguments.
I'm not dead set on this but it just seems a little cleaner.
Agreed.
* I see that the code is set up to detect an unsortable input type
before it takes the fast exit for "no sort required". I think this
is poor engineering: we ought to make the fast path as fast as
possible. The can't-sort case is so rare in real-world usage that
I do not think it matters if the error isn't thrown by every possible
call. Besides which, it is inconsistent anyway: consider
SELECT array_sort(NULL::xid[]);
which will not error because it will never reach the C code. Why's
that okay but delivering an answer for "array_sort('{1}'::xid[])"
is not? I think "throw error only if we must sort and cannot" is
a perfectly fine definition.
Agreed.
At the code level, I didn't like the way that the multiple entry
points were set up. I think it's generally cleaner code to have
a worker function with plain C call and return coding and make
all the SQL-visible functions be wrappers around that. Also the
caching mechanism was overcomplicated, in particular because we
do not need a cache lookup to know which sort operators apply to
arrays.
Agreed, your refactor made the code cleaner.
So all that leads me to v18 attached. (I merged the two patches
into one, didn't see much value in splitting them.)In v18, it's somewhat annoying that the typcache doesn't cache
the typarray field; we would not need a separate get_array_type()
lookup if it did. I doubt there is any real reason for that except
that pg_type.typarray didn't exist when the typcache was invented.
So I'm tempted to add it. But I looked at existing callers of
get_array_type() and none of them are adjacent to typcache lookups,
so only array_sort would be helped immediately. I left it alone
for the moment; wonder if anyone else has an opinion?
The need for `elmtyp` and `array_type` here because a column can
have arrays with varying dimensions. Maybe other callers don't share
this behavior?
regards, tom lane
--
Regards
Junwang Zhao
Junwang Zhao <zhjwpku@gmail.com> writes:
On Mon, Mar 31, 2025 at 5:58 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
In v18, it's somewhat annoying that the typcache doesn't cache
the typarray field; we would not need a separate get_array_type()
lookup if it did. I doubt there is any real reason for that except
that pg_type.typarray didn't exist when the typcache was invented.
So I'm tempted to add it. But I looked at existing callers of
get_array_type() and none of them are adjacent to typcache lookups,
so only array_sort would be helped immediately. I left it alone
for the moment; wonder if anyone else has an opinion?
The need for `elmtyp` and `array_type` here because a column can
have arrays with varying dimensions. Maybe other callers don't share
this behavior?
Maybe. I think some of what's going on here is that because for a
long time we only had pg_type.typelem and not pg_type.typarray,
code was written to not need to look up the array type if at all
possible. So there are simply not that many users. Anyway it
seems really cheap to add this field to the typcache now.
Attached 0001 is the same as v18, and then 0002 is the proposed
addition to typcache.
regards, tom lane
Attachments:
v19-0001-Introduce-a-SQL-callable-function-array_sort-any.patchtext/x-diff; charset=us-ascii; name*0=v19-0001-Introduce-a-SQL-callable-function-array_sort-any.p; name*1=atchDownload
From 21bfc6f86a767a0ef774dbaf9b3f3b6168c15a27 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 31 Mar 2025 12:52:00 -0400
Subject: [PATCH v19 1/2] Introduce a SQL-callable function
array_sort(anyarray).
Create a function that will sort the elements of an array
according to the element type's sort order. If the array
has more than one dimension, the sub-arrays of the first
dimension are sorted per normal array-comparison rules,
leaving their contents alone.
Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Reviewed-by: Aleksander Alekseev <aleksander@timescale.com>
Discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw@mail.gmail.com
---
doc/src/sgml/func.sgml | 36 ++++
src/backend/utils/adt/array_userfuncs.c | 183 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 12 ++
src/test/regress/expected/arrays.out | 142 ++++++++++++++
.../regress/expected/collate.icu.utf8.out | 13 ++
src/test/regress/sql/arrays.sql | 36 ++++
src/test/regress/sql/collate.icu.utf8.sql | 4 +
src/tools/pgindent/typedefs.list | 1 +
8 files changed, 427 insertions(+)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5bf6656deca..2129d027398 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20741,6 +20741,42 @@ SELECT NULLIF(value, '(none)') ...
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>array_sort</primary>
+ </indexterm>
+ <function>array_sort</function> (
+ <parameter>array</parameter> <type>anyarray</type>
+ <optional>, <parameter>descending</parameter> <type>boolean</type>
+ <optional>, <parameter>nulls_first</parameter> <type>boolean</type>
+ </optional></optional> )
+ <returnvalue>anyarray</returnvalue>
+ </para>
+ <para>
+ Sorts the first dimension of the array.
+ The sort order is determined by the default sort ordering of the
+ array's element type; however, if the element type is collatable,
+ the collation to use can be forced by adding
+ a <literal>COLLATE</literal> clause to
+ the <parameter>array</parameter> argument.
+ </para>
+ <para>
+ If <parameter>descending</parameter> is true then sort in
+ descending order, otherwise ascending order. If omitted, the
+ default is ascending order.
+ If <parameter>nulls_first</parameter> is true then nulls appear
+ before non-null values, otherwise nulls appear after non-null
+ values.
+ If omitted, <parameter>nulls_first</parameter> is taken to have
+ the same value as <parameter>descending</parameter>.
+ </para>
+ <para>
+ <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+ <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 2aae2f8ed93..2a8ea974029 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -12,16 +12,19 @@
*/
#include "postgres.h"
+#include "catalog/pg_operator_d.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "nodes/supportnodes.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
#include "utils/typcache.h"
/*
@@ -43,6 +46,18 @@ typedef struct DeserialIOData
Oid typioparam;
} DeserialIOData;
+/*
+ * ArraySortCachedInfo
+ * Used for caching catalog data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+ ArrayMetaState array_meta; /* metadata for array_create_iterator */
+ Oid elem_lt_opr; /* "<" operator for element type */
+ Oid elem_gt_opr; /* ">" operator for element type */
+ Oid array_type; /* pg_type OID of array type */
+} ArraySortCachedInfo;
+
static Datum array_position_common(FunctionCallInfo fcinfo);
@@ -1858,3 +1873,171 @@ array_reverse(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ */
+static ArrayType *
+array_sort_internal(ArrayType *array, bool descending, bool nulls_first,
+ FunctionCallInfo fcinfo)
+{
+ ArrayType *newarray;
+ Oid collation = PG_GET_COLLATION();
+ int ndim,
+ *dims,
+ *lbs;
+ ArraySortCachedInfo *cache_info;
+ Oid elmtyp;
+ Oid sort_typ;
+ Oid sort_opr;
+ Tuplesortstate *tuplesortstate;
+ ArrayIterator array_iterator;
+ Datum value;
+ bool isnull;
+ ArrayBuildStateAny *astate = NULL;
+
+ ndim = ARR_NDIM(array);
+ dims = ARR_DIMS(array);
+ lbs = ARR_LBOUND(array);
+
+ /* Quick exit if we don't need to sort */
+ if (ndim < 1 || dims[0] < 2)
+ return array;
+
+ /* Set up cache area if we didn't already */
+ cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+ if (cache_info == NULL)
+ {
+ cache_info = (ArraySortCachedInfo *)
+ MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArraySortCachedInfo));
+ fcinfo->flinfo->fn_extra = cache_info;
+ }
+
+ /* Fetch and cache required data if we don't have it */
+ elmtyp = ARR_ELEMTYPE(array);
+ if (elmtyp != cache_info->array_meta.element_type)
+ {
+ TypeCacheEntry *typentry;
+
+ typentry = lookup_type_cache(elmtyp,
+ TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+ cache_info->array_meta.element_type = elmtyp;
+ cache_info->array_meta.typlen = typentry->typlen;
+ cache_info->array_meta.typbyval = typentry->typbyval;
+ cache_info->array_meta.typalign = typentry->typalign;
+ cache_info->elem_lt_opr = typentry->lt_opr;
+ cache_info->elem_gt_opr = typentry->gt_opr;
+ /* For some reason the typcache doesn't track array type */
+ cache_info->array_type = InvalidOid;
+ }
+
+ /* Identify the sort operator to use */
+ if (ndim == 1)
+ {
+ /* Need to sort the element type */
+ sort_typ = elmtyp;
+ sort_opr = (descending ? cache_info->elem_gt_opr : cache_info->elem_lt_opr);
+ }
+ else
+ {
+ /* Otherwise we're sorting arrays */
+ if (!OidIsValid(cache_info->array_type))
+ {
+ cache_info->array_type = get_array_type(elmtyp);
+ if (!OidIsValid(cache_info->array_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
+ }
+ sort_typ = cache_info->array_type;
+ /* We know what operators to use for arrays */
+ sort_opr = (descending ? ARRAY_GT_OP : ARRAY_LT_OP);
+ }
+
+ /*
+ * Fail if we don't know how to sort. The error message is chosen to
+ * match what array_lt()/array_gt() will say in the multidimensional case.
+ */
+ if (!OidIsValid(sort_opr))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(elmtyp)));
+
+ /* Put the things to be sorted (elements or sub-arrays) into a tuplesort */
+ tuplesortstate = tuplesort_begin_datum(sort_typ,
+ sort_opr,
+ collation,
+ nulls_first,
+ work_mem,
+ NULL,
+ TUPLESORT_NONE);
+
+ array_iterator = array_create_iterator(array, ndim - 1,
+ &cache_info->array_meta);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ tuplesort_putdatum(tuplesortstate, value, isnull);
+ }
+ array_free_iterator(array_iterator);
+
+ /* Do the sort */
+ tuplesort_performsort(tuplesortstate);
+
+ /* Extract results into a new array */
+ while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+ {
+ astate = accumArrayResultAny(astate, value, isnull,
+ sort_typ, CurrentMemoryContext);
+ }
+ tuplesort_end(tuplesortstate);
+
+ newarray = DatumGetArrayTypeP(makeArrayResultAny(astate,
+ CurrentMemoryContext,
+ true));
+
+ /* Adjust lower bound to match the input */
+ ARR_LBOUND(newarray)[0] = lbs[0];
+
+ return newarray;
+}
+
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+ false,
+ false,
+ fcinfo));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ bool descending = PG_GETARG_BOOL(1);
+
+ PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+ descending,
+ descending,
+ fcinfo));
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
+ bool descending = PG_GETARG_BOOL(1);
+ bool nulls_first = PG_GETARG_BOOL(2);
+
+ PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+ descending,
+ nulls_first,
+ fcinfo));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 8b68b16d79d..7f2426fdb3a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1772,6 +1772,18 @@
{ oid => '8686', descr => 'reverse array',
proname => 'array_reverse', prorettype => 'anyarray',
proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray',
+ prosrc => 'array_sort' },
+{ oid => '8811', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool', proargnames => '{array,descending}',
+ prosrc => 'array_sort_order' },
+{ oid => '8812', descr => 'sort array',
+ proname => 'array_sort', prorettype => 'anyarray',
+ proargtypes => 'anyarray bool bool',
+ proargnames => '{array,descending,nulls_first}',
+ prosrc => 'array_sort_order_nulls_first' },
{ oid => '3816', descr => 'array typanalyze',
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 7afd7356bbe..b815473f414 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2860,3 +2860,145 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
{{7,8},{5,6},{3,4},{1,2}}
(1 row)
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+ array_sort
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+ array_sort
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+ array_sort
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+ array_sort
+---------------
+ {"1 2","1 4"}
+(1 row)
+
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+ array_sort
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+ array_sort
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+ array_sort
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+ array_sort
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+ array_sort
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+ array_sort
+---------------------------------
+ {{"1 -2","-1 4"},{"1 2","3 4"}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- no error because no sort is required
+ array_sort
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR: could not identify a comparison function for type xid
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR: could not identify a comparison function for type xid
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+ array_sort
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index aee4755c083..69805d4b9ec 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1471,6 +1471,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort
+------------
+ {B,a}
+(1 row)
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 399a0797f3b..47d62c1d38d 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -856,3 +856,39 @@ SELECT array_reverse('{1}'::int[]);
SELECT array_reverse('{1,2}'::int[]);
SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- no error because no sort is required
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 38ebcd99508..dbc190227d0 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -564,6 +564,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b66cecd8799..449bafc123c 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -154,6 +154,7 @@ ArrayIOData
ArrayIterator
ArrayMapState
ArrayMetaState
+ArraySortCachedInfo
ArraySubWorkspace
ArrayToken
ArrayType
--
2.43.5
v19-0002-Add-pg_type.typarray-to-the-typcache-s-collectio.patchtext/x-diff; charset=us-ascii; name*0=v19-0002-Add-pg_type.typarray-to-the-typcache-s-collectio.p; name*1=atchDownload
From 8992f94b68b4aa6205b222c8e221b0a8c0306c47 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 31 Mar 2025 13:06:40 -0400
Subject: [PATCH v19 2/2] Add pg_type.typarray to the typcache's collection of
fields.
This was not done in bc8036fc6 which added the typarray column,
but I think that was simply an oversight. We generally are
willing to cache simple columns of pg_type in the typcache.
Add it now to simplify array_sort().
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw@mail.gmail.com
---
src/backend/utils/adt/array_userfuncs.c | 17 ++++++-----------
src/backend/utils/cache/typcache.c | 2 ++
src/include/utils/typcache.h | 1 +
3 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index 2a8ea974029..8eb342e3382 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -1930,8 +1930,7 @@ array_sort_internal(ArrayType *array, bool descending, bool nulls_first,
cache_info->array_meta.typalign = typentry->typalign;
cache_info->elem_lt_opr = typentry->lt_opr;
cache_info->elem_gt_opr = typentry->gt_opr;
- /* For some reason the typcache doesn't track array type */
- cache_info->array_type = InvalidOid;
+ cache_info->array_type = typentry->typarray;
}
/* Identify the sort operator to use */
@@ -1944,16 +1943,12 @@ array_sort_internal(ArrayType *array, bool descending, bool nulls_first,
else
{
/* Otherwise we're sorting arrays */
- if (!OidIsValid(cache_info->array_type))
- {
- cache_info->array_type = get_array_type(elmtyp);
- if (!OidIsValid(cache_info->array_type))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("could not find array type for data type %s",
- format_type_be(elmtyp))));
- }
sort_typ = cache_info->array_type;
+ if (!OidIsValid(sort_typ))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find array type for data type %s",
+ format_type_be(elmtyp))));
/* We know what operators to use for arrays */
sort_opr = (descending ? ARRAY_GT_OP : ARRAY_LT_OP);
}
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 5a3b3788d02..ae65a1cce06 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -499,6 +499,7 @@ lookup_type_cache(Oid type_id, int flags)
typentry->typrelid = typtup->typrelid;
typentry->typsubscript = typtup->typsubscript;
typentry->typelem = typtup->typelem;
+ typentry->typarray = typtup->typarray;
typentry->typcollation = typtup->typcollation;
typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA;
@@ -544,6 +545,7 @@ lookup_type_cache(Oid type_id, int flags)
typentry->typrelid = typtup->typrelid;
typentry->typsubscript = typtup->typsubscript;
typentry->typelem = typtup->typelem;
+ typentry->typarray = typtup->typarray;
typentry->typcollation = typtup->typcollation;
typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA;
diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h
index 562a581333a..1cb30f1818c 100644
--- a/src/include/utils/typcache.h
+++ b/src/include/utils/typcache.h
@@ -44,6 +44,7 @@ typedef struct TypeCacheEntry
Oid typrelid;
Oid typsubscript;
Oid typelem;
+ Oid typarray;
Oid typcollation;
/*
--
2.43.5
On Tue, Apr 1, 2025 at 1:11 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Junwang Zhao <zhjwpku@gmail.com> writes:
On Mon, Mar 31, 2025 at 5:58 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
In v18, it's somewhat annoying that the typcache doesn't cache
the typarray field; we would not need a separate get_array_type()
lookup if it did. I doubt there is any real reason for that except
that pg_type.typarray didn't exist when the typcache was invented.
So I'm tempted to add it. But I looked at existing callers of
get_array_type() and none of them are adjacent to typcache lookups,
so only array_sort would be helped immediately. I left it alone
for the moment; wonder if anyone else has an opinion?The need for `elmtyp` and `array_type` here because a column can
have arrays with varying dimensions. Maybe other callers don't share
this behavior?Maybe. I think some of what's going on here is that because for a
long time we only had pg_type.typelem and not pg_type.typarray,
code was written to not need to look up the array type if at all
possible. So there are simply not that many users. Anyway it
seems really cheap to add this field to the typcache now.Attached 0001 is the same as v18, and then 0002 is the proposed
addition to typcache.
I've applied the patches to master and regression passed.
0002 is neat, I am +1 for this improvement.
regards, tom lane
--
Regards
Junwang Zhao
Junwang Zhao <zhjwpku@gmail.com> writes:
On Tue, Apr 1, 2025 at 1:11 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Attached 0001 is the same as v18, and then 0002 is the proposed
addition to typcache.
I've applied the patches to master and regression passed.
0002 is neat, I am +1 for this improvement.
Hearing no further comments, pushed.
regards, tom lane
On Wed, Apr 2, 2025 at 6:05 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Junwang Zhao <zhjwpku@gmail.com> writes:
On Tue, Apr 1, 2025 at 1:11 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Attached 0001 is the same as v18, and then 0002 is the proposed
addition to typcache.I've applied the patches to master and regression passed.
0002 is neat, I am +1 for this improvement.Hearing no further comments, pushed.
regards, tom lane
Thanks for pushing, and I noticed the corresponding CF entry has
been marked as committed, thanks for taking care of it.
--
Regards
Junwang Zhao