From 2981a7ce87277fec51533c9d7cd501919f8909ec Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Fri, 27 Sep 2024 13:05:40 +0000 Subject: [PATCH v1] general purpose array_sort Sorts anyarray in either ascending or descending order. The array must be empty or one-dimensional. Signed-off-by: Junwang Zhao --- doc/src/sgml/func.sgml | 20 ++++ src/backend/utils/adt/array_userfuncs.c | 120 ++++++++++++++++++++++++ src/include/catalog/pg_proc.dat | 9 ++ src/test/regress/expected/arrays.out | 67 +++++++++++++ src/test/regress/sql/arrays.sql | 13 +++ 5 files changed, 229 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e39d524b6b..7441d413bd 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -20419,6 +20419,26 @@ SELECT NULLIF(value, '(none)') ... + + + + array_sort + + array_sort ( anyarray , dir , collation ) + anyarray + + + Sorts the array in either ascending or descending order. + dir must be asc + or desc. The array must be empty or one-dimensional. + If the array element is text variants, specify collation as needed. + + + array_sort(ARRAY[1,2,5,6,3,4]) + {1,2,3,4,5,6} + + + diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 6599be2ec5..e507eb3dbb 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -12,15 +12,18 @@ */ #include "postgres.h" +#include "catalog/namespace.h" #include "catalog/pg_type.h" #include "common/int.h" #include "common/pg_prng.h" #include "libpq/pqformat.h" +#include "miscadmin.h" #include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" +#include "utils/tuplesort.h" #include "utils/typcache.h" /* @@ -1685,3 +1688,120 @@ array_sample(PG_FUNCTION_ARGS) PG_RETURN_ARRAYTYPE_P(result); } + +/* + * array_sort + * + * Sorts the array in either ascending or descending order. + * The array must be empty or one-dimensional. + */ +Datum +array_sort(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL; + int32 dc = (dirstr) ? VARSIZE_ANY_EXHDR(dirstr) : 0; + char *d = (dirstr) ? VARDATA_ANY(dirstr) : NULL; + Oid elmtyp; + text *collstr = (fcinfo->nargs > 2) ? PG_GETARG_TEXT_PP(2) : NULL; + char *collname = (collstr) ? text_to_cstring(collstr) : NULL; + Oid collid; + int dir = -1; + TypeCacheEntry *typentry; + Tuplesortstate *tuplesortstate; + ArrayIterator array_iterator; + Datum value; + bool isnull; + ArrayBuildState *astate = NULL; + + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("multidimensional arrays are not supported"))); + + if (ARR_NDIM(array) < 1) + PG_RETURN_ARRAYTYPE_P(array); + + if (ARR_HASNULL(array)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array must not contain nulls"))); + + if (dirstr == NULL || (dc == 3 + && (d[0] == 'a' || d[0] == 'A') + && (d[1] == 's' || d[1] == 'S') + && (d[2] == 'c' || d[2] == 'C'))) + dir = 1; + else if (dc == 4 + && (d[0] == 'd' || d[0] == 'D') + && (d[1] == 'e' || d[1] == 'E') + && (d[2] == 's' || d[2] == 'S') + && (d[3] == 'c' || d[3] == 'C')) + dir = 0; + if (dir == -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("second parameter must be \"ASC\" or \"DESC\""))); + + elmtyp = ARR_ELEMTYPE(array); + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, dir == 1 ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + + if (collname == NULL) + collid = typentry->typcollation; + else + { + collid = CollationGetCollid(collname); + if (!OidIsValid(collid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("collation \"%s\" not found", collname))); + } + + tuplesortstate = tuplesort_begin_datum(elmtyp, + dir == 1 ? typentry->lt_opr : typentry->gt_opr, + collid, + false, work_mem, NULL, false); + + array_iterator = array_create_iterator(array, 0, NULL); + while (array_iterate(array_iterator, &value, &isnull)) + { + Assert(!isnull); + tuplesort_putdatum(tuplesortstate, value, isnull); + } + array_free_iterator(array_iterator); + + /* + * Do the sort. + */ + tuplesort_performsort(tuplesortstate); + + while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL)) + { + Assert(!isnull); + astate = accumArrayResult(astate, value, false, + elmtyp, CurrentMemoryContext); + } + + tuplesort_end(tuplesortstate); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} + +Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} + +Datum +array_sort_order_collation(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 43f608d7a0..1769a5d2c5 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1734,6 +1734,15 @@ { oid => '6216', descr => 'take samples from array', proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray', proargtypes => 'anyarray int4', prosrc => 'array_sample' }, +{ oid => '8810', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray', prosrc => 'array_sort'}, +{ oid => '8811', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray text', prosrc => 'array_sort_order'}, +{ oid => '8812', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray text text', prosrc => 'array_sort_order_collation'}, { oid => '3816', descr => 'array typanalyze', proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool', proargtypes => 'internal', prosrc => 'array_typanalyze' }, diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index a6d81fd5f9..9326009188 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -2703,3 +2703,70 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail ERROR: sample size must be between 0 and 6 SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail ERROR: sample size must be between 0 and 6 +-- array_sort +SELECT array_sort('{}'::int[]); + array_sort +------------ + {} +(1 row) + +SELECT array_sort('{1,3,5,2,4,6}'::int[]); + array_sort +--------------- + {1,2,3,4,5,6} +(1 row) + +SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc'); + array_sort +--------------- + {6,5,4,3,2,1} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc'); + array_sort +--------------------------- + {1.1,2.2,3.3,4.4,5.5,6.6} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc'); + array_sort +--------------------------- + {6.6,5.5,4.4,3.3,2.2,1.1} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]); + array_sort +--------------------------- + {1.1,2.2,3.3,4.4,5.5,6.6} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc'); + array_sort +--------------------------- + {6.6,5.5,4.4,3.3,2.2,1.1} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]); + array_sort +------------------------------------------------------------------ + {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc'); + array_sort +------------------------------------------------------------------ + {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'asc', 'pg_c_utf8'); + array_sort +------------------------------------------------------------------ + {"abc DEF 123abc","ábc sßs ßss DÉF","DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥȺ,ⱥⱥⱥ} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc', 'pg_c_utf8'); + array_sort +------------------------------------------------------------------ + {ⱥⱥⱥ,ⱥȺ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž","ábc sßs ßss DÉF","abc DEF 123abc"} +(1 row) + diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 47058dfde5..b9bf5fd294 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -827,3 +827,16 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[] SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2)); SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail + +-- array_sort +SELECT array_sort('{}'::int[]); +SELECT array_sort('{1,3,5,2,4,6}'::int[]); +SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'asc', 'pg_c_utf8'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc', 'pg_c_utf8'); -- 2.39.5