From 8f5678e921cfc128749d8be9203d544dca033697 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Fri, 27 Sep 2024 13:05:40 +0000 Subject: [PATCH v3] general purpose array_sort Sorts anyarray in either ascending or descending order. The array must be empty or one-dimensional. Signed-off-by: Junwang Zhao --- doc/src/sgml/func.sgml | 32 +++++ src/backend/utils/adt/array_userfuncs.c | 180 ++++++++++++++++++++++++ src/include/catalog/pg_proc.dat | 6 + src/test/regress/expected/arrays.out | 122 ++++++++++++++++ src/test/regress/sql/arrays.sql | 23 +++ src/tools/pgindent/typedefs.list | 1 + 6 files changed, 364 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e39d524b6b..cf30484e2a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -20419,6 +20419,38 @@ SELECT NULLIF(value, '(none)') ... + + + + array_sort + + array_sort ( anyarray COLLATE collation_name , dir ) + anyarray + + + Sorts the array based on the given parameter. The array must be empty or one-dimensional. + + + If the COLLATE option is specified then sorting is based on collation_name, otherwise + using array element type's collation. + dir must be one of the following, and that the starred options are the defaults when null position is omitted. + + asc + desc + asc nulls first + asc nulls last * + desc nulls first * + desc nulls last + nulls first + nulls last + + + + array_sort(ARRAY[1,2,5,6,3,4]) + {1,2,3,4,5,6} + + + diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 6599be2ec5..0ff1a4f6f4 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -12,15 +12,18 @@ */ #include "postgres.h" +#include "catalog/namespace.h" #include "catalog/pg_type.h" #include "common/int.h" #include "common/pg_prng.h" #include "libpq/pqformat.h" +#include "miscadmin.h" #include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" +#include "utils/tuplesort.h" #include "utils/typcache.h" /* @@ -1685,3 +1688,180 @@ array_sample(PG_FUNCTION_ARGS) PG_RETURN_ARRAYTYPE_P(result); } + + +#define WHITESPACE " \t\n\r" + +typedef enum +{ + PARSE_SORT_ORDER_INIT, + PARSE_SORT_ORDER_DIRECTION_SET, + PARSE_SORT_ORDER_NULLS_OPTION, + PARSE_SORT_ORDER_ERROR, + PARSE_SORT_ORDER_DONE +} ParseSortOrderState; + +static bool +parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first) +{ + char *token; + char *saveptr; + char *str_copy = pstrdup(str); + bool nulls_first_set = false; + ParseSortOrderState state = PARSE_SORT_ORDER_INIT; + + token = strtok_r(str_copy, WHITESPACE, &saveptr); + + while (token != NULL && state != PARSE_SORT_ORDER_ERROR) + { + switch (state) + { + case PARSE_SORT_ORDER_INIT: + if (pg_strcasecmp(token, "ASC") == 0) + { + *sort_asc = true; + state = PARSE_SORT_ORDER_DIRECTION_SET; + } + else if (pg_strcasecmp(token, "DESC") == 0) + { + *sort_asc = false; + state = PARSE_SORT_ORDER_DIRECTION_SET; + } + else if (pg_strcasecmp(token, "NULLS") == 0) + state = PARSE_SORT_ORDER_NULLS_OPTION; + else + state = PARSE_SORT_ORDER_ERROR; + break; + + case PARSE_SORT_ORDER_DIRECTION_SET: + if (pg_strcasecmp(token, "NULLS") == 0) + state = PARSE_SORT_ORDER_NULLS_OPTION; + else + state = PARSE_SORT_ORDER_ERROR; + break; + + case PARSE_SORT_ORDER_NULLS_OPTION: + if (pg_strcasecmp(token, "FIRST") == 0) + { + *nulls_first = true; + nulls_first_set = true; + state = PARSE_SORT_ORDER_DONE; + } + else if (pg_strcasecmp(token, "LAST") == 0) + { + *nulls_first = false; + nulls_first_set = true; + state = PARSE_SORT_ORDER_DONE; + } + else + state = PARSE_SORT_ORDER_ERROR; + break; + + case PARSE_SORT_ORDER_DONE: + /* No more tokens should be processed after first/last */ + state = PARSE_SORT_ORDER_ERROR; + break; + + default: + state = PARSE_SORT_ORDER_ERROR; + break; + } + + token = strtok_r(NULL, WHITESPACE, &saveptr); + } + + if (state == PARSE_SORT_ORDER_INIT || + state == PARSE_SORT_ORDER_DIRECTION_SET) + state = PARSE_SORT_ORDER_DONE; + + if (state == PARSE_SORT_ORDER_NULLS_OPTION) + state = PARSE_SORT_ORDER_ERROR; + + if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE) + *nulls_first = !*sort_asc; + + pfree(str_copy); + return state == PARSE_SORT_ORDER_DONE; +} + +/* + * array_sort + * + * Sorts the array in either ascending or descending order. + * The array must be empty or one-dimensional. + */ +Datum +array_sort(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL; + bool sort_asc = true; + bool nulls_first = false; + Oid elmtyp; + Oid collation = PG_GET_COLLATION(); + TypeCacheEntry *typentry; + Tuplesortstate *tuplesortstate; + ArrayIterator array_iterator; + Datum value; + bool isnull; + ArrayBuildState *astate = NULL; + + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("multidimensional arrays sorting are not supported"))); + + if (ARR_NDIM(array) < 1) + PG_RETURN_ARRAYTYPE_P(array); + + if (dirstr != NULL) + { + if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("second parameter must be a valid sort direction"))); + } + + elmtyp = ARR_ELEMTYPE(array); + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + + tuplesortstate = tuplesort_begin_datum(elmtyp, + sort_asc ? typentry->lt_opr : typentry->gt_opr, + collation, + nulls_first, work_mem, NULL, false); + + array_iterator = array_create_iterator(array, 0, NULL); + while (array_iterate(array_iterator, &value, &isnull)) + { + tuplesort_putdatum(tuplesortstate, value, isnull); + } + array_free_iterator(array_iterator); + + /* + * Do the sort. + */ + tuplesort_performsort(tuplesortstate); + + while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL)) + { + astate = accumArrayResult(astate, value, isnull, + elmtyp, CurrentMemoryContext); + } + + tuplesort_end(tuplesortstate); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} + +Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 43f608d7a0..1b2d64ac39 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1734,6 +1734,12 @@ { oid => '6216', descr => 'take samples from array', proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray', proargtypes => 'anyarray int4', prosrc => 'array_sample' }, +{ oid => '8810', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray', prosrc => 'array_sort'}, +{ oid => '8811', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray text', prosrc => 'array_sort_order'}, { oid => '3816', descr => 'array typanalyze', proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool', proargtypes => 'internal', prosrc => 'array_typanalyze' }, diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index a6d81fd5f9..9c4116cf04 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -2703,3 +2703,125 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail ERROR: sample size must be between 0 and 6 SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail ERROR: sample size must be between 0 and 6 +-- array_sort +SELECT array_sort('{}'::int[]); + array_sort +------------ + {} +(1 row) + +SELECT array_sort('{1,3,5,2,4,6}'::int[]); + array_sort +--------------- + {1,2,3,4,5,6} +(1 row) + +SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc'); + array_sort +--------------- + {6,5,4,3,2,1} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc'); + array_sort +--------------------------- + {1.1,2.2,3.3,4.4,5.5,6.6} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc'); + array_sort +--------------------------- + {6.6,5.5,4.4,3.3,2.2,1.1} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]); + array_sort +--------------------------- + {1.1,2.2,3.3,4.4,5.5,6.6} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc'); + array_sort +--------------------------- + {6.6,5.5,4.4,3.3,2.2,1.1} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]); + array_sort +------------------------------------------------------------------ + {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc'); + array_sort +------------------------------------------------------------------ + {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc'); + array_sort +------------------------------------------------------------------ + {"abc DEF 123abc","ábc sßs ßss DÉF","DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥȺ,ⱥⱥⱥ} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc'); + array_sort +------------------------------------------------------------------ + {ⱥⱥⱥ,ⱥȺ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž","ábc sßs ßss DÉF","abc DEF 123abc"} +(1 row) + +-- nulls first/last tests +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]); + array_sort +---------------------------------------------------------------------------- + {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc'); + array_sort +---------------------------------------------------------------------------- + {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc'); + array_sort +---------------------------------------------------------------------------- + {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first'); + array_sort +---------------------------------------------------------------------------- + {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first'); + array_sort +---------------------------------------------------------------------------- + {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first'); + array_sort +---------------------------------------------------------------------------- + {NULL,NULL,"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last'); + array_sort +---------------------------------------------------------------------------- + {"abc DEF 123abc","ábc sßs ßss DÉF",ⱥȺ,ⱥⱥⱥ,ȺȺȺ,"DŽxxDŽ džxxDž Džxxdž",NULL,NULL} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first'); + array_sort +---------------------------------------------------------------------------- + {NULL,NULL,"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc"} +(1 row) + +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last'); + array_sort +---------------------------------------------------------------------------- + {"DŽxxDŽ džxxDž Džxxdž",ȺȺȺ,ⱥⱥⱥ,ⱥȺ,"ábc sßs ßss DÉF","abc DEF 123abc",NULL,NULL} +(1 row) + diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 47058dfde5..2cdd26386b 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -827,3 +827,26 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[] SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2)); SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail + +-- array_sort +SELECT array_sort('{}'::int[]); +SELECT array_sort('{1,3,5,2,4,6}'::int[]); +SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[]); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[], 'desc'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'asc'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ}'::text[] COLLATE "pg_c_utf8", 'desc'); +-- nulls first/last tests +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[]); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'nulls first'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls first'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'asc nulls last'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls first'); +SELECT array_sort('{abc DEF 123abc,ábc sßs ßss DÉF,null,DŽxxDŽ džxxDž Džxxdž,ȺȺȺ,ⱥⱥⱥ,ⱥȺ,null}'::text[], 'desc nulls last'); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index b6135f0347..99f1abe869 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2025,6 +2025,7 @@ ParseLoc ParseNamespaceColumn ParseNamespaceItem ParseParamRefHook +ParseSortOrderState ParseState ParsedLex ParsedScript -- 2.39.5