From 783791442fdb4005cdfb4775f8aad977cbfbfce9 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Fri, 27 Sep 2024 13:05:40 +0000 Subject: [PATCH v7] general purpose array_sort Sorts anyarray in either ascending or descending order. Signed-off-by: Junwang Zhao --- doc/src/sgml/func.sgml | 32 +++ src/backend/utils/adt/array_userfuncs.c | 205 ++++++++++++++++++ src/include/catalog/pg_proc.dat | 6 + src/test/regress/expected/arrays.out | 123 +++++++++++ .../regress/expected/collate.icu.utf8.out | 13 ++ src/test/regress/sql/arrays.sql | 24 ++ src/test/regress/sql/collate.icu.utf8.sql | 4 + src/tools/pgindent/typedefs.list | 1 + 8 files changed, 408 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index ad663c94d7..0414b43523 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -20421,6 +20421,38 @@ SELECT NULLIF(value, '(none)') ... + + + + array_sort + + array_sort ( anyarray COLLATE collation_name , dir ) + anyarray + + + Sorts the array based on the given parameter. + + + If the COLLATE option is specified then sorting is based on collation_name, otherwise + using array element type's collation. + dir must be one of the following, and that the starred options are the defaults when null position is omitted. + + asc + desc + asc nulls first + asc nulls last * + desc nulls first * + desc nulls last + nulls first + nulls last + + + + array_sort(ARRAY[[2,4],[2,1],[6,5]]) + {{2,1},{2,4},{6,5}} + + + diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 6599be2ec5..b6c948e60f 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -12,15 +12,18 @@ */ #include "postgres.h" +#include "catalog/namespace.h" #include "catalog/pg_type.h" #include "common/int.h" #include "common/pg_prng.h" #include "libpq/pqformat.h" +#include "miscadmin.h" #include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" +#include "utils/tuplesort.h" #include "utils/typcache.h" /* @@ -1685,3 +1688,205 @@ array_sample(PG_FUNCTION_ARGS) PG_RETURN_ARRAYTYPE_P(result); } + + +#define WHITESPACE " \t\n\r\v\f" + +typedef enum +{ + PARSE_SORT_ORDER_INIT, + PARSE_SORT_ORDER_DIRECTION_SET, + PARSE_SORT_ORDER_NULLS_OPTION, + PARSE_SORT_ORDER_ERROR, + PARSE_SORT_ORDER_DONE +} ParseSortOrderState; + +static bool +parse_sort_order(const char *str, bool *sort_asc, bool *nulls_first) +{ + char *token; + char *saveptr; + char *str_copy = pstrdup(str); + bool nulls_first_set = false; + ParseSortOrderState state = PARSE_SORT_ORDER_INIT; + + token = strtok_r(str_copy, WHITESPACE, &saveptr); + + while (token != NULL && state != PARSE_SORT_ORDER_ERROR) + { + switch (state) + { + case PARSE_SORT_ORDER_INIT: + if (pg_strcasecmp(token, "ASC") == 0) + { + *sort_asc = true; + state = PARSE_SORT_ORDER_DIRECTION_SET; + } + else if (pg_strcasecmp(token, "DESC") == 0) + { + *sort_asc = false; + state = PARSE_SORT_ORDER_DIRECTION_SET; + } + else if (pg_strcasecmp(token, "NULLS") == 0) + state = PARSE_SORT_ORDER_NULLS_OPTION; + else + state = PARSE_SORT_ORDER_ERROR; + break; + + case PARSE_SORT_ORDER_DIRECTION_SET: + if (pg_strcasecmp(token, "NULLS") == 0) + state = PARSE_SORT_ORDER_NULLS_OPTION; + else + state = PARSE_SORT_ORDER_ERROR; + break; + + case PARSE_SORT_ORDER_NULLS_OPTION: + if (pg_strcasecmp(token, "FIRST") == 0) + { + *nulls_first = true; + nulls_first_set = true; + state = PARSE_SORT_ORDER_DONE; + } + else if (pg_strcasecmp(token, "LAST") == 0) + { + *nulls_first = false; + nulls_first_set = true; + state = PARSE_SORT_ORDER_DONE; + } + else + state = PARSE_SORT_ORDER_ERROR; + break; + + case PARSE_SORT_ORDER_DONE: + /* No more tokens should be processed after first/last */ + state = PARSE_SORT_ORDER_ERROR; + break; + + default: + state = PARSE_SORT_ORDER_ERROR; + break; + } + + token = strtok_r(NULL, WHITESPACE, &saveptr); + } + + if (state == PARSE_SORT_ORDER_INIT || + state == PARSE_SORT_ORDER_DIRECTION_SET) + state = PARSE_SORT_ORDER_DONE; + + if (state == PARSE_SORT_ORDER_NULLS_OPTION) + state = PARSE_SORT_ORDER_ERROR; + + if (!nulls_first_set && state == PARSE_SORT_ORDER_DONE) + *nulls_first = !*sort_asc; + + pfree(str_copy); + return state == PARSE_SORT_ORDER_DONE; +} + +/* + * array_sort + * + * Sorts the array in either ascending or descending order. + * The array must be empty or one-dimensional. + */ +Datum +array_sort(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + text *dirstr = (fcinfo->nargs > 1) ? PG_GETARG_TEXT_PP(1) : NULL; + bool sort_asc = true; + bool nulls_first = false; + Oid elmtyp; + Oid array_type; + Oid collation = PG_GET_COLLATION(); + TypeCacheEntry *typentry; + Tuplesortstate *tuplesortstate; + ArrayIterator array_iterator; + Datum value; + bool isnull; + ArrayBuildStateAny *astate = NULL; + + if (ARR_NDIM(array) < 1) + PG_RETURN_ARRAYTYPE_P(array); + + if (dirstr != NULL) + { + if (!parse_sort_order(text_to_cstring(dirstr), &sort_asc, &nulls_first)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("second parameter must be a valid sort direction"))); + } + + elmtyp = ARR_ELEMTYPE(array); + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (ARR_NDIM(array) == 1) + { + if (typentry == NULL || typentry->type_id != elmtyp) + { + typentry = lookup_type_cache(elmtyp, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + if ((sort_asc && !OidIsValid(typentry->lt_opr)) || + (!sort_asc && !OidIsValid(typentry->gt_opr))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an ordering operator for type %s", + format_type_be(elmtyp)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + } + else + { + if (typentry == NULL || typentry->typelem != elmtyp) + { + array_type = get_array_type(elmtyp); + if (!OidIsValid(array_type)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("could not find array type for data type %s", + format_type_be(elmtyp)))); + typentry = lookup_type_cache(array_type, sort_asc ? TYPECACHE_LT_OPR : TYPECACHE_GT_OPR); + if ((sort_asc && !OidIsValid(typentry->lt_opr)) || + (!sort_asc && !OidIsValid(typentry->gt_opr))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an ordering operator for type %s", + format_type_be(elmtyp)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + } + + tuplesortstate = tuplesort_begin_datum(typentry->type_id, + sort_asc ? typentry->lt_opr : typentry->gt_opr, + collation, + nulls_first, work_mem, NULL, false); + + array_iterator = array_create_iterator(array, ARR_NDIM(array) - 1, NULL); + while (array_iterate(array_iterator, &value, &isnull)) + { + tuplesort_putdatum(tuplesortstate, value, isnull); + } + array_free_iterator(array_iterator); + + /* + * Do the sort. + */ + tuplesort_performsort(tuplesortstate); + + while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL)) + { + astate = accumArrayResultAny(astate, value, isnull, + typentry->type_id, CurrentMemoryContext); + } + + tuplesort_end(tuplesortstate); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true)); +} + +Datum +array_sort_order(PG_FUNCTION_ARGS) +{ + return array_sort(fcinfo); +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 7c0b74fe05..a92fd3a6c2 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1741,6 +1741,12 @@ { oid => '6216', descr => 'take samples from array', proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray', proargtypes => 'anyarray int4', prosrc => 'array_sample' }, +{ oid => '8810', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray', prosrc => 'array_sort'}, +{ oid => '8811', descr => 'sort array', + proname => 'array_sort', provolatile => 'v', prorettype => 'anyarray', + proargtypes => 'anyarray text', prosrc => 'array_sort_order'}, { oid => '3816', descr => 'array typanalyze', proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool', proargtypes => 'internal', prosrc => 'array_typanalyze' }, diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index a6d81fd5f9..3c733e5aab 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -2703,3 +2703,126 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail ERROR: sample size must be between 0 and 6 SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail ERROR: sample size must be between 0 and 6 +-- array_sort +SELECT array_sort('{}'::int[]); + array_sort +------------ + {} +(1 row) + +SELECT array_sort('{1,3,5,2,4,6}'::int[]); + array_sort +--------------- + {1,2,3,4,5,6} +(1 row) + +SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc'); + array_sort +--------------- + {6,5,4,3,2,1} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc'); + array_sort +--------------------------- + {1.1,2.2,3.3,4.4,5.5,6.6} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc'); + array_sort +--------------------------- + {6.6,5.5,4.4,3.3,2.2,1.1} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]); + array_sort +--------------------------- + {1.1,2.2,3.3,4.4,5.5,6.6} +(1 row) + +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc'); + array_sort +--------------------------- + {6.6,5.5,4.4,3.3,2.2,1.1} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C"); + array_sort +----------------------- + {Abc,CCC,bar,bbc,foo} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc'); + array_sort +----------------------- + {Abc,CCC,bar,bbc,foo} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc'); + array_sort +----------------------- + {foo,bbc,bar,CCC,Abc} +(1 row) + +-- nulls first/last tests +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C"); + array_sort +---------------------------- + {Abc,CCC,bar,bbc,foo,NULL} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc'); + array_sort +---------------------------- + {Abc,CCC,bar,bbc,foo,NULL} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc'); + array_sort +---------------------------- + {NULL,foo,bbc,bar,CCC,Abc} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first'); + array_sort +---------------------------- + {NULL,Abc,CCC,bar,bbc,foo} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first'); + array_sort +---------------------------- + {NULL,Abc,CCC,bar,bbc,foo} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first'); + array_sort +---------------------------- + {NULL,Abc,CCC,bar,bbc,foo} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last'); + array_sort +---------------------------- + {Abc,CCC,bar,bbc,foo,NULL} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first'); + array_sort +---------------------------- + {NULL,foo,bbc,bar,CCC,Abc} +(1 row) + +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last'); + array_sort +---------------------------- + {foo,bbc,bar,CCC,Abc,NULL} +(1 row) + +-- multidimensional array tests +SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]); + array_sort +--------------------- + {{2,1},{2,4},{6,5}} +(1 row) + diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index faa376e060..aa5fd75e6e 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse t | t (1 row) +-- tests with array_sort +SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive); + array_sort +------------ + {a,B} +(1 row) + +SELECT array_sort('{a,B}'::text[] COLLATE "C"); + array_sort +------------ + {B,a} +(1 row) + -- test language tags CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false); SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 47058dfde5..821c4eab0b 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -827,3 +827,27 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[] SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2)); SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail + +-- array_sort +SELECT array_sort('{}'::int[]); +SELECT array_sort('{1,3,5,2,4,6}'::int[]); +SELECT array_sort('{1,3,5,2,4,6}'::int[], 'desc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'asc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::float8[], 'desc'); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]); +SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[], 'desc'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C"); +SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'asc'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C", 'desc'); +-- nulls first/last tests +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C"); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'nulls first'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls first'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'asc nulls last'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls first'); +SELECT array_sort('{foo,bar,CCC,Abc,bbc,null}'::text[] COLLATE "C", 'desc nulls last'); +-- multidimensional array tests +SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 80f28a97d7..3c739d332b 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; +-- tests with array_sort +SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive); +SELECT array_sort('{a,B}'::text[] COLLATE "C"); + -- test language tags CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false); SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 57de1acff3..f085b3cf87 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2025,6 +2025,7 @@ ParseLoc ParseNamespaceColumn ParseNamespaceItem ParseParamRefHook +ParseSortOrderState ParseState ParsedLex ParsedScript -- 2.39.5