From 219cf386d74b979119bc75a769a937b649b969f0 Mon Sep 17 00:00:00 2001 From: Juan Jose Santamaria Flecha Date: Fri, 9 Jun 2023 16:57:32 -0400 Subject: [PATCH] WIN32 Inconsistent results with libc utf8 sorting --- src/backend/utils/adt/pg_locale.c | 142 ++++++++++++++++++++++++++------------ src/include/utils/pg_locale.h | 3 + 2 files changed, 99 insertions(+), 46 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 31e3b16..7500fa5 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1534,6 +1534,20 @@ pg_newlocale_from_collation(Oid collid) NULL); #else loc = _create_locale(LC_ALL, collcollate); + if (GetDatabaseEncoding() == PG_UTF8) + { + wchar_t wcollcollate[LOCALE_NAME_MAX_LENGTH]; + LCID lcid; + + MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wcollcollate, + LOCALE_NAME_MAX_LENGTH); + lcid = LocaleNameToLCID(wcollcollate, 0); + if (lcid == 0) + ereport(ERROR, + (errmsg("could not convert locale name to LCID: error code %lu", + GetLastError()))); + result.info.lcid = lcid; + } #endif if (!loc) report_newlocale_failure(collcollate); @@ -1565,7 +1579,10 @@ pg_newlocale_from_collation(Oid collid) #endif } - result.info.lt = loc; +#ifdef WIN32 + if (!result.info.lcid) +#endif + result.info.lt = loc; #else /* not HAVE_LOCALE_T */ /* platform that doesn't support locale_t */ ereport(ERROR, @@ -1729,77 +1746,110 @@ get_collation_actual_version(char collprovider, const char *collcollate) } /* - * pg_strncoll_libc_win32_utf8 + * pg_strncoll_sort_key * * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and - * invoke wcscoll() or wcscoll_l(). + * produce a normalized sort key based on the locale. Returns a palloced + * string. */ #ifdef WIN32 -static int -pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, - size_t len2, pg_locale_t locale) +static char * +pg_strncoll_sort_key(const char *arg, size_t len, pg_locale_t locale, + int *sortlen) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; - char *a1p, - *a2p; - int a1len = len1 * 2 + 2; - int a2len = len2 * 2 + 2; - int r; - int result; - - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); - Assert(GetDatabaseEncoding() == PG_UTF8); -#ifndef WIN32 - Assert(false); -#endif + char *ap; + int alen = len * 2 + 2; + int result; - if (a1len + a2len > TEXTBUFLEN) - buf = palloc(a1len + a2len); - - a1p = buf; - a2p = buf + a1len; + ap = palloc(alen); /* API does not work for zero-length input */ - if (len1 == 0) - r = 0; + if (len == 0) + result = 0; else { - r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1, - (LPWSTR) a1p, a1len / 2); - if (!r) + result = MultiByteToWideChar(CP_UTF8, 0, arg, len, + (LPWSTR) ap, alen / 2); + if (!result) ereport(ERROR, (errmsg("could not convert string to UTF-16: error code %lu", GetLastError()))); } - ((LPWSTR) a1p)[r] = 0; + ((LPWSTR) ap)[result] = 0; - if (len2 == 0) - r = 0; - else + errno = 0; +#ifdef HAVE_LOCALE_T + if (locale) { - r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2, - (LPWSTR) a2p, a2len / 2); - if (!r) + int mapsize; + char *map; + + mapsize = LCMapStringW(locale->info.lcid, LCMAP_SORTKEY, (LPWSTR) ap, -1, NULL, 0); + if (mapsize == 0) ereport(ERROR, - (errmsg("could not convert string to UTF-16: error code %lu", + (errmsg("could not produce a normalized sort key: error code %lu", + GetLastError()))); + + map = palloc(mapsize); + + result = LCMapStringW(locale->info.lcid, LCMAP_SORTKEY, (LPWSTR) ap, -1, + (LPWSTR) map, mapsize); + if (result == 0) + ereport(ERROR, + (errmsg("could not produce a normalized sort key: error code %lu", GetLastError()))); + + pfree(ap); + ap = map; } - ((LPWSTR) a2p)[r] = 0; +#endif + + *sortlen = result; + return ap; +} +#endif /* WIN32 */ + +/* + * pg_strncoll_libc_win32_utf8 + * + * For Win32 UTF-8 string comparison we will use LCMapStringW() or + * CompareStringOrdinal(). + */ +#ifdef WIN32 +static int +pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, + size_t len2, pg_locale_t locale) +{ + char *a1p, + *a2p; + int a1plen = 0, + a2plen = 0; + int result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(GetDatabaseEncoding() == PG_UTF8); +#ifndef WIN32 + Assert(false); +#endif + + a1p = pg_strncoll_sort_key(arg1, len1, locale, &a1plen); + a2p = pg_strncoll_sort_key(arg2, len2, locale, &a2plen); - errno = 0; #ifdef HAVE_LOCALE_T if (locale) - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); + result = memcmp((LPWSTR) a1p, (LPWSTR) a2p, (a1plen < a2plen) ? + a1plen : a2plen); else #endif - result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); - if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ - ereport(ERROR, + { + result = CompareStringOrdinal((LPWSTR) a1p, -1, (LPWSTR) a2p, -1, FALSE) - 2; + if (result == -2) + ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); + } - if (buf != sbuf) - pfree(buf); + pfree(a1p); + pfree(a2p); return result; } diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index e2a7243..d642bbc 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -80,6 +80,9 @@ struct pg_locale_struct { #ifdef HAVE_LOCALE_T locale_t lt; +#ifdef WIN32 + LCID lcid; +#endif #endif #ifdef USE_ICU struct -- 2.11.0