diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 755ca6e..5a57391 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -96,6 +96,7 @@ #include "utils/memutils.h" #include "utils/numeric.h" #include "utils/pg_locale.h" +#include "common/unicode_norm.h" /* ---------- * Routines type @@ -1843,6 +1844,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) wchar_t *workspace; size_t curr_char; size_t result_size; + int encoding; /* Overflow paranoia */ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) @@ -1850,6 +1852,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); + encoding = GetDatabaseEncoding(); /* Output workspace cannot have more codes than input bytes */ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); @@ -1864,7 +1867,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); else workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + if (encoding == PG_UTF8 && !is_pg_wchar_combining(workspace[curr_char])) + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); } else #endif @@ -1873,7 +1877,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) workspace[curr_char] = towlower(workspace[curr_char]); else workspace[curr_char] = towupper(workspace[curr_char]); - wasalnum = iswalnum(workspace[curr_char]); + if (encoding == PG_UTF8 && !is_pg_wchar_combining(workspace[curr_char])) + wasalnum = iswalnum(workspace[curr_char]); } } diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c index 89c5533..25b149b 100644 --- a/src/common/unicode_norm.c +++ b/src/common/unicode_norm.c @@ -435,3 +435,14 @@ unicode_normalize_kc(const pg_wchar *input) return recomp_chars; } + +bool +is_pg_wchar_combining(const pg_wchar current) +{ + pg_unicode_decomposition *currEntry = get_code_entry(current); + if (currEntry == NULL) + return false; + if (currEntry->comb_class == 0x0) + return false; + return true; +} diff --git a/src/include/common/unicode_norm.h b/src/include/common/unicode_norm.h index 99167d2..bdcf02e 100644 --- a/src/include/common/unicode_norm.h +++ b/src/include/common/unicode_norm.h @@ -17,5 +17,6 @@ #include "mb/pg_wchar.h" extern pg_wchar *unicode_normalize_kc(const pg_wchar *input); +extern bool is_pg_wchar_combining(const pg_wchar current); #endif /* UNICODE_NORM_H */