From 337b2049fae0ac614ae3ca0c5f93f6cfcbf8dbbf Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Wed, 15 May 2019 11:47:56 +0200 Subject: [PATCH] Handle lowercase sigma in word-final position According to ISO/IEC 30112 WD10, when GREEK SMALL LETTER SIGMA is at word-final position it should instead become GREEK SMALL LETTER FINAL SIGMA. Since towlower only sees a single wchar and the context we need to handle word-final positions separately. Reference: http://www.open-std.org/JTC1/SC35/WG5/docs/30112d10.pdf --- src/backend/tsearch/ts_locale.c | 8 ++++++++ src/backend/utils/adt/formatting.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 309f7c0459..c49b980843 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -283,6 +283,14 @@ lowerstr_with_len(const char *str, int len) while (*wptr) { *wptr = towlower((wint_t) *wptr); + /* + * According to ISO/IEC 30112 WD10, when GREEK SMALL LETTER SIGMA + * is at word- final position it should instead become GREEK SMALL + * LETTER FINAL SIGMA. Since towlower only sees a single wchar and + * the context we need to handle word-final positions separately. + */ + if (*wptr == 0x03c3 && (!*(wptr + 1) || *(wptr + 1) < 0x0041)) + *wptr = (wint_t) 0x03c2; wptr++; } diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 69a691f18e..4cbac9561b 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -962,6 +962,18 @@ typedef struct NUMProc *L_currency_symbol; } NUMProc; +/* + * According to ISO/IEC 30112 WD10, when GREEK SMALL LETTER SIGMA is at word- + * final position it should instead become GREEK SMALL LETTER FINAL SIGMA. + * Since towlower only sees a single wchar and the context we need to handle + * word-final positions separately. + */ +#define ADJUST_WORDFINAL \ +do { \ + if ((workspace[curr_char] == 0x03c3) && \ + (!workspace[curr_char + 1] || workspace[curr_char + 1] < 0x0041)) \ + workspace[curr_char] = (wchar_t) 0x03c2; \ +} while(0) /* ---------- * Functions @@ -1615,6 +1627,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) else #endif workspace[curr_char] = towlower(workspace[curr_char]); + + ADJUST_WORDFINAL; } /* @@ -1869,7 +1883,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) #endif { if (wasalnum) + { workspace[curr_char] = towlower(workspace[curr_char]); + ADJUST_WORDFINAL; + } else workspace[curr_char] = towupper(workspace[curr_char]); wasalnum = iswalnum(workspace[curr_char]); -- 2.14.1.145.gb3622a4ee