From c9ace91726c2889fe96dec28fd9f3c655e13afd7 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 19 Sep 2024 11:12:41 -0700
Subject: [PATCH v4 7/7] Use method table for collation.

---
 src/backend/regex/regc_pg_locale.c     |  376 ++-----
 src/backend/utils/adt/Makefile         |    2 +
 src/backend/utils/adt/meson.build      |    2 +
 src/backend/utils/adt/pg_locale.c      | 1338 +++---------------------
 src/backend/utils/adt/pg_locale_icu.c  |  873 ++++++++++++++++
 src/backend/utils/adt/pg_locale_libc.c |  604 +++++++++++
 src/include/utils/pg_locale.h          |   44 +-
 7 files changed, 1727 insertions(+), 1512 deletions(-)
 create mode 100644 src/backend/utils/adt/pg_locale_icu.c
 create mode 100644 src/backend/utils/adt/pg_locale_libc.c

diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index b75784b6ce5..f7cd3f1787c 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -63,32 +63,17 @@
  * NB: the coding here assumes pg_wchar is an unsigned type.
  */
 
-typedef enum
-{
-	PG_REGEX_STRATEGY_C,		/* C locale (encoding independent) */
-	PG_REGEX_STRATEGY_BUILTIN,	/* built-in Unicode semantics */
-	PG_REGEX_STRATEGY_LIBC_WIDE,	/* Use locale_t <wctype.h> functions */
-	PG_REGEX_STRATEGY_LIBC_1BYTE,	/* Use locale_t <ctype.h> functions */
-	PG_REGEX_STRATEGY_ICU,		/* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
 static pg_locale_t pg_regex_locale;
 static Oid	pg_regex_collation;
 
+static struct pg_locale_struct dummy_c_locale = {
+	.collate_is_c = true,
+	.ctype_is_c = true,
+};
+
 /*
  * Hard-wired character properties for C locale
  */
-#define PG_ISDIGIT	0x01
-#define PG_ISALPHA	0x02
-#define PG_ISALNUM	(PG_ISDIGIT | PG_ISALPHA)
-#define PG_ISUPPER	0x04
-#define PG_ISLOWER	0x08
-#define PG_ISGRAPH	0x10
-#define PG_ISPRINT	0x20
-#define PG_ISPUNCT	0x40
-#define PG_ISSPACE	0x80
-
 static const unsigned char pg_char_properties[128] = {
 	 /* NUL */ 0,
 	 /* ^A */ 0,
@@ -232,7 +217,6 @@ void
 pg_set_regex_collation(Oid collation)
 {
 	pg_locale_t locale = 0;
-	PG_Locale_Strategy strategy;
 
 	if (!OidIsValid(collation))
 	{
@@ -253,8 +237,8 @@ pg_set_regex_collation(Oid collation)
 		 * catalog access is available, so we can't call
 		 * pg_newlocale_from_collation().
 		 */
-		strategy = PG_REGEX_STRATEGY_C;
 		collation = C_COLLATION_OID;
+		locale = &dummy_c_locale;
 	}
 	else
 	{
@@ -271,32 +255,11 @@ pg_set_regex_collation(Oid collation)
 			 * C/POSIX collations use this path regardless of database
 			 * encoding
 			 */
-			strategy = PG_REGEX_STRATEGY_C;
-			locale = 0;
+			locale = &dummy_c_locale;
 			collation = C_COLLATION_OID;
 		}
-		else if (locale->provider == COLLPROVIDER_BUILTIN)
-		{
-			Assert(GetDatabaseEncoding() == PG_UTF8);
-			strategy = PG_REGEX_STRATEGY_BUILTIN;
-		}
-#ifdef USE_ICU
-		else if (locale->provider == COLLPROVIDER_ICU)
-		{
-			strategy = PG_REGEX_STRATEGY_ICU;
-		}
-#endif
-		else
-		{
-			Assert(locale->provider == COLLPROVIDER_LIBC);
-			if (GetDatabaseEncoding() == PG_UTF8)
-				strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
-			else
-				strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
-		}
 	}
 
-	pg_regex_strategy = strategy;
 	pg_regex_locale = locale;
 	pg_regex_collation = collation;
 }
@@ -304,82 +267,31 @@ pg_set_regex_collation(Oid collation)
 static int
 pg_wc_isdigit(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISDIGIT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isdigit(c, true);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isdigit(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISDIGIT));
+	else
+		return char_props(c, PG_ISDIGIT, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isalpha(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALPHA));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalpha(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalpha(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISALPHA));
+	else
+		return char_props(c, PG_ISALPHA, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isalnum(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALNUM));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalnum(c, true);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalnum(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISALNUM));
+	else
+		return char_props(c, PG_ISDIGIT|PG_ISALPHA, pg_regex_locale) != 0;
 }
 
 static int
@@ -394,219 +306,87 @@ pg_wc_isword(pg_wchar c)
 static int
 pg_wc_isupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISUPPER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isupper(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isupper_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isupper(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISUPPER));
+	else
+		return char_props(c, PG_ISUPPER, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_islower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISLOWER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_islower(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					islower_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_islower(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISLOWER));
+	else
+		return char_props(c, PG_ISLOWER, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isgraph(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISGRAPH));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isgraph(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isgraph(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISGRAPH));
+	else
+		return char_props(c, PG_ISGRAPH, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isprint(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPRINT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isprint(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isprint_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isprint(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISPRINT));
+	else
+		return char_props(c, PG_ISPRINT, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_ispunct(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPUNCT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_ispunct(c, true);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_ispunct(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISPUNCT));
+	else
+		return char_props(c, PG_ISPUNCT, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isspace(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISSPACE));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isspace(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isspace_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isspace(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISSPACE));
+	else
+		return char_props(c, PG_ISSPACE, pg_regex_locale) != 0;
 }
 
 static pg_wchar
 pg_wc_toupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_uppercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_toupper(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_toupper((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
 }
 
 static pg_wchar
 pg_wc_tolower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_lowercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_tolower(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_tolower((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
 }
 
 
@@ -732,37 +512,27 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
 	 * would always be true for production values of MAX_SIMPLE_CHR, but it's
 	 * useful to allow it to be small for testing purposes.)
 	 */
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
 #if MAX_SIMPLE_CHR >= 127
 			max_chr = (pg_wchar) 127;
 			pcc->cv.cclasscode = -1;
 #else
 			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 #endif
-			break;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
+	}
+	else
+	{
 #if MAX_SIMPLE_CHR >= UCHAR_MAX
+		if (pg_regex_locale->provider == COLLPROVIDER_LIBC &&
+			GetDatabaseEncoding() != PG_UTF8)
+		{
 			max_chr = (pg_wchar) UCHAR_MAX;
 			pcc->cv.cclasscode = -1;
-#else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+		}
+		else
 #endif
-			break;
-		case PG_REGEX_STRATEGY_ICU:
 			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		default:
-			Assert(false);
-			max_chr = 0;		/* can't get here, but keep compiler quiet */
-			break;
 	}
 
 	/*
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index edb09d4e356..85e5eaf32eb 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -79,6 +79,8 @@ OBJS = \
 	orderedsetaggs.o \
 	partitionfuncs.o \
 	pg_locale.o \
+	pg_locale_icu.o \
+	pg_locale_libc.o \
 	pg_lsn.o \
 	pg_upgrade_support.o \
 	pgstatfuncs.o \
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index 8c6fc80c373..f73f294b8f5 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -66,6 +66,8 @@ backend_sources += files(
   'orderedsetaggs.c',
   'partitionfuncs.c',
   'pg_locale.c',
+  'pg_locale_icu.c',
+  'pg_locale_libc.c',
   'pg_lsn.c',
   'pg_upgrade_support.c',
   'pgstatfuncs.c',
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index cfba55a6e31..1802b7a1589 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -58,6 +58,8 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
 #include "common/hashfn.h"
+#include "common/unicode_case.h"
+#include "common/unicode_category.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
@@ -87,12 +89,6 @@
 #define		PGLOCALE_SUPPORT_ERROR(provider) \
 	elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider)
 
-/*
- * This should be large enough that most strings will fit, but small enough
- * that we feel comfortable putting it on the stack
- */
-#define		TEXTBUFLEN			1024
-
 #define		MAX_L10N_DATA		80
 
 
@@ -119,7 +115,21 @@ char	   *localized_full_months[12 + 1];
 /* is the databases's LC_CTYPE the C locale? */
 bool		database_ctype_is_c = false;
 
-static struct pg_locale_struct default_locale;
+#ifdef USE_ICU
+extern pg_locale_t icu_dat_create_locale(HeapTuple dattuple);
+extern pg_locale_t icu_coll_create_locale(MemoryContext context,
+										  ResourceOwner resowner,
+										  HeapTuple colltuple);
+extern UCollator *pg_ucol_open(const char *loc_str);
+#endif
+
+
+extern pg_locale_t libc_dat_create_locale(HeapTuple dattuple);
+extern pg_locale_t libc_coll_create_locale(MemoryContext context,
+										   ResourceOwner resowner,
+										   HeapTuple colltuple);
+
+static pg_locale_t default_locale = NULL;
 
 /* indicates whether locale information cache is valid */
 static bool CurrentLocaleConvValid = false;
@@ -170,51 +180,48 @@ static pg_locale_t last_collation_cache_locale = NULL;
 static char *IsoLocaleName(const char *);
 #endif
 
-#ifdef USE_ICU
-/*
- * Converter object for converting between ICU's UChar strings and C strings
- * in database encoding.  Since the database encoding doesn't change, we only
- * need one of these per session.
- */
-static UConverter *icu_converter = NULL;
-
-static UCollator *pg_ucol_open(const char *loc_str);
-static void init_icu_converter(void);
-static size_t uchar_length(UConverter *converter,
-						   const char *str, int32_t len);
-static int32_t uchar_convert(UConverter *converter,
-							 UChar *dest, int32_t destlen,
-							 const char *src, int32_t srclen);
-static void icu_set_collation_attributes(UCollator *collator, const char *loc,
-										 UErrorCode *status);
-
-static void ResourceOwnerRememberUCollator(ResourceOwner owner,
-										   UCollator *collator);
-static void ResOwnerReleaseUCollator(Datum val);
-
-static const ResourceOwnerDesc UCollatorResourceKind =
-{
-	.name = "UCollator reference",
-	.release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
-	.release_priority = RELEASE_PRIO_LAST,
-	.ReleaseResource = ResOwnerReleaseUCollator,
-	.DebugPrint = NULL                      /* the default message is fine */
-};
-#endif
+static int
+char_props_builtin(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	int result = 0;
+
+	if ((mask & PG_ISDIGIT) && pg_u_isdigit(wc, true))
+		result |= PG_ISDIGIT;
+	if ((mask & PG_ISALPHA) && pg_u_isalpha(wc))
+		result |= PG_ISALPHA;
+	if ((mask & PG_ISUPPER) && pg_u_isupper(wc))
+		result |= PG_ISUPPER;
+	if ((mask & PG_ISLOWER) && pg_u_islower(wc))
+		result |= PG_ISLOWER;
+	if ((mask & PG_ISGRAPH) && pg_u_isgraph(wc))
+		result |= PG_ISGRAPH;
+	if ((mask & PG_ISPRINT) && pg_u_isprint(wc))
+		result |= PG_ISPRINT;
+	if ((mask & PG_ISPUNCT) && pg_u_ispunct(wc, true))
+		result |= PG_ISPUNCT;
+	if ((mask & PG_ISSPACE) && pg_u_isspace(wc))
+		result |= PG_ISSPACE;
+
+	return result;
+}
 
-static void ResourceOwnerRememberLocaleT(ResourceOwner owner,
-										 locale_t locale);
-static void ResOwnerReleaseLocaleT(Datum val);
+static pg_wchar
+toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return unicode_uppercase_simple(wc);
+}
 
-static const ResourceOwnerDesc LocaleTResourceKind =
+static pg_wchar
+tolower_builtin(pg_wchar wc, pg_locale_t locale)
 {
-	.name = "locale_t reference",
-	.release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
-	.release_priority = RELEASE_PRIO_LAST,
-	.ReleaseResource = ResOwnerReleaseLocaleT,
-	.DebugPrint = NULL                      /* the default message is fine */
-};
+	return unicode_lowercase_simple(wc);
+}
 
+struct ctype_methods builtin_ctype_methods = {
+	.char_props = char_props_builtin,
+	.wc_toupper = toupper_builtin,
+	.wc_tolower = tolower_builtin,
+};
 
 /*
  * POSIX doesn't define _l-variants of these functions, but several systems
@@ -1262,206 +1269,6 @@ IsoLocaleName(const char *winlocname)
 #endif							/* WIN32 && LC_MESSAGES */
 
 
-/* simple subroutine for reporting errors from newlocale() */
-static void
-report_newlocale_failure(const char *localename)
-{
-	int			save_errno;
-
-	/*
-	 * Windows doesn't provide any useful error indication from
-	 * _create_locale(), and BSD-derived platforms don't seem to feel they
-	 * need to set errno either (even though POSIX is pretty clear that
-	 * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
-	 * is what to report.
-	 */
-	if (errno == 0)
-		errno = ENOENT;
-
-	/*
-	 * ENOENT means "no such locale", not "no such file", so clarify that
-	 * errno with an errdetail message.
-	 */
-	save_errno = errno;			/* auxiliary funcs might change errno */
-	ereport(ERROR,
-			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-			 errmsg("could not create locale \"%s\": %m",
-					localename),
-			 (save_errno == ENOENT ?
-			  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
-						localename) : 0)));
-}
-
-static void
-ResourceOwnerRememberLocaleT(ResourceOwner owner, locale_t locale)
-{
-	ResourceOwnerRemember(owner, PointerGetDatum(locale),
-						  &LocaleTResourceKind);
-}
-
-static void
-ResOwnerReleaseLocaleT(Datum val)
-{
-	locale_t locale = (locale_t) DatumGetPointer(val);
-	freelocale(locale);
-}
-
-/*
- * Create a locale_t with the given collation and ctype.
- *
- * The "C" and "POSIX" locales are not actually handled by libc, so return
- * NULL.
- *
- * Ensure that no path leaks a locale_t.
- */
-static locale_t
-make_libc_collator(const char *collate, const char *ctype)
-{
-	locale_t	loc = 0;
-
-	if (strcmp(collate, ctype) == 0)
-	{
-		if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
-		{
-			/* Normal case where they're the same */
-			errno = 0;
-#ifndef WIN32
-			loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
-							NULL);
-#else
-			loc = _create_locale(LC_ALL, collate);
-#endif
-			if (!loc)
-				report_newlocale_failure(collate);
-		}
-	}
-	else
-	{
-#ifndef WIN32
-		/* We need two newlocale() steps */
-		locale_t	loc1 = 0;
-
-		if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
-		{
-			errno = 0;
-			loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
-			if (!loc1)
-				report_newlocale_failure(collate);
-		}
-
-		if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
-		{
-			errno = 0;
-			loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
-			if (!loc)
-			{
-				if (loc1)
-					freelocale(loc1);
-				report_newlocale_failure(ctype);
-			}
-		}
-		else
-			loc = loc1;
-#else
-
-		/*
-		 * XXX The _create_locale() API doesn't appear to support this. Could
-		 * perhaps be worked around by changing pg_locale_t to contain two
-		 * separate fields.
-		 */
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("collations with different collate and ctype values are not supported on this platform")));
-#endif
-	}
-
-	return loc;
-}
-
-/*
- * Create a UCollator with the given locale string and rules.
- *
- * Ensure that no path leaks a UCollator.
- */
-#ifdef USE_ICU
-static void
-ResourceOwnerRememberUCollator(ResourceOwner owner, UCollator *collator)
-{
-	ResourceOwnerRemember(owner, PointerGetDatum(collator),
-						  &UCollatorResourceKind);
-}
-
-static void
-ResOwnerReleaseUCollator(Datum val)
-{
-	UCollator *collator = (UCollator *) DatumGetPointer(val);
-	ucol_close(collator);
-}
-
-static UCollator *
-make_icu_collator(const char *iculocstr, const char *icurules)
-{
-	if (!icurules)
-	{
-		/* simple case without rules */
-		return pg_ucol_open(iculocstr);
-	}
-	else
-	{
-		UCollator  *collator_std_rules;
-		UCollator  *collator_all_rules;
-		const UChar *std_rules;
-		UChar	   *my_rules;
-		UChar	   *all_rules;
-		int32_t		length;
-		int32_t		total;
-		UErrorCode	status;
-
-		/*
-		 * If rules are specified, we extract the rules of the standard
-		 * collation, add our own rules, and make a new collator with the
-		 * combined rules.
-		 */
-		icu_to_uchar(&my_rules, icurules, strlen(icurules));
-
-		collator_std_rules = pg_ucol_open(iculocstr);
-
-		std_rules = ucol_getRules(collator_std_rules, &length);
-
-		total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
-
-		/* avoid leaking collator on OOM */
-		all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
-		if (!all_rules)
-		{
-			ucol_close(collator_std_rules);
-			ereport(ERROR,
-					(errcode(ERRCODE_OUT_OF_MEMORY),
-					 errmsg("out of memory")));
-		}
-
-		u_strcpy(all_rules, std_rules);
-		u_strcat(all_rules, my_rules);
-
-		ucol_close(collator_std_rules);
-
-		status = U_ZERO_ERROR;
-		collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
-											UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
-											NULL, &status);
-		if (U_FAILURE(status))
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
-							iculocstr, icurules, u_errorName(status))));
-		}
-
-		return collator_all_rules;
-	}
-}
-#endif							/* not USE_ICU */
-
 /*
  * Initialize default_locale with database locale settings.
  */
@@ -1471,6 +1278,7 @@ init_database_collation(void)
 	HeapTuple	tup;
 	Form_pg_database dbform;
 	Datum		datum;
+	pg_locale_t	result = NULL;
 
 	/* Fetch our pg_database row normally, via syscache */
 	tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
@@ -1487,70 +1295,38 @@ init_database_collation(void)
 
 		builtin_validate_locale(dbform->encoding, datlocale);
 
-		default_locale.collate_is_c = true;
-		default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
-
-		default_locale.info.builtin.locale = MemoryContextStrdup(
+		result = MemoryContextAllocZero(TopMemoryContext,
+										sizeof(struct pg_locale_struct));
+		result->info.builtin.locale = MemoryContextStrdup(
 																 TopMemoryContext, datlocale);
-	}
-	else if (dbform->datlocprovider == COLLPROVIDER_ICU)
-	{
-#ifdef USE_ICU
-		char	   *datlocale;
-		char	   *icurules;
-		bool		isnull;
-
-		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
-		datlocale = TextDatumGetCString(datum);
-
-		default_locale.collate_is_c = false;
-		default_locale.ctype_is_c = false;
+		result->collate_is_c = true;
+		result->ctype_is_c = (strcmp(datlocale, "C") == 0);
 
-		datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
-		if (!isnull)
-			icurules = TextDatumGetCString(datum);
-		else
-			icurules = NULL;
+		if (!result->ctype_is_c)
+			result->ctype = &builtin_ctype_methods;
 
-		default_locale.info.icu.locale = MemoryContextStrdup(TopMemoryContext, datlocale);
-		default_locale.info.icu.ucol = make_icu_collator(datlocale, icurules);
-#else							/* not USE_ICU */
-		/* could get here if a collation was created by a build with ICU */
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("ICU is not supported in this build")));
-#endif							/* not USE_ICU */
 	}
+#ifdef USE_ICU
+	else if (dbform->datlocprovider == COLLPROVIDER_ICU)
+		result = icu_dat_create_locale(tup);
+#endif							/* not USE_ICU */
+	else if (dbform->datlocprovider == COLLPROVIDER_LIBC)
+		result = libc_dat_create_locale(tup);
 	else
-	{
-		const char *datcollate;
-		const char *datctype;
-
-		Assert(dbform->datlocprovider == COLLPROVIDER_LIBC);
-
-		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datcollate);
-		datcollate = TextDatumGetCString(datum);
-		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
-		datctype = TextDatumGetCString(datum);
-
-		default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
-			(strcmp(datcollate, "POSIX") == 0);
-		default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
-			(strcmp(datctype, "POSIX") == 0);
+		PGLOCALE_SUPPORT_ERROR(dbform->datlocprovider);
 
-		default_locale.info.lt = make_libc_collator(datcollate, datctype);
-	}
-
-	default_locale.provider = dbform->datlocprovider;
+	result->provider = dbform->datlocprovider;
 
 	/*
 	 * Default locale is currently always deterministic.  Nondeterministic
 	 * locales currently don't support pattern matching, which would break a
 	 * lot of things if applied globally.
 	 */
-	default_locale.deterministic = true;
+	result->deterministic = true;
 
 	ReleaseSysCache(tup);
+
+	default_locale = result;
 }
 
 /*
@@ -1558,12 +1334,12 @@ init_database_collation(void)
  * allocating memory.
  */
 static pg_locale_t
-create_pg_locale(MemoryContext context, ResourceOwner owner, Oid collid)
+create_pg_locale(MemoryContext context, ResourceOwner resowner, Oid collid)
 {
 	/* We haven't computed this yet in this session, so do it */
 	HeapTuple	tp;
 	Form_pg_collation collform;
-	pg_locale_t result;
+	pg_locale_t result = NULL;
 	Datum		datum;
 	bool		isnull;
 
@@ -1631,65 +1407,19 @@ create_pg_locale(MemoryContext context, ResourceOwner owner, Oid collid)
 		result->deterministic = collform->collisdeterministic;
 		result->collate_is_c = true;
 		result->ctype_is_c = (strcmp(locstr, "C") == 0);
+		if (!result->ctype_is_c)
+			result->ctype = &builtin_ctype_methods;
 		result->info.builtin.locale = MemoryContextStrdup(context,
 														  locstr);
 	}
-	else if (collform->collprovider == COLLPROVIDER_LIBC)
-	{
-		const char *collcollate;
-		const char *collctype;
-		locale_t	locale;
-
-		datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
-		collcollate = TextDatumGetCString(datum);
-		datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
-		collctype = TextDatumGetCString(datum);
-
-		ResourceOwnerEnlarge(owner);
-		locale = make_libc_collator(collcollate, collctype);
-		if (locale)
-			ResourceOwnerRememberLocaleT(owner, locale);
-
-		result = MemoryContextAllocZero(context,
-										sizeof(struct pg_locale_struct));
-
-		result->provider = collform->collprovider;
-		result->deterministic = collform->collisdeterministic;
-		result->collate_is_c = (strcmp(collcollate, "C") == 0) ||
-			(strcmp(collcollate, "POSIX") == 0);
-		result->ctype_is_c = (strcmp(collctype, "C") == 0) ||
-			(strcmp(collctype, "POSIX") == 0);
-		result->info.lt = locale;
-	}
+#ifdef USE_ICU
 	else if (collform->collprovider == COLLPROVIDER_ICU)
-	{
-		const char *iculocstr;
-		const char *icurules;
-		UCollator  *collator;
-
-		datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
-		iculocstr = TextDatumGetCString(datum);
-
-		datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
-		if (!isnull)
-			icurules = TextDatumGetCString(datum);
-		else
-			icurules = NULL;
-
-		ResourceOwnerEnlarge(owner);
-		collator = make_icu_collator(iculocstr, icurules);
-		ResourceOwnerRememberUCollator(owner, collator);
-
-		result = MemoryContextAllocZero(context,
-										sizeof(struct pg_locale_struct));
-
-		result->provider = collform->collprovider;
-		result->deterministic = collform->collisdeterministic;
-		result->collate_is_c = false;
-		result->ctype_is_c = false;
-		result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
-		result->info.icu.ucol = collator;
-	}
+		result = icu_coll_create_locale(context, resowner, tp);
+#endif
+	else if (collform->collprovider == COLLPROVIDER_LIBC)
+		result = libc_coll_create_locale(context, resowner, tp);
+	else
+		PGLOCALE_SUPPORT_ERROR(collform->collprovider);
 
 	ReleaseSysCache(tp);
 
@@ -1735,7 +1465,7 @@ pg_newlocale_from_collation(Oid collid)
 	bool					 found;
 
 	if (collid == DEFAULT_COLLATION_OID)
-		return &default_locale;
+		return default_locale;
 
 	if (!OidIsValid(collid))
 		elog(ERROR, "cache lookup failed for collation %u", collid);
@@ -1886,483 +1616,48 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 }
 
 /*
- * strncoll_libc_win32_utf8
- *
- * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
- * invoke wcscoll_l().
+ * pg_strcoll
  *
- * An input string length of -1 means that it's NUL-terminated.
+ * Like pg_strncoll for NUL-terminated input strings.
  */
-#ifdef WIN32
-static int
-strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
-						 ssize_t len2, pg_locale_t locale)
+int
+pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
 {
-	char		sbuf[TEXTBUFLEN];
-	char	   *buf = sbuf;
-	char	   *a1p,
-			   *a2p;
-	int			a1len;
-	int			a2len;
-	int			r;
-	int			result;
-
-	Assert(locale->provider == COLLPROVIDER_LIBC);
-	Assert(GetDatabaseEncoding() == PG_UTF8);
-#ifndef WIN32
-	Assert(false);
-#endif
-
-	if (len1 == -1)
-		len1 = strlen(arg1);
-	if (len2 == -1)
-		len2 = strlen(arg2);
-
-	a1len = len1 * 2 + 2;
-	a2len = len2 * 2 + 2;
-
-	if (a1len + a2len > TEXTBUFLEN)
-		buf = palloc(a1len + a2len);
-
-	a1p = buf;
-	a2p = buf + a1len;
-
-	/* API does not work for zero-length input */
-	if (len1 == 0)
-		r = 0;
-	else
-	{
-		r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
-								(LPWSTR) a1p, a1len / 2);
-		if (!r)
-			ereport(ERROR,
-					(errmsg("could not convert string to UTF-16: error code %lu",
-							GetLastError())));
-	}
-	((LPWSTR) a1p)[r] = 0;
-
-	if (len2 == 0)
-		r = 0;
-	else
-	{
-		r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
-								(LPWSTR) a2p, a2len / 2);
-		if (!r)
-			ereport(ERROR,
-					(errmsg("could not convert string to UTF-16: error code %lu",
-							GetLastError())));
-	}
-	((LPWSTR) a2p)[r] = 0;
-
-	errno = 0;
-	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
-	if (result == 2147483647)	/* _NLSCMPERROR; missing from mingw headers */
-		ereport(ERROR,
-				(errmsg("could not compare Unicode strings: %m")));
-
-	if (buf != sbuf)
-		pfree(buf);
-
-	return result;
+	return locale->collate->strncoll(arg1, -1, arg2, -1, locale);
 }
-#endif							/* WIN32 */
 
 /*
- * strncoll_libc
+ * pg_strncoll
+ *
+ * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll_l() or wcscoll_l() as
+ * appropriate for the given locale, platform, and database encoding. If the
+ * locale is not specified, use the database collation.
+ *
+ * The input strings must be encoded in the database encoding. If an input
+ * string is NUL-terminated, its length may be specified as -1.
  *
- * An input string length of -1 means that it's NUL-terminated.
+ * The caller is responsible for breaking ties if the collation is
+ * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
+ * easily account for deterministic collations.
  */
-static int
-strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
-			  pg_locale_t locale)
+int
+pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
+			pg_locale_t locale)
 {
-	char		sbuf[TEXTBUFLEN];
-	char	   *buf = sbuf;
-	size_t		bufsize1 = (len1 == -1) ? 0 : len1 + 1;
-	size_t		bufsize2 = (len2 == -1) ? 0 : len2 + 1;
-	const char *arg1n;
-	const char *arg2n;
-	int			result;
-
-	Assert(locale->provider == COLLPROVIDER_LIBC);
-
-#ifdef WIN32
-	/* check for this case before doing the work for nul-termination */
-	if (GetDatabaseEncoding() == PG_UTF8)
-		return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
-#endif							/* WIN32 */
-
-	if (bufsize1 + bufsize2 > TEXTBUFLEN)
-		buf = palloc(bufsize1 + bufsize2);
-
-	/* nul-terminate arguments if necessary */
-	if (len1 == -1)
-	{
-		arg1n = arg1;
-	}
-	else
-	{
-		char *buf1 = buf;
-		memcpy(buf1, arg1, len1);
-		buf1[len1] = '\0';
-		arg1n = buf1;
-	}
-
-	if (len2 == -1)
-	{
-		arg2n = arg2;
-	}
-	else
-	{
-		char *buf2 = buf + bufsize1;
-		memcpy(buf2, arg2, len2);
-		buf2[len2] = '\0';
-		arg2n = buf2;
-	}
-
-	result = strcoll_l(arg1n, arg2n, locale->info.lt);
-
-	if (buf != sbuf)
-		pfree(buf);
-
-	return result;
+	return locale->collate->strncoll(arg1, len1, arg2, len2, locale);
 }
 
-#ifdef USE_ICU
-
 /*
- * strncoll_icu_no_utf8
+ * Return true if the collation provider supports pg_strxfrm() and
+ * pg_strnxfrm(); otherwise false.
  *
- * Convert the arguments from the database encoding to UChar strings, then
- * call ucol_strcoll(). An argument length of -1 means that the string is
- * NUL-terminated.
- *
- * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
- * caller should call that instead.
- */
-static int
-strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
-					 const char *arg2, ssize_t len2, pg_locale_t locale)
-{
-	char		sbuf[TEXTBUFLEN];
-	char	   *buf = sbuf;
-	int32_t		ulen1;
-	int32_t		ulen2;
-	size_t		bufsize1;
-	size_t		bufsize2;
-	UChar	   *uchar1,
-			   *uchar2;
-	int			result;
-
-	Assert(locale->provider == COLLPROVIDER_ICU);
-#ifdef HAVE_UCOL_STRCOLLUTF8
-	Assert(GetDatabaseEncoding() != PG_UTF8);
-#endif
-
-	init_icu_converter();
-
-	ulen1 = uchar_length(icu_converter, arg1, len1);
-	ulen2 = uchar_length(icu_converter, arg2, len2);
-
-	bufsize1 = (ulen1 + 1) * sizeof(UChar);
-	bufsize2 = (ulen2 + 1) * sizeof(UChar);
-
-	if (bufsize1 + bufsize2 > TEXTBUFLEN)
-		buf = palloc(bufsize1 + bufsize2);
-
-	uchar1 = (UChar *) buf;
-	uchar2 = (UChar *) (buf + bufsize1);
-
-	ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
-	ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
-
-	result = ucol_strcoll(locale->info.icu.ucol,
-						  uchar1, ulen1,
-						  uchar2, ulen2);
-
-	if (buf != sbuf)
-		pfree(buf);
-
-	return result;
-}
-
-/*
- * strncoll_icu
- *
- * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
- * database encoding. An argument length of -1 means the string is
- * NUL-terminated.
- */
-static int
-strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
-			 pg_locale_t locale)
-{
-	int			result;
-
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
-#ifdef HAVE_UCOL_STRCOLLUTF8
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		UErrorCode	status;
-
-		status = U_ZERO_ERROR;
-		result = ucol_strcollUTF8(locale->info.icu.ucol,
-								  arg1, len1,
-								  arg2, len2,
-								  &status);
-		if (U_FAILURE(status))
-			ereport(ERROR,
-					(errmsg("collation failed: %s", u_errorName(status))));
-	}
-	else
-#endif
-	{
-		result = strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
-	}
-
-	return result;
-}
-
-#endif							/* USE_ICU */
-
-/*
- * pg_strcoll
- *
- * Like pg_strncoll for NUL-terminated input strings.
- */
-int
-pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
-{
-	int			result;
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strncoll_libc(arg1, -1, arg2, -1, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strncoll_icu(arg1, -1, arg2, -1, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
-}
-
-/*
- * pg_strncoll
- *
- * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll_l() or wcscoll_l() as
- * appropriate for the given locale, platform, and database encoding. If the
- * locale is not specified, use the database collation.
- *
- * The input strings must be encoded in the database encoding. If an input
- * string is NUL-terminated, its length may be specified as -1.
- *
- * The caller is responsible for breaking ties if the collation is
- * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
- * easily account for deterministic collations.
- */
-int
-pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
-			pg_locale_t locale)
-{
-	int			result;
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strncoll_libc(arg1, len1, arg2, len2, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strncoll_icu(arg1, len1, arg2, len2, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
-}
-
-
-static size_t
-strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
-			  pg_locale_t locale)
-{
-	char		sbuf[TEXTBUFLEN];
-	char	   *buf = sbuf;
-	size_t		bufsize = srclen + 1;
-	size_t		result;
-
-	Assert(locale->provider == COLLPROVIDER_LIBC);
-
-	if (srclen == -1)
-		return strxfrm_l(dest, src, destsize, locale->info.lt);
-
-	if (bufsize > TEXTBUFLEN)
-		buf = palloc(bufsize);
-
-	/* nul-terminate argument */
-	memcpy(buf, src, srclen);
-	buf[srclen] = '\0';
-
-	result = strxfrm_l(dest, buf, destsize, locale->info.lt);
-
-	if (buf != sbuf)
-		pfree(buf);
-
-	/* if dest is defined, it should be nul-terminated */
-	Assert(result >= destsize || dest[result] == '\0');
-
-	return result;
-}
-
-#ifdef USE_ICU
-
-/* 'srclen' of -1 means the strings are NUL-terminated */
-static size_t
-strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
-			 pg_locale_t locale)
-{
-	char		sbuf[TEXTBUFLEN];
-	char	   *buf = sbuf;
-	UChar	   *uchar;
-	int32_t		ulen;
-	size_t		uchar_bsize;
-	Size		result_bsize;
-
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
-	init_icu_converter();
-
-	ulen = uchar_length(icu_converter, src, srclen);
-
-	uchar_bsize = (ulen + 1) * sizeof(UChar);
-
-	if (uchar_bsize > TEXTBUFLEN)
-		buf = palloc(uchar_bsize);
-
-	uchar = (UChar *) buf;
-
-	ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
-
-	result_bsize = ucol_getSortKey(locale->info.icu.ucol,
-								   uchar, ulen,
-								   (uint8_t *) dest, destsize);
-
-	/*
-	 * ucol_getSortKey() counts the nul-terminator in the result length, but
-	 * this function should not.
-	 */
-	Assert(result_bsize > 0);
-	result_bsize--;
-
-	if (buf != sbuf)
-		pfree(buf);
-
-	/* if dest is defined, it should be nul-terminated */
-	Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
-
-	return result_bsize;
-}
-
-/* 'srclen' of -1 means the strings are NUL-terminated */
-static size_t
-strnxfrm_prefix_icu_no_utf8(char *dest,size_t destsize,
-							const char *src, ssize_t srclen,
-							pg_locale_t locale)
-{
-	char		sbuf[TEXTBUFLEN];
-	char	   *buf = sbuf;
-	UCharIterator iter;
-	uint32_t	state[2];
-	UErrorCode	status;
-	int32_t		ulen = -1;
-	UChar	   *uchar = NULL;
-	size_t		uchar_bsize;
-	Size		result_bsize;
-
-	Assert(locale->provider == COLLPROVIDER_ICU);
-	Assert(GetDatabaseEncoding() != PG_UTF8);
-
-	init_icu_converter();
-
-	ulen = uchar_length(icu_converter, src, srclen);
-
-	uchar_bsize = (ulen + 1) * sizeof(UChar);
-
-	if (uchar_bsize > TEXTBUFLEN)
-		buf = palloc(uchar_bsize);
-
-	uchar = (UChar *) buf;
-
-	ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
-
-	uiter_setString(&iter, uchar, ulen);
-	state[0] = state[1] = 0;	/* won't need that again */
-	status = U_ZERO_ERROR;
-	result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
-										&iter,
-										state,
-										(uint8_t *) dest,
-										destsize,
-										&status);
-	if (U_FAILURE(status))
-		ereport(ERROR,
-				(errmsg("sort key generation failed: %s",
-						u_errorName(status))));
-
-	return result_bsize;
-}
-
-/* 'srclen' of -1 means the strings are NUL-terminated */
-static size_t
-strnxfrm_prefix_icu(char *dest, size_t destsize,
-					const char *src, ssize_t srclen,
-					pg_locale_t locale)
-{
-	size_t		result;
-
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		UCharIterator iter;
-		uint32_t	state[2];
-		UErrorCode	status;
-
-		uiter_setUTF8(&iter, src, srclen);
-		state[0] = state[1] = 0;	/* won't need that again */
-		status = U_ZERO_ERROR;
-		result = ucol_nextSortKeyPart(locale->info.icu.ucol,
-									  &iter,
-									  state,
-									  (uint8_t *) dest,
-									  destsize,
-									  &status);
-		if (U_FAILURE(status))
-			ereport(ERROR,
-					(errmsg("sort key generation failed: %s",
-							u_errorName(status))));
-	}
-	else
-		result = strnxfrm_prefix_icu_no_utf8(dest, destsize, src, srclen,
-											 locale);
-
-	return result;
-}
-
-#endif
-
-/*
- * Return true if the collation provider supports pg_strxfrm() and
- * pg_strnxfrm(); otherwise false.
- *
- * Unfortunately, it seems that strxfrm() for non-C collations is broken on
- * many common platforms; testing of multiple versions of glibc reveals that,
- * for many locales, strcoll() and strxfrm() do not return consistent
- * results. While no other libc other than Cygwin has so far been shown to
- * have a problem, we take the conservative course of action for right now and
- * disable this categorically.  (Users who are certain this isn't a problem on
- * their system can define TRUST_STRXFRM.)
+ * Unfortunately, it seems that strxfrm() for non-C collations is broken on
+ * many common platforms; testing of multiple versions of glibc reveals that,
+ * for many locales, strcoll() and strxfrm() do not return consistent
+ * results. While no other libc other than Cygwin has so far been shown to
+ * have a problem, we take the conservative course of action for right now and
+ * disable this categorically.  (Users who are certain this isn't a problem on
+ * their system can define TRUST_STRXFRM.)
  *
  * No similar problem is known for the ICU provider.
  */
@@ -2392,19 +1687,7 @@ pg_strxfrm_enabled(pg_locale_t locale)
 size_t
 pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
 {
-	size_t		result = 0;		/* keep compiler quiet */
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strnxfrm_libc(dest, destsize, src, -1, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strnxfrm_icu(dest, destsize, src, -1, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strnxfrm(dest, destsize, src, -1, locale);
 }
 
 /*
@@ -2430,19 +1713,7 @@ size_t
 pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	size_t		result = 0;		/* keep compiler quiet */
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strnxfrm_libc(dest, src, srclen, destsize, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strnxfrm_icu(dest, src, srclen, destsize, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strnxfrm(dest, destsize, src, srclen, locale);
 }
 
 /*
@@ -2472,7 +1743,7 @@ size_t
 pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
 				  pg_locale_t locale)
 {
-	return pg_strnxfrm_prefix(dest, destsize, src, -1, locale);
+	return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale);
 }
 
 /*
@@ -2497,16 +1768,9 @@ size_t
 pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 				   ssize_t srclen, pg_locale_t locale)
 {
-	size_t		result = 0;		/* keep compiler quiet */
-
-#ifdef USE_ICU
-	if (locale->provider == COLLPROVIDER_ICU)
-		result = strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-	else
-#endif
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strnxfrm_prefix(dest, destsize,
+											src, srclen,
+											locale);
 }
 
 /*
@@ -2561,356 +1825,6 @@ builtin_validate_locale(int encoding, const char *locale)
 	return canonical_name;
 }
 
-
-#ifdef USE_ICU
-
-/*
- * Wrapper around ucol_open() to handle API differences for older ICU
- * versions.
- *
- * Ensure that no path leaks a UCollator.
- */
-static UCollator *
-pg_ucol_open(const char *loc_str)
-{
-	UCollator  *collator;
-	UErrorCode	status;
-	const char *orig_str = loc_str;
-	char	   *fixed_str = NULL;
-
-	/*
-	 * Must never open default collator, because it depends on the environment
-	 * and may change at any time. Should not happen, but check here to catch
-	 * bugs that might be hard to catch otherwise.
-	 *
-	 * NB: the default collator is not the same as the collator for the root
-	 * locale. The root locale may be specified as the empty string, "und", or
-	 * "root". The default collator is opened by passing NULL to ucol_open().
-	 */
-	if (loc_str == NULL)
-		elog(ERROR, "opening default collator is not supported");
-
-	/*
-	 * In ICU versions 54 and earlier, "und" is not a recognized spelling of
-	 * the root locale. If the first component of the locale is "und", replace
-	 * with "root" before opening.
-	 */
-	if (U_ICU_VERSION_MAJOR_NUM < 55)
-	{
-		char		lang[ULOC_LANG_CAPACITY];
-
-		status = U_ZERO_ERROR;
-		uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
-		if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("could not get language from locale \"%s\": %s",
-							loc_str, u_errorName(status))));
-		}
-
-		if (strcmp(lang, "und") == 0)
-		{
-			const char *remainder = loc_str + strlen("und");
-
-			fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
-			strcpy(fixed_str, "root");
-			strcat(fixed_str, remainder);
-
-			loc_str = fixed_str;
-		}
-	}
-
-	status = U_ZERO_ERROR;
-	collator = ucol_open(loc_str, &status);
-	if (U_FAILURE(status))
-		ereport(ERROR,
-		/* use original string for error report */
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("could not open collator for locale \"%s\": %s",
-						orig_str, u_errorName(status))));
-
-	if (U_ICU_VERSION_MAJOR_NUM < 54)
-	{
-		status = U_ZERO_ERROR;
-		icu_set_collation_attributes(collator, loc_str, &status);
-
-		/*
-		 * Pretend the error came from ucol_open(), for consistent error
-		 * message across ICU versions.
-		 */
-		if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
-		{
-			ucol_close(collator);
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("could not open collator for locale \"%s\": %s",
-							orig_str, u_errorName(status))));
-		}
-	}
-
-	if (fixed_str != NULL)
-		pfree(fixed_str);
-
-	return collator;
-}
-
-static void
-init_icu_converter(void)
-{
-	const char *icu_encoding_name;
-	UErrorCode	status;
-	UConverter *conv;
-
-	if (icu_converter)
-		return;					/* already done */
-
-	icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
-	if (!icu_encoding_name)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("encoding \"%s\" not supported by ICU",
-						pg_encoding_to_char(GetDatabaseEncoding()))));
-
-	status = U_ZERO_ERROR;
-	conv = ucnv_open(icu_encoding_name, &status);
-	if (U_FAILURE(status))
-		ereport(ERROR,
-				(errmsg("could not open ICU converter for encoding \"%s\": %s",
-						icu_encoding_name, u_errorName(status))));
-
-	icu_converter = conv;
-}
-
-/*
- * Find length, in UChars, of given string if converted to UChar string.
- *
- * A length of -1 indicates that the input string is NUL-terminated.
- */
-static size_t
-uchar_length(UConverter *converter, const char *str, int32_t len)
-{
-	UErrorCode	status = U_ZERO_ERROR;
-	int32_t		ulen;
-
-	ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
-	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
-		ereport(ERROR,
-				(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
-	return ulen;
-}
-
-/*
- * Convert the given source string into a UChar string, stored in dest, and
- * return the length (in UChars).
- *
- * A srclen of -1 indicates that the input string is NUL-terminated.
- */
-static int32_t
-uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
-			  const char *src, int32_t srclen)
-{
-	UErrorCode	status = U_ZERO_ERROR;
-	int32_t		ulen;
-
-	status = U_ZERO_ERROR;
-	ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
-	if (U_FAILURE(status))
-		ereport(ERROR,
-				(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
-	return ulen;
-}
-
-/*
- * Convert a string in the database encoding into a string of UChars.
- *
- * The source string at buff is of length nbytes
- * (it needn't be nul-terminated)
- *
- * *buff_uchar receives a pointer to the palloc'd result string, and
- * the function's result is the number of UChars generated.
- *
- * The result string is nul-terminated, though most callers rely on the
- * result length instead.
- */
-int32_t
-icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
-{
-	int32_t		len_uchar;
-
-	init_icu_converter();
-
-	len_uchar = uchar_length(icu_converter, buff, nbytes);
-
-	*buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
-	len_uchar = uchar_convert(icu_converter,
-							  *buff_uchar, len_uchar + 1, buff, nbytes);
-
-	return len_uchar;
-}
-
-/*
- * Convert a string of UChars into the database encoding.
- *
- * The source string at buff_uchar is of length len_uchar
- * (it needn't be nul-terminated)
- *
- * *result receives a pointer to the palloc'd result string, and the
- * function's result is the number of bytes generated (not counting nul).
- *
- * The result string is nul-terminated.
- */
-int32_t
-icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
-{
-	UErrorCode	status;
-	int32_t		len_result;
-
-	init_icu_converter();
-
-	status = U_ZERO_ERROR;
-	len_result = ucnv_fromUChars(icu_converter, NULL, 0,
-								 buff_uchar, len_uchar, &status);
-	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
-		ereport(ERROR,
-				(errmsg("%s failed: %s", "ucnv_fromUChars",
-						u_errorName(status))));
-
-	*result = palloc(len_result + 1);
-
-	status = U_ZERO_ERROR;
-	len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
-								 buff_uchar, len_uchar, &status);
-	if (U_FAILURE(status) ||
-		status == U_STRING_NOT_TERMINATED_WARNING)
-		ereport(ERROR,
-				(errmsg("%s failed: %s", "ucnv_fromUChars",
-						u_errorName(status))));
-
-	return len_result;
-}
-
-/*
- * Parse collation attributes from the given locale string and apply them to
- * the open collator.
- *
- * First, the locale string is canonicalized to an ICU format locale ID such
- * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
- * the key-value arguments.
- *
- * Starting with ICU version 54, the attributes are processed automatically by
- * ucol_open(), so this is only necessary for emulating this behavior on older
- * versions.
- */
-pg_attribute_unused()
-static void
-icu_set_collation_attributes(UCollator *collator, const char *loc,
-							 UErrorCode *status)
-{
-	int32_t		len;
-	char	   *icu_locale_id;
-	char	   *lower_str;
-	char	   *str;
-	char	   *token;
-
-	/*
-	 * The input locale may be a BCP 47 language tag, e.g.
-	 * "und-u-kc-ks-level1", which expresses the same attributes in a
-	 * different form. It will be converted to the equivalent ICU format
-	 * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
-	 * uloc_canonicalize().
-	 */
-	*status = U_ZERO_ERROR;
-	len = uloc_canonicalize(loc, NULL, 0, status);
-	icu_locale_id = palloc(len + 1);
-	*status = U_ZERO_ERROR;
-	len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
-	if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
-		return;
-
-	lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
-
-	pfree(icu_locale_id);
-
-	str = strchr(lower_str, '@');
-	if (!str)
-		return;
-	str++;
-
-	while ((token = strsep(&str, ";")))
-	{
-		char	   *e = strchr(token, '=');
-
-		if (e)
-		{
-			char	   *name;
-			char	   *value;
-			UColAttribute uattr;
-			UColAttributeValue uvalue;
-
-			*status = U_ZERO_ERROR;
-
-			*e = '\0';
-			name = token;
-			value = e + 1;
-
-			/*
-			 * See attribute name and value lists in ICU i18n/coll.cpp
-			 */
-			if (strcmp(name, "colstrength") == 0)
-				uattr = UCOL_STRENGTH;
-			else if (strcmp(name, "colbackwards") == 0)
-				uattr = UCOL_FRENCH_COLLATION;
-			else if (strcmp(name, "colcaselevel") == 0)
-				uattr = UCOL_CASE_LEVEL;
-			else if (strcmp(name, "colcasefirst") == 0)
-				uattr = UCOL_CASE_FIRST;
-			else if (strcmp(name, "colalternate") == 0)
-				uattr = UCOL_ALTERNATE_HANDLING;
-			else if (strcmp(name, "colnormalization") == 0)
-				uattr = UCOL_NORMALIZATION_MODE;
-			else if (strcmp(name, "colnumeric") == 0)
-				uattr = UCOL_NUMERIC_COLLATION;
-			else
-				/* ignore if unknown */
-				continue;
-
-			if (strcmp(value, "primary") == 0)
-				uvalue = UCOL_PRIMARY;
-			else if (strcmp(value, "secondary") == 0)
-				uvalue = UCOL_SECONDARY;
-			else if (strcmp(value, "tertiary") == 0)
-				uvalue = UCOL_TERTIARY;
-			else if (strcmp(value, "quaternary") == 0)
-				uvalue = UCOL_QUATERNARY;
-			else if (strcmp(value, "identical") == 0)
-				uvalue = UCOL_IDENTICAL;
-			else if (strcmp(value, "no") == 0)
-				uvalue = UCOL_OFF;
-			else if (strcmp(value, "yes") == 0)
-				uvalue = UCOL_ON;
-			else if (strcmp(value, "shifted") == 0)
-				uvalue = UCOL_SHIFTED;
-			else if (strcmp(value, "non-ignorable") == 0)
-				uvalue = UCOL_NON_IGNORABLE;
-			else if (strcmp(value, "lower") == 0)
-				uvalue = UCOL_LOWER_FIRST;
-			else if (strcmp(value, "upper") == 0)
-				uvalue = UCOL_UPPER_FIRST;
-			else
-			{
-				*status = U_ILLEGAL_ARGUMENT_ERROR;
-				break;
-			}
-
-			ucol_setAttribute(collator, uattr, uvalue, status);
-		}
-	}
-
-	pfree(lower_str);
-}
-#endif
-
 /*
  * Return the BCP47 language tag representation of the requested locale.
  *
@@ -3049,6 +1963,16 @@ icu_validate_locale(const char *loc_str)
 #endif							/* not USE_ICU */
 }
 
+/*
+ *
+ *TODO: add caching?
+ */
+int
+char_props(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	return locale->ctype->char_props(wc, mask, locale);
+}
+
 /*
  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
  * Therefore we keep them here rather than with the mbutils code.
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
new file mode 100644
index 00000000000..a9e8b4b642b
--- /dev/null
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -0,0 +1,873 @@
+/*-----------------------------------------------------------------------
+ *
+ * PostgreSQL locale utilities for ICU
+ *
+ * Portions Copyright (c) 2002-2024, PostgreSQL Global Development Group
+ *
+ * src/backend/utils/adt/pg_locale_libc.c
+ *
+ *-----------------------------------------------------------------------
+ */
+
+
+#include "postgres.h"
+
+#ifdef USE_ICU
+
+#include <unicode/ucnv.h>
+#include <unicode/ustring.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_database.h"
+#include "utils/builtins.h"
+#include "utils/formatting.h"
+#include "utils/memutils.h"
+#include "utils/pg_locale.h"
+#include "utils/resowner.h"
+#include "utils/syscache.h"
+
+/*
+ * This should be large enough that most strings will fit, but small enough
+ * that we feel comfortable putting it on the stack
+ */
+#define		TEXTBUFLEN			1024
+
+extern pg_locale_t icu_dat_create_locale(HeapTuple dattuple);
+extern pg_locale_t icu_coll_create_locale(MemoryContext context,
+										  ResourceOwner resowner,
+										  HeapTuple colltuple);
+extern UCollator *pg_ucol_open(const char *loc_str);
+
+
+static UCollator * make_icu_collator(const char *iculocstr,
+									 const char *icurules);
+
+static int strncoll_icu(const char *arg1, ssize_t len1,
+						const char *arg2, ssize_t len2,
+						pg_locale_t locale);
+static size_t strnxfrm_icu(char *dest, size_t destsize,
+						   const char *src, ssize_t srclen,
+						   pg_locale_t locale);
+static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
+								  const char *src, ssize_t srclen,
+								  pg_locale_t locale);
+
+static void ResourceOwnerRememberUCollator(ResourceOwner owner,
+										   UCollator *collator);
+static void ResOwnerReleaseUCollator(Datum val);
+
+static void init_icu_converter(void);
+static size_t uchar_length(UConverter *converter,
+						   const char *str, int32_t len);
+static int32_t uchar_convert(UConverter *converter,
+							 UChar *dest, int32_t destlen,
+							 const char *src, int32_t srclen);
+static void icu_set_collation_attributes(UCollator *collator, const char *loc,
+										 UErrorCode *status);
+
+/*
+ * Converter object for converting between ICU's UChar strings and C strings
+ * in database encoding.  Since the database encoding doesn't change, we only
+ * need one of these per session.
+ */
+static UConverter *icu_converter = NULL;
+
+static const ResourceOwnerDesc UCollatorResourceKind =
+{
+	.name = "UCollator reference",
+	.release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
+	.release_priority = RELEASE_PRIO_LAST,
+	.ReleaseResource = ResOwnerReleaseUCollator,
+	.DebugPrint = NULL                      /* the default message is fine */
+};
+
+
+static int
+char_props_icu(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	int result = 0;
+
+	if ((mask & PG_ISDIGIT) && u_isdigit(wc))
+		result |= PG_ISDIGIT;
+	if ((mask & PG_ISALPHA) && u_isalpha(wc))
+		result |= PG_ISALPHA;
+	if ((mask & PG_ISUPPER) && u_isupper(wc))
+		result |= PG_ISUPPER;
+	if ((mask & PG_ISLOWER) && u_islower(wc))
+		result |= PG_ISLOWER;
+	if ((mask & PG_ISGRAPH) && u_isgraph(wc))
+		result |= PG_ISGRAPH;
+	if ((mask & PG_ISPRINT) && u_isprint(wc))
+		result |= PG_ISPRINT;
+	if ((mask & PG_ISPUNCT) && u_ispunct(wc))
+		result |= PG_ISPUNCT;
+	if ((mask & PG_ISSPACE) && u_isspace(wc))
+		result |= PG_ISSPACE;
+
+	return result;
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_tolower(wc);
+}
+
+struct collate_methods icu_collate_methods = {
+	.strncoll = strncoll_icu,
+	.strnxfrm = strnxfrm_icu,
+	.strnxfrm_prefix = strnxfrm_prefix_icu,
+};
+
+struct ctype_methods icu_ctype_methods = {
+	.char_props = char_props_icu,
+	.wc_toupper = toupper_icu,
+	.wc_tolower = tolower_icu,
+};
+
+static void
+ResourceOwnerRememberUCollator(ResourceOwner owner, UCollator *collator)
+{
+	ResourceOwnerRemember(owner, PointerGetDatum(collator),
+						  &UCollatorResourceKind);
+}
+
+static void
+ResOwnerReleaseUCollator(Datum val)
+{
+	UCollator *collator = (UCollator *) DatumGetPointer(val);
+	ucol_close(collator);
+}
+
+pg_locale_t
+icu_dat_create_locale(HeapTuple dattuple)
+{
+	Form_pg_database dbform;
+	Datum		datum;
+	char	   *datlocale;
+	char	   *icurules;
+	bool		isnull;
+	pg_locale_t result;
+
+	dbform = (Form_pg_database) GETSTRUCT(dattuple);
+
+	datum = SysCacheGetAttrNotNull(DATABASEOID, dattuple, Anum_pg_database_datlocale);
+	datlocale = TextDatumGetCString(datum);
+
+	datum = SysCacheGetAttr(DATABASEOID, dattuple, Anum_pg_database_daticurules, &isnull);
+	if (!isnull)
+		icurules = TextDatumGetCString(datum);
+	else
+		icurules = NULL;
+
+	result = MemoryContextAllocZero(TopMemoryContext,
+									sizeof(struct pg_locale_struct));
+
+	result->info.icu.locale = MemoryContextStrdup(TopMemoryContext,
+												  datlocale);
+	result->provider = dbform->datlocprovider;
+	result->deterministic = true;
+	result->collate_is_c = false;
+	result->ctype_is_c = false;
+	result->collate = &icu_collate_methods;
+	result->ctype = &icu_ctype_methods;
+	result->info.icu.ucol = make_icu_collator(datlocale, icurules);
+
+	return result;
+}
+
+pg_locale_t
+icu_coll_create_locale(MemoryContext context, ResourceOwner resowner,
+					   HeapTuple colltuple)
+{
+	Form_pg_collation collform;
+	Datum		datum;
+	bool		isnull;
+	const char *iculocstr;
+	const char *icurules;
+	UCollator  *collator;
+	pg_locale_t result;
+
+	collform = (Form_pg_collation) GETSTRUCT(colltuple);
+
+	Assert(collform->collprovider == COLLPROVIDER_ICU);
+
+	datum = SysCacheGetAttrNotNull(COLLOID, colltuple, Anum_pg_collation_colllocale);
+	iculocstr = TextDatumGetCString(datum);
+
+	datum = SysCacheGetAttr(COLLOID, colltuple, Anum_pg_collation_collicurules, &isnull);
+	if (!isnull)
+		icurules = TextDatumGetCString(datum);
+	else
+		icurules = NULL;
+
+	ResourceOwnerEnlarge(resowner);
+	collator = make_icu_collator(iculocstr, icurules);
+	ResourceOwnerRememberUCollator(resowner, collator);
+
+	result = MemoryContextAllocZero(context,
+									sizeof(struct pg_locale_struct));
+
+	result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
+	result->provider = collform->collprovider;
+	result->deterministic = collform->collisdeterministic;
+	result->collate_is_c = false;
+	result->ctype_is_c = false;
+	result->collate = &icu_collate_methods;
+	result->ctype = &icu_ctype_methods;
+	result->info.icu.ucol = collator;
+
+	return result;
+}
+
+/*
+ * Create a UCollator with the given locale string and rules.
+ *
+ * Ensure that no path leaks a UCollator.
+ */
+static UCollator *
+make_icu_collator(const char *iculocstr, const char *icurules)
+{
+	if (!icurules)
+	{
+		/* simple case without rules */
+		return pg_ucol_open(iculocstr);
+	}
+	else
+	{
+		UCollator  *collator_std_rules;
+		UCollator  *collator_all_rules;
+		const UChar *std_rules;
+		UChar	   *my_rules;
+		UChar	   *all_rules;
+		int32_t		length;
+		int32_t		total;
+		UErrorCode	status;
+
+		/*
+		 * If rules are specified, we extract the rules of the standard
+		 * collation, add our own rules, and make a new collator with the
+		 * combined rules.
+		 */
+		icu_to_uchar(&my_rules, icurules, strlen(icurules));
+
+		collator_std_rules = pg_ucol_open(iculocstr);
+
+		std_rules = ucol_getRules(collator_std_rules, &length);
+
+		total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
+
+		/* avoid leaking collator on OOM */
+		all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
+		if (!all_rules)
+		{
+			ucol_close(collator_std_rules);
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of memory")));
+		}
+
+		u_strcpy(all_rules, std_rules);
+		u_strcat(all_rules, my_rules);
+
+		ucol_close(collator_std_rules);
+
+		status = U_ZERO_ERROR;
+		collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
+											UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
+											NULL, &status);
+		if (U_FAILURE(status))
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
+							iculocstr, icurules, u_errorName(status))));
+		}
+
+		return collator_all_rules;
+	}
+}
+
+
+/*
+ * strncoll_icu_no_utf8
+ *
+ * Convert the arguments from the database encoding to UChar strings, then
+ * call ucol_strcoll(). An argument length of -1 means that the string is
+ * NUL-terminated.
+ *
+ * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
+ * caller should call that instead.
+ */
+static int
+strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
+					 const char *arg2, ssize_t len2, pg_locale_t locale)
+{
+	char		sbuf[TEXTBUFLEN];
+	char	   *buf = sbuf;
+	int32_t		ulen1;
+	int32_t		ulen2;
+	size_t		bufsize1;
+	size_t		bufsize2;
+	UChar	   *uchar1,
+			   *uchar2;
+	int			result;
+
+	Assert(locale->provider == COLLPROVIDER_ICU);
+#ifdef HAVE_UCOL_STRCOLLUTF8
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+#endif
+
+	init_icu_converter();
+
+	ulen1 = uchar_length(icu_converter, arg1, len1);
+	ulen2 = uchar_length(icu_converter, arg2, len2);
+
+	bufsize1 = (ulen1 + 1) * sizeof(UChar);
+	bufsize2 = (ulen2 + 1) * sizeof(UChar);
+
+	if (bufsize1 + bufsize2 > TEXTBUFLEN)
+		buf = palloc(bufsize1 + bufsize2);
+
+	uchar1 = (UChar *) buf;
+	uchar2 = (UChar *) (buf + bufsize1);
+
+	ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
+	ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
+
+	result = ucol_strcoll(locale->info.icu.ucol,
+						  uchar1, ulen1,
+						  uchar2, ulen2);
+
+	if (buf != sbuf)
+		pfree(buf);
+
+	return result;
+}
+
+/*
+ * strncoll_icu
+ *
+ * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
+ * database encoding. An argument length of -1 means the string is
+ * NUL-terminated.
+ */
+static int
+strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
+			 pg_locale_t locale)
+{
+	int			result;
+
+	Assert(locale->provider == COLLPROVIDER_ICU);
+
+#ifdef HAVE_UCOL_STRCOLLUTF8
+	if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		UErrorCode	status;
+
+		status = U_ZERO_ERROR;
+		result = ucol_strcollUTF8(locale->info.icu.ucol,
+								  arg1, len1,
+								  arg2, len2,
+								  &status);
+		if (U_FAILURE(status))
+			ereport(ERROR,
+					(errmsg("collation failed: %s", u_errorName(status))));
+	}
+	else
+#endif
+	{
+		result = strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
+	}
+
+	return result;
+}
+
+
+/* 'srclen' of -1 means the strings are NUL-terminated */
+static size_t
+strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+			 pg_locale_t locale)
+{
+	char		sbuf[TEXTBUFLEN];
+	char	   *buf = sbuf;
+	UChar	   *uchar;
+	int32_t		ulen;
+	size_t		uchar_bsize;
+	Size		result_bsize;
+
+	Assert(locale->provider == COLLPROVIDER_ICU);
+
+	init_icu_converter();
+
+	ulen = uchar_length(icu_converter, src, srclen);
+
+	uchar_bsize = (ulen + 1) * sizeof(UChar);
+
+	if (uchar_bsize > TEXTBUFLEN)
+		buf = palloc(uchar_bsize);
+
+	uchar = (UChar *) buf;
+
+	ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
+
+	result_bsize = ucol_getSortKey(locale->info.icu.ucol,
+								   uchar, ulen,
+								   (uint8_t *) dest, destsize);
+
+	/*
+	 * ucol_getSortKey() counts the nul-terminator in the result length, but
+	 * this function should not.
+	 */
+	Assert(result_bsize > 0);
+	result_bsize--;
+
+	if (buf != sbuf)
+		pfree(buf);
+
+	/* if dest is defined, it should be nul-terminated */
+	Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
+
+	return result_bsize;
+}
+
+/* 'srclen' of -1 means the strings are NUL-terminated */
+static size_t
+strnxfrm_prefix_icu_no_utf8(char *dest,size_t destsize,
+							const char *src, ssize_t srclen,
+							pg_locale_t locale)
+{
+	char		sbuf[TEXTBUFLEN];
+	char	   *buf = sbuf;
+	UCharIterator iter;
+	uint32_t	state[2];
+	UErrorCode	status;
+	int32_t		ulen = -1;
+	UChar	   *uchar = NULL;
+	size_t		uchar_bsize;
+	Size		result_bsize;
+
+	Assert(locale->provider == COLLPROVIDER_ICU);
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	init_icu_converter();
+
+	ulen = uchar_length(icu_converter, src, srclen);
+
+	uchar_bsize = (ulen + 1) * sizeof(UChar);
+
+	if (uchar_bsize > TEXTBUFLEN)
+		buf = palloc(uchar_bsize);
+
+	uchar = (UChar *) buf;
+
+	ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
+
+	uiter_setString(&iter, uchar, ulen);
+	state[0] = state[1] = 0;	/* won't need that again */
+	status = U_ZERO_ERROR;
+	result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
+										&iter,
+										state,
+										(uint8_t *) dest,
+										destsize,
+										&status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("sort key generation failed: %s",
+						u_errorName(status))));
+
+	return result_bsize;
+}
+
+/* 'srclen' of -1 means the strings are NUL-terminated */
+static size_t
+strnxfrm_prefix_icu(char *dest, size_t destsize,
+					const char *src, ssize_t srclen,
+					pg_locale_t locale)
+{
+	size_t		result;
+
+	Assert(locale->provider == COLLPROVIDER_ICU);
+
+	if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		UCharIterator iter;
+		uint32_t	state[2];
+		UErrorCode	status;
+
+		uiter_setUTF8(&iter, src, srclen);
+		state[0] = state[1] = 0;	/* won't need that again */
+		status = U_ZERO_ERROR;
+		result = ucol_nextSortKeyPart(locale->info.icu.ucol,
+									  &iter,
+									  state,
+									  (uint8_t *) dest,
+									  destsize,
+									  &status);
+		if (U_FAILURE(status))
+			ereport(ERROR,
+					(errmsg("sort key generation failed: %s",
+							u_errorName(status))));
+	}
+	else
+		result = strnxfrm_prefix_icu_no_utf8(dest, destsize, src, srclen,
+											 locale);
+
+	return result;
+}
+
+/*
+ * Wrapper around ucol_open() to handle API differences for older ICU
+ * versions.
+ *
+ * Ensure that no path leaks a UCollator.
+ */
+UCollator *
+pg_ucol_open(const char *loc_str)
+{
+	UCollator  *collator;
+	UErrorCode	status;
+	const char *orig_str = loc_str;
+	char	   *fixed_str = NULL;
+
+	/*
+	 * Must never open default collator, because it depends on the environment
+	 * and may change at any time. Should not happen, but check here to catch
+	 * bugs that might be hard to catch otherwise.
+	 *
+	 * NB: the default collator is not the same as the collator for the root
+	 * locale. The root locale may be specified as the empty string, "und", or
+	 * "root". The default collator is opened by passing NULL to ucol_open().
+	 */
+	if (loc_str == NULL)
+		elog(ERROR, "opening default collator is not supported");
+
+	/*
+	 * In ICU versions 54 and earlier, "und" is not a recognized spelling of
+	 * the root locale. If the first component of the locale is "und", replace
+	 * with "root" before opening.
+	 */
+	if (U_ICU_VERSION_MAJOR_NUM < 55)
+	{
+		char		lang[ULOC_LANG_CAPACITY];
+
+		status = U_ZERO_ERROR;
+		uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
+		if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("could not get language from locale \"%s\": %s",
+							loc_str, u_errorName(status))));
+		}
+
+		if (strcmp(lang, "und") == 0)
+		{
+			const char *remainder = loc_str + strlen("und");
+
+			fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
+			strcpy(fixed_str, "root");
+			strcat(fixed_str, remainder);
+
+			loc_str = fixed_str;
+		}
+	}
+
+	status = U_ZERO_ERROR;
+	collator = ucol_open(loc_str, &status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+		/* use original string for error report */
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("could not open collator for locale \"%s\": %s",
+						orig_str, u_errorName(status))));
+
+	if (U_ICU_VERSION_MAJOR_NUM < 54)
+	{
+		status = U_ZERO_ERROR;
+		icu_set_collation_attributes(collator, loc_str, &status);
+
+		/*
+		 * Pretend the error came from ucol_open(), for consistent error
+		 * message across ICU versions.
+		 */
+		if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
+		{
+			ucol_close(collator);
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("could not open collator for locale \"%s\": %s",
+							orig_str, u_errorName(status))));
+		}
+	}
+
+	if (fixed_str != NULL)
+		pfree(fixed_str);
+
+	return collator;
+}
+
+static void
+init_icu_converter(void)
+{
+	const char *icu_encoding_name;
+	UErrorCode	status;
+	UConverter *conv;
+
+	if (icu_converter)
+		return;					/* already done */
+
+	icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
+	if (!icu_encoding_name)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("encoding \"%s\" not supported by ICU",
+						pg_encoding_to_char(GetDatabaseEncoding()))));
+
+	status = U_ZERO_ERROR;
+	conv = ucnv_open(icu_encoding_name, &status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("could not open ICU converter for encoding \"%s\": %s",
+						icu_encoding_name, u_errorName(status))));
+
+	icu_converter = conv;
+}
+
+/*
+ * Find length, in UChars, of given string if converted to UChar string.
+ *
+ * A length of -1 indicates that the input string is NUL-terminated.
+ */
+static size_t
+uchar_length(UConverter *converter, const char *str, int32_t len)
+{
+	UErrorCode	status = U_ZERO_ERROR;
+	int32_t		ulen;
+
+	ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
+	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+		ereport(ERROR,
+				(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+	return ulen;
+}
+
+/*
+ * Convert the given source string into a UChar string, stored in dest, and
+ * return the length (in UChars).
+ *
+ * A srclen of -1 indicates that the input string is NUL-terminated.
+ */
+static int32_t
+uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
+			  const char *src, int32_t srclen)
+{
+	UErrorCode	status = U_ZERO_ERROR;
+	int32_t		ulen;
+
+	status = U_ZERO_ERROR;
+	ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+	return ulen;
+}
+
+/*
+ * Convert a string in the database encoding into a string of UChars.
+ *
+ * The source string at buff is of length nbytes
+ * (it needn't be nul-terminated)
+ *
+ * *buff_uchar receives a pointer to the palloc'd result string, and
+ * the function's result is the number of UChars generated.
+ *
+ * The result string is nul-terminated, though most callers rely on the
+ * result length instead.
+ */
+int32_t
+icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
+{
+	int32_t		len_uchar;
+
+	init_icu_converter();
+
+	len_uchar = uchar_length(icu_converter, buff, nbytes);
+
+	*buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
+	len_uchar = uchar_convert(icu_converter,
+							  *buff_uchar, len_uchar + 1, buff, nbytes);
+
+	return len_uchar;
+}
+
+/*
+ * Convert a string of UChars into the database encoding.
+ *
+ * The source string at buff_uchar is of length len_uchar
+ * (it needn't be nul-terminated)
+ *
+ * *result receives a pointer to the palloc'd result string, and the
+ * function's result is the number of bytes generated (not counting nul).
+ *
+ * The result string is nul-terminated.
+ */
+int32_t
+icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
+{
+	UErrorCode	status;
+	int32_t		len_result;
+
+	init_icu_converter();
+
+	status = U_ZERO_ERROR;
+	len_result = ucnv_fromUChars(icu_converter, NULL, 0,
+								 buff_uchar, len_uchar, &status);
+	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+		ereport(ERROR,
+				(errmsg("%s failed: %s", "ucnv_fromUChars",
+						u_errorName(status))));
+
+	*result = palloc(len_result + 1);
+
+	status = U_ZERO_ERROR;
+	len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
+								 buff_uchar, len_uchar, &status);
+	if (U_FAILURE(status) ||
+		status == U_STRING_NOT_TERMINATED_WARNING)
+		ereport(ERROR,
+				(errmsg("%s failed: %s", "ucnv_fromUChars",
+						u_errorName(status))));
+
+	return len_result;
+}
+
+/*
+ * Parse collation attributes from the given locale string and apply them to
+ * the open collator.
+ *
+ * First, the locale string is canonicalized to an ICU format locale ID such
+ * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
+ * the key-value arguments.
+ *
+ * Starting with ICU version 54, the attributes are processed automatically by
+ * ucol_open(), so this is only necessary for emulating this behavior on older
+ * versions.
+ */
+pg_attribute_unused()
+static void
+icu_set_collation_attributes(UCollator *collator, const char *loc,
+							 UErrorCode *status)
+{
+	int32_t		len;
+	char	   *icu_locale_id;
+	char	   *lower_str;
+	char	   *str;
+	char	   *token;
+
+	/*
+	 * The input locale may be a BCP 47 language tag, e.g.
+	 * "und-u-kc-ks-level1", which expresses the same attributes in a
+	 * different form. It will be converted to the equivalent ICU format
+	 * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
+	 * uloc_canonicalize().
+	 */
+	*status = U_ZERO_ERROR;
+	len = uloc_canonicalize(loc, NULL, 0, status);
+	icu_locale_id = palloc(len + 1);
+	*status = U_ZERO_ERROR;
+	len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
+	if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
+		return;
+
+	lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
+
+	pfree(icu_locale_id);
+
+	str = strchr(lower_str, '@');
+	if (!str)
+		return;
+	str++;
+
+	while ((token = strsep(&str, ";")))
+	{
+		char	   *e = strchr(token, '=');
+
+		if (e)
+		{
+			char	   *name;
+			char	   *value;
+			UColAttribute uattr;
+			UColAttributeValue uvalue;
+
+			*status = U_ZERO_ERROR;
+
+			*e = '\0';
+			name = token;
+			value = e + 1;
+
+			/*
+			 * See attribute name and value lists in ICU i18n/coll.cpp
+			 */
+			if (strcmp(name, "colstrength") == 0)
+				uattr = UCOL_STRENGTH;
+			else if (strcmp(name, "colbackwards") == 0)
+				uattr = UCOL_FRENCH_COLLATION;
+			else if (strcmp(name, "colcaselevel") == 0)
+				uattr = UCOL_CASE_LEVEL;
+			else if (strcmp(name, "colcasefirst") == 0)
+				uattr = UCOL_CASE_FIRST;
+			else if (strcmp(name, "colalternate") == 0)
+				uattr = UCOL_ALTERNATE_HANDLING;
+			else if (strcmp(name, "colnormalization") == 0)
+				uattr = UCOL_NORMALIZATION_MODE;
+			else if (strcmp(name, "colnumeric") == 0)
+				uattr = UCOL_NUMERIC_COLLATION;
+			else
+				/* ignore if unknown */
+				continue;
+
+			if (strcmp(value, "primary") == 0)
+				uvalue = UCOL_PRIMARY;
+			else if (strcmp(value, "secondary") == 0)
+				uvalue = UCOL_SECONDARY;
+			else if (strcmp(value, "tertiary") == 0)
+				uvalue = UCOL_TERTIARY;
+			else if (strcmp(value, "quaternary") == 0)
+				uvalue = UCOL_QUATERNARY;
+			else if (strcmp(value, "identical") == 0)
+				uvalue = UCOL_IDENTICAL;
+			else if (strcmp(value, "no") == 0)
+				uvalue = UCOL_OFF;
+			else if (strcmp(value, "yes") == 0)
+				uvalue = UCOL_ON;
+			else if (strcmp(value, "shifted") == 0)
+				uvalue = UCOL_SHIFTED;
+			else if (strcmp(value, "non-ignorable") == 0)
+				uvalue = UCOL_NON_IGNORABLE;
+			else if (strcmp(value, "lower") == 0)
+				uvalue = UCOL_LOWER_FIRST;
+			else if (strcmp(value, "upper") == 0)
+				uvalue = UCOL_UPPER_FIRST;
+			else
+			{
+				*status = U_ILLEGAL_ARGUMENT_ERROR;
+				break;
+			}
+
+			ucol_setAttribute(collator, uattr, uvalue, status);
+		}
+	}
+
+	pfree(lower_str);
+}
+
+#endif					/* USE_ICU */
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
new file mode 100644
index 00000000000..6eb8b80fdf9
--- /dev/null
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -0,0 +1,604 @@
+/*-----------------------------------------------------------------------
+ *
+ * PostgreSQL locale utilities for libc
+ *
+ * Portions Copyright (c) 2002-2024, PostgreSQL Global Development Group
+ *
+ * src/backend/utils/adt/pg_locale_libc.c
+ *
+ *-----------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <wctype.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_database.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/pg_locale.h"
+#include "utils/resowner.h"
+#include "utils/syscache.h"
+
+/*
+ * This should be large enough that most strings will fit, but small enough
+ * that we feel comfortable putting it on the stack
+ */
+#define		TEXTBUFLEN			1024
+
+extern pg_locale_t libc_dat_create_locale(HeapTuple dattuple);
+extern pg_locale_t libc_coll_create_locale(MemoryContext context,
+										   ResourceOwner resowner,
+										   HeapTuple colltuple);
+
+static int strncoll_libc(const char *arg1, ssize_t len1,
+						 const char *arg2, ssize_t len2,
+						 pg_locale_t locale);
+static size_t strnxfrm_libc(char *dest, size_t destsize,
+							const char *src, ssize_t srclen,
+							pg_locale_t locale);
+
+static int char_props_libc(pg_wchar wc, int mask, pg_locale_t locale);
+static pg_wchar toupper_libc(pg_wchar wc, pg_locale_t locale);
+static pg_wchar tolower_libc(pg_wchar wc, pg_locale_t locale);
+
+static void ResourceOwnerRememberLocaleT(ResourceOwner resowner,
+										 locale_t locale);
+static void ResOwnerReleaseLocaleT(Datum val);
+
+static locale_t make_libc_collator(const char *collate, const char *ctype);
+
+static void report_newlocale_failure(const char *localename);
+
+static const ResourceOwnerDesc LocaleTResourceKind =
+{
+	.name = "locale_t reference",
+	.release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
+	.release_priority = RELEASE_PRIO_LAST,
+	.ReleaseResource = ResOwnerReleaseLocaleT,
+	.DebugPrint = NULL                      /* the default message is fine */
+};
+
+struct collate_methods libc_collate_methods = {
+	.strncoll = strncoll_libc,
+	.strnxfrm = strnxfrm_libc,
+	.strnxfrm_prefix = NULL,
+};
+
+struct ctype_methods libc_ctype_methods = {
+	.char_props = char_props_libc,
+	.wc_toupper = toupper_libc,
+	.wc_tolower = tolower_libc,
+};
+
+pg_locale_t
+libc_dat_create_locale(HeapTuple dattuple)
+{
+	Form_pg_database dbform;
+	Datum		datum;
+	const char *datcollate;
+	const char *datctype;
+	pg_locale_t	result;
+
+	dbform = (Form_pg_database) GETSTRUCT(dattuple);
+	
+	Assert(dbform->datlocprovider == COLLPROVIDER_LIBC);
+
+	datum = SysCacheGetAttrNotNull(DATABASEOID, dattuple, Anum_pg_database_datcollate);
+	datcollate = TextDatumGetCString(datum);
+	datum = SysCacheGetAttrNotNull(DATABASEOID, dattuple, Anum_pg_database_datctype);
+	datctype = TextDatumGetCString(datum);
+
+	result = MemoryContextAllocZero(TopMemoryContext,
+									sizeof(struct pg_locale_struct));
+
+	result->provider = dbform->datlocprovider;
+	result->deterministic = true;
+	result->collate_is_c = (strcmp(datcollate, "C") == 0) ||
+		(strcmp(datcollate, "POSIX") == 0);
+	result->ctype_is_c = (strcmp(datctype, "C") == 0) ||
+		(strcmp(datctype, "POSIX") == 0);
+
+	if (!result->collate_is_c)
+		result->collate = &libc_collate_methods;
+	if (!result->ctype_is_c)
+		result->ctype = &libc_ctype_methods;
+	result->info.lt = make_libc_collator(datcollate, datctype);
+
+	return result;
+}
+
+pg_locale_t
+libc_coll_create_locale(MemoryContext context, ResourceOwner resowner,
+						HeapTuple colltuple)
+{
+	Form_pg_collation collform;
+	Datum		datum;
+	const char *collcollate;
+	const char *collctype;
+	locale_t	locale;
+	pg_locale_t result;
+
+	collform = (Form_pg_collation) GETSTRUCT(colltuple);
+
+	datum = SysCacheGetAttrNotNull(COLLOID, colltuple, Anum_pg_collation_collcollate);
+	collcollate = TextDatumGetCString(datum);
+	datum = SysCacheGetAttrNotNull(COLLOID, colltuple, Anum_pg_collation_collctype);
+	collctype = TextDatumGetCString(datum);
+
+	ResourceOwnerEnlarge(resowner);
+	locale = make_libc_collator(collcollate, collctype);
+	if (locale)
+		ResourceOwnerRememberLocaleT(resowner, locale);
+
+	result = MemoryContextAllocZero(context,
+									sizeof(struct pg_locale_struct));
+
+	result->provider = collform->collprovider;
+	result->deterministic = collform->collisdeterministic;
+	result->collate_is_c = (strcmp(collcollate, "C") == 0) ||
+		(strcmp(collcollate, "POSIX") == 0);
+	result->ctype_is_c = (strcmp(collctype, "C") == 0) ||
+		(strcmp(collctype, "POSIX") == 0);
+	if (!result->collate_is_c)
+		result->collate = &libc_collate_methods;
+	if (!result->ctype_is_c)
+		result->ctype = &libc_ctype_methods;
+	result->info.lt = locale;
+
+	return result;
+}
+
+static void
+ResourceOwnerRememberLocaleT(ResourceOwner resowner, locale_t locale)
+{
+	ResourceOwnerRemember(resowner, PointerGetDatum(locale),
+						  &LocaleTResourceKind);
+}
+
+static void
+ResOwnerReleaseLocaleT(Datum val)
+{
+	locale_t locale = (locale_t) DatumGetPointer(val);
+#ifndef WIN32
+	freelocale(locale);
+#else
+	_free_locale(locale);
+#endif
+}
+
+static int
+char_props_libc(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	int result = 0;
+
+	Assert(!locale->ctype_is_c);
+
+	if (mask & PG_ISDIGIT)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswdigit_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISDIGIT;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				isdigit_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISDIGIT;
+		}
+	}
+	if (mask & PG_ISALPHA)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswalpha_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISALPHA;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				isalpha_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISALPHA;
+		}
+	}
+	if (mask & PG_ISUPPER)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswupper_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISUPPER;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				isupper_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISUPPER;
+		}
+	}
+	if (mask & PG_ISLOWER)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswlower_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISLOWER;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				islower_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISLOWER;
+		}
+	}
+	if (mask & PG_ISGRAPH)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswgraph_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISGRAPH;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				isgraph_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISGRAPH;
+		}
+	}
+	if (mask & PG_ISPRINT)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswprint_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISPRINT;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				isprint_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISPRINT;
+		}
+	}
+	if (mask & PG_ISPUNCT)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswpunct_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISPUNCT;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				ispunct_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISPUNCT;
+		}
+	}
+	if (mask & PG_ISSPACE)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8 &&
+			(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		{
+			if (iswspace_l((wint_t) wc, locale->info.lt))
+				result |= PG_ISSPACE;
+		}
+		else
+		{
+			if (wc <= (pg_wchar) UCHAR_MAX &&
+				isspace_l((unsigned char) wc, locale->info.lt))
+				result |= PG_ISSPACE;
+		}
+	}
+
+	return result;
+}
+
+static pg_wchar
+toupper_libc(pg_wchar wc, pg_locale_t locale)
+{
+	if (GetDatabaseEncoding() == PG_UTF8 &&
+		(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		return towupper_l((wint_t) wc, locale->info.lt);
+	else if (wc <= (pg_wchar) UCHAR_MAX)
+		return toupper_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+tolower_libc(pg_wchar wc, pg_locale_t locale)
+{
+	if (GetDatabaseEncoding() == PG_UTF8 &&
+		(sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF))
+		return towlower_l((wint_t) wc, locale->info.lt);
+	else if (wc <= (pg_wchar) UCHAR_MAX)
+		return tolower_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+/*
+ * Create a locale_t with the given collation and ctype.
+ *
+ * The "C" and "POSIX" locales are not actually handled by libc, so return
+ * NULL.
+ *
+ * Ensure that no path leaks a locale_t.
+ */
+static locale_t
+make_libc_collator(const char *collate, const char *ctype)
+{
+	locale_t	loc = 0;
+
+	if (strcmp(collate, ctype) == 0)
+	{
+		if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
+		{
+			/* Normal case where they're the same */
+			errno = 0;
+#ifndef WIN32
+			loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
+							NULL);
+#else
+			loc = _create_locale(LC_ALL, collate);
+#endif
+			if (!loc)
+				report_newlocale_failure(collate);
+		}
+	}
+	else
+	{
+#ifndef WIN32
+		/* We need two newlocale() steps */
+		locale_t	loc1 = 0;
+
+		if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
+		{
+			errno = 0;
+			loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
+			if (!loc1)
+				report_newlocale_failure(collate);
+		}
+
+		if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
+		{
+			errno = 0;
+			loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
+			if (!loc)
+			{
+				if (loc1)
+					freelocale(loc1);
+				report_newlocale_failure(ctype);
+			}
+		}
+		else
+			loc = loc1;
+#else
+
+		/*
+		 * XXX The _create_locale() API doesn't appear to support this. Could
+		 * perhaps be worked around by changing pg_locale_t to contain two
+		 * separate fields.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("collations with different collate and ctype values are not supported on this platform")));
+#endif
+	}
+
+	return loc;
+}
+
+/*
+ * strncoll_libc_win32_utf8
+ *
+ * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
+ * invoke wcscoll_l().
+ *
+ * An input string length of -1 means that it's NUL-terminated.
+ */
+#ifdef WIN32
+static int
+strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
+						 ssize_t len2, pg_locale_t locale)
+{
+	char		sbuf[TEXTBUFLEN];
+	char	   *buf = sbuf;
+	char	   *a1p,
+			   *a2p;
+	int			a1len;
+	int			a2len;
+	int			r;
+	int			result;
+
+	Assert(locale->provider == COLLPROVIDER_LIBC);
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+#ifndef WIN32
+	Assert(false);
+#endif
+
+	if (len1 == -1)
+		len1 = strlen(arg1);
+	if (len2 == -1)
+		len2 = strlen(arg2);
+
+	a1len = len1 * 2 + 2;
+	a2len = len2 * 2 + 2;
+
+	if (a1len + a2len > TEXTBUFLEN)
+		buf = palloc(a1len + a2len);
+
+	a1p = buf;
+	a2p = buf + a1len;
+
+	/* API does not work for zero-length input */
+	if (len1 == 0)
+		r = 0;
+	else
+	{
+		r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
+								(LPWSTR) a1p, a1len / 2);
+		if (!r)
+			ereport(ERROR,
+					(errmsg("could not convert string to UTF-16: error code %lu",
+							GetLastError())));
+	}
+	((LPWSTR) a1p)[r] = 0;
+
+	if (len2 == 0)
+		r = 0;
+	else
+	{
+		r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
+								(LPWSTR) a2p, a2len / 2);
+		if (!r)
+			ereport(ERROR,
+					(errmsg("could not convert string to UTF-16: error code %lu",
+							GetLastError())));
+	}
+	((LPWSTR) a2p)[r] = 0;
+
+	errno = 0;
+	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
+	if (result == 2147483647)	/* _NLSCMPERROR; missing from mingw headers */
+		ereport(ERROR,
+				(errmsg("could not compare Unicode strings: %m")));
+
+	if (buf != sbuf)
+		pfree(buf);
+
+	return result;
+}
+#endif							/* WIN32 */
+
+/*
+ * strncoll_libc
+ *
+ * An input string length of -1 means that it's NUL-terminated.
+ */
+static int
+strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
+			  pg_locale_t locale)
+{
+	char		sbuf[TEXTBUFLEN];
+	char	   *buf = sbuf;
+	size_t		bufsize1 = (len1 == -1) ? 0 : len1 + 1;
+	size_t		bufsize2 = (len2 == -1) ? 0 : len2 + 1;
+	const char *arg1n;
+	const char *arg2n;
+	int			result;
+
+	Assert(locale->provider == COLLPROVIDER_LIBC);
+
+#ifdef WIN32
+	/* check for this case before doing the work for nul-termination */
+	if (GetDatabaseEncoding() == PG_UTF8)
+		return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
+#endif							/* WIN32 */
+
+	if (bufsize1 + bufsize2 > TEXTBUFLEN)
+		buf = palloc(bufsize1 + bufsize2);
+
+	/* nul-terminate arguments if necessary */
+	if (len1 == -1)
+	{
+		arg1n = arg1;
+	}
+	else
+	{
+		char *buf1 = buf;
+		memcpy(buf1, arg1, len1);
+		buf1[len1] = '\0';
+		arg1n = buf1;
+	}
+
+	if (len2 == -1)
+	{
+		arg2n = arg2;
+	}
+	else
+	{
+		char *buf2 = buf + bufsize1;
+		memcpy(buf2, arg2, len2);
+		buf2[len2] = '\0';
+		arg2n = buf2;
+	}
+
+	result = strcoll_l(arg1n, arg2n, locale->info.lt);
+
+	if (buf != sbuf)
+		pfree(buf);
+
+	return result;
+}
+
+static size_t
+strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
+			  pg_locale_t locale)
+{
+	char		sbuf[TEXTBUFLEN];
+	char	   *buf = sbuf;
+	size_t		bufsize = srclen + 1;
+	size_t		result;
+
+	Assert(locale->provider == COLLPROVIDER_LIBC);
+
+	if (srclen == -1)
+		return strxfrm_l(dest, src, destsize, locale->info.lt);
+
+	if (bufsize > TEXTBUFLEN)
+		buf = palloc(bufsize);
+
+	/* nul-terminate argument */
+	memcpy(buf, src, srclen);
+	buf[srclen] = '\0';
+
+	result = strxfrm_l(dest, buf, destsize, locale->info.lt);
+
+	if (buf != sbuf)
+		pfree(buf);
+
+	/* if dest is defined, it should be nul-terminated */
+	Assert(result >= destsize || dest[result] == '\0');
+
+	return result;
+}
+
+/* simple subroutine for reporting errors from newlocale() */
+static void
+report_newlocale_failure(const char *localename)
+{
+	int			save_errno;
+
+	/*
+	 * Windows doesn't provide any useful error indication from
+	 * _create_locale(), and BSD-derived platforms don't seem to feel they
+	 * need to set errno either (even though POSIX is pretty clear that
+	 * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
+	 * is what to report.
+	 */
+	if (errno == 0)
+		errno = ENOENT;
+
+	/*
+	 * ENOENT means "no such locale", not "no such file", so clarify that
+	 * errno with an errdetail message.
+	 */
+	save_errno = errno;			/* auxiliary funcs might change errno */
+	ereport(ERROR,
+			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			 errmsg("could not create locale \"%s\": %m",
+					localename),
+			 (save_errno == ENOENT ?
+			  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
+						localename) : 0)));
+}
+
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 3b443df8014..95ba7940b95 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -12,6 +12,7 @@
 #ifndef _PG_LOCALE_
 #define _PG_LOCALE_
 
+#include "mb/pg_wchar.h"
 #if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE)
 #include <xlocale.h>
 #endif
@@ -19,6 +20,19 @@
 #include <unicode/ucol.h>
 #endif
 
+/*
+ * Character properties for regular expressions.
+ */
+#define PG_ISDIGIT	0x01
+#define PG_ISALPHA	0x02
+#define PG_ISALNUM	(PG_ISDIGIT | PG_ISALPHA)
+#define PG_ISUPPER	0x04
+#define PG_ISLOWER	0x08
+#define PG_ISGRAPH	0x10
+#define PG_ISPRINT	0x20
+#define PG_ISPUNCT	0x40
+#define PG_ISSPACE	0x80
+
 #ifdef USE_ICU
 /*
  * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
@@ -62,6 +76,28 @@ extern struct lconv *PGLC_localeconv(void);
 
 extern void cache_locale_time(void);
 
+struct pg_locale_struct;
+typedef struct pg_locale_struct *pg_locale_t;
+
+struct collate_methods
+{
+	int	(*strncoll)(const char *arg1, ssize_t len1,
+					const char *arg2, ssize_t len2,
+					pg_locale_t locale);
+	size_t (*strnxfrm)(char *dest, size_t destsize,
+					   const char *src, ssize_t srclen,
+					   pg_locale_t locale);
+	size_t (*strnxfrm_prefix)(char *dest, size_t destsize,
+							  const char *src, ssize_t srclen,
+							  pg_locale_t locale);
+};
+
+struct ctype_methods
+{
+	int (*char_props)(pg_wchar wc, int mask, pg_locale_t locale);
+	pg_wchar (*wc_toupper)(pg_wchar wc, pg_locale_t locale);
+	pg_wchar (*wc_tolower)(pg_wchar wc, pg_locale_t locale);
+};
 
 /*
  * We use a discriminated union to hold either a locale_t or an ICU collator.
@@ -85,6 +121,10 @@ struct pg_locale_struct
 	bool		deterministic;
 	bool		collate_is_c;
 	bool		ctype_is_c;
+
+	struct collate_methods *collate;
+	struct ctype_methods *ctype;
+
 	union
 	{
 		struct
@@ -102,8 +142,6 @@ struct pg_locale_struct
 	}			info;
 };
 
-typedef struct pg_locale_struct *pg_locale_t;
-
 extern void init_database_collation(void);
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
@@ -132,6 +170,8 @@ extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
 extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
 #endif
 
+extern int char_props(pg_wchar wc, int mask, pg_locale_t locale);
+
 /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
 extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
 						 pg_locale_t locale);
-- 
2.34.1