From 15243bd82e44d83e90751c51e30dcce4e9c3a39f Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 7 Dec 2022 11:07:31 -0800
Subject: [PATCH v4 4/6] Support multiple ICU collation provider libraries.

Introduce structure pg_icu_library, which holds pointers to each
required ICU method, and store this as part of pg_locale_t. Each call
to an ICU function instead goes through this structure, so that it can
more easily be replaced by a non-builtin ICU library.

Offer a hook to allow an extension to control which ICU library is
loaded for a given locale and version.

Author: Thomas Munro, Jeff Davis
---
 src/backend/commands/collationcmds.c   |  17 +-
 src/backend/utils/adt/formatting.c     |  67 ++++--
 src/backend/utils/adt/pg_locale.c      | 299 ++++++++++++++++++-------
 src/include/utils/pg_locale_internal.h | 121 +++++++++-
 4 files changed, 398 insertions(+), 106 deletions(-)

diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 9e84da4891..4e90bfa65e 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -560,13 +560,14 @@ get_icu_language_tag(const char *localename)
 {
 	char		buf[ULOC_FULLNAME_CAPACITY];
 	UErrorCode	status;
+	pg_icu_library *iculib = get_default_icu_library();
 
 	status = U_ZERO_ERROR;
-	uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
+	iculib->toLanguageTag(localename, buf, sizeof(buf), true, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not convert locale name \"%s\" to language tag: %s",
-						localename, u_errorName(status))));
+						localename, iculib->errorName(status))));
 
 	return pstrdup(buf);
 }
@@ -585,11 +586,12 @@ get_icu_locale_comment(const char *localename)
 	int32		len_uchar;
 	int32		i;
 	char	   *result;
+	pg_icu_library *iculib = get_default_icu_library();
 
 	status = U_ZERO_ERROR;
-	len_uchar = uloc_getDisplayName(localename, "en",
-									displayname, lengthof(displayname),
-									&status);
+	len_uchar = iculib->getDisplayName(localename, "en",
+									   displayname, lengthof(displayname),
+									   &status);
 	if (U_FAILURE(status))
 		return NULL;			/* no good reason to raise an error */
 
@@ -809,12 +811,13 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 #ifdef USE_ICU
 	{
 		int			i;
+		pg_icu_library *iculib = get_default_icu_library();
 
 		/*
 		 * Start the loop at -1 to sneak in the root locale without too much
 		 * code duplication.
 		 */
-		for (i = -1; i < uloc_countAvailable(); i++)
+		for (i = -1; i < iculib->countAvailable(); i++)
 		{
 			const char *name;
 			char	   *langtag;
@@ -825,7 +828,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 			if (i == -1)
 				name = "";		/* ICU root locale */
 			else
-				name = uloc_getAvailable(i);
+				name = iculib->getAvailable(i);
 
 			langtag = get_icu_language_tag(name);
 			iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 3067431cbe..7f318fe2c7 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1565,6 +1565,11 @@ typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
 									 const UChar *src, int32_t srcLength,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
+typedef int32_t (*ICU_Convert_BI_Func) (UChar *dest, int32_t destCapacity,
+										const UChar *src, int32_t srcLength,
+										UBreakIterator *bi,
+										const char *locale,
+										UErrorCode *pErrorCode);
 
 static int32_t
 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
@@ -1572,6 +1577,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
 {
 	UErrorCode	status;
 	int32_t		len_dest;
+	pg_icu_library *iculib = PG_ICU_LIB(mylocale);
 
 	len_dest = len_source;		/* try first with same length */
 	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
@@ -1589,18 +1595,42 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
 	}
 	if (U_FAILURE(status))
 		ereport(ERROR,
-				(errmsg("case conversion failed: %s", u_errorName(status))));
+				(errmsg("case conversion failed: %s",
+						iculib->errorName(status))));
 	return len_dest;
 }
 
+/*
+ * Like icu_convert_case, but func takes a break iterator (which we don't
+ * make use of).
+ */
 static int32_t
-u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
-						const UChar *src, int32_t srcLength,
-						const char *locale,
-						UErrorCode *pErrorCode)
+icu_convert_case_bi(ICU_Convert_BI_Func func, pg_locale_t mylocale,
+					UChar **buff_dest, UChar *buff_source, int32_t len_source)
 {
-	return u_strToTitle(dest, destCapacity, src, srcLength,
-						NULL, locale, pErrorCode);
+	UErrorCode	status;
+	int32_t		len_dest;
+	pg_icu_library *iculib = PG_ICU_LIB(mylocale);
+
+	len_dest = len_source;		/* try first with same length */
+	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
+	status = U_ZERO_ERROR;
+	len_dest = func(*buff_dest, len_dest, buff_source, len_source, NULL,
+					mylocale->ctype, &status);
+	if (status == U_BUFFER_OVERFLOW_ERROR)
+	{
+		/* try again with adjusted length */
+		pfree(*buff_dest);
+		*buff_dest = palloc(len_dest * sizeof(**buff_dest));
+		status = U_ZERO_ERROR;
+		len_dest = func(*buff_dest, len_dest, buff_source, len_source, NULL,
+						mylocale->ctype, &status);
+	}
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("case conversion failed: %s",
+						iculib->errorName(status))));
+	return len_dest;
 }
 
 #endif							/* USE_ICU */
@@ -1666,11 +1696,12 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 			int32_t		len_conv;
 			UChar	   *buff_uchar;
 			UChar	   *buff_conv;
+			pg_icu_library *iculib = PG_ICU_LIB(mylocale);
 
-			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToLower, mylocale,
+			len_uchar = icu_to_uchar(iculib, &buff_uchar, buff, nbytes);
+			len_conv = icu_convert_case(iculib->strToLower, mylocale,
 										&buff_conv, buff_uchar, len_uchar);
-			icu_from_uchar(&result, buff_conv, len_conv);
+			icu_from_uchar(iculib, &result, buff_conv, len_conv);
 			pfree(buff_uchar);
 			pfree(buff_conv);
 		}
@@ -1788,11 +1819,12 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 						len_conv;
 			UChar	   *buff_uchar;
 			UChar	   *buff_conv;
+			pg_icu_library *iculib = PG_ICU_LIB(mylocale);
 
-			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToUpper, mylocale,
+			len_uchar = icu_to_uchar(iculib, &buff_uchar, buff, nbytes);
+			len_conv = icu_convert_case(iculib->strToUpper, mylocale,
 										&buff_conv, buff_uchar, len_uchar);
-			icu_from_uchar(&result, buff_conv, len_conv);
+			icu_from_uchar(iculib, &result, buff_conv, len_conv);
 			pfree(buff_uchar);
 			pfree(buff_conv);
 		}
@@ -1911,11 +1943,12 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 						len_conv;
 			UChar	   *buff_uchar;
 			UChar	   *buff_conv;
+			pg_icu_library *iculib = PG_ICU_LIB(mylocale);
 
-			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
-										&buff_conv, buff_uchar, len_uchar);
-			icu_from_uchar(&result, buff_conv, len_conv);
+			len_uchar = icu_to_uchar(iculib, &buff_uchar, buff, nbytes);
+			len_conv = icu_convert_case_bi(iculib->strToTitle, mylocale,
+										   &buff_conv, buff_uchar, len_uchar);
+			icu_from_uchar(iculib, &result, buff_conv, len_conv);
 			pfree(buff_uchar);
 			pfree(buff_conv);
 		}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index d8df2a3832..5521fe11a5 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -70,6 +70,8 @@
 
 #ifdef USE_ICU
 #include <unicode/ucnv.h>
+#include <unicode/ulocdata.h>
+#include <unicode/ustring.h>
 #endif
 
 #ifdef __GLIBC__
@@ -110,6 +112,41 @@ char	   *localized_full_days[7 + 1];
 char	   *localized_abbrev_months[12 + 1];
 char	   *localized_full_months[12 + 1];
 
+/*
+ * The precise version of a collation provider library is important, because
+ * subtle changes in collation between library versions can corrupt
+ * indexes. This hook allows control over how collation provider libraries are
+ * loaded.
+ *
+ * If the hook is not set, or if it returns NULL, Postgres constructs the
+ * pg_icu_library structure from the symbols Postgres is linked with at build
+ * time.
+ *
+ * If the hook is set, it can instead construct the pg_icu_library structure
+ * using custom logic. Ordinarily, this custom logic would involve finding a
+ * specific known version of the collation provider library, and dynamically
+ * loading the necessary symbols. If the collation version obtained from the
+ * library does not match the collation version recorded in the catalog,
+ * Postgres will issue a WARNING.
+ *
+ * The returned structure must be allocated in TopMemoryContext, and the
+ * associated symbols must remain valid permanently. It's expected that the
+ * hook will maintain a small finite list of libraries and return a pointer to
+ * the appropraite one, so that resource use remains bounded.
+ *
+ * XXX: For now, the only information the hook has access to is the ICU locale
+ * name, and the collation version as recorded in the catalog. We should
+ * consider what other information can be provided to allow for greater
+ * control over which library is loaded.
+ */
+#ifdef USE_ICU
+get_icu_library_hook_type get_icu_library_hook = NULL;
+#endif
+
+#ifdef USE_ICU
+static pg_icu_library *builtin_icu_library = NULL;
+#endif
+
 /* indicates whether locale information cache is valid */
 static bool CurrentLocaleConvValid = false;
 static bool CurrentLCTimeValid = false;
@@ -136,8 +173,12 @@ static char *IsoLocaleName(const char *);
  * Database default locale.
  */
 static pg_locale_t default_locale = NULL;
+#ifdef USE_ICU
+static pg_icu_library *default_icu = NULL;
+#endif
 
 #ifdef USE_ICU
+
 /*
  * Converter object for converting between ICU's UChar strings and C strings
  * in database encoding.  Since the database encoding doesn't change, we only
@@ -145,13 +186,17 @@ static pg_locale_t default_locale = NULL;
  */
 static UConverter *icu_converter = NULL;
 
-static void init_icu_converter(void);
-static size_t uchar_length(UConverter *converter,
+static void init_icu_converter(pg_icu_library *iculib);
+static size_t uchar_length(pg_icu_library *iculib,
+						   UConverter *converter,
 						   const char *str, size_t len);
-static int32_t uchar_convert(UConverter *converter,
+static int32_t uchar_convert(pg_icu_library *iculib,
+							 UConverter *converter,
 							 UChar *dest, int32_t destlen,
 							 const char *str, size_t srclen);
-static void icu_set_collation_attributes(UCollator *collator, const char *loc);
+static void icu_set_collation_attributes(pg_icu_library *iculib,
+										 UCollator *collator,
+										 const char *loc);
 #endif
 
 /*
@@ -1458,6 +1503,78 @@ report_newlocale_failure(const char *localename)
 }
 #endif							/* HAVE_LOCALE_T */
 
+#ifdef USE_ICU
+static pg_icu_library *
+get_builtin_icu_library()
+{
+	pg_icu_library *lib;
+
+	if (builtin_icu_library != NULL)
+		return builtin_icu_library;
+
+	/*
+	 * These assignments will fail to compile if an incompatible API change is
+	 * made to some future version of ICU, at which point we might need to
+	 * consider special treatment for different major version ranges, with
+	 * intermediate trampoline functions.
+	 */
+	lib = MemoryContextAlloc(TopMemoryContext, sizeof(pg_icu_library));
+	lib->getICUVersion = u_getVersion;
+	lib->getUnicodeVersion = u_getUnicodeVersion;
+	lib->getCLDRVersion = ulocdata_getCLDRVersion;
+	lib->openCollator = ucol_open;
+	lib->closeCollator = ucol_close;
+	lib->getCollatorVersion = ucol_getVersion;
+	lib->getUCAVersion = ucol_getUCAVersion;
+	lib->versionToString = u_versionToString;
+	lib->strcoll = ucol_strcoll;
+	lib->strcollUTF8 = ucol_strcollUTF8;
+	lib->getSortKey = ucol_getSortKey;
+	lib->nextSortKeyPart = ucol_nextSortKeyPart;
+	lib->setUTF8 = uiter_setUTF8;
+	lib->errorName = u_errorName;
+	lib->strToUpper = u_strToUpper;
+	lib->strToLower = u_strToLower;
+	lib->strToTitle = u_strToTitle;
+	lib->setAttribute = ucol_setAttribute;
+	lib->openConverter = ucnv_open;
+	lib->closeConverter = ucnv_close;
+	lib->fromUChars = ucnv_fromUChars;
+	lib->toUChars = ucnv_toUChars;
+	lib->toLanguageTag = uloc_toLanguageTag;
+	lib->getDisplayName = uloc_getDisplayName;
+	lib->countAvailable = uloc_countAvailable;
+	lib->getAvailable = uloc_getAvailable;
+
+	/*
+	 * Also assert the size of a couple of types used as output buffers, as a
+	 * canary to tell us to add extra padding in the (unlikely) event that a
+	 * later release makes these values smaller.
+	 */
+	StaticAssertStmt(U_MAX_VERSION_STRING_LENGTH == 20,
+					 "u_versionToString output buffer size changed incompatibly");
+	StaticAssertStmt(U_MAX_VERSION_LENGTH == 4,
+					 "ucol_getVersion output buffer size changed incompatibly");
+
+	builtin_icu_library = lib;
+	return lib;
+}
+
+static pg_icu_library *
+get_icu_library(const char *locale, const char *version)
+{
+	pg_icu_library *lib = NULL;
+
+	if (get_icu_library_hook != NULL)
+		lib = get_icu_library_hook(locale, version);
+
+	if (!lib)
+		lib = get_builtin_icu_library();
+
+	return lib;
+}
+
+#endif
 
 /*
  * Construct a new pg_locale_t object.
@@ -1554,20 +1671,24 @@ pg_newlocale(char provider, bool deterministic, const char *collate,
 	{
 		UCollator  *collator;
 		UErrorCode	status;
+		pg_icu_library *iculib = get_icu_library(collate, version);
+
+		Assert(strcmp(collate, ctype) == 0);
 
 		/* collator may be leaked if we encounter an error */
 
 		status = U_ZERO_ERROR;
-		collator = ucol_open(collate, &status);
+		collator = iculib->openCollator(collate, &status);
 		if (U_FAILURE(status))
 			ereport(ERROR,
 					(errmsg("could not open collator for locale \"%s\": %s",
-							collate, u_errorName(status))));
+							collate, iculib->errorName(status))));
 
 		if (U_ICU_VERSION_MAJOR_NUM < 54)
-			icu_set_collation_attributes(collator, collate);
+			icu_set_collation_attributes(iculib, collator, collate);
 
 		result->info.icu.ucol = collator;
+		result->info.icu.lib = iculib;
 	}
 #endif
 	else
@@ -1589,6 +1710,14 @@ pg_locale_deterministic(pg_locale_t locale)
 		return locale->deterministic;
 }
 
+#ifdef USE_ICU
+pg_icu_library *
+get_default_icu_library()
+{
+	return default_icu;
+}
+#endif
+
 /*
  * Initialize default database locale.
  */
@@ -1596,6 +1725,10 @@ void
 init_default_locale(char provider, const char *collate, const char *ctype,
 					const char *iculocale, const char *version)
 {
+#ifdef USE_ICU
+	default_icu = get_icu_library(iculocale, version);
+#endif
+
 	/*
 	 * For the purposes of pg_locale_t, if the provider is ICU, we use
 	 * iculocale for both collate and ctype.
@@ -1759,17 +1892,18 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 		UErrorCode	status;
 		UVersionInfo versioninfo;
 		char		buf[U_MAX_VERSION_STRING_LENGTH];
+		pg_icu_library	*iculib = get_icu_library(collcollate, NULL);
 
 		status = U_ZERO_ERROR;
-		collator = ucol_open(collcollate, &status);
+		collator = iculib->openCollator(collcollate, &status);
 		if (U_FAILURE(status))
 			ereport(ERROR,
 					(errmsg("could not open collator for locale \"%s\": %s",
-							collcollate, u_errorName(status))));
-		ucol_getVersion(collator, versioninfo);
-		ucol_close(collator);
+							collcollate, iculib->errorName(status))));
+		iculib->getCollatorVersion(collator, versioninfo);
+		iculib->closeCollator(collator);
 
-		u_versionToString(versioninfo, buf);
+		iculib->versionToString(versioninfo, buf);
 		collversion = pstrdup(buf);
 	}
 	else
@@ -2021,16 +2155,17 @@ pg_strncoll_icu_no_utf8(const char *arg1, size_t len1,
 	UChar	*uchar1,
 			*uchar2;
 	int		 result;
+	pg_icu_library *iculib = PG_ICU_LIB(locale);
 
 	Assert(locale->provider == COLLPROVIDER_ICU);
 #ifdef HAVE_UCOL_STRCOLLUTF8
 	Assert(GetDatabaseEncoding() != PG_UTF8);
 #endif
 
-	init_icu_converter();
+	init_icu_converter(iculib);
 
-	ulen1 = uchar_length(icu_converter, arg1, len1);
-	ulen2 = uchar_length(icu_converter, arg2, len2);
+	ulen1 = uchar_length(iculib, icu_converter, arg1, len1);
+	ulen2 = uchar_length(iculib, icu_converter, arg2, len2);
 
 	bufsize1 = (ulen1 + 1) * sizeof(UChar);
 	bufsize2 = (ulen2 + 1) * sizeof(UChar);
@@ -2041,12 +2176,12 @@ pg_strncoll_icu_no_utf8(const char *arg1, size_t len1,
 	uchar1 = (UChar *) buf;
 	uchar2 = (UChar *) (buf + bufsize1);
 
-	ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
-	ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
+	ulen1 = uchar_convert(iculib, icu_converter, uchar1, ulen1 + 1, arg1, len1);
+	ulen2 = uchar_convert(iculib, icu_converter, uchar2, ulen2 + 1, arg2, len2);
 
-	result = ucol_strcoll(locale->info.icu.ucol,
-						  uchar1, ulen1,
-						  uchar2, ulen2);
+	result = iculib->strcoll(locale->info.icu.ucol,
+							 uchar1, ulen1,
+							 uchar2, ulen2);
 
 	if (buf != sbuf)
 		pfree(buf);
@@ -2067,6 +2202,7 @@ pg_strncoll_icu(const char *arg1, size_t len1, const char *arg2, size_t len2,
 				pg_locale_t locale)
 {
 	int result;
+	pg_icu_library *iculib = PG_ICU_LIB(locale);
 
 	Assert(locale->provider == COLLPROVIDER_ICU);
 
@@ -2076,13 +2212,14 @@ pg_strncoll_icu(const char *arg1, size_t len1, const char *arg2, size_t len2,
 		UErrorCode	status;
 
 		status = U_ZERO_ERROR;
-		result = ucol_strcollUTF8(locale->info.icu.ucol,
-								  arg1, len1,
-								  arg2, len2,
-								  &status);
+		result = iculib->strcollUTF8(locale->info.icu.ucol,
+									 arg1, len1,
+									 arg2, len2,
+									 &status);
 		if (U_FAILURE(status))
 			ereport(ERROR,
-					(errmsg("collation failed: %s", u_errorName(status))));
+					(errmsg("collation failed: %s",
+							iculib->errorName(status))));
 	}
 	else
 #endif
@@ -2261,12 +2398,13 @@ pg_strnxfrm_icu(char *dest, const char *src, size_t srclen, size_t destsize,
 	int32_t	 ulen;
 	size_t   uchar_bsize;
 	Size	 result_bsize;
+	pg_icu_library *iculib = PG_ICU_LIB(locale);
 
 	Assert(locale->provider == COLLPROVIDER_ICU);
 
-	init_icu_converter();
+	init_icu_converter(iculib);
 
-	ulen = uchar_length(icu_converter, src, srclen);
+	ulen = uchar_length(iculib, icu_converter, src, srclen);
 
 	uchar_bsize = (ulen + 1) * sizeof(UChar);
 
@@ -2275,11 +2413,11 @@ pg_strnxfrm_icu(char *dest, const char *src, size_t srclen, size_t destsize,
 
 	uchar = (UChar *) buf;
 
-	ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
+	ulen = uchar_convert(iculib, icu_converter, uchar, ulen + 1, src, srclen);
 
-	result_bsize = ucol_getSortKey(locale->info.icu.ucol,
-								   uchar, ulen,
-								   (uint8_t *) dest, destsize);
+	result_bsize = iculib->getSortKey(locale->info.icu.ucol,
+									  uchar, ulen,
+									  (uint8_t *) dest, destsize);
 
 	if (buf != sbuf)
 		pfree(buf);
@@ -2308,13 +2446,14 @@ pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, size_t srclen,
 	UChar			*uchar = NULL;
 	size_t			 uchar_bsize;
 	Size			 result_bsize;
+	pg_icu_library	*iculib = PG_ICU_LIB(locale);
 
 	Assert(locale->provider == COLLPROVIDER_ICU);
 	Assert(GetDatabaseEncoding() != PG_UTF8);
 
-	init_icu_converter();
+	init_icu_converter(iculib);
 
-	ulen = uchar_length(icu_converter, src, srclen);
+	ulen = uchar_length(iculib, icu_converter, src, srclen);
 
 	uchar_bsize = (ulen + 1) * sizeof(UChar);
 
@@ -2323,21 +2462,19 @@ pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, size_t srclen,
 
 	uchar = (UChar *) buf;
 
-	ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
+	ulen = uchar_convert(iculib, icu_converter, uchar, ulen + 1, src, srclen);
 
 	uiter_setString(&iter, uchar, ulen);
 	state[0] = state[1] = 0;	/* won't need that again */
 	status = U_ZERO_ERROR;
-	result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
-										&iter,
-										state,
-										(uint8_t *) dest,
-										destsize,
-										&status);
+	result_bsize = iculib->nextSortKeyPart(
+		locale->info.icu.ucol, &iter, state,
+		(uint8_t *) dest, destsize, &status);
+
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("sort key generation failed: %s",
-						u_errorName(status))));
+						iculib->errorName(status))));
 
 	return result_bsize;
 }
@@ -2346,6 +2483,7 @@ static size_t
 pg_strnxfrm_prefix_icu(char *dest, const char *src, size_t srclen,
 					   size_t destsize, pg_locale_t locale)
 {
+	pg_icu_library *iculib = PG_ICU_LIB(locale);
 	size_t result;
 
 	Assert(locale->provider == COLLPROVIDER_ICU);
@@ -2356,19 +2494,17 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, size_t srclen,
 		uint32_t	state[2];
 		UErrorCode	status;
 
-		uiter_setUTF8(&iter, src, srclen);
+		iculib->setUTF8(&iter, src, srclen);
 		state[0] = state[1] = 0;	/* won't need that again */
 		status = U_ZERO_ERROR;
-		result = ucol_nextSortKeyPart(locale->info.icu.ucol,
-									  &iter,
-									  state,
-									  (uint8_t *) dest,
-									  destsize,
-									  &status);
+		result = iculib->nextSortKeyPart(
+			locale->info.icu.ucol, &iter, state,
+			(uint8_t *) dest, destsize, &status);
+
 		if (U_FAILURE(status))
 			ereport(ERROR,
 					(errmsg("sort key generation failed: %s",
-							u_errorName(status))));
+							iculib->errorName(status))));
 	}
 	else
 		result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize,
@@ -2564,7 +2700,7 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 
 #ifdef USE_ICU
 static void
-init_icu_converter(void)
+init_icu_converter(pg_icu_library *iculib)
 {
 	const char *icu_encoding_name;
 	UErrorCode	status;
@@ -2581,11 +2717,11 @@ init_icu_converter(void)
 						pg_encoding_to_char(GetDatabaseEncoding()))));
 
 	status = U_ZERO_ERROR;
-	conv = ucnv_open(icu_encoding_name, &status);
+	conv = iculib->openConverter(icu_encoding_name, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not open ICU converter for encoding \"%s\": %s",
-						icu_encoding_name, u_errorName(status))));
+						icu_encoding_name, iculib->errorName(status))));
 
 	icu_converter = conv;
 }
@@ -2594,14 +2730,15 @@ init_icu_converter(void)
  * Find length, in UChars, of given string if converted to UChar string.
  */
 static size_t
-uchar_length(UConverter *converter, const char *str, size_t len)
+uchar_length(pg_icu_library *iculib, UConverter *converter, const char *str, size_t len)
 {
 	UErrorCode	status = U_ZERO_ERROR;
 	int32_t		ulen;
-	ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
+	ulen = iculib->toUChars(converter, NULL, 0, str, len, &status);
 	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
 		ereport(ERROR,
-				(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+				(errmsg("%s failed: %s", "ucnv_toUChars",
+						iculib->errorName(status))));
 	return ulen;
 }
 
@@ -2610,16 +2747,17 @@ uchar_length(UConverter *converter, const char *str, size_t len)
  * return the length (in UChars).
  */
 static int32_t
-uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
-			  const char *src, size_t srclen)
+uchar_convert(pg_icu_library *iculib, UConverter *converter, UChar *dest,
+			  int32_t destlen, const char *src, size_t srclen)
 {
 	UErrorCode	status = U_ZERO_ERROR;
 	int32_t		ulen;
 	status = U_ZERO_ERROR;
-	ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
+	ulen = iculib->toUChars(converter, dest, destlen, src, srclen, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
-				(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+				(errmsg("%s failed: %s", "ucnv_toUChars",
+						iculib->errorName(status))));
 	return ulen;
 }
 
@@ -2636,16 +2774,17 @@ uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
  * result length instead.
  */
 int32_t
-icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
+icu_to_uchar(pg_icu_library *iculib, UChar **buff_uchar, const char *buff,
+			 size_t nbytes)
 {
 	int32_t len_uchar;
 
-	init_icu_converter();
+	init_icu_converter(iculib);
 
-	len_uchar = uchar_length(icu_converter, buff, nbytes);
+	len_uchar = uchar_length(iculib, icu_converter, buff, nbytes);
 
 	*buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
-	len_uchar = uchar_convert(icu_converter,
+	len_uchar = uchar_convert(iculib, icu_converter,
 							  *buff_uchar, len_uchar + 1, buff, nbytes);
 
 	return len_uchar;
@@ -2663,30 +2802,32 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
  * The result string is nul-terminated.
  */
 int32_t
-icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
+icu_from_uchar(pg_icu_library *iculib, char **result, const UChar *buff_uchar,
+			   int32_t len_uchar)
 {
 	UErrorCode	status;
 	int32_t		len_result;
 
-	init_icu_converter();
+	init_icu_converter(iculib);
 
 	status = U_ZERO_ERROR;
-	len_result = ucnv_fromUChars(icu_converter, NULL, 0,
-								 buff_uchar, len_uchar, &status);
+	len_result = iculib->fromUChars(icu_converter, NULL, 0,
+									buff_uchar, len_uchar, &status);
 	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
 		ereport(ERROR,
 				(errmsg("%s failed: %s", "ucnv_fromUChars",
-						u_errorName(status))));
+						iculib->errorName(status))));
 
 	*result = palloc(len_result + 1);
 
 	status = U_ZERO_ERROR;
-	len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
-								 buff_uchar, len_uchar, &status);
+	len_result = iculib->fromUChars(icu_converter, *result,
+									len_result + 1, buff_uchar,
+									len_uchar, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("%s failed: %s", "ucnv_fromUChars",
-						u_errorName(status))));
+						iculib->errorName(status))));
 
 	return len_result;
 }
@@ -2702,7 +2843,8 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
  */
 pg_attribute_unused()
 static void
-icu_set_collation_attributes(UCollator *collator, const char *loc)
+icu_set_collation_attributes(pg_icu_library *iculib, UCollator *collator,
+							 const char *loc)
 {
 	char	   *str = asc_tolower(loc, strlen(loc));
 
@@ -2776,7 +2918,7 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
 				status = U_ILLEGAL_ARGUMENT_ERROR;
 
 			if (status == U_ZERO_ERROR)
-				ucol_setAttribute(collator, uattr, uvalue, &status);
+				iculib->setAttribute(collator, uattr, uvalue, &status);
 
 			/*
 			 * Pretend the error came from ucol_open(), for consistent error
@@ -2785,7 +2927,7 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
 			if (U_FAILURE(status))
 				ereport(ERROR,
 						(errmsg("could not open collator for locale \"%s\": %s",
-								loc, u_errorName(status))));
+								loc, iculib->errorName(status))));
 		}
 	}
 }
@@ -2801,17 +2943,18 @@ check_icu_locale(const char *icu_locale)
 #ifdef USE_ICU
 	UCollator	*collator;
 	UErrorCode   status;
+	pg_icu_library *iculib = get_icu_library(icu_locale, NULL);
 
 	status = U_ZERO_ERROR;
-	collator = ucol_open(icu_locale, &status);
+	collator = iculib->openCollator(icu_locale, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not open collator for locale \"%s\": %s",
-						icu_locale, u_errorName(status))));
+						icu_locale, iculib->errorName(status))));
 
 	if (U_ICU_VERSION_MAJOR_NUM < 54)
-		icu_set_collation_attributes(collator, icu_locale);
-	ucol_close(collator);
+		icu_set_collation_attributes(iculib, collator, icu_locale);
+	iculib->closeCollator(collator);
 #else
 	ereport(ERROR,
 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/include/utils/pg_locale_internal.h b/src/include/utils/pg_locale_internal.h
index 33465ad92d..afb6c24d64 100644
--- a/src/include/utils/pg_locale_internal.h
+++ b/src/include/utils/pg_locale_internal.h
@@ -14,6 +14,8 @@
 #define _PG_LOCALE_INTERNAL_
 
 #ifdef USE_ICU
+#include <unicode/ubrk.h>
+#include <unicode/ucnv.h>
 #include <unicode/ucol.h>
 #endif
 
@@ -30,6 +32,106 @@
 #endif
 #endif
 
+#ifdef USE_ICU
+/*
+ * An ICU library version that we're either linked against or have loaded at
+ * runtime.
+ */
+typedef struct pg_icu_library
+{
+	int			major_version;
+	int			minor_version;
+	void		(*getICUVersion) (UVersionInfo info);
+	void		(*getUnicodeVersion) (UVersionInfo into);
+	void		(*getCLDRVersion) (UVersionInfo info, UErrorCode *status);
+	UCollator  *(*openCollator) (const char *loc, UErrorCode *status);
+	void		(*closeCollator) (UCollator *coll);
+	void		(*getCollatorVersion) (const UCollator *coll, UVersionInfo info);
+	void		(*getUCAVersion) (const UCollator *coll, UVersionInfo info);
+	void		(*versionToString) (const UVersionInfo versionArray,
+									char *versionString);
+	UCollationResult (*strcoll) (const UCollator *coll,
+								 const UChar *source,
+								 int32_t sourceLength,
+								 const UChar *target,
+								 int32_t targetLength);
+	UCollationResult (*strcollUTF8) (const UCollator *coll,
+									 const char *source,
+									 int32_t sourceLength,
+									 const char *target,
+									 int32_t targetLength,
+									 UErrorCode *status);
+	int32_t		(*getSortKey) (const UCollator *coll,
+							   const UChar *source,
+							   int32_t sourceLength,
+							   uint8_t *result,
+							   int32_t resultLength);
+	int32_t		(*nextSortKeyPart) (const UCollator *coll,
+									UCharIterator *iter,
+									uint32_t state[2],
+									uint8_t *dest,
+									int32_t count,
+									UErrorCode *status);
+	void		(*setUTF8) (UCharIterator *iter,
+							const char *s,
+							int32_t length);
+	const char *(*errorName) (UErrorCode code);
+	int32_t		(*strToUpper) (UChar *dest,
+							   int32_t destCapacity,
+							   const UChar *src,
+							   int32_t srcLength,
+							   const char *locale,
+							   UErrorCode *pErrorCode);
+	int32_t		(*strToLower) (UChar *dest,
+							   int32_t destCapacity,
+							   const UChar *src,
+							   int32_t srcLength,
+							   const char *locale,
+							   UErrorCode *pErrorCode);
+	int32_t		(*strToTitle) (UChar *dest,
+							   int32_t destCapacity,
+							   const UChar *src,
+							   int32_t srcLength,
+							   UBreakIterator *titleIter,
+							   const char *locale,
+							   UErrorCode *pErrorCode);
+	void		(*setAttribute) (UCollator *coll,
+								 UColAttribute attr,
+								 UColAttributeValue value,
+								 UErrorCode *status);
+	UConverter *(*openConverter) (const char *converterName,
+								  UErrorCode *  	err);
+	void		(*closeConverter) (UConverter *converter);
+	int32_t		(*fromUChars) (UConverter *cnv,
+							   char *dest,
+							   int32_t destCapacity,
+							   const UChar *src,
+							   int32_t srcLength,
+							   UErrorCode *pErrorCode);
+	int32_t		(*toUChars) (UConverter *cnv,
+							 UChar *dest,
+							 int32_t destCapacity,
+							 const char *src,
+							 int32_t srcLength,
+							 UErrorCode *pErrorCode);
+	int32_t		(*toLanguageTag) (const char *localeID,
+								  char *langtag,
+								  int32_t langtagCapacity,
+								  UBool strict,
+								  UErrorCode *err);
+	int32_t		(*getDisplayName) (const char *localeID,
+								   const char *inLocaleID,
+								   UChar *result,
+								   int32_t maxResultSize,
+								   UErrorCode *err);
+	int32_t		(*countAvailable) (void);
+	const char *(*getAvailable) (int32_t n);
+} pg_icu_library;
+
+#define PG_ICU_LIB(x) ((x)->info.icu.lib)
+
+#endif
+
 /*
  * We define our own wrapper around locale_t so we can keep the same
  * function signatures for all builds, while not having to create a
@@ -53,7 +155,8 @@ struct pg_locale_struct
 #ifdef USE_ICU
 		struct
 		{
-			UCollator	*ucol;
+			UCollator		*ucol;
+			pg_icu_library	*lib;
 		}			icu;
 #endif
 		int			dummy;		/* in case we have neither LOCALE_T nor ICU */
@@ -61,8 +164,18 @@ struct pg_locale_struct
 };
 
 #ifdef USE_ICU
-extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
-extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
-#endif
+
+typedef pg_icu_library *(*get_icu_library_hook_type)(
+	const char *locale, const char *version);
+
+extern PGDLLIMPORT get_icu_library_hook_type get_icu_library_hook;
+
+extern pg_icu_library *get_default_icu_library(void);
+extern int32_t icu_to_uchar(pg_icu_library *lib, UChar **buff_uchar,
+							const char *buff, size_t nbytes);
+extern int32_t icu_from_uchar(pg_icu_library *lib, char **result,
+							  const UChar *buff_uchar, int32_t len_uchar);
+
+#endif							/* USE_ICU */
 
 #endif							/* _PG_LOCALE_INTERNAL_ */
-- 
2.34.1

