From 6613da1cb99dfa19d45fb11073ba82ebda2c6242 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 26 Sep 2024 11:27:29 -0700
Subject: [PATCH v5 5/8] Control collation behavior with a method table.

Previously, behavior branched based on the provider.

A method table is less error prone and easier to hook.
---
 src/backend/utils/adt/pg_locale.c      | 147 +++++--------------------
 src/backend/utils/adt/pg_locale_icu.c  |  55 +++++----
 src/backend/utils/adt/pg_locale_libc.c |  64 +++++++----
 src/include/utils/pg_locale.h          |  33 ++++++
 4 files changed, 136 insertions(+), 163 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index f49c89c833e..dfb9c3bd952 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -100,24 +100,8 @@ extern pg_locale_t coll_create_locale_libc(HeapTuple colltuple,
 
 #ifdef USE_ICU
 extern UCollator *pg_ucol_open(const char *loc_str);
-extern int strncoll_icu(const char *arg1, ssize_t len1,
-						const char *arg2, ssize_t len2,
-						pg_locale_t locale);
-extern size_t strnxfrm_icu(char *dest, size_t destsize,
-						   const char *src, ssize_t srclen,
-						   pg_locale_t locale);
-extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
-								  const char *src, ssize_t srclen,
-								  pg_locale_t locale);
 #endif
 
-extern int strncoll_libc(const char *arg1, ssize_t len1,
-						 const char *arg2, ssize_t len2,
-						 pg_locale_t locale);
-extern size_t strnxfrm_libc(char *dest, size_t destsize,
-							const char *src, ssize_t srclen,
-							pg_locale_t locale);
-
 /* GUC settings */
 char	   *locale_messages;
 char	   *locale_monetary;
@@ -1234,10 +1218,10 @@ IsoLocaleName(const char *winlocname)
 static pg_locale_t
 dat_create_locale_builtin(HeapTuple dattuple)
 {
-	Form_pg_database	 dbform;
-	Datum				 datum;
-	const char			*locstr;
-	pg_locale_t			 result;
+	Form_pg_database dbform;
+	Datum		datum;
+	const char *locstr;
+	pg_locale_t result;
 
 	dbform = (Form_pg_database) GETSTRUCT(dattuple);
 	datum = SysCacheGetAttrNotNull(DATABASEOID, dattuple,
@@ -1262,10 +1246,10 @@ dat_create_locale_builtin(HeapTuple dattuple)
 static pg_locale_t
 coll_create_locale_builtin(HeapTuple colltuple, MemoryContext context)
 {
-	Form_pg_collation	 collform;
-	Datum				 datum;
-	const char			*locstr;
-	pg_locale_t			 result;
+	Form_pg_collation collform;
+	Datum		datum;
+	const char *locstr;
+	pg_locale_t result;
 
 	collform = (Form_pg_collation) GETSTRUCT(colltuple);
 	datum = SysCacheGetAttrNotNull(COLLOID, colltuple,
@@ -1293,8 +1277,8 @@ create_pg_locale(Oid collid, MemoryContext context)
 {
 	/* We haven't computed this yet in this session, so do it */
 	HeapTuple	tp;
-	Datum				 datum;
-	bool				 isnull;
+	Datum		datum;
+	bool		isnull;
 	Form_pg_collation collform;
 	pg_locale_t	result;
 
@@ -1332,9 +1316,9 @@ create_pg_locale(Oid collid, MemoryContext context)
 		if (!actual_versionstr)
 		{
 			/*
-			 * This could happen when specifying a version in CREATE
-			 * COLLATION but the provider does not support versioning, or
-			 * manually creating a mess in the catalogs.
+			 * This could happen when specifying a version in CREATE COLLATION
+			 * but the provider does not support versioning, or manually
+			 * creating a mess in the catalogs.
 			 */
 			ereport(ERROR,
 					(errmsg("collation \"%s\" has no actual version, but a version was recorded",
@@ -1561,19 +1545,7 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 int
 pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
 {
-	int			result;
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strncoll_libc(arg1, -1, arg2, -1, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strncoll_icu(arg1, -1, arg2, -1, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strncoll(arg1, -1, arg2, -1, locale);
 }
 
 /*
@@ -1594,51 +1566,25 @@ int
 pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
 			pg_locale_t locale)
 {
-	int			result;
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strncoll_libc(arg1, len1, arg2, len2, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strncoll_icu(arg1, len1, arg2, len2, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strncoll(arg1, len1, arg2, len2, locale);
 }
 
 /*
  * Return true if the collation provider supports pg_strxfrm() and
  * pg_strnxfrm(); otherwise false.
  *
- * Unfortunately, it seems that strxfrm() for non-C collations is broken on
- * many common platforms; testing of multiple versions of glibc reveals that,
- * for many locales, strcoll() and strxfrm() do not return consistent
- * results. While no other libc other than Cygwin has so far been shown to
- * have a problem, we take the conservative course of action for right now and
- * disable this categorically.  (Users who are certain this isn't a problem on
- * their system can define TRUST_STRXFRM.)
  *
  * No similar problem is known for the ICU provider.
  */
 bool
 pg_strxfrm_enabled(pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_LIBC)
-#ifdef TRUST_STRXFRM
-		return true;
-#else
-		return false;
-#endif
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return true;
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return false;				/* keep compiler quiet */
+	/*
+	 * locale->collate->strnxfrm is still a required method, even if it may
+	 * have the wrong behavior, because the planner uses it for estimates in
+	 * some cases.
+	 */
+	return locale->collate->strxfrm_is_safe;
 }
 
 /*
@@ -1649,19 +1595,7 @@ pg_strxfrm_enabled(pg_locale_t locale)
 size_t
 pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
 {
-	size_t		result = 0;		/* keep compiler quiet */
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strnxfrm_libc(dest, destsize, src, -1, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strnxfrm_icu(dest, destsize, src, -1, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strnxfrm(dest, destsize, src, -1, locale);
 }
 
 /*
@@ -1687,19 +1621,7 @@ size_t
 pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	size_t		result = 0;		/* keep compiler quiet */
-
-	if (locale->provider == COLLPROVIDER_LIBC)
-		result = strnxfrm_libc(dest, destsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		result = strnxfrm_icu(dest, destsize, src, srclen, locale);
-#endif
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strnxfrm(dest, destsize, src, srclen, locale);
 }
 
 /*
@@ -1709,15 +1631,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
 bool
 pg_strxfrm_prefix_enabled(pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_LIBC)
-		return false;
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return true;
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return false;				/* keep compiler quiet */
+	return (locale->collate->strnxfrm_prefix != NULL);
 }
 
 /*
@@ -1729,7 +1643,7 @@ size_t
 pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
 				  pg_locale_t locale)
 {
-	return pg_strnxfrm_prefix(dest, destsize, src, -1, locale);
+	return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale);
 }
 
 /*
@@ -1754,16 +1668,7 @@ size_t
 pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 				   ssize_t srclen, pg_locale_t locale)
 {
-	size_t		result = 0;		/* keep compiler quiet */
-
-#ifdef USE_ICU
-	if (locale->provider == COLLPROVIDER_ICU)
-		result = strnxfrm_prefix_icu(dest, destsize, src, -1, locale);
-	else
-#endif
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return result;
+	return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
 }
 
 /*
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 2df1a8226e6..e53bf2d4b33 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -40,13 +40,14 @@ extern pg_locale_t coll_create_locale_icu(HeapTuple colltuple,
 #ifdef USE_ICU
 
 extern UCollator *pg_ucol_open(const char *loc_str);
-extern int strncoll_icu(const char *arg1, ssize_t len1,
-						const char *arg2, ssize_t len2,
-						pg_locale_t locale);
-extern size_t strnxfrm_icu(char *dest, size_t destsize,
+
+static int	strncoll_icu(const char *arg1, ssize_t len1,
+						 const char *arg2, ssize_t len2,
+						 pg_locale_t locale);
+static size_t strnxfrm_icu(char *dest, size_t destsize,
 						   const char *src, ssize_t srclen,
 						   pg_locale_t locale);
-extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
+static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
 								  const char *src, ssize_t srclen,
 								  pg_locale_t locale);
 
@@ -59,9 +60,9 @@ static UConverter *icu_converter = NULL;
 
 static UCollator *make_icu_collator(const char *iculocstr,
 									const char *icurules);
-static int strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
-								const char *arg2, ssize_t len2,
-								pg_locale_t locale);
+static int	strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
+								 const char *arg2, ssize_t len2,
+								 pg_locale_t locale);
 static size_t strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
 										  const char *src, ssize_t srclen,
 										  pg_locale_t locale);
@@ -73,19 +74,27 @@ static int32_t uchar_convert(UConverter *converter,
 							 const char *src, int32_t srclen);
 static void icu_set_collation_attributes(UCollator *collator, const char *loc,
 										 UErrorCode *status);
+
+static struct collate_methods collate_methods_icu = {
+	.strncoll = strncoll_icu,
+	.strnxfrm = strnxfrm_icu,
+	.strnxfrm_prefix = strnxfrm_prefix_icu,
+	.strxfrm_is_safe = true,
+};
+
 #endif
 
 pg_locale_t
 dat_create_locale_icu(HeapTuple dattuple)
 {
 #ifdef USE_ICU
-	Form_pg_database	 dbform;
-	Datum				 datum;
-	bool				 isnull;
-	const char			*iculocstr;
-	const char			*icurules = NULL;
-	UCollator			*collator;
-	pg_locale_t			 result;
+	Form_pg_database dbform;
+	Datum		datum;
+	bool		isnull;
+	const char *iculocstr;
+	const char *icurules = NULL;
+	UCollator  *collator;
+	pg_locale_t result;
 
 	dbform = (Form_pg_database) GETSTRUCT(dattuple);
 
@@ -108,6 +117,7 @@ dat_create_locale_icu(HeapTuple dattuple)
 	result->deterministic = true;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
+	result->collate = &collate_methods_icu;
 
 	return result;
 #else
@@ -124,13 +134,13 @@ pg_locale_t
 coll_create_locale_icu(HeapTuple colltuple, MemoryContext context)
 {
 #ifdef USE_ICU
-	Form_pg_collation	 collform;
-	Datum				 datum;
-	bool				 isnull;
-	const char			*iculocstr;
-	const char			*icurules = NULL;
-	UCollator			*collator;
-	pg_locale_t			 result;
+	Form_pg_collation collform;
+	Datum		datum;
+	bool		isnull;
+	const char *iculocstr;
+	const char *icurules = NULL;
+	UCollator  *collator;
+	pg_locale_t result;
 
 	collform = (Form_pg_collation) GETSTRUCT(colltuple);
 
@@ -152,6 +162,7 @@ coll_create_locale_icu(HeapTuple colltuple, MemoryContext context)
 	result->deterministic = collform->collisdeterministic;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
+	result->collate = &collate_methods_icu;
 
 	return result;
 #else
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 5481fd3b802..b8ccd24715d 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -31,10 +31,10 @@ extern pg_locale_t dat_create_locale_libc(HeapTuple dattuple);
 extern pg_locale_t coll_create_locale_libc(HeapTuple colltuple,
 										   MemoryContext context);
 
-extern int strncoll_libc(const char *arg1, ssize_t len1,
-						 const char *arg2, ssize_t len2,
-						 pg_locale_t locale);
-extern size_t strnxfrm_libc(char *dest, size_t destsize,
+static int	strncoll_libc(const char *arg1, ssize_t len1,
+						  const char *arg2, ssize_t len2,
+						  pg_locale_t locale);
+static size_t strnxfrm_libc(char *dest, size_t destsize,
 							const char *src, ssize_t srclen,
 							pg_locale_t locale);
 
@@ -43,20 +43,41 @@ static locale_t make_libc_collator(const char *collate,
 static void report_newlocale_failure(const char *localename);
 
 #ifdef WIN32
-static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
-									const char *arg2, ssize_t len2,
-									pg_locale_t locale);
+static int	strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
+									 const char *arg2, ssize_t len2,
+									 pg_locale_t locale);
 #endif
 
+static struct collate_methods collate_methods_libc = {
+	.strncoll = strncoll_libc,
+	.strnxfrm = strnxfrm_libc,
+	.strnxfrm_prefix = NULL,
+
+	/*
+	 * Unfortunately, it seems that strxfrm() for non-C collations is broken
+	 * on many common platforms; testing of multiple versions of glibc reveals
+	 * that, for many locales, strcoll() and strxfrm() do not return
+	 * consistent results. While no other libc other than Cygwin has so far
+	 * been shown to have a problem, we take the conservative course of action
+	 * for right now and disable this categorically.  (Users who are certain
+	 * this isn't a problem on their system can define TRUST_STRXFRM.)
+	 */
+#ifdef TRUST_STRXFRM
+	.strxfrm_is_safe = true,
+#else
+	.strxfrm_is_safe = false,
+#endif
+};
+
 pg_locale_t
 dat_create_locale_libc(HeapTuple dattuple)
 {
-	Form_pg_database	 dbform;
-	Datum				 datum;
-	const char			*datcollate;
-	const char			*datctype;
-	locale_t			 loc;
-	pg_locale_t			 result;
+	Form_pg_database dbform;
+	Datum		datum;
+	const char *datcollate;
+	const char *datctype;
+	locale_t	loc;
+	pg_locale_t result;
 
 	dbform = (Form_pg_database) GETSTRUCT(dattuple);
 
@@ -79,6 +100,8 @@ dat_create_locale_libc(HeapTuple dattuple)
 	result->ctype_is_c = (strcmp(datctype, "C") == 0) ||
 		(strcmp(datctype, "POSIX") == 0);
 	result->info.lt = loc;
+	if (!result->collate_is_c)
+		result->collate = &collate_methods_libc;
 
 	return result;
 }
@@ -86,12 +109,12 @@ dat_create_locale_libc(HeapTuple dattuple)
 pg_locale_t
 coll_create_locale_libc(HeapTuple colltuple, MemoryContext context)
 {
-	Form_pg_collation	 collform;
-	Datum				 datum;
-	const char			*collcollate;
-	const char			*collctype;
-	locale_t			 loc;
-	pg_locale_t			 result;
+	Form_pg_collation collform;
+	Datum		datum;
+	const char *collcollate;
+	const char *collctype;
+	locale_t	loc;
+	pg_locale_t result;
 
 	collform = (Form_pg_collation) GETSTRUCT(colltuple);
 
@@ -112,6 +135,8 @@ coll_create_locale_libc(HeapTuple colltuple, MemoryContext context)
 	result->ctype_is_c = (strcmp(collctype, "C") == 0) ||
 		(strcmp(collctype, "POSIX") == 0);
 	result->info.lt = loc;
+	if (!result->collate_is_c)
+		result->collate = &collate_methods_libc;
 
 	return result;
 }
@@ -400,4 +425,3 @@ report_newlocale_failure(const char *localename)
 			  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
 						localename) : 0)));
 }
-
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 3b443df8014..6c2a0456f22 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -63,6 +63,36 @@ extern struct lconv *PGLC_localeconv(void);
 extern void cache_locale_time(void);
 
 
+struct pg_locale_struct;
+typedef struct pg_locale_struct *pg_locale_t;
+
+/* methods that define collation behavior */
+struct collate_methods
+{
+	/* required */
+	int			(*strncoll) (const char *arg1, ssize_t len1,
+							 const char *arg2, ssize_t len2,
+							 pg_locale_t locale);
+
+	/* required */
+	size_t		(*strnxfrm) (char *dest, size_t destsize,
+							 const char *src, ssize_t srclen,
+							 pg_locale_t locale);
+
+	/* optional */
+	size_t		(*strnxfrm_prefix) (char *dest, size_t destsize,
+									const char *src, ssize_t srclen,
+									pg_locale_t locale);
+
+	/*
+	 * If the strnxfrm method is not trusted to return the correct results,
+	 * set strxfrm_is_safe to false. It set to false, the method will not be
+	 * used in most cases, but the planner still expects it to be there for
+	 * estimation purposes (where incorrect results are acceptable).
+	 */
+	bool		strxfrm_is_safe;
+};
+
 /*
  * We use a discriminated union to hold either a locale_t or an ICU collator.
  * pg_locale_t is occasionally checked for truth, so make it a pointer.
@@ -85,6 +115,9 @@ struct pg_locale_struct
 	bool		deterministic;
 	bool		collate_is_c;
 	bool		ctype_is_c;
+
+	struct collate_methods *collate;	/* NULL if collate_is_c */
+
 	union
 	{
 		struct
-- 
2.34.1

