From 5b903c82f34f5da9cab58ecd0a2683454d6ac9ed Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 5 Jun 2024 14:48:07 -0700
Subject: [PATCH v6 1/3] Make datcollate/datctype accessible as a pg_locale_t.

get_db_env_locale() returns a libc locale representing the LC_COLLATE
/ LC_CTYPE environment, which is the same as the database default
collation if and only if the datlocprovider is libc.

Update callers in ts_locale.c to use get_db_env_locale() instead of
NULL.

Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e57d@eisentraut.org
Reviewed-by: Peter Eisentraut, Andreas Karlsson
---
 src/backend/tsearch/ts_locale.c   | 37 ++++++++++++++++---------
 src/backend/tsearch/wparser_def.c |  6 +++--
 src/backend/utils/adt/pg_locale.c | 45 ++++++++++++++++++++++++++++---
 src/backend/utils/init/postinit.c |  5 +---
 src/include/utils/pg_locale.h     |  5 ++--
 5 files changed, 74 insertions(+), 24 deletions(-)

diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index bc44599de6a..6befd8e82d6 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -13,6 +13,7 @@
  */
 #include "postgres.h"
 
+#include "catalog/pg_collation.h"
 #include "common/string.h"
 #include "storage/fd.h"
 #include "tsearch/ts_locale.h"
@@ -36,9 +37,11 @@ t_isdigit(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isdigit(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -51,9 +54,11 @@ t_isspace(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isspace(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -66,9 +71,11 @@ t_isalpha(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isalpha(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -81,9 +88,11 @@ t_isalnum(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isalnum(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -96,9 +105,11 @@ t_isprint(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || database_ctype_is_c)
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
+
+	if (clen == 1 || mylocale->ctype_is_c)
 		return isprint(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -266,7 +277,9 @@ char *
 lowerstr_with_len(const char *str, int len)
 {
 	char	   *out;
-	pg_locale_t mylocale = 0;	/* TODO */
+
+	/* TODO: determine collation properly */
+	pg_locale_t mylocale = get_db_env_locale();
 
 	if (len == 0)
 		return pstrdup("");
@@ -277,7 +290,7 @@ lowerstr_with_len(const char *str, int len)
 	 * Also, for a C locale there is no need to process as multibyte. From
 	 * backend/utils/adt/oracle_compat.c Teodor
 	 */
-	if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
+	if (pg_database_encoding_max_length() > 1 && !mylocale->ctype_is_c)
 	{
 		wchar_t    *wstr,
 				   *wptr;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 3919ef27b57..45caec0c4f0 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -17,6 +17,7 @@
 #include <limits.h>
 #include <wctype.h>
 
+#include "catalog/pg_collation.h"
 #include "commands/defrem.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -299,10 +300,11 @@ TParserInit(char *str, int len)
 	 */
 	if (prs->charmaxlen > 1)
 	{
-		pg_locale_t mylocale = 0;	/* TODO */
+		/* TODO: determine collation properly */
+		pg_locale_t mylocale = get_db_env_locale();
 
 		prs->usewide = true;
-		if (database_ctype_is_c)
+		if (mylocale->ctype_is_c)
 		{
 			/*
 			 * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 627ab89d7cc..0295d834cc5 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -114,10 +114,8 @@ char	   *localized_full_days[7 + 1];
 char	   *localized_abbrev_months[12 + 1];
 char	   *localized_full_months[12 + 1];
 
-/* is the databases's LC_CTYPE the C locale? */
-bool		database_ctype_is_c = false;
-
 static struct pg_locale_struct default_locale;
+static struct pg_locale_struct database_env_locale;
 
 /* indicates whether locale information cache is valid */
 static bool CurrentLocaleConvValid = false;
@@ -1471,6 +1469,42 @@ pg_locale_deterministic(pg_locale_t locale)
 		return locale->deterministic;
 }
 
+/*
+ * Initialize the database environment locale and store in a pg_locale_t.
+ */
+void
+init_db_env_locale(const char *datcollate, const char *datctype)
+{
+	Assert(database_env_locale.provider == (char) 0);
+
+	database_env_locale.provider = COLLPROVIDER_LIBC;
+	database_env_locale.deterministic = true;
+	database_env_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
+		(strcmp(datcollate, "POSIX") == 0);
+	database_env_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
+		(strcmp(datctype, "POSIX") == 0);
+
+	make_libc_collator(datcollate, datctype, &database_env_locale);
+}
+
+/*
+ * Return pg_locale_t representing the database environment locale.
+ *
+ * The provider is always libc, and it represents the server environment
+ * LC_COLLATE and LC_CTYPE.
+ *
+ * Most callers should use pg_newlocale_from_collation(DEFAULT_COLLATION_OID)
+ * instead to get a pg_locale_t representing the database default collation
+ * (which might be any provider). Use get_db_env_locale() only if the libc
+ * provider is needed, such as with wchar2char()/char2wchar().
+ */
+pg_locale_t
+get_db_env_locale(void)
+{
+	Assert(database_env_locale.provider != (char) 0);
+	return &database_env_locale;
+}
+
 /*
  * Initialize default_locale with database locale settings.
  */
@@ -1482,6 +1516,8 @@ init_database_collation(void)
 	Datum		datum;
 	bool		isnull;
 
+	Assert(default_locale.provider == (char) 0);
+
 	/* Fetch our pg_database row normally, via syscache */
 	tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
 	if (!HeapTupleIsValid(tup))
@@ -1571,7 +1607,10 @@ pg_newlocale_from_collation(Oid collid)
 	Assert(OidIsValid(collid));
 
 	if (collid == DEFAULT_COLLATION_OID)
+	{
+		Assert(default_locale.provider != (char) 0);
 		return &default_locale;
+	}
 
 	cache_entry = lookup_collation_cache(collid);
 
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 13524ea488a..23ac403e390 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -418,10 +418,7 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 						   " which is not recognized by setlocale().", ctype),
 				 errhint("Recreate the database with another locale or install the missing locale.")));
 
-	if (strcmp(ctype, "C") == 0 ||
-		strcmp(ctype, "POSIX") == 0)
-		database_ctype_is_c = true;
-
+	init_db_env_locale(collate, ctype);
 	init_database_collation();
 
 	/*
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index f41d33975be..47b2942c9d8 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -48,9 +48,6 @@ extern PGDLLIMPORT char *localized_full_days[];
 extern PGDLLIMPORT char *localized_abbrev_months[];
 extern PGDLLIMPORT char *localized_full_months[];
 
-/* is the databases's LC_CTYPE the C locale? */
-extern PGDLLIMPORT bool database_ctype_is_c;
-
 extern bool check_locale(int category, const char *locale, char **canonname);
 extern char *pg_perm_setlocale(int category, const char *locale);
 
@@ -112,6 +109,8 @@ extern void make_icu_collator(const char *iculocstr,
 							  struct pg_locale_struct *resultp);
 
 extern bool pg_locale_deterministic(pg_locale_t locale);
+extern void init_db_env_locale(const char *datcollate, const char *datctype);
+extern pg_locale_t get_db_env_locale(void);
 extern void init_database_collation(void);
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
-- 
2.34.1

