From d71a6a017e4eb794438d52a7ff30ee1e24af257d Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Tue, 7 Mar 2023 21:25:54 -0800
Subject: [PATCH v1] Fix t_isspace(), etc., when datlocprovider=i and
 datctype=C.

Check whether the datctype is C to determine whether t_isspace() and
related functions use isspace() or iswspace().

Previously, t_isspace() checked whether the database default collation
was C; which is incorrect when the default collation uses the ICU
provider.

Backpatch-through: 15
---
 src/backend/tsearch/ts_locale.c   | 18 ++++++------------
 src/backend/tsearch/wparser_def.c |  3 +--
 src/backend/utils/adt/pg_locale.c |  3 +++
 src/backend/utils/init/postinit.c |  4 ++++
 src/include/utils/pg_locale.h     |  2 ++
 5 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 0c03170990..f1150d30b7 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -38,10 +38,9 @@ t_isdigit(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
 	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || lc_ctype_is_c(collation))
+	if (clen == 1 || database_ctype_is_c)
 		return isdigit(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -54,10 +53,9 @@ t_isspace(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
 	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || lc_ctype_is_c(collation))
+	if (clen == 1 || database_ctype_is_c)
 		return isspace(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -70,10 +68,9 @@ t_isalpha(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
 	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || lc_ctype_is_c(collation))
+	if (clen == 1 || database_ctype_is_c)
 		return isalpha(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -86,10 +83,9 @@ t_isalnum(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
 	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || lc_ctype_is_c(collation))
+	if (clen == 1 || database_ctype_is_c)
 		return isalnum(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -102,10 +98,9 @@ t_isprint(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
 	pg_locale_t mylocale = 0;	/* TODO */
 
-	if (clen == 1 || lc_ctype_is_c(collation))
+	if (clen == 1 || database_ctype_is_c)
 		return isprint(TOUCHAR(ptr));
 
 	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -273,7 +268,6 @@ char *
 lowerstr_with_len(const char *str, int len)
 {
 	char	   *out;
-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
 	pg_locale_t mylocale = 0;	/* TODO */
 
 	if (len == 0)
@@ -285,7 +279,7 @@ lowerstr_with_len(const char *str, int len)
 	 * Also, for a C locale there is no need to process as multibyte. From
 	 * backend/utils/adt/oracle_compat.c Teodor
 	 */
-	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
+	if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
 	{
 		wchar_t    *wstr,
 				   *wptr;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index cc3736454e..840a44ec00 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -297,11 +297,10 @@ TParserInit(char *str, int len)
 	 */
 	if (prs->charmaxlen > 1)
 	{
-		Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
 		pg_locale_t mylocale = 0;	/* TODO */
 
 		prs->usewide = true;
-		if (lc_ctype_is_c(collation))
+		if (database_ctype_is_c)
 		{
 			/*
 			 * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 4aa5eaa984..646c4b0de1 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -106,6 +106,9 @@ char	   *localized_full_days[7 + 1];
 char	   *localized_abbrev_months[12 + 1];
 char	   *localized_full_months[12 + 1];
 
+/* is the databases's LC_CTYPE the C locale? */
+bool		database_ctype_is_c = false;
+
 /* indicates whether locale information cache is valid */
 static bool CurrentLocaleConvValid = false;
 static bool CurrentLCTimeValid = false;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 2f07ca7a0e..563c254f42 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -419,6 +419,10 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 						   " which is not recognized by setlocale().", ctype),
 				 errhint("Recreate the database with another locale or install the missing locale.")));
 
+	if (strcmp(ctype, "C") == 0 ||
+		strcmp(ctype, "POSIX") == 0)
+		database_ctype_is_c = true;
+
 	if (dbform->datlocprovider == COLLPROVIDER_ICU)
 	{
 		datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticulocale, &isnull);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index b8f22875a8..d61622eb31 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -47,6 +47,8 @@ extern PGDLLIMPORT char *localized_full_days[];
 extern PGDLLIMPORT char *localized_abbrev_months[];
 extern PGDLLIMPORT char *localized_full_months[];
 
+/* is the databases's LC_CTYPE the C locale? */
+extern PGDLLIMPORT bool	database_ctype_is_c;
 
 extern bool check_locale(int category, const char *locale, char **canonname);
 extern char *pg_perm_setlocale(int category, const char *locale);
-- 
2.34.1

