From 35e24316222ea444f9f8bb96656fecf102b7812a Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 15 Aug 2024 16:45:27 +1200
Subject: [PATCH v4 3/3] Remove setlocale() from check_locale().

Validate locale names with newlocale() or _create_locale() instead, to
avoid clobbering global state.

This also removes the "canonicalization" of the locale name, which
previously had two user-visible effects:

1.  CREATE DATABASE ... LOCALE = '' would look up the locale from
environment variables, but this was not documented behavior and doesn't
seem too useful.  A default will normally be inherited from the template
if you just leave the option off.  (Note that initdb still chooses
default values from the server environment, and that would often be the
original source of the template database's values.)

2.  On Windows only, the setlocale() step reached by CREATE DATABASE ...
LOCALE = '...' did accept a wide range of formats and do some
canonicalization, when using (deprecated) pre-BCP 47 locale names, but
again it seems enough to let that happen in the initdb phase only.

Now, CREATE DATABASE and SET lc_XXX reject ''.  CREATE COLLATION
continues to accept it, as it never recorded canonicalized values so
there may be system catalogs containing verbatim '' in the wild.  We'll
need to research the upgrade implications before banning it there.

Thanks to Peter Eisentraut and Tom Lane for the suggestion that we might
not really need to preserve those behaviors in the backend, in our hunt
for non-thread-safe code.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Discussion: https://postgr.es/m/CA%2BhUKGJqVe0%2BPv9dvC9dSums_PXxGo9SWcxYAMBguWJUGbWz-A%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGK57sgUYKO03jB4VarTsswfMyScFAyJpVnYD8c%2Bg12_mg%40mail.gmail.com
---
 src/backend/commands/dbcommands.c |   7 +-
 src/backend/utils/adt/pg_locale.c | 167 ++++++++++++++++++------------
 src/bin/initdb/initdb.c           |   2 -
 src/include/port/win32_port.h     |   1 -
 src/include/utils/pg_locale.h     |   2 +-
 5 files changed, 104 insertions(+), 75 deletions(-)

diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 7c92c3463b6..eaf072fff90 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -728,7 +728,6 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	const char *dblocale = NULL;
 	char	   *dbicurules = NULL;
 	char		dblocprovider = '\0';
-	char	   *canonname;
 	int			encoding = -1;
 	bool		dbistemplate = false;
 	bool		dballowconnections = true;
@@ -1062,18 +1061,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 				 errmsg("invalid server encoding %d", encoding)));
 
 	/* Check that the chosen locales are valid, and get canonical spellings */
-	if (!check_locale(LC_COLLATE, dbcollate, &canonname))
+	if (!check_locale(LC_COLLATE, dbcollate))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
 				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
-	dbcollate = canonname;
-	if (!check_locale(LC_CTYPE, dbctype, &canonname))
+	if (!check_locale(LC_CTYPE, dbctype))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
 				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
-	dbctype = canonname;
 
 	check_encoding_locale_matches(encoding, dbcollate, dbctype);
 
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 1cf8efcc7b7..383ae21dab0 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -190,6 +190,65 @@ wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
 }
 #endif
 
+/*
+ * The category names as strings.  These are the names of the environment
+ * variables that define the server locale environment.  We always unset
+ * LC_ALL, so we only need the actual categories.
+ */
+static const char *
+get_lc_category_name(int category)
+{
+	switch (category)
+	{
+		case LC_COLLATE:
+			return "LC_COLLATE";
+		case LC_CTYPE:
+			return "LC_CTYPE";
+#ifdef LC_MESSAGES
+		case LC_MESSAGES:
+			return "LC_MESSAGES";
+#endif
+		case LC_MONETARY:
+			return "LC_MONETARY";
+		case LC_NUMERIC:
+			return "LC_NUMERIC";
+		case LC_TIME:
+			return "LC_TIME";
+		default:
+			return NULL;
+	};
+};
+
+#ifndef WIN32
+/*
+ * The newlocale() function needs LC_xxx_MASK, but sometimes we have LC_xxx,
+ * and POSIX doesn't offer a way to translate.
+ */
+static int
+get_lc_category_mask(int category)
+{
+	switch (category)
+	{
+		case LC_COLLATE:
+			return LC_COLLATE_MASK;
+		case LC_CTYPE:
+			return LC_CTYPE_MASK;
+#ifdef LC_MESSAGES
+		case LC_MESSAGES:
+			return LC_MESSAGES_MASK;
+#endif
+		case LC_MONETARY:
+			return LC_MONETARY_MASK;
+		case LC_NUMERIC:
+			return LC_NUMERIC_MASK;
+		case LC_TIME:
+			return LC_TIME_MASK;
+		default:
+			return 0;
+	};
+}
+#endif
+
 /*
  * pg_perm_setlocale
  *
@@ -257,38 +316,9 @@ pg_perm_setlocale(int category, const char *locale)
 #endif
 	}
 
-	switch (category)
-	{
-		case LC_COLLATE:
-			envvar = "LC_COLLATE";
-			break;
-		case LC_CTYPE:
-			envvar = "LC_CTYPE";
-			break;
-#ifdef LC_MESSAGES
-		case LC_MESSAGES:
-			envvar = "LC_MESSAGES";
-#ifdef WIN32
-			result = IsoLocaleName(locale);
-			if (result == NULL)
-				result = (char *) locale;
-			elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
-#endif							/* WIN32 */
-			break;
-#endif							/* LC_MESSAGES */
-		case LC_MONETARY:
-			envvar = "LC_MONETARY";
-			break;
-		case LC_NUMERIC:
-			envvar = "LC_NUMERIC";
-			break;
-		case LC_TIME:
-			envvar = "LC_TIME";
-			break;
-		default:
-			elog(FATAL, "unrecognized LC category: %d", category);
-			return NULL;		/* keep compiler quiet */
-	}
+	envvar = get_lc_category_name(category);
+	if (!envvar)
+		elog(FATAL, "unrecognized LC category: %d", category);
 
 	if (setenv(envvar, result, 1) != 0)
 		return NULL;
@@ -299,43 +329,48 @@ pg_perm_setlocale(int category, const char *locale)
 
 /*
  * Is the locale name valid for the locale category?
- *
- * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
- * canonical name is stored there.  This is especially useful for figuring out
- * what locale name "" means (ie, the server environment value).  (Actually,
- * it seems that on most implementations that's the only thing it's good for;
- * we could wish that setlocale gave back a canonically spelled version of
- * the locale name, but typically it doesn't.)
  */
 bool
-check_locale(int category, const char *locale, char **canonname)
+check_locale(int category, const char *locale)
 {
-	char	   *save;
-	char	   *res;
-
-	if (canonname)
-		*canonname = NULL;		/* in case of failure */
-
-	save = setlocale(category, NULL);
-	if (!save)
-		return false;			/* won't happen, we hope */
+	locale_t	loc;
 
-	/* save may be pointing at a modifiable scratch variable, see above. */
-	save = pstrdup(save);
-
-	/* set the locale with setlocale, to see if it accepts it. */
-	res = setlocale(category, locale);
+	if (locale[0] == 0)
+	{
+		/*
+		 * We only accept explicit locale names, not "".  We don't want to
+		 * rely on environment variables in the backend.
+		 */
+		return false;
+	}
 
-	/* save canonical name if requested. */
-	if (res && canonname)
-		*canonname = pstrdup(res);
+	/*
+	 * See if we can open it.  Unfortunately we can't always distinguish
+	 * out-of-memory from invalid locale name.
+	 */
+	errno = ENOENT;
+#ifdef WIN32
+	loc = _create_locale(category, locale);
+	if (loc == (locale_t) 0)
+		_dosmaperr(GetLastError());
+#else
+	loc = newlocale(get_lc_category_mask(category), locale, (locale_t) 0);
+#endif
+	if (loc == (locale_t) 0)
+	{
+		if (errno == ENOMEM)
+			elog(ERROR, "out of memory");
 
-	/* restore old value. */
-	if (!setlocale(category, save))
-		elog(WARNING, "failed to restore old locale \"%s\"", save);
-	pfree(save);
+		/* Otherwise assume the locale doesn't exist. */
+		return false;
+	}
+#ifdef WIN32
+	_free_locale(loc);
+#else
+	freelocale(loc);
+#endif
 
-	return (res != NULL);
+	return true;
 }
 
 
@@ -353,7 +388,7 @@ check_locale(int category, const char *locale, char **canonname)
 bool
 check_locale_monetary(char **newval, void **extra, GucSource source)
 {
-	return check_locale(LC_MONETARY, *newval, NULL);
+	return check_locale(LC_MONETARY, *newval);
 }
 
 void
@@ -365,7 +400,7 @@ assign_locale_monetary(const char *newval, void *extra)
 bool
 check_locale_numeric(char **newval, void **extra, GucSource source)
 {
-	return check_locale(LC_NUMERIC, *newval, NULL);
+	return check_locale(LC_NUMERIC, *newval);
 }
 
 void
@@ -377,7 +412,7 @@ assign_locale_numeric(const char *newval, void *extra)
 bool
 check_locale_time(char **newval, void **extra, GucSource source)
 {
-	return check_locale(LC_TIME, *newval, NULL);
+	return check_locale(LC_TIME, *newval);
 }
 
 void
@@ -413,7 +448,7 @@ check_locale_messages(char **newval, void **extra, GucSource source)
 	 * On Windows, we can't even check the value, so accept blindly
 	 */
 #if defined(LC_MESSAGES) && !defined(WIN32)
-	return check_locale(LC_MESSAGES, *newval, NULL);
+	return check_locale(LC_MESSAGES, *newval);
 #else
 	return true;
 #endif
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index f00718a0150..1aa1c26785c 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2126,8 +2126,6 @@ locale_date_order(const char *locale)
  * it seems that on most implementations that's the only thing it's good for;
  * we could wish that setlocale gave back a canonically spelled version of
  * the locale name, but typically it doesn't.)
- *
- * this should match the backend's check_locale() function
  */
 static void
 check_locale_name(int category, const char *locale, char **canonname)
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 7ffe5891c69..f2c92525b61 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -471,7 +471,6 @@ extern char *pgwin32_setlocale(int category, const char *locale);
 
 #define setlocale(a,b) pgwin32_setlocale(a,b)
 
-
 /* In backend/port/win32/signal.c */
 extern PGDLLIMPORT volatile int pg_signal_queue;
 extern PGDLLIMPORT int pg_signal_mask;
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index f41d33975be..5b73526c92c 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -51,7 +51,7 @@ extern PGDLLIMPORT char *localized_full_months[];
 /* is the databases's LC_CTYPE the C locale? */
 extern PGDLLIMPORT bool database_ctype_is_c;
 
-extern bool check_locale(int category, const char *locale, char **canonname);
+extern bool check_locale(int category, const char *locale);
 extern char *pg_perm_setlocale(int category, const char *locale);
 
 extern bool lc_collate_is_c(Oid collation);
-- 
2.46.0

