From 49dc1ce52adb3c2865da81a577e02aa5b46c22b4 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 13 Aug 2024 14:15:54 +1200
Subject: [PATCH v1 3/3] Provide thread-safe pg_localeconv_r().

This involves four different implementation strategies:

1.  For Windows, we now require _configthreadlocale() to be available
and work, and the documentation says that the object returned by
localeconv() is in thread-local memory.

2.  For glibc, we translate to nl_langinfo_l() calls, because it offers
the same information that way as an extension, and that API is
thread-safe.

3.  For macOS/*BSD, use localeconv_l(), which is thread-safe.

4.  For everything else, use uselocale() to set the locale for the
thread, and use a big ugly lock to defend against the returned object
being concurrently clobbered.  In practice this currently means only
Solaris.

The new call is used in pg_locale.c, replacing calls to setlocale() and
localeconv().
---
 configure                         |   2 +-
 configure.ac                      |   1 +
 meson.build                       |   1 +
 src/backend/utils/adt/pg_locale.c | 128 +++-----------
 src/include/pg_config.h.in        |   3 +
 src/include/port.h                |   6 +
 src/port/Makefile                 |   1 +
 src/port/meson.build              |   1 +
 src/port/pg_localeconv_r.c        | 275 ++++++++++++++++++++++++++++++
 9 files changed, 310 insertions(+), 108 deletions(-)
 create mode 100644 src/port/pg_localeconv_r.c

diff --git a/configure b/configure
index 2abbeb27944..60dcf1e436e 100755
--- a/configure
+++ b/configure
@@ -15232,7 +15232,7 @@ fi
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-for ac_func in backtrace_symbols copyfile copy_file_range getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
+for ac_func in backtrace_symbols copyfile copy_file_range getifaddrs getpeerucred inet_pton localeconv_l kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.ac b/configure.ac
index c46ed2c591a..59e51a74629 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1735,6 +1735,7 @@ AC_CHECK_FUNCS(m4_normalize([
 	getifaddrs
 	getpeerucred
 	inet_pton
+	localeconv_l
 	kqueue
 	mbstowcs_l
 	memset_s
diff --git a/meson.build b/meson.build
index cd711c6d018..028a14547aa 100644
--- a/meson.build
+++ b/meson.build
@@ -2675,6 +2675,7 @@ func_checks = [
   ['inet_aton'],
   ['inet_pton'],
   ['kqueue'],
+  ['localeconv_l'],
   ['mbstowcs_l'],
   ['memset_s'],
   ['mkdtemp'],
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index cd3661e7279..dd4ba9e0e89 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -543,12 +543,8 @@ PGLC_localeconv(void)
 	static struct lconv CurrentLocaleConv;
 	static bool CurrentLocaleConvAllocated = false;
 	struct lconv *extlconv;
-	struct lconv worklconv;
-	char	   *save_lc_monetary;
-	char	   *save_lc_numeric;
-#ifdef WIN32
-	char	   *save_lc_ctype;
-#endif
+	struct lconv tmp;
+	struct lconv worklconv = {0};
 
 	/* Did we do it already? */
 	if (CurrentLocaleConvValid)
@@ -562,77 +558,21 @@ PGLC_localeconv(void)
 	}
 
 	/*
-	 * This is tricky because we really don't want to risk throwing error
-	 * while the locale is set to other than our usual settings.  Therefore,
-	 * the process is: collect the usual settings, set locale to special
-	 * setting, copy relevant data into worklconv using strdup(), restore
-	 * normal settings, convert data to desired encoding, and finally stash
-	 * the collected data in CurrentLocaleConv.  This makes it safe if we
-	 * throw an error during encoding conversion or run out of memory anywhere
-	 * in the process.  All data pointed to by struct lconv members is
-	 * allocated with strdup, to avoid premature elog(ERROR) and to allow
-	 * using a single cleanup routine.
+	 * Use thread-safe method of obtaining a copy of lconv from the operating
+	 * system.
 	 */
-	memset(&worklconv, 0, sizeof(worklconv));
-
-	/* Save prevailing values of monetary and numeric locales */
-	save_lc_monetary = setlocale(LC_MONETARY, NULL);
-	if (!save_lc_monetary)
-		elog(ERROR, "setlocale(NULL) failed");
-	save_lc_monetary = pstrdup(save_lc_monetary);
-
-	save_lc_numeric = setlocale(LC_NUMERIC, NULL);
-	if (!save_lc_numeric)
-		elog(ERROR, "setlocale(NULL) failed");
-	save_lc_numeric = pstrdup(save_lc_numeric);
-
-#ifdef WIN32
-
-	/*
-	 * The POSIX standard explicitly says that it is undefined what happens if
-	 * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
-	 * that implied by LC_CTYPE.  In practice, all Unix-ish platforms seem to
-	 * believe that localeconv() should return strings that are encoded in the
-	 * codeset implied by the LC_MONETARY or LC_NUMERIC locale name.  Hence,
-	 * once we have successfully collected the localeconv() results, we will
-	 * convert them from that codeset to the desired server encoding.
-	 *
-	 * Windows, of course, resolutely does things its own way; on that
-	 * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
-	 * results.  Hence, we must temporarily set that category as well.
-	 */
-
-	/* Save prevailing value of ctype locale */
-	save_lc_ctype = setlocale(LC_CTYPE, NULL);
-	if (!save_lc_ctype)
-		elog(ERROR, "setlocale(NULL) failed");
-	save_lc_ctype = pstrdup(save_lc_ctype);
-
-	/* Here begins the critical section where we must not throw error */
-
-	/* use numeric to set the ctype */
-	setlocale(LC_CTYPE, locale_numeric);
-#endif
-
-	/* Get formatting information for numeric */
-	setlocale(LC_NUMERIC, locale_numeric);
-	extlconv = localeconv();
-
-	/* Must copy data now in case setlocale() overwrites it */
+	if (pg_localeconv_r(locale_monetary,
+						locale_numeric,
+						&tmp) != 0)
+		elog(ERROR,
+			 "could not get lconv for LC_MONETARY = \"%s\", LC_NUMERIC = \"%s\": %m",
+			 locale_monetary, locale_numeric);
+
+	/* Must copy data now now so we can re-encode it. */
+	extlconv = &tmp;
 	worklconv.decimal_point = strdup(extlconv->decimal_point);
 	worklconv.thousands_sep = strdup(extlconv->thousands_sep);
 	worklconv.grouping = strdup(extlconv->grouping);
-
-#ifdef WIN32
-	/* use monetary to set the ctype */
-	setlocale(LC_CTYPE, locale_monetary);
-#endif
-
-	/* Get formatting information for monetary */
-	setlocale(LC_MONETARY, locale_monetary);
-	extlconv = localeconv();
-
-	/* Must copy data now in case setlocale() overwrites it */
 	worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
 	worklconv.currency_symbol = strdup(extlconv->currency_symbol);
 	worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
@@ -650,45 +590,19 @@ PGLC_localeconv(void)
 	worklconv.p_sign_posn = extlconv->p_sign_posn;
 	worklconv.n_sign_posn = extlconv->n_sign_posn;
 
-	/*
-	 * Restore the prevailing locale settings; failure to do so is fatal.
-	 * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
-	 * but proceeding with the wrong value of LC_CTYPE would certainly be bad
-	 * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
-	 * are almost certainly "C", there's really no reason that restoring those
-	 * should fail.
-	 */
-#ifdef WIN32
-	if (!setlocale(LC_CTYPE, save_lc_ctype))
-		elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
-#endif
-	if (!setlocale(LC_MONETARY, save_lc_monetary))
-		elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
-	if (!setlocale(LC_NUMERIC, save_lc_numeric))
-		elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
+	/* Free the contents of the object populated by pg_localeconv_r(). */
+	pg_localeconv_free(&tmp);
+
+	/* If any of the preceding strdup calls failed, complain now. */
+	if (!struct_lconv_is_valid(&worklconv))
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
 
-	/*
-	 * At this point we've done our best to clean up, and can call functions
-	 * that might possibly throw errors with a clean conscience.  But let's
-	 * make sure we don't leak any already-strdup'd fields in worklconv.
-	 */
 	PG_TRY();
 	{
 		int			encoding;
 
-		/* Release the pstrdup'd locale names */
-		pfree(save_lc_monetary);
-		pfree(save_lc_numeric);
-#ifdef WIN32
-		pfree(save_lc_ctype);
-#endif
-
-		/* If any of the preceding strdup calls failed, complain now. */
-		if (!struct_lconv_is_valid(&worklconv))
-			ereport(ERROR,
-					(errcode(ERRCODE_OUT_OF_MEMORY),
-					 errmsg("out of memory")));
-
 		/*
 		 * Now we must perform encoding conversion from whatever's associated
 		 * with the locales into the database encoding.  If we can't identify
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 979925cc2e2..f3db06d155f 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -280,6 +280,9 @@
 /* Define to 1 if you have the `zstd' library (-lzstd). */
 #undef HAVE_LIBZSTD
 
+/* Define to 1 if you have the `localeconv_l' function. */
+#undef HAVE_LOCALECONV_L
+
 /* Define to 1 if `long int' works and is 64 bits. */
 #undef HAVE_LONG_INT_64
 
diff --git a/src/include/port.h b/src/include/port.h
index c7400052675..ac0cff79fc6 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -465,6 +465,12 @@ extern void *bsearch_arg(const void *key, const void *base0,
 						 int (*compar) (const void *, const void *, void *),
 						 void *arg);
 
+/* port/pg_localeconv_r.c */
+extern int	pg_localeconv_r(const char *lc_monetary,
+							const char *lc_numeric,
+							struct lconv *output);
+extern void pg_localeconv_free(struct lconv *lconv);
+
 /* port/chklocale.c */
 extern int	pg_get_encoding_from_locale(const char *ctype, bool write_message);
 
diff --git a/src/port/Makefile b/src/port/Makefile
index db7c02117b0..f24d2dbc138 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -45,6 +45,7 @@ OBJS = \
 	noblock.o \
 	path.o \
 	pg_bitutils.o \
+	pg_localeconv_r.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
 	pgmkdirp.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index ff54b7b53e9..9d4c4018523 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -7,6 +7,7 @@ pgport_sources = [
   'noblock.c',
   'path.c',
   'pg_bitutils.c',
+  'pg_localeconv_r.c',
   'pg_strong_random.c',
   'pgcheckdir.c',
   'pgmkdirp.c',
diff --git a/src/port/pg_localeconv_r.c b/src/port/pg_localeconv_r.c
new file mode 100644
index 00000000000..01f5e97deba
--- /dev/null
+++ b/src/port/pg_localeconv_r.c
@@ -0,0 +1,275 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_localeconv_r.c
+ *    Thread-safe implementations of localeconv()
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    src/port/pg_localeconv_r.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "c.h"
+
+#if !defined(WIN32)
+#include <langinfo.h>
+#include <pthread.h>
+#endif
+
+#include <limits.h>
+
+#ifdef MON_THOUSANDS_SEP
+/*
+ * One of glibc's extended langinfo items detected.  Assume that the full set
+ * is present, which means we can use nl_langinfo_l() instead of localeconv().
+ */
+#define TRANSLATE_FROM_LANGINFO
+#endif
+
+struct lconv_member_info
+{
+	size_t		offset;
+#ifdef TRANSLATE_FROM_LANGINFO
+	nl_item		langinfo_item;
+#endif
+};
+
+#ifdef TRANSLATE_FROM_LANGINFO
+#define LCONV_MEMBER(name, langinfo_item) { (offsetof(struct lconv, name)), langinfo_item }
+#else
+#define LCONV_MEMBER(name, langinfo_item) { (offsetof(struct lconv, name)) }
+#endif
+
+/* The C string members that we have to strdup() and free(). */
+const static struct lconv_member_info lconv_string_members[] = {
+	LCONV_MEMBER(decimal_point, DECIMAL_POINT),
+	LCONV_MEMBER(thousands_sep, THOUSANDS_SEP),
+	LCONV_MEMBER(grouping, GROUPING),
+	LCONV_MEMBER(int_curr_symbol, INT_CURR_SYMBOL),
+	LCONV_MEMBER(currency_symbol, CURRENCY_SYMBOL),
+	LCONV_MEMBER(mon_decimal_point, MON_DECIMAL_POINT),
+	LCONV_MEMBER(mon_thousands_sep, MON_THOUSANDS_SEP),
+	LCONV_MEMBER(mon_grouping, MON_GROUPING),
+	LCONV_MEMBER(positive_sign, POSITIVE_SIGN),
+	LCONV_MEMBER(negative_sign, NEGATIVE_SIGN),
+};
+
+/* The char members we can just copy. */
+const static struct lconv_member_info lconv_char_members[] = {
+	LCONV_MEMBER(int_frac_digits, INT_FRAC_DIGITS),
+	LCONV_MEMBER(frac_digits, FRAC_DIGITS),
+	LCONV_MEMBER(p_cs_precedes, P_CS_PRECEDES),
+	LCONV_MEMBER(p_sep_by_space, P_SEP_BY_SPACE),
+	LCONV_MEMBER(n_cs_precedes, N_CS_PRECEDES),
+	LCONV_MEMBER(n_sep_by_space, N_SEP_BY_SPACE),
+	LCONV_MEMBER(p_sign_posn, P_SIGN_POSN),
+	LCONV_MEMBER(n_sign_posn, N_SIGN_POSN),
+};
+
+static inline char **
+lconv_string_member(struct lconv *lconv, int i)
+{
+	return (char **) ((char *) lconv + lconv_string_members[i].offset);
+}
+
+static inline char *
+lconv_char_member(struct lconv *lconv, int i)
+{
+	return (char *) lconv + lconv_char_members[i].offset;
+}
+
+/*
+ * Free the members of a struct lconv populated by pg_localeconv_r().  The
+ * struct itself is in storage provided by the caller of pg_localeconv_r().
+ */
+void
+pg_localeconv_free(struct lconv *lconv)
+{
+	for (int i = 0; i < lengthof(lconv_string_members); ++i)
+		free(*lconv_string_member(lconv, i));
+}
+
+#ifdef TRANSLATE_FROM_LANGINFO
+/*
+ * Copy the members we know about from nl_langinfo_l() into a caller-supplied
+ * struct lconv.
+ */
+static int
+pg_localeconv_from_langinfo(struct lconv *dst, locale_t loc)
+{
+	memset(dst, 0, sizeof(*dst));
+
+	/* Transate and copy the string members. */
+	for (int i = 0; i < lengthof(lconv_string_members); ++i)
+	{
+		char	   *string;
+
+		string = nl_langinfo_l(lconv_string_members[i].langinfo_item, loc);
+		if ((string = strdup(string)) == NULL)
+		{
+			pg_localeconv_free(dst);
+			errno = ENOMEM;
+			return -1;
+		}
+		*lconv_string_member(dst, i) = string;
+	}
+
+	/* Translate the char members. */
+	for (int i = 0; i < lengthof(lconv_char_members); ++i)
+		*lconv_char_member(dst, i) =
+			*nl_langinfo_l(lconv_char_members[i].langinfo_item, loc);
+
+	return 0;
+}
+#else
+/*
+ * Copy the members we know about from a system-provided struct lconv into a
+ * caller-supplied struct lconv.
+ */
+static int
+pg_localeconv_copy(struct lconv *dst, struct lconv *src)
+{
+	memset(dst, 0, sizeof(*dst));
+
+	/* Copy the string members. */
+	for (int i = 0; i < lengthof(lconv_string_members); ++i)
+	{
+		char	   *string = *lconv_string_member(src, i);
+
+		if (string && (string = strdup(string)) == NULL)
+		{
+			pg_localeconv_free(dst);
+			errno = ENOMEM;
+			return -1;
+		}
+		*lconv_string_member(dst, i) = string;
+	}
+
+	/* Copy the char members. */
+	for (int i = 0; i < lengthof(lconv_char_members); ++i)
+		*lconv_char_member(dst, i) = *lconv_char_member(src, i);
+
+	return 0;
+}
+#endif
+
+/*
+ * A thread-safe routine to get a copy of the lconv struct for a given
+ * LC_C_TYPE, LC_NUMERIC, LC_MONETARY.  We have three different strategies:
+ *
+ * 1.  On Windows, there is no uselocale(), but there is a way to put
+ * setlocale() into a thread-local mode temporarily.  Its localeconv() is
+ * documented as returning a pointer to thread-local storage, so we don't have
+ * to worry about concurrent callers.
+ *
+ * 2.  On Glibc, as an extension, all the information required to populate
+ * struct lconv is also available via nl_langpath_l(), which is thread-safe.
+ *
+ * 3.  On macOS and *BSD, there is localeconv_l(), so we can create a temporary
+ * locale_t to pass in, and the result is a pointer to storage associated with
+ * the locale_t so we control its lifetime and we don't have to worry about
+ * concurrent calls clobbering it.
+ *
+ * 4.  Otherwise, we wrap plain old localeconv() in uselocale() to avoid
+ * touching the global locale, but the output buffer is allowed by the standard
+ * to be overwritten by concurrent calls to localeconv().  We protect against
+ * _this_ function doing that with a Big Lock, but there isn't much we can do
+ * about code outside our tree that might call localeconv(), given such a poor
+ * interface.
+ *
+ * Returns 0 on success.  Returns non-zero on failure, and sets errno.  On
+ * success, the caller is responsible for calling pg_localeconf_free() on the
+ * output struct to free the string members it contains.
+ */
+int
+pg_localeconv_r(const char *lc_monetary,
+				const char *lc_numeric,
+				struct lconv *output)
+{
+#ifdef WIN32
+	int			save_config_thread_locale;
+	char	   *save_lc_ctype = NULL;
+	char	   *save_lc_monetary = NULL;
+	char	   *save_lc_numeric = NULL;
+	int			result = -1;
+
+	/* Put setlocale() into thread-local mode. */
+	save_config_thread_locale = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
+
+	/*
+	 * Windows requires LC_CTYPE's encoding to match for correct results, so
+	 * set it to the LC_NUMERIC value.
+	 */
+	save_lc_ctype = setlocale(LC_CTYPE, lc_numeric);
+	if (save_lc_ctype && !(save_lc_ctype = strdup(save_lc_ctype)))
+		goto exit;
+
+	save_lc_monetary = setlocale(LC_MONETARY, lc_monetary);
+	if (save_lc_monetary && !(save_lc_monetary = strdup(save_lc_monetary)))
+		goto exit;
+
+	save_lc_numeric = setlocale(LC_NUMERIC, lc_numeric);
+	if (save_lc_numeric && !(save_lc_numeric = strdup(save_lc_numeric)))
+		goto exit;
+
+	result = pg_localeconv_copy(output, localeconv());
+
+exit:
+	/* Restore everything we changed. */
+	if (save_lc_ctype)
+	{
+		setlocale(LC_CTYPE, save_lc_ctype);
+		free(save_lc_ctype);
+	}
+	if (save_lc_monetary)
+	{
+		setlocale(LC_MONETARY, save_lc_monetary);
+		free(save_lc_monetary);
+	}
+	if (save_lc_numeric)
+	{
+		setlocale(LC_NUMERIC, save_lc_numeric);
+		free(save_lc_numeric);
+	}
+	_configthreadlocale(save_config_thread_locale);
+
+	return result;
+#else
+	locale_t	tmp;
+	locale_t	loc;
+	int			result;
+
+	tmp = newlocale(LC_MONETARY_MASK, lc_monetary, 0);
+	if (tmp == 0)
+		return -1;
+	loc = newlocale(LC_NUMERIC_MASK, lc_numeric, tmp);
+	if (loc == 0)
+	{
+		freelocale(tmp);
+		return -1;
+	}
+#if defined(TRANSLATE_FROM_LANGINFO)
+	result = pg_localeconv_from_langinfo(output, loc);
+#elif defined(HAVE_LOCALE_CONV_L)
+	result = pg_localeconv_copy(output, localeconv_l(loc));
+#else
+	{
+		static pthread_mutex_t big_lock = PTHREAD_MUTEX_INITIALIZER;
+		locale_t	save = uselocale(loc);
+
+		pthread_mutex_lock(&big_lock);
+		result = pg_localeconv_copy(output, localeconv());
+		pthread_mutex_unlock(&big_lock);
+
+		uselocale(save);
+	}
+#endif
+
+	freelocale(loc);
+	return result;
+#endif
+}
-- 
2.46.0

