From 95f2684150e2938f2e555d16bbed4295a6dad279 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 19 Jul 2022 06:31:17 +1200
Subject: [PATCH v2 1/2] Default to BCP 47 locale in initdb on Windows.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid selecting traditional Windows locale names written with English
words, because they are unstable and not recommended for use in
databases.  Since setlocale() returns such names, on Windows use
GetUserDefaultLocaleName() if the user didn't provide an explicit
locale.

Also update the documentation to recommend BCP 47 over the traditional
names when providing explicit values to initdb.

Reviewed-by: Juan José Santamaría Flecha <juanjo.santamaria@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKGJ%3DXThErgAQRoqfCy1bKPxXVuF0%3D2zDbB%2BSxDs59pv7Fw%40mail.gmail.com
---
 doc/src/sgml/charset.sgml | 10 ++++++++--
 src/bin/initdb/initdb.c   | 31 +++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 445fd175d8..b656ca489f 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -83,8 +83,14 @@ initdb --locale=sv_SE
     system under what names depends on what was provided by the operating
     system vendor and what was installed.  On most Unix systems, the command
     <literal>locale -a</literal> will provide a list of available locales.
-    Windows uses more verbose locale names, such as <literal>German_Germany</literal>
-    or <literal>Swedish_Sweden.1252</literal>, but the principles are the same.
+   </para>
+
+   <para>
+    Windows uses BCP 47 language tags, like ICU.
+    For example, <literal>sv-SE</literal> represents Swedish as spoken in Sweden.
+    Windows also supports more verbose locale names based on English words,
+    such as <literal>German_Germany</literal> or <literal>Swedish_Sweden.1252</literal>,
+    but these are not recommended.
    </para>
 
    <para>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 89b888eaa5..3af08b7b99 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -59,6 +59,10 @@
 #include "sys/mman.h"
 #endif
 
+#ifdef WIN32
+#include <winnls.h>
+#endif
+
 #include "access/xlog_internal.h"
 #include "catalog/pg_authid_d.h"
 #include "catalog/pg_class_d.h" /* pgrminclude ignore */
@@ -2007,6 +2011,7 @@ locale_date_order(const char *locale)
 static void
 check_locale_name(int category, const char *locale, char **canonname)
 {
+	char	   *locale_copy;
 	char	   *save;
 	char	   *res;
 
@@ -2022,10 +2027,30 @@ check_locale_name(int category, const char *locale, char **canonname)
 
 	/* for setlocale() call */
 	if (!locale)
-		locale = "";
+	{
+#ifdef WIN32
+		wchar_t		wide_name[LOCALE_NAME_MAX_LENGTH];
+		char		name[LOCALE_NAME_MAX_LENGTH];
+
+		/* use Windows API to find the default in BCP47 format */
+		if (GetUserDefaultLocaleName(wide_name, LOCALE_NAME_MAX_LENGTH) == 0)
+			pg_fatal("failed to get default locale name: error code %lu",
+					 GetLastError());
+		if (WideCharToMultiByte(CP_ACP, 0, wide_name, -1, name,
+								LOCALE_NAME_MAX_LENGTH, NULL, NULL) == 0)
+			pg_fatal("failed to convert locale name: error code %lu",
+					 GetLastError());
+		locale_copy = pg_strdup(name);
+#else
+		/* use environment to find the default */
+		locale_copy = pg_strdup("");
+#endif
+	}
+	else
+		locale_copy = pg_strdup(locale);
 
 	/* set the locale with setlocale, to see if it accepts it. */
-	res = setlocale(category, locale);
+	res = setlocale(category, locale_copy);
 
 	/* save canonical name if requested. */
 	if (res && canonname)
@@ -2054,6 +2079,8 @@ check_locale_name(int category, const char *locale, char **canonname)
 			pg_fatal("invalid locale settings; check LANG and LC_* environment variables");
 		}
 	}
+
+	free(locale_copy);
 }
 
 /*
-- 
2.35.1

