From e5876ac466d5158d3aafa1cf92dc54ff45a6b996 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 17 Jul 2025 13:07:50 -0700
Subject: [PATCH v1] initdb: new environment variable INITDB_LOCALE_PROVIDER

Controls the locale provider used by initdb.

Also specifies defaults for both the builtin provider and ICU, so that
plain initdb (without locale arguments specified) will succeed for any
provider. For the builtin provider's UTF-8 based locales, if LC_CTYPE
is not compatible with UTF-8, forces LC_CTYPE=C to avoid such an
error.
---
 src/backend/commands/dbcommands.c             |  2 +-
 src/bin/initdb/initdb.c                       | 71 +++++++++++++++----
 src/bin/initdb/t/001_initdb.pl                | 11 +--
 src/bin/scripts/t/020_createdb.pl             | 69 ++++++++++--------
 .../modules/test_escape/t/001_test_escape.pl  |  2 +-
 5 files changed, 104 insertions(+), 51 deletions(-)

diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 502a45163c8..92a396b8406 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1052,7 +1052,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 		dbctype = src_ctype;
 	if (dblocprovider == '\0')
 		dblocprovider = src_locprovider;
-	if (dblocale == NULL)
+	if (dblocale == NULL && dblocprovider == src_locprovider)
 		dblocale = src_locale;
 	if (dbicurules == NULL)
 		dbicurules = src_icurules;
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 62bbd08d9f6..60e5c9d4a31 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -82,6 +82,9 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 
+#define DEFAULT_LOCALE_PROVIDER		COLLPROVIDER_LIBC
+#define DEFAULT_BUILTIN_LOCALE		"C.UTF-8"
+#define DEFAULT_ICU_LOCALE			"und"
 
 /* Ideally this would be in a .h file, but it hardly seems worth the trouble */
 extern const char *select_default_timezone(const char *share_path);
@@ -144,7 +147,7 @@ static char *lc_monetary = NULL;
 static char *lc_numeric = NULL;
 static char *lc_time = NULL;
 static char *lc_messages = NULL;
-static char locale_provider = COLLPROVIDER_LIBC;
+static char locale_provider = '\0';
 static bool builtin_locale_specified = false;
 static char *datlocale = NULL;
 static bool icu_locale_specified = false;
@@ -2468,12 +2471,11 @@ setlocales(void)
 	lc_messages = canonname;
 #endif
 
-	if (locale_provider != COLLPROVIDER_LIBC && datlocale == NULL)
-		pg_fatal("locale must be specified if provider is %s",
-				 collprovider_name(locale_provider));
-
 	if (locale_provider == COLLPROVIDER_BUILTIN)
 	{
+		if (!datlocale)
+			datlocale = DEFAULT_BUILTIN_LOCALE;
+
 		if (strcmp(datlocale, "C") == 0)
 			canonname = "C";
 		else if (strcmp(datlocale, "C.UTF-8") == 0 ||
@@ -2491,11 +2493,13 @@ setlocales(void)
 	{
 		char	   *langtag;
 
+		if (!datlocale)
+			datlocale = DEFAULT_ICU_LOCALE;
+
 		/* canonicalize to a language tag */
 		langtag = icu_language_tag(datlocale);
 		printf(_("Using language tag \"%s\" for ICU locale \"%s\".\n"),
 			   langtag, datlocale);
-		pg_free(datlocale);
 		datlocale = langtag;
 
 		icu_validate_locale(datlocale);
@@ -2686,6 +2690,30 @@ setup_locale_encoding(void)
 {
 	setlocales();
 
+	/*
+	 * For the builtin provider (other than the "C" locale), default encoding
+	 * to UTF-8. If lc_ctype is not compatible with UTF-8, also force lc_ctype
+	 * to "C". On windows, all locales are compatible with UTF-8.
+	 */
+	if (!encoding && locale_provider == COLLPROVIDER_BUILTIN &&
+		strcmp(datlocale, "C") != 0)
+	{
+#ifndef WIN32
+		int			ctype_enc = pg_get_encoding_from_locale(lc_ctype, false);
+
+		if (!(ctype_enc == PG_UTF8 ||
+			  ctype_enc == PG_SQL_ASCII))
+		{
+			pg_log_warning("setting LC_CTYPE to \"C\"");
+			pg_log_warning_detail("Encoding of LC_CTYPE locale \"%s\" does not match encoding required by builtin locale \"%s\".",
+								  lc_ctype, datlocale);
+			pg_log_warning_hint("Specify a UTF-8 compatible locale with --lc-ctype, or choose a different locale provider.");
+			lc_ctype = "C";
+		}
+#endif
+		encoding = "UTF-8";
+	}
+
 	if (locale_provider == COLLPROVIDER_LIBC &&
 		strcmp(lc_ctype, lc_collate) == 0 &&
 		strcmp(lc_ctype, lc_time) == 0 &&
@@ -2721,10 +2749,11 @@ setup_locale_encoding(void)
 		ctype_enc = pg_get_encoding_from_locale(lc_ctype, true);
 
 		/*
-		 * If ctype_enc=SQL_ASCII, it's compatible with any encoding. ICU does
-		 * not support SQL_ASCII, so select UTF-8 instead.
+		 * If ctype_enc=SQL_ASCII, it's compatible with any encoding. Neither
+		 * ICU nor the builtin provider support SQL_ASCII, so select UTF-8
+		 * instead.
 		 */
-		if (locale_provider == COLLPROVIDER_ICU && ctype_enc == PG_SQL_ASCII)
+		if (locale_provider != COLLPROVIDER_LIBC && ctype_enc == PG_SQL_ASCII)
 			ctype_enc = PG_UTF8;
 
 		if (ctype_enc == -1)
@@ -2773,11 +2802,10 @@ setup_locale_encoding(void)
 		!check_locale_encoding(lc_collate, encodingid))
 		exit(1);				/* check_locale_encoding printed the error */
 
-	if (locale_provider == COLLPROVIDER_BUILTIN)
+	if (locale_provider == COLLPROVIDER_BUILTIN &&
+		strcmp(datlocale, "C") != 0)
 	{
-		if ((strcmp(datlocale, "C.UTF-8") == 0 ||
-			 strcmp(datlocale, "PG_UNICODE_FAST") == 0) &&
-			encodingid != PG_UTF8)
+		if (encodingid != PG_UTF8)
 			pg_fatal("builtin provider locale \"%s\" requires encoding \"%s\"",
 					 datlocale, "UTF-8");
 	}
@@ -3402,7 +3430,6 @@ main(int argc, char *argv[])
 		}
 	}
 
-
 	/*
 	 * Non-option argument specifies data directory as long as it wasn't
 	 * already specified with -D / --pgdata
@@ -3421,6 +3448,22 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
+	if (locale_provider == '\0')
+	{
+		char	   *provider_name = getenv("INITDB_LOCALE_PROVIDER");
+
+		if (!provider_name)
+			locale_provider = DEFAULT_LOCALE_PROVIDER;
+		else if (strcmp(provider_name, "builtin") == 0)
+			locale_provider = COLLPROVIDER_BUILTIN;
+		else if (strcmp(provider_name, "icu") == 0)
+			locale_provider = COLLPROVIDER_ICU;
+		else if (strcmp(provider_name, "libc") == 0)
+			locale_provider = COLLPROVIDER_LIBC;
+		else
+			pg_fatal("unrecognized locale provider: %s", provider_name);
+	}
+
 	if (builtin_locale_specified && locale_provider != COLLPROVIDER_BUILTIN)
 		pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen",
 				 "--builtin-locale", "builtin");
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index b7ef7ed8d06..ba3211a4aa6 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -113,14 +113,13 @@ SKIP:
 
 if ($ENV{with_icu} eq 'yes')
 {
-	command_fails_like(
+	command_ok(
 		[
 			'initdb', '--no-sync',
 			'--locale-provider' => 'icu',
 			"$tempdir/data2"
 		],
-		qr/initdb: error: locale must be specified if provider is icu/,
-		'locale provider ICU requires --icu-locale');
+		'locale provider ICU default locale');
 
 	command_ok(
 		[
@@ -200,13 +199,15 @@ else
 		'locale provider ICU fails since no ICU support');
 }
 
-command_fails(
+command_like(
 	[
 		'initdb', '--no-sync',
+		'--auth' => 'trust',
 		'--locale-provider' => 'builtin',
 		"$tempdir/data6"
 	],
-	'locale provider builtin fails without --locale');
+	qr/^\s+default collation:\s+C.UTF-8\n/ms,
+	'locale provider builtin defaults to C.UTF-8');
 
 command_ok(
 	[
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index a8293390ede..6003d213e89 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -16,6 +16,9 @@ my $node = PostgreSQL::Test::Cluster->new('main');
 $node->init;
 $node->start;
 
+my $datlocprovider = $node->safe_psql('postgres',
+	"SELECT datlocprovider FROM pg_database WHERE datname='template1'");
+
 $node->issues_sql_like(
 	[ 'createdb', 'foobar1' ],
 	qr/statement: CREATE DATABASE foobar1/,
@@ -33,19 +36,6 @@ $node->issues_sql_like(
 
 if ($ENV{with_icu} eq 'yes')
 {
-	# This fails because template0 uses libc provider and has no ICU
-	# locale set.  It would succeed if template0 used the icu
-	# provider.  XXX Maybe split into multiple tests?
-	$node->command_fails(
-		[
-			'createdb',
-			'--template' => 'template0',
-			'--encoding' => 'UTF8',
-			'--locale-provider' => 'icu',
-			'foobar4',
-		],
-		'create database with ICU fails without ICU locale specified');
-
 	$node->issues_sql_like(
 		[
 			'createdb',
@@ -130,14 +120,18 @@ else
 		'create database with ICU fails since no ICU support');
 }
 
-$node->command_fails(
-	[
-		'createdb',
-		'--template' => 'template0',
-		'--locale-provider' => 'builtin',
-		'tbuiltin1',
-	],
-	'create database with provider "builtin" fails without --locale');
+if ($datlocprovider eq 'c')
+{
+	$node->command_fails(
+		[
+			'createdb',
+			'--template' => 'template0',
+			'--encoding' => 'UTF8',
+			'--locale-provider' => 'builtin',
+			'foobar4',
+		],
+		'create database with builtin provider fails without locale specified');
+}
 
 $node->command_ok(
 	[
@@ -219,15 +213,30 @@ $node->command_fails(
 	],
 	'create database with provider "builtin" and ICU_RULES=""');
 
-$node->command_fails(
-	[
-		'createdb',
-		'--template' => 'template1',
-		'--locale-provider' => 'builtin',
-		'--locale' => 'C',
-		'tbuiltin9',
-	],
-	'create database with provider "builtin" not matching template');
+if ($datlocprovider eq 'b')
+{
+	$node->command_fails(
+		[
+			'createdb',
+			'--template' => 'template1',
+			'--locale-provider' => 'libc',
+			'--locale' => 'C',
+			'tbuiltin9',
+		],
+		'create database with provider "libc" not matching template');
+}
+else
+{
+	$node->command_fails(
+		[
+			'createdb',
+			'--template' => 'template1',
+			'--locale-provider' => 'builtin',
+			'--locale' => 'C',
+			'tbuiltin9',
+		],
+		'create database with provider "builtin" not matching template');
+}
 
 $node->command_fails([ 'createdb', 'foobar1' ],
 	'fails if database already exists');
diff --git a/src/test/modules/test_escape/t/001_test_escape.pl b/src/test/modules/test_escape/t/001_test_escape.pl
index 0d5aec3ed74..b29f093db28 100644
--- a/src/test/modules/test_escape/t/001_test_escape.pl
+++ b/src/test/modules/test_escape/t/001_test_escape.pl
@@ -12,7 +12,7 @@ $node->init();
 $node->start();
 
 $node->safe_psql('postgres',
-	q(CREATE DATABASE db_sql_ascii ENCODING "sql_ascii" TEMPLATE template0;));
+	q(CREATE DATABASE db_sql_ascii LOCALE_PROVIDER "builtin" LOCALE "C" ENCODING "sql_ascii" TEMPLATE template0;));
 
 my $cmd =
   [ 'test_escape', '--conninfo', $node->connstr . " dbname=db_sql_ascii" ];
-- 
2.43.0

