From b2e42cef9d8080ad27ef76444b74a72e5cda922c Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 15 Feb 2023 23:05:08 -0800
Subject: [PATCH v2] ICU locale string canonicalization and validation.

Before storing the locale name in the catalog, convert to a BCP47
language tag. The language tag should hold all of the necessary
information and also be an unambiguous representation of the locale.

Also, add a new GUC icu_locale_validation. When set to true, it raises
an ERROR if the locale string is malformed or if it is not a valid
locale in ICU.

During pg_upgrade, the previous locale string may need to be preserved
if the language tag resolves to a different actual locale.

Discussion: https://postgr.es/m/11b1eeb7e7667fdd4178497aeb796c48d26e69b9.camel@j-davis.com
---
 doc/src/sgml/config.sgml                      |  16 ++
 src/backend/commands/collationcmds.c          |  63 +++--
 src/backend/commands/dbcommands.c             |  81 +++++++
 src/backend/utils/adt/pg_locale.c             | 226 +++++++++++++++++-
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   2 +
 src/include/commands/dbcommands.h             |   1 +
 src/include/utils/pg_locale.h                 |   4 +
 .../regress/expected/collate.icu.utf8.out     |  16 ++
 src/test/regress/sql/collate.icu.utf8.sql     |   5 +
 10 files changed, 395 insertions(+), 29 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index ecd9aa73ef..d137159532 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -9775,6 +9775,22 @@ SET XML OPTION { DOCUMENT | CONTENT };
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-icu-locale-validation" xreflabel="icu_locale_validation">
+      <term><varname>icu_locale_validation</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>icu_locale_validation</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If set to <literal>true</literal>, validates that ICU locale strings
+        are well-formed, and that they represent valid locale in ICU. Does not
+        cause any locale string to be rejected during <xref
+        linkend="pgupgrade"/>. The default is <literal>false</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-default-text-search-config" xreflabel="default_text_search_config">
       <term><varname>default_text_search_config</varname> (<type>string</type>)
       <indexterm>
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index eb62d285ea..d1fc46777c 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -47,6 +47,8 @@ typedef struct
 	int			enc;			/* encoding */
 } CollAliasData;
 
+extern bool icu_locale_validation;
+
 
 /*
  * CREATE COLLATION
@@ -240,10 +242,45 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 		}
 		else if (collprovider == COLLPROVIDER_ICU)
 		{
+#ifdef USE_ICU
+			char	*langtag;
+			int		 elevel = WARNING;
+
+			/* can't reject previously-accepted locales during upgrade */
+			if (!IsBinaryUpgrade && icu_locale_validation)
+				elevel = ERROR;
+
 			if (!colliculocale)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 						 errmsg("parameter \"locale\" must be specified")));
+
+			check_icu_locale(colliculocale);
+			langtag = get_icu_locale(colliculocale);
+			if (langtag)
+			{
+				ereport(NOTICE,
+						(errmsg("using language tag \"%s\" for locale \"%s\"",
+								langtag, colliculocale)));
+
+				if (!icu_collator_exists(langtag))
+					ereport(elevel,
+							(errmsg("ICU collator for language tag \"%s\" not found",
+									langtag)));
+
+				colliculocale = langtag;
+			}
+			else
+			{
+				ereport(elevel,
+						(errmsg("could not convert locale \"%s\" to language tag",
+								colliculocale)));
+			}
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("ICU is not supported in this build")));
+#endif
 		}
 
 		/*
@@ -556,26 +593,6 @@ cmpaliases(const void *a, const void *b)
 
 
 #ifdef USE_ICU
-/*
- * Get the ICU language tag for a locale name.
- * The result is a palloc'd string.
- */
-static char *
-get_icu_language_tag(const char *localename)
-{
-	char		buf[ULOC_FULLNAME_CAPACITY];
-	UErrorCode	status;
-
-	status = U_ZERO_ERROR;
-	uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
-	if (U_FAILURE(status))
-		ereport(ERROR,
-				(errmsg("could not convert locale name \"%s\" to language tag: %s",
-						localename, u_errorName(status))));
-
-	return pstrdup(buf);
-}
-
 /*
  * Get a comment (specifically, the display name) for an ICU locale.
  * The result is a palloc'd string, or NULL if we can't get a comment
@@ -938,7 +955,11 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 			else
 				name = uloc_getAvailable(i);
 
-			langtag = get_icu_language_tag(name);
+			langtag = icu_language_tag(name);
+			if (langtag == NULL)
+				ereport(ERROR,
+						(errmsg("could not convert locale name \"%s\" to language tag",
+								name)));
 			iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
 
 			/*
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index ef05633bb0..805e754dcb 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -109,6 +109,7 @@ typedef struct CreateDBRelInfo
 	bool		permanent;		/* relation is permanent or unlogged */
 } CreateDBRelInfo;
 
+extern bool icu_locale_validation;
 
 /* non-export function prototypes */
 static void createdb_failure_callback(int code, Datum arg);
@@ -1029,6 +1030,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 
 	if (dblocprovider == COLLPROVIDER_ICU)
 	{
+#ifdef USE_ICU
+		char	*langtag;
+		int		 elevel = WARNING;
+
+		/* can't reject previously-accepted locales during upgrade */
+		if (!IsBinaryUpgrade && icu_locale_validation)
+			elevel = ERROR;
+
 		if (!(is_encoding_supported_by_icu(encoding)))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1045,6 +1054,31 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 					 errmsg("ICU locale must be specified")));
 
 		check_icu_locale(dbiculocale);
+		langtag = get_icu_locale(dbiculocale);
+		if (langtag)
+		{
+			ereport(NOTICE,
+					(errmsg("using language tag \"%s\" for locale \"%s\"",
+							langtag, dbiculocale)));
+
+			if (!icu_collator_exists(langtag))
+				ereport(elevel,
+						(errmsg("ICU collator for language tag \"%s\" not found",
+								langtag)));
+
+			dbiculocale = langtag;
+		}
+		else
+		{
+			ereport(elevel,
+					(errmsg("could not convert locale \"%s\" to language tag",
+							dbiculocale)));
+		}
+#else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ICU is not supported in this build")));
+#endif
 	}
 	else
 	{
@@ -1463,6 +1497,53 @@ check_encoding_locale_matches(int encoding, const char *collate, const char *cty
 						   pg_encoding_to_char(collate_encoding))));
 }
 
+/*
+ * Given the input ICU locale string, return a new string in a form suitable
+ * for storing in the catalog.
+ *
+ * Ordinarily this just converts to a language tag, but we need to make an
+ * allowance for invalid locale strings that come from earlier versions of
+ * Postgres while upgrading.
+ *
+ * Converting to a language tag performs "level 2 canonicalization". In
+ * addition to producing a consistent result format, level 2 canonicalization
+ * is able to more accurately interpret different input locale string formats,
+ * such as POSIX and .NET IDs. But prior to Postgres version 16, input locale
+ * strings were not canonicalized; the raw string provided by the user was
+ * stored in the catalog and passed directly to ucol_open().
+ *
+ * The raw string may resolve to the wrong actual collator when passed to
+ * directly ucol_open(), but indexes in older versions may depend on that
+ * actual collator. Therefore, during binary upgrade, we preserve the invalid
+ * raw string if it resolves to a different actual collator than the language
+ * tag. If it resolves to the same actual collator, then we proceed using the
+ * language tag.
+ */
+char *
+get_icu_locale(const char *requested_locale)
+{
+#ifdef USE_ICU
+	char *lang_tag = icu_language_tag(requested_locale);
+
+	if (lang_tag != NULL && IsBinaryUpgrade &&
+		!check_equivalent_icu_locales(requested_locale, lang_tag))
+	{
+		ereport(WARNING,
+				(errmsg("language tag \"%s\" resolves to different actual collator "
+						"than raw locale string \"%s\"",
+						lang_tag, requested_locale)));
+		pfree(lang_tag);
+		return NULL;
+	}
+
+	return lang_tag;
+#else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ICU is not supported in this build")));
+#endif
+}
+
 /* Error cleanup callback for createdb */
 static void
 createdb_failure_callback(int code, Datum arg)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 059e4fd79f..1764e51645 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1945,15 +1945,12 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
 	}
 }
 
-#endif							/* USE_ICU */
-
 /*
  * Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
  */
 void
 check_icu_locale(const char *icu_locale)
 {
-#ifdef USE_ICU
 	UCollator  *collator;
 	UErrorCode	status;
 
@@ -1967,13 +1964,226 @@ check_icu_locale(const char *icu_locale)
 	if (U_ICU_VERSION_MAJOR_NUM < 54)
 		icu_set_collation_attributes(collator, icu_locale);
 	ucol_close(collator);
-#else
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("ICU is not supported in this build")));
-#endif
 }
 
+/*
+ * Test if the given locales resolve to the same actual collator with the same
+ * attributes and version.
+ */
+bool
+check_equivalent_icu_locales(const char *locale1, const char *locale2)
+{
+	const UColAttribute collAtt[]	   = {
+		UCOL_FRENCH_COLLATION,
+		UCOL_ALTERNATE_HANDLING,
+		UCOL_CASE_FIRST,
+		UCOL_CASE_LEVEL,
+		UCOL_NORMALIZATION_MODE,
+		UCOL_DECOMPOSITION_MODE,
+		UCOL_STRENGTH,
+		UCOL_HIRAGANA_QUATERNARY_MODE,
+		UCOL_NUMERIC_COLLATION};
+	int              n_collAtt		   = sizeof(collAtt)/sizeof(*collAtt);
+	const char		*actual1, *actual2;
+	UVersionInfo     versionInfo1;
+	UVersionInfo	 versionInfo2;
+	char             version1[U_MAX_VERSION_STRING_LENGTH];
+	char             version2[U_MAX_VERSION_STRING_LENGTH];
+	UCollator		*collator1		   = NULL;
+	UCollator		*collator2		   = NULL;
+	UErrorCode		 status;
+	bool			 result			   = false;
+
+	/*
+	 * Be careful not to return without closing the collators.
+	 */
+
+	status = U_ZERO_ERROR;
+	collator1 = ucol_open(locale1, &status);
+	if (U_FAILURE(status))
+		goto cleanup;
+
+	status = U_ZERO_ERROR;
+	collator2 = ucol_open(locale2, &status);
+	if (U_FAILURE(status))
+		goto cleanup;
+
+	/* actual locale */
+	status = U_ZERO_ERROR;
+	actual1 = ucol_getLocaleByType(collator1, ULOC_ACTUAL_LOCALE, &status);
+	if (U_FAILURE(status))
+		goto cleanup;
+
+	status = U_ZERO_ERROR;
+	actual2 = ucol_getLocaleByType(collator2, ULOC_ACTUAL_LOCALE, &status);
+	if (U_FAILURE(status))
+		goto cleanup;
+
+	if (strcmp(actual1, actual2) != 0)
+		goto cleanup;
+
+	/* version */
+	ucol_getVersion(collator1, versionInfo1);
+	u_versionToString(versionInfo1, version1);
+	ucol_getVersion(collator2, versionInfo2);
+	u_versionToString(versionInfo2, version2);
+	if (strcmp(version1, version2) != 0)
+		goto cleanup;
+
+	/* attributes */
+	for (int i = 0; i < n_collAtt; i++)
+	{
+		UColAttributeValue val1, val2;
+
+		status = U_ZERO_ERROR;
+		val1 = ucol_getAttribute(collator1, collAtt[i], &status);
+		if (U_FAILURE(status))
+			goto cleanup;
+
+		status = U_ZERO_ERROR;
+		val2 = ucol_getAttribute(collator2, collAtt[i], &status);
+		if (U_FAILURE(status))
+			goto cleanup;
+
+		if (val1 != val2)
+			goto cleanup;
+	}
+
+	/* passed all the best-effort checks for equivalence */
+	result = true;
+
+cleanup:
+	if (collator2)
+		ucol_close(collator2);
+	if (collator1)
+		ucol_close(collator1);
+
+	return result;
+}
+
+static char *
+get_lang_part(const char *locale)
+{
+	UErrorCode	 status;
+	char		*lang_part;
+	int32_t		 len;
+
+	status = U_ZERO_ERROR;
+	len  = uloc_getLanguage(locale, NULL, 0, &status);
+	lang_part = palloc(len + 1);
+	status = U_ZERO_ERROR;
+	uloc_getLanguage(locale, lang_part, len + 1, &status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("could not get language name from locale string \"%s\": %s",
+						locale, u_errorName(status))));
+	return lang_part;
+}
+
+/*
+ * Check if the locale string represents the root locale. It represents the
+ * root locale if the language part is "und", "root", or the empty string.
+ */
+static bool
+icu_is_root_locale(const char *locale)
+{
+	char		*lang_part = get_lang_part(locale);
+	bool		 result	   = false;
+
+	if (strcasecmp(lang_part, "root") == 0 ||
+		strcasecmp(lang_part, "und") == 0 ||
+		strcasecmp(lang_part, "") == 0)
+		result = true;
+
+	pfree(lang_part);
+	return result;
+}
+
+/*
+ * Check if the locale string represents the C/POSIX locale, which is not
+ * handled by ICU level 2 canonicalization.
+ */
+static bool
+icu_is_c_posix_locale(const char *locale)
+{
+	char		*lang_part = get_lang_part(locale);
+	bool		 result	   = false;
+
+	if (strcasecmp(lang_part, "c") == 0 ||
+		strcasecmp(lang_part, "posix") == 0)
+		result = true;
+
+	pfree(lang_part);
+	return result;
+}
+
+bool
+icu_collator_exists(const char *requested_locale)
+{
+	UCollator	*collator;
+	const char	*valid_locale = NULL;
+	UErrorCode	 status;
+	bool		 result		  = false;
+
+	status = U_ZERO_ERROR;
+	collator = ucol_open(requested_locale, &status);
+	if (U_FAILURE(status))
+		return false;
+
+	status = U_ZERO_ERROR;
+	valid_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, &status);
+	if (U_FAILURE(status))
+		goto cleanup;
+
+	if (icu_is_root_locale(requested_locale) ||
+		!icu_is_root_locale(valid_locale))
+		result = true;
+
+cleanup:
+	ucol_close(collator);
+	return result;
+}
+
+/*
+ * Return the BCP47 language tag representation of the requested locale; or
+ * NULL if a problem is encountered.
+ *
+ * This function should be called before passing the string to ucol_open(),
+ * because conversion to a language tag also performs "level 2
+ * canonicalization". In addition to producing a consistent result format,
+ * level 2 canonicalization is able to more accurately interpret different
+ * input locale string formats, such as POSIX and .NET IDs.
+ */
+char *
+icu_language_tag(const char *requested_locale)
+{
+	UErrorCode	 status;
+	char		*result;
+	int32_t		 len;
+	const bool	 strict = true;
+
+	/* c/posix locales aren't handled by uloc_getLanguageTag() */
+	if (icu_is_c_posix_locale(requested_locale))
+		return pstrdup("en-US-u-va-posix");
+
+	status = U_ZERO_ERROR;
+	len = uloc_toLanguageTag(requested_locale, NULL, 0, strict, &status);
+
+	result = palloc(len + 1);
+
+	status = U_ZERO_ERROR;
+	uloc_toLanguageTag(requested_locale, result, len + 1, strict, &status);
+	if (U_FAILURE(status))
+	{
+		pfree(result);
+		return NULL;
+	}
+
+	return result;
+}
+
+#endif							/* USE_ICU */
+
 /*
  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
  * Therefore we keep them here rather than with the mbutils code.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 1c0583fe26..4aa53259dc 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -481,6 +481,7 @@ char	   *event_source;
 
 bool		row_security;
 bool		check_function_bodies = true;
+bool		icu_locale_validation = false;
 
 /*
  * This GUC exists solely for backward compatibility, check its definition for
@@ -1586,6 +1587,15 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"icu_locale_validation", PGC_SUSET, CLIENT_CONN_LOCALE,
+			gettext_noop("Validate ICU locale strings."),
+			NULL
+		},
+		&icu_locale_validation,
+		false,
+		NULL, NULL, NULL
+	},
 	{
 		{"array_nulls", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
 			gettext_noop("Enable input of NULL elements in arrays."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..cff927e8be 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -730,6 +730,8 @@
 #lc_numeric = 'C'			# locale for number formatting
 #lc_time = 'C'				# locale for time formatting
 
+#icu_locale_validation = off		# validate ICU locale strings
+
 # default configuration for text search
 #default_text_search_config = 'pg_catalog.simple'
 
diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h
index 5fbc3ca752..0f0e827ff2 100644
--- a/src/include/commands/dbcommands.h
+++ b/src/include/commands/dbcommands.h
@@ -33,5 +33,6 @@ extern char *get_database_name(Oid dbid);
 extern bool have_createdb_privilege(void);
 
 extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype);
+extern char *get_icu_locale(const char *requested_locale);
 
 #endif							/* DBCOMMANDS_H */
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index cede43440b..e1bb017a54 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -104,8 +104,12 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol
 #ifdef USE_ICU
 extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
 extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
+extern bool check_equivalent_icu_locales(const char *locale1,
+										 const char *locale2);
 #endif
 extern void check_icu_locale(const char *icu_locale);
+extern bool icu_collator_exists(const char *requested_locale);
+extern char *icu_language_tag(const char *requested_locale);
 
 /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
 extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 4354dc07b8..4d18ca8a85 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1019,6 +1019,7 @@ reset enable_seqscan;
 CREATE ROLE regress_test_role;
 CREATE SCHEMA test_schema;
 -- We need to do this this way to cope with varying names for encodings:
+SET client_min_messages TO WARNING;
 do $$
 BEGIN
   EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -1033,9 +1034,12 @@ BEGIN
           quote_literal(current_setting('lc_collate')) || ');';
 END
 $$;
+RESET client_min_messages;
 CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
 ERROR:  parameter "locale" must be specified
 CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */  DROP COLLATION testx;
+NOTICE:  using language tag "nonsense" for locale "nonsense"
+WARNING:  ICU collator for language tag "nonsense" not found
 CREATE COLLATION test4 FROM nonsense;
 ERROR:  collation "nonsense" for encoding "UTF8" does not exist
 CREATE COLLATION test5 FROM test0;
@@ -1144,6 +1148,7 @@ drop type textrange_en_us;
 -- test ICU collation customization
 -- test the attributes handled by icu_set_collation_attributes()
 CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+NOTICE:  using language tag "und-u-kc-ks-level1" for locale "@colStrength=primary;colCaseLevel=yes"
 SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
  ?column? | ?column? 
 ----------+----------
@@ -1151,6 +1156,7 @@ SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignor
 (1 row)
 
 CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+NOTICE:  using language tag "und-u-kb" for locale "@colBackwards=yes"
 SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
  ?column? | ?column? 
 ----------+----------
@@ -1158,7 +1164,9 @@ SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll
 (1 row)
 
 CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
+NOTICE:  using language tag "und-u-kf-lower" for locale "@colCaseFirst=lower"
 CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
+NOTICE:  using language tag "und-u-kf-upper" for locale "@colCaseFirst=upper"
 SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
  ?column? | ?column? 
 ----------+----------
@@ -1166,6 +1174,7 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol
 (1 row)
 
 CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
+NOTICE:  using language tag "und-u-ka-shifted" for locale "@colAlternate=shifted"
 SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
  ?column? | ?column? 
 ----------+----------
@@ -1173,6 +1182,7 @@ SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE te
 (1 row)
 
 CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+NOTICE:  using language tag "und-u-kn" for locale "@colNumeric=yes"
 SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
  ?column? | ?column? 
 ----------+----------
@@ -1184,6 +1194,7 @@ ERROR:  could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUME
 -- test that attributes not handled by icu_set_collation_attributes()
 -- (handled by ucol_open() directly) also work
 CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=phonebook');
+NOTICE:  using language tag "de-u-co-phonebk" for locale "de@collation=phonebook"
 SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;
  ?column? | ?column? 
 ----------+----------
@@ -1192,7 +1203,9 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
 
 -- nondeterministic collations
 CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);
+NOTICE:  using language tag "und" for locale ""
 CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false);
+NOTICE:  using language tag "und" for locale ""
 CREATE TABLE test6 (a int, b text);
 -- same string in different normal forms
 INSERT INTO test6 VALUES (1, U&'\00E4bc');
@@ -1242,7 +1255,9 @@ SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet;
 (2 rows)
 
 CREATE COLLATION case_sensitive (provider = icu, locale = '');
+NOTICE:  using language tag "und" for locale ""
 CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false);
+NOTICE:  using language tag "und-u-ks-level2" for locale "@colStrength=secondary"
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
  ?column? | ?column? 
 ----------+----------
@@ -1710,6 +1725,7 @@ SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
 
 -- accents
 CREATE COLLATION ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes', deterministic = false);
+NOTICE:  using language tag "und-u-kc-ks-level1" for locale "@colStrength=primary;colCaseLevel=yes"
 CREATE TABLE test4 (a int, b text);
 INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
 SELECT * FROM test4 WHERE b = 'cote';
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index b0ddc7db44..3a7e7202af 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -357,6 +357,8 @@ CREATE ROLE regress_test_role;
 CREATE SCHEMA test_schema;
 
 -- We need to do this this way to cope with varying names for encodings:
+SET client_min_messages TO WARNING;
+
 do $$
 BEGIN
   EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -370,6 +372,9 @@ BEGIN
           quote_literal(current_setting('lc_collate')) || ');';
 END
 $$;
+
+RESET client_min_messages;
+
 CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
 CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */  DROP COLLATION testx;
 
-- 
2.34.1

