From fc66f02976bb11b629bcf71346c2858eccbcf1a3 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 11 May 2023 10:36:04 -0700
Subject: [PATCH v5 1/7] For user-defined collations, never set
 collencoding=-1.

For new user-defined collations, always set collencoding to the
current database encoding so that it is never shadowed by a built-in
collation.

Built in collations that work with any encoding may have
collencoding=-1, and if a user defines a collation with the same name,
it will shadow the built-in collation.

Previously it was possible to create an ICU collation (which was
assigned collencoding=-1) that was shadowed by a built-in collation
and completely inaccessible.
---
 src/backend/commands/collationcmds.c          | 28 +++++++++++++------
 .../regress/expected/collate.icu.utf8.out     |  2 +-
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index c91fe66d9b..a53700256b 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -302,16 +302,29 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 					 errmsg("ICU rules cannot be specified unless locale provider is ICU")));
 
+		/*
+		 * The collencoding is used to hide built-in collations that are
+		 * incompatible with the current database encoding, allowing users to
+		 * define a compatible collation with the same name if
+		 * desired. Built-in collations that work with any encoding have
+		 * collencoding=-1.
+		 *
+		 * A collation that's a match to the current database encoding will
+		 * shadow a collation with the same name and collencoding=-1. We never
+		 * want a user-created collation to be shadowed by a built-in
+		 * collation, so for user-created collations, always set collencoding
+		 * to the current database encoding.
+		 */
+		collencoding = GetDatabaseEncoding();
+
 		if (collprovider == COLLPROVIDER_ICU)
 		{
 #ifdef USE_ICU
 			/*
-			 * We could create ICU collations with collencoding == database
-			 * encoding, but it seems better to use -1 so that it matches the
-			 * way initdb would create ICU collations.  However, only allow
-			 * one to be created when the current database's encoding is
-			 * supported.  Otherwise the collation is useless, plus we get
-			 * surprising behaviors like not being able to drop the collation.
+			 * Only allow an ICU collation to be created when the current
+			 * database's encoding is supported.  Otherwise the collation is
+			 * useless, plus we get surprising behaviors like not being able
+			 * to drop the collation.
 			 *
 			 * Skip this test when !USE_ICU, because the error we want to
 			 * throw for that isn't thrown till later.
@@ -321,11 +334,10 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("current database's encoding is not supported with this provider")));
 #endif
-			collencoding = -1;
 		}
 		else
 		{
-			collencoding = GetDatabaseEncoding();
+			Assert(collprovider == COLLPROVIDER_LIBC);
 			check_encoding_locale_matches(collencoding, collcollate, collctype);
 		}
 	}
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index b5a221b030..9c9e1e4f48 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1062,7 +1062,7 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
 
 ALTER COLLATION test1 RENAME TO test11;
 ALTER COLLATION test0 RENAME TO test11; -- fail
-ERROR:  collation "test11" already exists in schema "collate_tests"
+ERROR:  collation "test11" for encoding "UTF8" already exists in schema "collate_tests"
 ALTER COLLATION test1 RENAME TO test22; -- fail
 ERROR:  collation "test1" for encoding "UTF8" does not exist
 ALTER COLLATION test11 OWNER TO regress_test_role;
-- 
2.34.1

