From 498a98c009a92c4d625a97452d24c4f9eeb1f67b Mon Sep 17 00:00:00 2001
From: Jehan-Guillaume de Rorthais <jgdr@dalibo.com>
Date: Thu, 11 Jun 2020 18:08:31 +0200
Subject: [PATCH] Replace buggy ucol_strcoll* funcs with ucol_strcollIter

Functions ucol_strcoll and ucol_strcollUTF8 are breaking some collation rules.
This leads to wrong sort order or wrong result from index using such
collations. See bug report #15285 for details:
http://postgr.es/m/153201618542.1404.3611626898935613264%40wrigleys.postgresql.org

This fix replace ucol_strcoll* functions with ucol_strcollIter() which is not
affected by this bug.

Reported-by: Roman Lytovchenko
Analysed-by: Peter Geoghegan
Author: Jehan-Guillaume de Rorthais
---
 src/backend/utils/adt/varlena.c | 54 ++++++++++++++++++++++-----------
 src/include/utils/pg_locale.h   | 14 ---------
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2eaabd6231..f156c00a1a 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1638,34 +1638,43 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 			if (mylocale->provider == COLLPROVIDER_ICU)
 			{
 #ifdef USE_ICU
-#ifdef HAVE_UCOL_STRCOLLUTF8
 				if (GetDatabaseEncoding() == PG_UTF8)
 				{
 					UErrorCode	status;
+					UCharIterator	iter1,
+									iter2;
 
 					status = U_ZERO_ERROR;
-					result = ucol_strcollUTF8(mylocale->info.icu.ucol,
-											  arg1, len1,
-											  arg2, len2,
-											  &status);
+
+					uiter_setUTF8(&iter1, arg1, len1);
+					uiter_setUTF8(&iter2, arg2, len2);
+
+					result = ucol_strcollIter(mylocale->info.icu.ucol,
+											  &iter1, &iter2, &status);
 					if (U_FAILURE(status))
 						ereport(ERROR,
 								(errmsg("collation failed: %s", u_errorName(status))));
 				}
 				else
-#endif
 				{
+					UErrorCode	status;
+					UCharIterator	iter1,
+									iter2;
 					int32_t		ulen1,
 								ulen2;
 					UChar	   *uchar1,
 							   *uchar2;
 
+					status = U_ZERO_ERROR;
+
 					ulen1 = icu_to_uchar(&uchar1, arg1, len1);
 					ulen2 = icu_to_uchar(&uchar2, arg2, len2);
 
-					result = ucol_strcoll(mylocale->info.icu.ucol,
-										  uchar1, ulen1,
-										  uchar2, ulen2);
+					uiter_setString(&iter1, uchar1, ulen1);
+					uiter_setString(&iter2, uchar2, ulen2);
+
+					result = ucol_strcollIter(mylocale->info.icu.ucol,
+											  &iter1, &iter2, &status);
 
 					pfree(uchar1);
 					pfree(uchar2);
@@ -2352,34 +2361,43 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 		if (sss->locale->provider == COLLPROVIDER_ICU)
 		{
 #ifdef USE_ICU
-#ifdef HAVE_UCOL_STRCOLLUTF8
 			if (GetDatabaseEncoding() == PG_UTF8)
 			{
 				UErrorCode	status;
+				UCharIterator	iter1,
+								iter2;
 
 				status = U_ZERO_ERROR;
-				result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
-										  a1p, len1,
-										  a2p, len2,
-										  &status);
+
+				uiter_setUTF8(&iter1, a1p, len1);
+				uiter_setUTF8(&iter2, a2p, len2);
+
+				result = ucol_strcollIter(sss->locale->info.icu.ucol,
+										  &iter1, &iter2, &status);
 				if (U_FAILURE(status))
 					ereport(ERROR,
 							(errmsg("collation failed: %s", u_errorName(status))));
 			}
 			else
-#endif
 			{
+				UErrorCode	status;
+				UCharIterator	iter1,
+								iter2;
 				int32_t		ulen1,
 							ulen2;
 				UChar	   *uchar1,
 						   *uchar2;
 
+				status = U_ZERO_ERROR;
+
 				ulen1 = icu_to_uchar(&uchar1, a1p, len1);
 				ulen2 = icu_to_uchar(&uchar2, a2p, len2);
 
-				result = ucol_strcoll(sss->locale->info.icu.ucol,
-									  uchar1, ulen1,
-									  uchar2, ulen2);
+				uiter_setString(&iter1, uchar1, ulen1);
+				uiter_setString(&iter2, uchar2, ulen2);
+
+				result = ucol_strcollIter(sss->locale->info.icu.ucol,
+										  &iter1, &iter2, &status);
 
 				pfree(uchar1);
 				pfree(uchar2);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 9cb7d91ddf..2b3ec2c597 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -21,20 +21,6 @@
 
 #include "utils/guc.h"
 
-#ifdef USE_ICU
-/*
- * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
- * (see
- * <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
- */
-#if U_ICU_VERSION_MAJOR_NUM >= 53
-#define HAVE_UCOL_STRCOLLUTF8 1
-#else
-#undef HAVE_UCOL_STRCOLLUTF8
-#endif
-#endif
-
-
 /* GUC settings */
 extern char *locale_messages;
 extern char *locale_monetary;
-- 
2.20.1

