From 8aea65b0a32a0bee9432ccd537d2852e4cc74087 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jdavis@postgresql.org>
Date: Wed, 9 Nov 2022 17:00:01 -0800
Subject: [PATCH v3 4/5] Export pg_collate_libc() and pg_collate_icu().

Minor optimization for callers such as varstrfastcmp_locale(), which
has both nul-terminated arguments, and also the string length.
---
 src/backend/utils/adt/pg_locale.c | 40 ++++++++++++++++---------------
 src/backend/utils/adt/varlena.c   |  6 ++++-
 src/include/utils/pg_locale.h     |  4 ++++
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 7c29519214..df809891ac 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1824,8 +1824,10 @@ win32_utf8_wcscoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
 
 /*
  * Collate using the libc provider. Arguments must be nul-terminated.
+ *
+ * If the collation is deterministic, break ties with strcmp().
  */
-static int
+int
 pg_collate_libc(const char *arg1, const char *arg2, pg_locale_t locale)
 {
 	int result;
@@ -1852,6 +1854,10 @@ pg_collate_libc(const char *arg1, const char *arg2, pg_locale_t locale)
 	else
 		result = strcoll(arg1, arg2);
 
+	/* Break tie if necessary. */
+	if (result == 0 && (!locale || locale->deterministic))
+		result = strcmp(arg1, arg2);
+
 	return result;
 }
 
@@ -1865,8 +1871,8 @@ pg_collate_icu_no_utf8(const char *arg1, size_t len1,
 {
 	char	 sbuf[TEXTBUFLEN];
 	char	*buf = sbuf;
-	int32_t	 ulen1;
-	int32_t	 ulen2;
+	int32_t	 ulen1 = len1 * 2;
+	int32_t	 ulen2 = len2 * 2;
 	size_t   bufsize1;
 	size_t   bufsize2;
 	UChar	*uchar1,
@@ -1875,9 +1881,6 @@ pg_collate_icu_no_utf8(const char *arg1, size_t len1,
 
 	init_icu_converter();
 
-	ulen1 = uchar_length(icu_converter, arg1, len1);
-	ulen2 = uchar_length(icu_converter, arg2, len2);
-
 	bufsize1 = (ulen1 + 1) * sizeof(UChar);
 	bufsize2 = (ulen2 + 1) * sizeof(UChar);
 
@@ -1903,8 +1906,11 @@ pg_collate_icu_no_utf8(const char *arg1, size_t len1,
 
 /*
  * Collate using the icu provider.
+ *
+ * If the collation is deterministic, break ties with memcmp(), and then with
+ * the string length.
  */
-static int
+int
 pg_collate_icu(const char *arg1, size_t len1, const char *arg2, size_t len2,
 			   pg_locale_t locale)
 {
@@ -1933,6 +1939,14 @@ pg_collate_icu(const char *arg1, size_t len1, const char *arg2, size_t len2,
 		result = pg_collate_icu_no_utf8(arg1, len1, arg2, len2, locale);
 	}
 
+	/* Break tie if necessary. */
+	if (result == 0 && (!locale || locale->deterministic))
+	{
+		result = memcmp(arg1, arg2, Min(len1, len2));
+		if ((result == 0) && (len1 != len2))
+			result = (len1 < len2) ? -1 : 1;
+	}
+
 	return result;
 #else							/* not USE_ICU */
 	/* shouldn't happen */
@@ -1967,10 +1981,6 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
 		result = pg_collate_libc(arg1, arg2, locale);
 	}
 
-	/* Break tie if necessary. */
-	if (result == 0 && (!locale || locale->deterministic))
-		result = strcmp(arg1, arg2);
-
 	return result;
 }
 
@@ -2023,14 +2033,6 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
 			pfree(buf);
 	}
 
-	/* Break tie if necessary. */
-	if (result == 0 && (!locale || locale->deterministic))
-	{
-		result = memcmp(arg1, arg2, Min(len1, len2));
-		if ((result == 0) && (len1 != len2))
-			result = (len1 < len2) ? -1 : 1;
-	}
-
 	return result;
 }
 
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index c904bc0825..5ebfe3acb7 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -2209,7 +2209,11 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 		return sss->last_returned;
 	}
 
-	result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
+	if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
+		result = pg_collate_icu(sss->buf1, len1, sss->buf2, len2,
+								sss->locale);
+	else
+		result = pg_collate_libc(sss->buf1, sss->buf2, sss->locale);
 
 	/* Cache result, perhaps saving an expensive strcoll() call next time */
 	sss->cache_blob = false;
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index bf70ae08ca..5f7a8ea435 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -103,6 +103,10 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol
 extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
 extern int pg_strncoll(const char *arg1, size_t len1,
 					   const char *arg2, size_t len2, pg_locale_t locale);
+extern int pg_collate_libc(const char *arg1, const char *arg2,
+						   pg_locale_t locale);
+extern int pg_collate_icu(const char *arg1, size_t len1,
+						  const char *arg2, size_t len2, pg_locale_t locale);
 
 #ifdef USE_ICU
 extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
-- 
2.34.1

