>From e54180b146edba871a9a4389f7cada0df1674587 Mon Sep 17 00:00:00 2001
From: Pavel Raiskup <praiskup@redhat.com>
Date: Sat, 21 Dec 2013 01:27:01 +0100
Subject: [PATCH] pg_upgrade: more tolerating locale comparison

Locale strings specified like 'cs_CZ.utf8' and 'cs_CZ.UTF-8'
should be treat as equivalent.
This patch switches from case-insensitive comparison of the whole
locale strings to more tolerant comparison:  It tries separately
compare the right side of the locale string (after the dot
character; the encoding part).  That string is firstly decoded by
pg_valid_client_encoding() from  pg_wchar.h and then are compared
corresponding codes.  Remaining part of locale string is case
insensitive string comparison.  If the encoding detection is not
possible (e.g. no encoding part of locale string), keep the
complete case insensitive comparison.

This resolves yet another complication of in-place upgrading.

---
 contrib/pg_upgrade/check.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
index 411689a..697a16a 100644
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -9,6 +9,7 @@
 
 #include "postgres_fe.h"
 
+#include "mb/pg_wchar.h"
 #include "pg_upgrade.h"
 
 
@@ -393,6 +394,36 @@ set_locale_and_encoding(ClusterInfo *cluster)
 	PQfinish(conn);
 }
 
+/*
+ * equivalent_locale()
+ *
+ * Best effort locale comparison.  Return false if we are not 100% sure the
+ * locale is equivalent.
+ */
+static bool
+equivalent_locale(const char *loca, const char *locb)
+{
+	int enca, encb;
+	const char *chara, *charb;
+	int lencmp;
+
+	if (!(chara = strrchr(loca, '.')) ||
+		!(charb = strrchr(locb, '.')))
+		/* locale string does not contain encoding part */
+		return (pg_strcasecmp(loca, locb) == 0);
+
+	chara++;
+	charb++;
+
+	enca = pg_valid_server_encoding(chara);
+	encb = pg_valid_server_encoding(charb);
+
+	if (enca < 0 || encb < 0 || enca != encb)
+		return (pg_strcasecmp(loca, locb) == 0);
+
+	lencmp = chara - loca;
+	return (pg_strncasecmp(loca, locb, lencmp) == 0);
+}
 
 /*
  * check_locale_and_encoding()
@@ -409,13 +440,13 @@ check_locale_and_encoding(ControlData *oldctrl,
 	 * They also often use inconsistent hyphenation, which we cannot fix, e.g.
 	 * UTF-8 vs. UTF8, so at least we display the mismatching values.
 	 */
-	if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
+	if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
 		pg_fatal("lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
 			   oldctrl->lc_collate, newctrl->lc_collate);
-	if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
+	if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
 		pg_fatal("lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
 			   oldctrl->lc_ctype, newctrl->lc_ctype);
-	if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
+	if (!equivalent_locale(oldctrl->encoding, newctrl->encoding))
 		pg_fatal("encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
 			   oldctrl->encoding, newctrl->encoding);
 }
-- 
1.8.4.2

