using Core Foundation locale functions

Started by Peter Eisentrautabout 11 years ago6 messages
#1Peter Eisentraut
peter_e@gmx.net
1 attachment(s)

In light of the recent discussions about using ICU on OS X, I looked
into the Core Foundation locale functions (Core Foundation = traditional
Mac API in OS X, as opposed to the Unix/POSIX APIs).

Attached is a proof of concept patch that just about works for the
sorting aspects. (The ctype aspects aren't there yet and will crash,
but they could be done similarly.) It passes an appropriately adjusted
collate.linux.utf8 test, meaning that it does produce language-aware
sort orders that are equivalent to what glibc produces.

At the moment, this is probably just an experiment that shows where
refactoring and better abstractions might be suitable if we want to
support multiple locale libraries. If we want to pursue ICU, I think
this could be a useful third option.

Attachments:

cf-locale.patchapplication/x-patch; name=cf-locale.patchDownload
diff --git a/configure b/configure
index 7594401..371cbe0 100755
--- a/configure
+++ b/configure
@@ -708,6 +708,8 @@ with_libxml
 XML2_CONFIG
 UUID_EXTRA_OBJS
 with_uuid
+LOCALE_EXTRA_LIBS
+with_locale
 with_selinux
 with_openssl
 krb_srvtab
@@ -831,6 +833,7 @@ with_openssl
 with_selinux
 with_readline
 with_libedit_preferred
+with_locale
 with_uuid
 with_ossp_uuid
 with_libxml
@@ -1520,6 +1523,7 @@ Optional Packages:
   --without-readline      do not use GNU Readline nor BSD Libedit for editing
   --with-libedit-preferred
                           prefer BSD Libedit over GNU Readline
+  --with-locale=LIB       use locale library LIB (posix,cf)
   --with-uuid=LIB         build contrib/uuid-ossp using LIB (bsd,e2fs,ossp)
   --with-ossp-uuid        obsolete spelling of --with-uuid=ossp
   --with-libxml           build with XML support
@@ -5677,6 +5681,51 @@ fi
 
 
 #
+# collation library
+#
+
+
+
+# Check whether --with-locale was given.
+if test "${with_locale+set}" = set; then :
+  withval=$with_locale;
+  case $withval in
+    yes)
+      as_fn_error $? "argument required for --with-locale option" "$LINENO" 5
+      ;;
+    no)
+      as_fn_error $? "argument required for --with-locale option" "$LINENO" 5
+      ;;
+    *)
+
+      ;;
+  esac
+
+else
+  with_locale=posix
+fi
+
+
+case $with_locale in
+  posix)
+
+$as_echo "#define USE_LOCALE_POSIX 1" >>confdefs.h
+
+    ;;
+  cf)
+
+$as_echo "#define USE_LOCALE_CF 1" >>confdefs.h
+
+    LOCALE_EXTRA_LIBS='-framework Foundation'
+    ;;
+  *)
+    as_fn_error $? "--with-locale must specify one of posix or cf" "$LINENO" 5
+esac
+
+
+
+
+#
 # UUID library
 #
 # There are at least three UUID libraries in common use: the FreeBSD/NetBSD
diff --git a/configure.in b/configure.in
index 0dc3f18..16b97a1 100644
--- a/configure.in
+++ b/configure.in
@@ -706,6 +706,25 @@ PGAC_ARG_BOOL(with, libedit-preferred, no,
 
 
 #
+# collation library
+#
+PGAC_ARG_REQ(with, locale, [LIB], [use locale library LIB (posix,cf)], [], [with_locale=posix])
+case $with_locale in
+  posix)
+    AC_DEFINE([USE_LOCALE_POSIX], 1, [Define to 1 to use POSIX locale functions.])
+    ;;
+  cf)
+    AC_DEFINE([USE_LOCALE_CF], 1, [Define to 1 to use Core Foundation locale functions.])
+    LOCALE_EXTRA_LIBS='-framework CoreFoundation'
+    ;;
+  *)
+    AC_MSG_ERROR([--with-locale must specify one of posix or cf])
+esac
+AC_SUBST(with_locale)
+AC_SUBST(LOCALE_EXTRA_LIBS)
+
+
+#
 # UUID library
 #
 # There are at least three UUID libraries in common use: the FreeBSD/NetBSD
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 63ff50b..fa5a60e 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -166,6 +166,7 @@ with_openssl	= @with_openssl@
 with_selinux	= @with_selinux@
 with_libxml	= @with_libxml@
 with_libxslt	= @with_libxslt@
+with_locale	= @with_locale@
 with_system_tzdata = @with_system_tzdata@
 with_uuid	= @with_uuid@
 with_zlib	= @with_zlib@
@@ -241,6 +242,7 @@ DLLWRAP = @DLLWRAP@
 LIBS = @LIBS@
 LDAP_LIBS_FE = @LDAP_LIBS_FE@
 LDAP_LIBS_BE = @LDAP_LIBS_BE@
+LOCALE_EXTRA_LIBS = @LOCALE_EXTRA_LIBS@
 UUID_LIBS = @UUID_LIBS@
 UUID_EXTRA_OBJS = @UUID_EXTRA_OBJS@
 LD = @LD@
diff --git a/src/backend/Makefile b/src/backend/Makefile
index 870a022..f793e76 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -54,7 +54,7 @@ ifneq ($(PORTNAME), win32)
 ifneq ($(PORTNAME), aix)
 
 postgres: $(OBJS)
-	$(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) -o $@
+	$(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) $(LOCALE_EXTRA_LIBS) -o $@
 
 endif
 endif
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 94bb5a4..7f441b4 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -63,6 +63,10 @@
 #include "utils/pg_locale.h"
 #include "utils/syscache.h"
 
+#ifdef USE_LOCALE_CF
+#include "cf_locale.h"
+#endif
+
 #ifdef WIN32
 /*
  * This Windows file defines StrNCpy. We don't need it here, so we undefine
@@ -1023,7 +1027,6 @@ lc_ctype_is_c(Oid collation)
 
 
 /* simple subroutine for reporting errors from newlocale() */
-#ifdef HAVE_LOCALE_T
 static void
 report_newlocale_failure(const char *localename)
 {
@@ -1042,7 +1045,6 @@ report_newlocale_failure(const char *localename)
 			  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
 						localename) : 0)));
 }
-#endif   /* HAVE_LOCALE_T */
 
 
 /*
@@ -1078,7 +1080,7 @@ pg_newlocale_from_collation(Oid collid)
 	if (cache_entry->locale == 0)
 	{
 		/* We haven't computed this yet in this session, so do it */
-#ifdef HAVE_LOCALE_T
+#if defined(USE_LOCALE_CF) || defined(USE_LOCALE_POSIX)
 		HeapTuple	tp;
 		Form_pg_collation collform;
 		const char *collcollate;
@@ -1096,18 +1098,22 @@ pg_newlocale_from_collation(Oid collid)
 		if (strcmp(collcollate, collctype) == 0)
 		{
 			/* Normal case where they're the same */
+#if defined(USE_LOCALE_CF)
+			result = cf_newlocale(collcollate);
+#elif defined(USE_LOCALE_POSIX)
 #ifndef WIN32
 			result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
 							   NULL);
 #else
 			result = _create_locale(LC_ALL, collcollate);
 #endif
+#endif
 			if (!result)
 				report_newlocale_failure(collcollate);
 		}
 		else
 		{
-#ifndef WIN32
+#if defined(USE_LOCALE_POSIX) && defined(HAVE_LOCALE_T) && !defined(WIN32)
 			/* We need two newlocale() steps */
 			locale_t	loc1;
 
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index b3f397e..dbea64d 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -32,6 +32,10 @@
 #include "utils/pg_locale.h"
 #include "utils/sortsupport.h"
 
+#ifdef USE_LOCALE_CF
+#include "cf_locale.h"
+#endif
+
 
 /* GUC variable */
 int			bytea_output = BYTEA_OUTPUT_HEX;
@@ -58,9 +62,7 @@ typedef struct
 	char			   *buf2;		/* 2nd string */
 	int					buflen1;
 	int					buflen2;
-#ifdef HAVE_LOCALE_T
 	pg_locale_t locale;
-#endif
 } TextSortSupport;
 
 /*
@@ -1382,10 +1384,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 		char		a2buf[TEXTBUFLEN];
 		char	   *a1p,
 				   *a2p;
-
-#ifdef HAVE_LOCALE_T
 		pg_locale_t mylocale = 0;
-#endif
 
 		if (collid != DEFAULT_COLLATION_OID)
 		{
@@ -1400,9 +1399,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 						 errmsg("could not determine which collation to use for string comparison"),
 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
 			}
-#ifdef HAVE_LOCALE_T
 			mylocale = pg_newlocale_from_collation(collid);
-#endif
 		}
 
 		/*
@@ -1521,12 +1518,16 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 		memcpy(a2p, arg2, len2);
 		a2p[len2] = '\0';
 
+#if defined(USE_LOCALE_CF)
+		result = cf_strcoll_l(a1p, a2p, mylocale);
+#elif defined(USE_LOCALE_POSIX)
 #ifdef HAVE_LOCALE_T
 		if (mylocale)
 			result = strcoll_l(a1p, a2p, mylocale);
 		else
 #endif
 			result = strcoll(a1p, a2p);
+#endif
 
 		/*
 		 * In some locales strcoll() can claim that nonidentical strings are
@@ -1766,9 +1767,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
 	 * allocate them once and use them unconditionally.
 	 */
 	tss = palloc(sizeof(TextSortSupport));
-#ifdef HAVE_LOCALE_T
 	tss->locale = 0;
-#endif
 
 	if (collid != DEFAULT_COLLATION_OID)
 	{
@@ -1783,9 +1782,7 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
 					 errmsg("could not determine which collation to use for string comparison"),
 					 errhint("Use the COLLATE clause to set the collation explicitly.")));
 		}
-#ifdef HAVE_LOCALE_T
 		tss->locale = pg_newlocale_from_collation(collid);
-#endif
 	}
 
 	tss->buf1 = palloc(TEXTBUFLEN);
@@ -1878,12 +1875,16 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 	memcpy(tss->buf2, a2p, len2);
 	tss->buf2[len2] = '\0';
 
+#if defined(USE_LOCALE_CF)
+	result = cf_strcoll_l(tss->buf1, tss->buf2, tss->locale);
+#elif defined(USE_LOCALE_POSIX)
 #ifdef HAVE_LOCALE_T
 	if (tss->locale)
 		result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
 	else
 #endif
 		result = strcoll(tss->buf1, tss->buf2);
+#endif
 
 	/*
 	 * In some locales strcoll() can claim that nonidentical strings are equal.
diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile
index 28c2678..1e09abe 100644
--- a/src/bin/initdb/Makefile
+++ b/src/bin/initdb/Makefile
@@ -28,7 +28,7 @@ OBJS=	initdb.o findtimezone.o localtime.o encnames.o $(WIN32RES)
 all: initdb
 
 initdb: $(OBJS) | submake-libpgport
-	$(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+	$(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) $(LOCALE_EXTRA_LIBS) -o $@$(X)
 
 # We used to pull in all of libpq to get encnames.c, but that
 # exposes us to risks of version skew if we link to a shared library.
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3b52867..093e128 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -67,6 +67,10 @@
 #include "getopt_long.h"
 #include "miscadmin.h"
 
+#if defined(USE_LOCALE_CF)
+#include "cf_locale.h"
+#endif
+
 /* Ideally this would be in a .h file, but it hardly seems worth the trouble */
 extern const char *select_default_timezone(const char *share_path);
 
@@ -1894,7 +1898,7 @@ setup_description(void)
 	check_ok();
 }
 
-#ifdef HAVE_LOCALE_T
+#if defined(USE_LOCALE_POSIX) && defined(HAVE_LOCALE_T)
 /*
  * "Normalize" a locale name, stripping off encoding tags such as
  * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
@@ -1936,23 +1940,51 @@ normalize_locale_name(char *new, const char *old)
 static void
 setup_collation(void)
 {
-#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+#if defined(USE_LOCALE_POSIX) && defined(HAVE_LOCALE_T) && !defined(WIN32)
 	int			i;
 	FILE	   *locale_a_handle;
 	char		localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
 	int			count = 0;
-
-	PG_CMD_DECL;
 #endif
+#if defined(USE_LOCALE_CF)
+	const char **locales;
+	const char **loc;
+#endif
+	PG_CMD_DECL;
 
 	fputs(_("creating collations ... "), stdout);
 	fflush(stdout);
 
-#if defined(HAVE_LOCALE_T) && !defined(WIN32)
-	snprintf(cmd, sizeof(cmd),
-			 "\"%s\" %s template1 >%s",
-			 backend_exec, backend_options,
-			 DEVNULL);
+#if defined(USE_LOCALE_CF)
+
+	locales = cf_available_locales();
+
+	PG_CMD_OPEN;
+
+	PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
+				"	collname name, "
+				"	locale name ) WITHOUT OIDS;\n");
+
+	for (loc = locales; *loc; loc++)
+	{
+		PG_CMD_PRINTF2("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s');\n",
+					   *loc, *loc);
+	}
+
+	PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
+				" SELECT "
+				"   collname, "
+				"   (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
+				"   (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
+				"   -1, locale, locale "
+				"  FROM tmp_pg_collation"
+	   "  ORDER BY collname, locale;\n");
+
+	PG_CMD_CLOSE;
+
+	check_ok();
+
+#elif defined(USE_LOCALE_POSIX) && defined(HAVE_LOCALE_T) && !defined(WIN32)
 
 	locale_a_handle = popen_check("locale -a", "r");
 	if (!locale_a_handle)
diff --git a/src/include/cf_locale.h b/src/include/cf_locale.h
new file mode 100644
index 0000000..dfbdf7e
--- /dev/null
+++ b/src/include/cf_locale.h
@@ -0,0 +1,3 @@
+void *cf_newlocale(const char *locale);
+int cf_strcoll_l(const char *s1, const char *s2, void *locale);
+const char **cf_available_locales(void);
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 465281c..0eb5628 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -800,6 +800,12 @@
    (--with-libxslt) */
 #undef USE_LIBXSLT
 
+/* Define to 1 to use Core Foundation locale functions. */
+#undef USE_LOCALE_CF
+
+/* Define to 1 to use POSIX locale functions. */
+#undef USE_LOCALE_POSIX
+
 /* Define to select named POSIX semaphores. */
 #undef USE_NAMED_POSIX_SEMAPHORES
 
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 2b6f7b8..d1462ca 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -64,7 +64,10 @@ extern void cache_locale_time(void);
  * The fake version of pg_locale_t can be checked for truth; that's
  * about all it will be needed for.
  */
-#ifdef HAVE_LOCALE_T
+#if defined(USE_LOCALE_CF)
+/* really CFLocaleRef, but we don't want to include the CF headers everywhere */
+typedef void *pg_locale_t;
+#elif defined(USE_LOCALE_POSIX) && defined(HAVE_LOCALE_T)
 typedef locale_t pg_locale_t;
 #else
 typedef int pg_locale_t;
diff --git a/src/port/Makefile b/src/port/Makefile
index 1be4ff5..3b931db 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -35,6 +35,10 @@ OBJS = $(LIBOBJS) chklocale.o dirmod.o erand48.o fls.o inet_net_ntop.o \
 	pgstrcasecmp.o pqsignal.o \
 	qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o
 
+ifeq ($(with_locale),cf)
+OBJS += cf_locale.o
+endif
+
 # foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND
 OBJS_SRV = $(OBJS:%.o=%_srv.o)
 
diff --git a/src/port/cf_locale.c b/src/port/cf_locale.c
new file mode 100644
index 0000000..7a16746
--- /dev/null
+++ b/src/port/cf_locale.c
@@ -0,0 +1,92 @@
+#include "cf_locale.h"
+
+extern const char *GetDatabaseEncodingName();
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * Including CF headers ends up defining some conflicting symbols such as
+ * Size, so it's wise to keep these headers contained in this one file.
+ */
+#include <CoreFoundation/CFLocale.h>
+#include <CoreFoundation/CFString.h>
+
+void *
+cf_newlocale(const char *locale)
+{
+	CFStringRef	s = CFStringCreateWithCString(NULL, locale, kCFStringEncodingASCII);
+	CFLocaleRef	l = CFLocaleCreate(NULL, s);
+	CFRelease(s);
+	return (void *) l;
+}
+
+#ifndef FRONTEND
+static CFStringEncoding
+cf_database_encoding(void)
+{
+	static int	done = 0;
+	static CFStringEncoding res = kCFStringEncodingInvalidId;
+	CFStringRef	s;
+
+	if (done)
+		return res;
+
+	s = CFStringCreateWithCString(NULL, GetDatabaseEncodingName(), kCFStringEncodingASCII);
+	res = CFStringConvertIANACharSetNameToEncoding(s);
+	if (res == kCFStringEncodingInvalidId)
+	{
+		fprintf(stderr, "could not convert database encoding to CFStringEncoding\n");
+		abort();
+	}
+	CFRelease(s);
+	done = 1;
+
+	return res;
+}
+
+int
+cf_strcoll_l(const char *s1, const char *s2, void *locale)
+{
+	CFStringRef	cfs1 = CFStringCreateWithCString(NULL, s1, cf_database_encoding());
+	CFStringRef	cfs2 = CFStringCreateWithCString(NULL, s2, cf_database_encoding());
+	CFLocaleRef	cfloc;
+	int			ret;
+
+	if (locale)
+		cfloc = locale;
+	else
+		cfloc = CFLocaleGetSystem();
+
+	ret = CFStringCompareWithOptionsAndLocale(cfs1, cfs2,
+											  CFRangeMake(0, CFStringGetLength(cfs1)),
+											  0, cfloc);
+
+	CFRelease(cfs1);
+	CFRelease(cfs2);
+
+	return ret;
+}
+#endif
+
+const char **
+cf_available_locales(void)
+{
+	CFArrayRef	a = CFLocaleCopyAvailableLocaleIdentifiers();
+	const char **ret;
+	int			i;
+
+	ret = malloc(CFArrayGetCount(a) * sizeof(*ret) + 1);
+
+	for (i = 0; i < CFArrayGetCount(a); i++)
+	{
+		CFStringRef el = CFArrayGetValueAtIndex(a, i);
+		ret[i] = strdup(CFStringGetCStringPtr(el, kCFStringEncodingASCII));
+	}
+	ret[i] = NULL;
+
+	CFRelease(a);
+
+	return ret;
+}
#2David E. Wheeler
david@justatheory.com
In reply to: Peter Eisentraut (#1)
1 attachment(s)
Re: using Core Foundation locale functions

On Nov 28, 2014, at 8:43 AM, Peter Eisentraut <peter_e@gmx.net> wrote:

At the moment, this is probably just an experiment that shows where
refactoring and better abstractions might be suitable if we want to
support multiple locale libraries. If we want to pursue ICU, I think
this could be a useful third option.

Gotta say, I’m thrilled to see movement on this front, and especially pleased to see how consensus seems to be building around an abstracted interface to keep options open. This platform-specific example really highlights the need for it (I had no idea that there was separate and more up-to-date collation support in Core Foundation than in the UNIX layer of OS X).

Really looking forward to seeing where we end up.

Best,

David

Attachments:

smime.p7sapplication/pkcs7-signature; name=smime.p7sDownload
#3Peter Geoghegan
pg@heroku.com
In reply to: Peter Eisentraut (#1)
Re: using Core Foundation locale functions

On Fri, Nov 28, 2014 at 8:43 AM, Peter Eisentraut <peter_e@gmx.net> wrote:

At the moment, this is probably just an experiment that shows where
refactoring and better abstractions might be suitable if we want to
support multiple locale libraries. If we want to pursue ICU, I think
this could be a useful third option.

FWIW, I think that the richer API that ICU provides for string
transformations could be handy in optimizing sorting using abbreviated
keys. For example, ICU will happily only produce parts of sort keys
(the equivalent of strxfrm() blobs) if that is all that is required
[1]: http://userguide.icu-project.org/collation/architecture

I think that ICU also allows clients to parse individual primary
weights in a principled way (primary weights tend to be isomorphic to
the Unicode code points in the original string). I think that this
will enable order-preserving compression of the type anticipated by
the Unicode collation algorithm [2]http://www.unicode.org/reports/tr10/#Run-length_Compression. That could be useful for certain
languages, like Russian, where the primary weight level usually
contains multi-byte code points with glibc's strxfrm() (this is
generally not true of languages that use the Latin alphabet, or of
East Asian languages).

Note that there is already naturally a form of what you might call
compression with strxfrm() [3]/messages/by-id/CAM3SWZTyWe5J69TaPvZf2CM7mhSKKE3UhHnK9gLuQckkWqoL5w@mail.gmail.com -- Peter Geoghegan. This is very useful for abbreviated
keys.

[1]: http://userguide.icu-project.org/collation/architecture
[2]: http://www.unicode.org/reports/tr10/#Run-length_Compression
[3]: /messages/by-id/CAM3SWZTyWe5J69TaPvZf2CM7mhSKKE3UhHnK9gLuQckkWqoL5w@mail.gmail.com -- Peter Geoghegan
--
Peter Geoghegan

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Noah Misch
noah@leadboat.com
In reply to: Peter Eisentraut (#1)
Re: using Core Foundation locale functions

On Fri, Nov 28, 2014 at 11:43:28AM -0500, Peter Eisentraut wrote:

In light of the recent discussions about using ICU on OS X, I looked
into the Core Foundation locale functions (Core Foundation = traditional
Mac API in OS X, as opposed to the Unix/POSIX APIs).

Attached is a proof of concept patch that just about works for the
sorting aspects. (The ctype aspects aren't there yet and will crash,
but they could be done similarly.) It passes an appropriately adjusted
collate.linux.utf8 test, meaning that it does produce language-aware
sort orders that are equivalent to what glibc produces.

At the moment, this is probably just an experiment that shows where
refactoring and better abstractions might be suitable if we want to
support multiple locale libraries. If we want to pursue ICU, I think
this could be a useful third option.

Does this make the backend multi-threaded?

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#5Craig Ringer
craig@2ndquadrant.com
In reply to: David E. Wheeler (#2)
Re: using Core Foundation locale functions

On 12/02/2014 12:52 AM, David E. Wheeler wrote:

Gotta say, I’m thrilled to see movement on this front, and especially pleased to see how consensus seems to be building around an abstracted interface to keep options open. This platform-specific example really highlights the need for it (I had no idea that there was separate and more up-to-date collation support in Core Foundation than in the UNIX layer of OS X).

It'd also potentially let us make use of Windows' native locale APIs,
which AFAIK receive considerably more love on that platform than their
POSIX back-country cousins.

--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6Peter Geoghegan
pg@heroku.com
In reply to: Craig Ringer (#5)
Re: using Core Foundation locale functions

On Tue, Dec 2, 2014 at 10:07 PM, Craig Ringer <craig@2ndquadrant.com> wrote:

On 12/02/2014 12:52 AM, David E. Wheeler wrote:

Gotta say, I’m thrilled to see movement on this front, and especially pleased to see how consensus seems to be building around an abstracted interface to keep options open. This platform-specific example really highlights the need for it (I had no idea that there was separate and more up-to-date collation support in Core Foundation than in the UNIX layer of OS X).

It'd also potentially let us make use of Windows' native locale APIs,
which AFAIK receive considerably more love on that platform than their
POSIX back-country cousins.

Not to mention the fact that a MultiByteToWideChar() call could be
saved, and sortsupport for text would just work on Windows.

--
Peter Geoghegan

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers