Index: configure.in =================================================================== RCS file: /projects/cvsroot/pgsql/configure.in,v retrieving revision 1.417 diff -c -r1.417 configure.in *** configure.in 6 Jul 2005 21:04:13 -0000 1.417 --- configure.in 28 Jul 2005 17:01:47 -0000 *************** *** 467,472 **** --- 467,480 ---- AC_MSG_RESULT([$with_openssl]) AC_SUBST(with_openssl) + # + # ICU + # + AC_MSG_CHECKING([whether to build with ICU support]) + PGAC_ARG_BOOL(with, icu, no, [ --with-icu build with ICU support], + [AC_DEFINE([USE_ICU], 1, [Define to build with ICU support. (--with-icu)])]) + AC_MSG_RESULT([$with_icu]) + AC_SUBST(with_icu) # # Readline *************** *** 674,679 **** --- 682,698 ---- fi fi + if test "$with_icu" = yes ; then + if test "$PORTNAME" != "win32"; then + AC_CHECK_LIB(icui18n, ucol_open_3_2, [], [AC_MSG_ERROR([library 'icui18n' is required for ICU])]) + AC_CHECK_LIB(icuuc, u_tolower_3_2, [], [AC_MSG_ERROR([library 'icuuc' is required for ICU])]) + AC_CHECK_LIB(icudata, icudt32_dat, [], [AC_MSG_ERROR([library 'icudata' is required for ICU])]) + else + AC_CHECK_LIB(icuin, ucol_open_3_2, [], [AC_MSG_ERROR([library 'icuin' is required for ICU])]) + AC_CHECK_LIB(icuuc, u_tolower_3_2, [], [AC_MSG_ERROR([library 'icuuc' is required for ICU])]) + fi + fi + if test "$with_pam" = yes ; then AC_CHECK_LIB(pam, pam_start, [], [AC_MSG_ERROR([library 'pam' is required for PAM])]) fi *************** *** 748,753 **** --- 767,776 ---- AC_CHECK_HEADER(openssl/err.h, [], [AC_MSG_ERROR([header file is required for OpenSSL])]) fi + if test "$with_icu" = yes ; then + AC_CHECK_HEADER(unicode/utypes.h, [], [AC_MSG_ERROR([header file is required for ICU])]) + fi + if test "$with_pam" = yes ; then AC_CHECK_HEADERS(security/pam_appl.h, [], [AC_CHECK_HEADERS(pam/pam_appl.h, [], Index: src/backend/port/win32/Makefile =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/port/win32/Makefile,v retrieving revision 1.6 diff -c -r1.6 Makefile *** src/backend/port/win32/Makefile 29 Aug 2004 00:38:03 -0000 1.6 --- src/backend/port/win32/Makefile 30 Jul 2005 17:52:47 -0000 *************** *** 12,29 **** top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = sema.o shmem.o timer.o socket.o signal.o security.o error.o all: SUBSYS.o SUBSYS.o: $(OBJS) $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) depend dep: $(CC) -MM $(CFLAGS) *.c >depend clean: ! rm -f SUBSYS.o $(OBJS) ifeq (depend,$(wildcard depend)) include depend --- 12,34 ---- top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = sema.o shmem.o timer.o socket.o signal.o security.o error.o localemap.o all: SUBSYS.o SUBSYS.o: $(OBJS) $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) + localemap.o: localemap.c localemap.h + + localemap.h: iso639 iso3166 localemap.pl + $(PERL) localemap.pl > localemap.h + depend dep: $(CC) -MM $(CFLAGS) *.c >depend clean: ! rm -f SUBSYS.o $(OBJS) localemap.h ifeq (depend,$(wildcard depend)) include depend Index: src/backend/utils/adt/oracle_compat.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v retrieving revision 1.60 diff -c -r1.60 oracle_compat.c *** src/backend/utils/adt/oracle_compat.c 7 May 2005 15:18:17 -0000 1.60 --- src/backend/utils/adt/oracle_compat.c 30 Jul 2005 14:59:29 -0000 *************** *** 32,37 **** --- 32,43 ---- #include "utils/pg_locale.h" #include "mb/pg_wchar.h" + #ifdef USE_ICU + #include /* Basic ICU data types */ + #include /* C Converter API */ + #include + #endif /* USE_ICU */ + /* * If the system provides the needed functions for wide-character manipulation *************** *** 53,58 **** --- 59,106 ---- bool doltrim, bool dortrim); + #ifdef USE_ICU + static UConverter *conv = NULL; + + static text * + UChartotext(const UChar *str, int ncodes) + { + text *result; + size_t nbytes, resultsize; + + UErrorCode status = U_ZERO_ERROR; + + /* Overflow paranoia */ + if (ncodes < 0 || + ncodes > (int) ((INT_MAX - VARHDRSZ) / sizeof(UChar)) - 1) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Make workspace certainly large enough for result */ + resultsize = UCNV_GET_MAX_BYTES_FOR_STRING(ncodes, ucnv_getMaxCharSize(conv)); + result = (text *) palloc(resultsize + VARHDRSZ); + + nbytes = ucnv_fromUChars(conv, VARDATA(result), resultsize, + str, ncodes, &status); + + if (U_FAILURE(status)) + { + /* Invalid multibyte character encountered ... shouldn't happen */ + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"))); + } + + Assert(nbytes <= (size_t) (ncodes * sizeof(UChar))); + + VARATT_SIZEP(result) = nbytes + VARHDRSZ; + + return result; + } + + + #else #ifdef USE_WIDE_UPPER_LOWER /* *************** *** 147,152 **** --- 195,201 ---- return result; } #endif /* USE_WIDE_UPPER_LOWER */ + #endif /* USE_ICU */ /******************************************************************** *************** *** 166,171 **** --- 215,286 ---- Datum lower(PG_FUNCTION_ARGS) { + #ifdef USE_ICU + #define STACKBUFLEN 1024 / sizeof(UChar) + /* use ICU only when max encoding length > one */ + if (pg_database_encoding_max_length() > 1) + { + text *string = PG_GETARG_TEXT_P(0); + text *result; + UChar sourcebuf[STACKBUFLEN], destbuf[STACKBUFLEN]; + UChar *source, *dest; + int buflen, + arglen = VARSIZE(string) - VARHDRSZ; + UErrorCode status = U_ZERO_ERROR; + + if (conv == NULL) + { + conv = ucnv_open(NULL, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: oracle_compat.c, could not get converter for \"%s\"", ucnv_getDefaultName()))); + } + } + + if (arglen >= STACKBUFLEN / sizeof(UChar)) + { + buflen = arglen * sizeof(UChar) + 1; + source = palloc(buflen); + dest = palloc(buflen); + } + else + { + buflen = STACKBUFLEN; + source = sourcebuf; + dest = destbuf; + } + // convert to UTF-16 + ucnv_toUChars(conv, source, buflen, VARDATA(string), arglen, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not convert string"))); + } + + // run desired function + buflen = u_strToLower(dest, buflen, source, -1, NULL, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not modify case"))); + } + + // and convert modified utf-16 string back to text + result = UChartotext(dest, buflen); + + if (arglen >= STACKBUFLEN / sizeof(UChar)) + { + pfree(source); + pfree(dest); + } + PG_RETURN_TEXT_P(result); + } + else + #else #ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. *************** *** 192,197 **** --- 307,313 ---- } else #endif /* USE_WIDE_UPPER_LOWER */ + #endif /* USE_ICU */ { text *string = PG_GETARG_TEXT_P_COPY(0); char *ptr; *************** *** 232,237 **** --- 348,418 ---- Datum upper(PG_FUNCTION_ARGS) { + #ifdef USE_ICU + /* use ICU only when max encoding length > one */ + if (pg_database_encoding_max_length() > 1) + { + text *string = PG_GETARG_TEXT_P(0); + text *result; + UChar sourcebuf[STACKBUFLEN], destbuf[STACKBUFLEN]; + UChar *source, *dest; + int32_t buflen, arglen; + + UErrorCode status = U_ZERO_ERROR; + + if (conv == NULL) + { + conv = ucnv_open(NULL, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not get converter for \"%s\"", ucnv_getDefaultName()))); + } + } + arglen = VARSIZE(string) - VARHDRSZ; + if (arglen * sizeof(UChar) >= STACKBUFLEN) + { + buflen = arglen * sizeof(UChar) + 1; + source = palloc(buflen); + dest = palloc(buflen); + } + else + { + buflen = STACKBUFLEN; + source = sourcebuf; + dest = destbuf; + } + // convert to UTF-16 + ucnv_toUChars(conv, source, buflen, VARDATA(string), arglen, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not convert string"))); + } + + // run desired function + buflen = u_strToUpper(dest, buflen, source, -1, NULL, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not modify case"))); + } + + // and convert modified utf-16 string back to text + result = UChartotext(dest, buflen); + + if (arglen * sizeof(UChar) >= STACKBUFLEN) + { + pfree(source); + pfree(dest); + } + PG_RETURN_TEXT_P(result); + } + else + #else #ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. *************** *** 258,263 **** --- 439,445 ---- } else #endif /* USE_WIDE_UPPER_LOWER */ + #endif /* USE_ICU */ { text *string = PG_GETARG_TEXT_P_COPY(0); char *ptr; *************** *** 301,306 **** --- 483,553 ---- Datum initcap(PG_FUNCTION_ARGS) { + #ifdef USE_ICU + /* use ICU only when max encoding length > one */ + if (pg_database_encoding_max_length() > 1) + { + text *string = PG_GETARG_TEXT_P(0); + text *result; + UChar sourcebuf[STACKBUFLEN], destbuf[STACKBUFLEN]; + UChar *source, *dest; + int32_t buflen, arglen; + + UErrorCode status = U_ZERO_ERROR; + + if (conv == NULL) + { + conv = ucnv_open(NULL, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not get converter for \"%s\"", ucnv_getDefaultName()))); + } + } + arglen = VARSIZE(string) - VARHDRSZ; + if (arglen * sizeof(UChar) >= STACKBUFLEN) + { + buflen = arglen * sizeof(UChar) + 1; + source = palloc(buflen); + dest = palloc(buflen); + } + else + { + buflen = STACKBUFLEN; + source = sourcebuf; + dest = destbuf; + } + // convert to UTF-16 + ucnv_toUChars(conv, source, buflen, VARDATA(string), arglen, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not convert string"))); + } + + // run desired function + buflen = u_strToTitle(dest, buflen, source, -1, NULL, NULL, &status); + if (U_FAILURE(status)) + { + ereport(ERROR, + (errcode(status), + errmsg("ICU error: Could not modify case"))); + } + + // and convert modified utf-16 string back to text + result = UChartotext(dest, buflen); + + if (arglen * sizeof(UChar) >= STACKBUFLEN) + { + pfree(source); + pfree(dest); + } + PG_RETURN_TEXT_P(result); + } + else + #else #ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. *************** *** 334,339 **** --- 581,587 ---- } else #endif /* USE_WIDE_UPPER_LOWER */ + #endif /* USE_ICU */ { text *string = PG_GETARG_TEXT_P_COPY(0); int wasalnum = 0; Index: src/backend/utils/adt/varlena.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.130 diff -c -r1.130 varlena.c *** src/backend/utils/adt/varlena.c 29 Jul 2005 03:17:55 -0000 1.130 --- src/backend/utils/adt/varlena.c 30 Jul 2005 17:46:34 -0000 *************** *** 30,35 **** --- 30,41 ---- #include "utils/pg_locale.h" #include "regex/regex.h" + #ifdef USE_ICU + #include /* Basic ICU data types */ + #include /* C Converter API */ + #include + #include + #endif /* USE_ICU */ typedef struct varlena unknown; *************** *** 844,874 **** if (!lc_collate_is_c()) { ! char a1buf[STACKBUFLEN]; ! char a2buf[STACKBUFLEN]; ! char *a1p, ! *a2p; ! if (len1 >= STACKBUFLEN) ! a1p = (char *) palloc(len1 + 1); ! else ! a1p = a1buf; ! if (len2 >= STACKBUFLEN) ! a2p = (char *) palloc(len2 + 1); else ! a2p = a2buf; ! memcpy(a1p, arg1, len1); ! a1p[len1] = '\0'; ! memcpy(a2p, arg2, len2); ! a2p[len2] = '\0'; ! ! result = strcoll(a1p, a2p); ! ! if (len1 >= STACKBUFLEN) ! pfree(a1p); ! if (len2 >= STACKBUFLEN) ! pfree(a2p); } else { --- 850,986 ---- if (!lc_collate_is_c()) { ! #ifdef USE_ICU ! #define USTACKBUFLEN STACKBUFLEN / sizeof(UChar) ! if (pg_database_encoding_max_length() > 1) ! { ! UChar a1buf[USTACKBUFLEN], ! a2buf[USTACKBUFLEN]; ! int a1len = USTACKBUFLEN, ! a2len = USTACKBUFLEN; ! UChar *a1p, ! *a2p; ! ! static UCollator * collator = NULL; ! static UConverter * conv = NULL; ! UErrorCode status = U_ZERO_ERROR; ! if (conv == NULL) ! { ! conv = ucnv_open(NULL, &status); ! if (U_FAILURE(status) || conv == NULL) ! { ! ereport(ERROR, ! (errcode(status), ! errmsg("ICU error: varlena.c, could not get converter for \"%s\"", ucnv_getDefaultName()))); ! } ! } ! ! /* We keep a static collator "forever", since it is hard ! * coded into the database cluster at initdb time ! * anyway. Create it first time we get here. */ ! if (collator == NULL) ! { ! /* Expect LC_COLLATE to be set to something that ICU ! * will understand. This is quite probable, since ICU ! * does a lot of heuristics with this argument. I'd ! * rather set this in xlog.c, but it seems ICU forgets ! * it??? */ ! #ifndef WIN32 ! uloc_setDefault(setlocale(LC_COLLATE, NULL), &status); ! #else ! /* Win32 locale names are completely different from what ICU expects, so ! we need to do some conversion */ ! uloc_setDefault(pgwin32_localemap(setlocale(LC_COLLATE, NULL)), &status); ! #endif ! if(U_FAILURE(status)) ! { ! ereport(WARNING, ! (errcode(status), ! errmsg("ICU Error: varlena.c, could not set default lc_collate"))); ! } ! collator = ucol_open(NULL, &status); ! if (U_FAILURE(status)) ! { ! ereport(WARNING, ! (errcode(status), ! errmsg("ICU Error: varlena.c, could not open collator"))); ! } ! } ! ! if (len1 >= USTACKBUFLEN / sizeof(UChar)) ! { ! a1len = len1 * sizeof(UChar) + 2; ! a1p = (UChar *) palloc(a1len); ! } ! else ! a1p = a1buf; ! ! if (len2 >= USTACKBUFLEN / sizeof(UChar)) ! { ! a2len = len2 * sizeof(UChar) + 2; ! a2p = (UChar *) palloc(a2len); ! } ! else ! a2p = a2buf; ! ! ucnv_toUChars(conv, a1p, a1len, arg1, len1, &status); ! if(U_FAILURE(status)) ! { ! ereport(WARNING, ! (errcode(status), ! errmsg("ICU Error: varlena.c, could not convert to UChars"))); ! } ! ucnv_toUChars(conv, a2p, a2len, arg2, len2, &status); ! if(U_FAILURE(status)) ! { ! ereport(WARNING, ! (errcode(status), ! errmsg("ICU Error: varlena.c, could not convert to UChars"))); ! } ! ! result = ucol_strcoll(collator, a1p, -1, a2p, -1); ! if(U_FAILURE(status)) ! { ! ereport(WARNING, ! (errcode(status), ! errmsg("ICU Error: varlena.c, could not collate"))); ! } ! ! if (len1 * sizeof(UChar) >= USTACKBUFLEN) ! pfree(a1p); ! if (len2 * sizeof(UChar) >= USTACKBUFLEN) ! pfree(a2p); ! } else ! #endif /* USE_ICU */ ! { ! char a1buf[STACKBUFLEN]; ! char a2buf[STACKBUFLEN]; ! char *a1p, ! *a2p; ! if (len1 >= STACKBUFLEN) ! a1p = (char *) palloc(len1 + 1); ! else ! a1p = a1buf; ! if (len2 >= STACKBUFLEN) ! a2p = (char *) palloc(len2 + 1); ! else ! a2p = a2buf; ! ! memcpy(a1p, arg1, len1); ! a1p[len1] = '\0'; ! memcpy(a2p, arg2, len2); ! a2p[len2] = '\0'; ! ! result = strcoll(a1p, a2p); ! ! if (len1 >= STACKBUFLEN) ! pfree(a1p); ! if (len2 >= STACKBUFLEN) ! pfree(a2p); ! } } else { Index: src/backend/utils/mb/encnames.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/mb/encnames.c,v retrieving revision 1.25 diff -c -r1.25 encnames.c *** src/backend/utils/mb/encnames.c 14 Mar 2005 18:31:20 -0000 1.25 --- src/backend/utils/mb/encnames.c 28 Jul 2005 17:16:52 -0000 *************** *** 375,380 **** --- 375,496 ---- } }; + #ifdef USE_ICU + /* + * Try to map most internal character encodings to the proper and + * preferred IANA string. Use this in mbutils.c to feed ICU info about + * the database's character encoding. + * + * Palle Girgensohn, 2005 + */ + + pg_enc2name pg_enc2iananame_tbl[] = + { + { + "US-ASCII", PG_SQL_ASCII + }, + { + "EUC-JP", PG_EUC_JP + }, + { + "GB2312", PG_EUC_CN + }, + { + "EUC-KR", PG_EUC_KR + }, + { + "ISO-2022-CN", PG_EUC_TW + }, + { + "KS_C_5601-1987", PG_JOHAB /* either KS_C_5601-1987 or ISO-2022-KR ??? */ + }, + { + "UTF-8", PG_UTF8 + }, + { + "MULE_INTERNAL", PG_MULE_INTERNAL /* is not for real */ + }, + { + "ISO-8859-1", PG_LATIN1 + }, + { + "ISO-8859-2", PG_LATIN2 + }, + { + "ISO-8859-3", PG_LATIN3 + }, + { + "ISO-8859-4", PG_LATIN4 + }, + { + "ISO-8859-9", PG_LATIN5 + }, + { + "ISO-8859-10", PG_LATIN6 + }, + { + "ISO-8859-13", PG_LATIN7 + }, + { + "ISO-8859-14", PG_LATIN8 + }, + { + "ISO-8859-15", PG_LATIN9 + }, + { + "ISO-8859-16", PG_LATIN10 + }, + { + "windows-1256", PG_WIN1256 + }, + { + "windows-1258", PG_WIN1258 + }, + { + "windows-874", PG_WIN874 + }, + { + "KOI8-R", PG_KOI8R + }, + { + "windows-1251", PG_WIN1251 + }, + { + "IBM866", PG_WIN866 + }, + { + "ISO-8859-5", PG_ISO_8859_5 + }, + { + "ISO-8859-6", PG_ISO_8859_6 + }, + { + "ISO-8859-7", PG_ISO_8859_7 + }, + { + "ISO-8859-8", PG_ISO_8859_8 + }, + { + "windows-1250", PG_WIN1250 + }, + { + "Shift_JIS", PG_SJIS + }, + { + "Big5", PG_BIG5 + }, + { + "GBK", PG_GBK + }, + { + "cp949", PG_UHC + }, + { + "GB18030", PG_GB18030 + } + }; + #endif /* USE_ICU */ + /* ---------- * Encoding checks, for error returns -1 else encoding id * ---------- Index: src/backend/utils/mb/mbutils.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v retrieving revision 1.50 diff -c -r1.50 mbutils.c *** src/backend/utils/mb/mbutils.c 10 Jul 2005 21:13:59 -0000 1.50 --- src/backend/utils/mb/mbutils.c 28 Jul 2005 16:36:54 -0000 *************** *** 15,20 **** --- 15,23 ---- #include "utils/memutils.h" #include "utils/syscache.h" #include "catalog/namespace.h" + #ifdef USE_ICU + #include + #endif /* USE_ICU */ /* * We handle for actual FE and BE encoding setting encoding-identificator *************** *** 576,581 **** --- 579,587 ---- DatabaseEncoding = &pg_enc2name_tbl[encoding]; Assert(DatabaseEncoding->encoding == encoding); + #ifdef USE_ICU + ucnv_setDefaultName((&pg_enc2iananame_tbl[encoding])->name); + #endif } void Index: src/include/pg_config.h.in =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/pg_config.h.in,v retrieving revision 1.86 diff -c -r1.86 pg_config.h.in *** src/include/pg_config.h.in 1 Jul 2005 18:17:31 -0000 1.86 --- src/include/pg_config.h.in 28 Jul 2005 16:36:54 -0000 *************** *** 642,647 **** --- 642,650 ---- /* Define to build with (Open)SSL support. (--with-openssl) */ #undef USE_SSL + /* Define to build with ICU support. (--with-icu) */ + #undef USE_ICU + /* Define to select SysV-style semaphores. */ #undef USE_SYSV_SEMAPHORES Index: src/include/mb/pg_wchar.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/mb/pg_wchar.h,v retrieving revision 1.59 diff -c -r1.59 pg_wchar.h *** src/include/mb/pg_wchar.h 15 Jun 2005 00:15:08 -0000 1.59 --- src/include/mb/pg_wchar.h 28 Jul 2005 16:36:54 -0000 *************** *** 235,240 **** --- 235,241 ---- } pg_enc2name; extern pg_enc2name pg_enc2name_tbl[]; + extern pg_enc2name pg_enc2iananame_tbl[]; extern pg_encname *pg_char_to_encname_struct(const char *name); Index: src/include/port/win32.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/port/win32.h,v retrieving revision 1.46 diff -c -r1.46 win32.h *** src/include/port/win32.h 16 Jun 2005 17:53:54 -0000 1.46 --- src/include/port/win32.h 30 Jul 2005 17:39:36 -0000 *************** *** 256,258 **** --- 256,261 ---- /* in backend/port/win32/error.c */ extern void _dosmaperr(unsigned long); + + /* in backend/port/win32/localemap.c */ + extern char *pgwin32_localemap(char *winlocale);