diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index 4b98c8b..d1c75db 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -462,8 +462,8 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string) * We return "1" for any leading byte that is either flat-out illegal or * indicates a length larger than we support. * - * pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps - * other places would need to be fixed to change this. + * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), + * and perhaps other places would need to be fixed to change this. */ int pg_utf_mblen(const unsigned char *s) @@ -632,36 +632,10 @@ ucs_wcwidth(pg_wchar ucs) (ucs >= 0x20000 && ucs <= 0x2ffff))); } -static pg_wchar -utf2ucs(const unsigned char *c) -{ - /* - * one char version of pg_utf2wchar_with_len. no control here, c must - * point to a large enough string - */ - if ((*c & 0x80) == 0) - return (pg_wchar) c[0]; - else if ((*c & 0xe0) == 0xc0) - return (pg_wchar) (((c[0] & 0x1f) << 6) | - (c[1] & 0x3f)); - else if ((*c & 0xf0) == 0xe0) - return (pg_wchar) (((c[0] & 0x0f) << 12) | - ((c[1] & 0x3f) << 6) | - (c[2] & 0x3f)); - else if ((*c & 0xf8) == 0xf0) - return (pg_wchar) (((c[0] & 0x07) << 18) | - ((c[1] & 0x3f) << 12) | - ((c[2] & 0x3f) << 6) | - (c[3] & 0x3f)); - else - /* that is an invalid code on purpose */ - return 0xffffffff; -} - static int pg_utf_dsplen(const unsigned char *s) { - return ucs_wcwidth(utf2ucs(s)); + return ucs_wcwidth(utf8_to_unicode(s)); } /* diff --git a/src/bin/psql/mbprint.c b/src/bin/psql/mbprint.c index ad66ed8..96f76ff 100644 --- a/src/bin/psql/mbprint.c +++ b/src/bin/psql/mbprint.c @@ -43,41 +43,6 @@ pg_get_utf8_id(void) #define PG_UTF8 pg_get_utf8_id() -static pg_wchar -utf2ucs(const unsigned char *c) -{ - /* - * one char version of pg_utf2wchar_with_len. no control here, c must - * point to a large enough string - */ - if ((*c & 0x80) == 0) - return (pg_wchar) c[0]; - else if ((*c & 0xe0) == 0xc0) - { - return (pg_wchar) (((c[0] & 0x1f) << 6) | - (c[1] & 0x3f)); - } - else if ((*c & 0xf0) == 0xe0) - { - return (pg_wchar) (((c[0] & 0x0f) << 12) | - ((c[1] & 0x3f) << 6) | - (c[2] & 0x3f)); - } - else if ((*c & 0xf0) == 0xf0) - { - return (pg_wchar) (((c[0] & 0x07) << 18) | - ((c[1] & 0x3f) << 12) | - ((c[2] & 0x3f) << 6) | - (c[3] & 0x3f)); - } - else - { - /* that is an invalid code on purpose */ - return 0xffffffff; - } -} - - /* * Unicode 3.1 compliant validation : for each category, it checks the * combination of each byte to make sure it maps to a valid range. It also @@ -354,7 +319,7 @@ pg_wcsformat(unsigned char *pwcs, size_t len, int encoding, else if (w < 0) /* Non-ascii control char */ { if (encoding == PG_UTF8) - sprintf((char *) ptr, "\\u%04X", utf2ucs(pwcs)); + sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs)); else { /* diff --git a/src/include/port.h b/src/include/port.h index 291a3e7..9a3614f 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -439,4 +439,7 @@ extern void qsort_arg(void *base, size_t nel, size_t elsize, /* port/chklocale.c */ extern int pg_get_encoding_from_locale(const char *ctype); +/* port/utf8.c */ +extern unsigned int utf8_to_unicode(const unsigned char *utf8char); + #endif /* PG_PORT_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 9ef9491..2622ca4 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -31,7 +31,7 @@ override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) OBJS = $(LIBOBJS) chklocale.o dirmod.o exec.o noblock.o path.o \ - pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o + pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o utf8.o ifneq (,$(filter $(PORTNAME),cygwin win32)) OBJS += pipe.o endif diff --git a/src/port/utf8.c b/src/port/utf8.c new file mode 100644 index 0000000..33d38ef --- /dev/null +++ b/src/port/utf8.c @@ -0,0 +1,45 @@ +/*------------------------------------------------------------------------- + * + * utf8.c + * The utf8_to_unicode function + * + * This lives in src/port instead of src/backend/utils/mb so that it + * doesn't have to be duplicated in src/bin/psql/mprint.c . + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + +/* + * Convert a UTF-8 character to a Unicode code point. + * This is a one-character version of pg_utf2wchar_with_len. + * + * c must point to a large enough string for the character's length. + */ +unsigned int +utf8_to_unicode(const unsigned char *c) +{ + if ((*c & 0x80) == 0) + return (unsigned int) c[0]; + else if ((*c & 0xe0) == 0xc0) + return (unsigned int) (((c[0] & 0x1f) << 6) | + (c[1] & 0x3f)); + else if ((*c & 0xf0) == 0xe0) + return (unsigned int) (((c[0] & 0x0f) << 12) | + ((c[1] & 0x3f) << 6) | + (c[2] & 0x3f)); + else if ((*c & 0xf8) == 0xf0) + return (unsigned int) (((c[0] & 0x07) << 18) | + ((c[1] & 0x3f) << 12) | + ((c[2] & 0x3f) << 6) | + (c[3] & 0x3f)); + else + /* that is an invalid code on purpose */ + return 0xffffffff; +}