diff -cpr HEAD/src/backend/utils/adt/like.c patched/src/backend/utils/adt/like.c *** HEAD/src/backend/utils/adt/like.c Thu Mar 1 09:40:18 2007 --- patched/src/backend/utils/adt/like.c Mon Apr 2 13:36:25 2007 *************** static int MBMatchText(char *t, int tlen *** 37,48 **** static int MBMatchTextIC(char *t, int tlen, char *p, int plen); static text *MB_do_like_escape(text *, text *); /*-------------------- * Support routine for MatchText. Compares given multibyte streams * as wide characters. If they match, returns 1 otherwise returns 0. *-------------------- */ ! static int wchareq(char *p1, char *p2) { int p1_len; --- 37,54 ---- static int MBMatchTextIC(char *t, int tlen, char *p, int plen); static text *MB_do_like_escape(text *, text *); + static int UTF8MatchText(char *t, int tlen, char *p, int plen); + static int UTF8MatchTextIC(char *t, int tlen, char *p, int plen); + + static int GenericMatchText(char *s, int slen, char* p, int plen); + static int mbtexticlike(text *str, text *pat); + /*-------------------- * Support routine for MatchText. Compares given multibyte streams * as wide characters. If they match, returns 1 otherwise returns 0. *-------------------- */ ! static __inline__ int wchareq(char *p1, char *p2) { int p1_len; *************** wchareq(char *p1, char *p2) *** 78,83 **** --- 84,92 ---- * different again in the future. */ + #define NextByte(p, plen) ((p)++, (plen)--) + #define BYTEEQ(p1, p2) (*(p1) == *(p2)) + /* Set up to compile like_match.c for multibyte characters */ #define CHAREQ(p1, p2) wchareq(p1, p2) #define ICHAREQ(p1, p2) wchareq(p1, p2) *************** wchareq(char *p1, char *p2) *** 96,123 **** #include "like_match.c" - #undef CHAREQ - #undef ICHAREQ - #undef NextChar - #undef CopyAdvChar - #undef MatchText - #undef MatchTextIC - #undef do_like_escape - /* Set up to compile like_match.c for single-byte characters */ ! #define CHAREQ(p1, p2) (*(p1) == *(p2)) #define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2))) ! #define NextChar(p, plen) ((p)++, (plen)--) #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) #include "like_match.c" /* And some support for BYTEA */ #define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2)) #define BYTEA_NextChar(p, plen) ((p)++, (plen)--) #define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) /* * interface routines called by the function manager */ --- 105,350 ---- #include "like_match.c" /* Set up to compile like_match.c for single-byte characters */ ! #define CHAREQ(p1, p2) BYTEEQ(p1, p2) #define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2))) ! #define NextChar(p, plen) NextByte(p, plen) #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) #include "like_match.c" + /* Set up for utf8 characters */ + #define CHAREQ(p1, p2) wchareq(p1, p2) + #define ICHAREQ(p1, p2) wchareq(p1, p2) + #define IBYTEEQ(p1, p2) BYTEEQ(p1, p2) + #define NextChar(p, plen) \ + do { int __l = pg_utf_mblen(p); (p) +=__l; (plen) -=__l; } while (0) + + /* + * UTF8MatchText -- specialized version of MBMatchText for UTF8 + */ + static int + UTF8MatchText(char *t, int tlen, char *p, int plen) + { + /* Fast path for match-everything pattern */ + if ((plen == 1) && (*p == '%')) + return LIKE_TRUE; + + while ((tlen > 0) && (plen > 0)) + { + if (*p == '\\') + { + /* Next pattern char must match literally, whatever it is */ + NextByte(p, plen); + if ((plen <= 0) || !CHAREQ(t, p)) + return LIKE_FALSE; + } + else if (*p == '%') + { + /* %% is the same as % according to the SQL standard */ + /* Advance past all %'s */ + while ((plen > 0) && (*p == '%')) + NextByte(p, plen); + /* Trailing percent matches everything. */ + if (plen <= 0) + return LIKE_TRUE; + + /* + * Otherwise, scan for a text position at which we can match the + * rest of the pattern. + */ + while (tlen > 0) + { + /* + * Optimization to prevent most recursion: don't recurse + * unless first pattern char might match this text char. + */ + if (CHAREQ(t, p) || (*p == '\\') || (*p == '_')) + { + int matched = UTF8MatchText(t, tlen, p, plen); + + if (matched != LIKE_FALSE) + return matched; /* TRUE or ABORT */ + } + + NextChar(t, tlen); + } + + /* + * End of text with no match, so no point in trying later places + * to start matching this pattern. + */ + return LIKE_ABORT; + } + else if (*p == '_') + { + NextChar(t, tlen); + NextByte(p, plen); + continue; + } + else if (!BYTEEQ(t, p)) + { + /* + * Not the single-character wildcard and no explicit match? Then + * time to quit... + */ + return LIKE_FALSE; + } + + NextByte(t, tlen); + NextByte(p, plen); + } + + if (tlen > 0) + return LIKE_FALSE; /* end of pattern, but not of text */ + + /* End of input string. Do we have matching pattern remaining? */ + while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of + * pattern */ + NextByte(p, plen); + if (plen <= 0) + return LIKE_TRUE; + + /* + * End of text with no match, so no point in trying later places to start + * matching this pattern. + */ + return LIKE_ABORT; + } + + /* + * Same as above, but ignore case + */ + static int + UTF8MatchTextIC(char *t, int tlen, char *p, int plen) + { + /* Fast path for match-everything pattern */ + if ((plen == 1) && (*p == '%')) + return LIKE_TRUE; + + while ((tlen > 0) && (plen > 0)) + { + if (*p == '\\') + { + /* Next pattern char must match literally, whatever it is */ + NextByte(p, plen); + if ((plen <= 0) || !ICHAREQ(t, p)) + return LIKE_FALSE; + } + else if (*p == '%') + { + /* %% is the same as % according to the SQL standard */ + /* Advance past all %'s */ + while ((plen > 0) && (*p == '%')) + NextByte(p, plen); + /* Trailing percent matches everything. */ + if (plen <= 0) + return LIKE_TRUE; + + /* + * Otherwise, scan for a text position at which we can match the + * rest of the pattern. + */ + while (tlen > 0) + { + /* + * Optimization to prevent most recursion: don't recurse + * unless first pattern char might match this text char. + */ + if (ICHAREQ(t, p) || (*p == '\\') || (*p == '_')) + { + int matched = UTF8MatchTextIC(t, tlen, p, plen); + + if (matched != LIKE_FALSE) + return matched; /* TRUE or ABORT */ + } + + NextChar(t, tlen); + } + + /* + * End of text with no match, so no point in trying later places + * to start matching this pattern. + */ + return LIKE_ABORT; + } + else if (*p == '_') + { + NextChar(t, tlen); + NextByte(p, plen); + continue; + } + else if (!IBYTEEQ(t, p)) + { + /* + * Not the single-character wildcard and no explicit match? Then + * time to quit... + */ + return LIKE_FALSE; + } + + NextByte(t, tlen); + NextByte(p, plen); + } + + if (tlen > 0) + return LIKE_FALSE; /* end of pattern, but not of text */ + + /* End of input string. Do we have matching pattern remaining? */ + while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of + * pattern */ + NextByte(p, plen); + if (plen <= 0) + return LIKE_TRUE; + + /* + * End of text with no match, so no point in trying later places to start + * matching this pattern. + */ + return LIKE_ABORT; + } + /* And some support for BYTEA */ #define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2)) #define BYTEA_NextChar(p, plen) ((p)++, (plen)--) #define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) + static __inline__ int + GenericMatchText(char *s, int slen, char* p, int plen) + { + if (pg_database_encoding_max_length() == 1) + return MatchText(s, slen, p, plen); + else if (GetDatabaseEncoding() == PG_UTF8) + return UTF8MatchText(s, slen, p, plen); + else + return MBMatchText(s, slen, p, plen); + } + + static __inline__ int + mbtexticlike(text *str, text *pat) + { + char *s, + *p; + int slen, + plen; + + /* Force inputs to lower case to achieve case insensitivity */ + str = DatumGetTextP(DirectFunctionCall1(lower, + PointerGetDatum(str))); + pat = DatumGetTextP(DirectFunctionCall1(lower, + PointerGetDatum(pat))); + s = VARDATA(str); + slen = (VARSIZE(str) - VARHDRSZ); + p = VARDATA(pat); + plen = (VARSIZE(pat) - VARHDRSZ); + + if (GetDatabaseEncoding() == PG_UTF8) + return UTF8MatchTextIC(s, slen, p, plen); + else + return MBMatchTextIC(s, slen, p, plen); + } + /* * interface routines called by the function manager */ *************** namelike(PG_FUNCTION_ARGS) *** 138,147 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) == LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } --- 365,371 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } *************** namenlike(PG_FUNCTION_ARGS) *** 162,171 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) != LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } --- 386,392 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } *************** textlike(PG_FUNCTION_ARGS) *** 186,195 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) == LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } --- 407,413 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } *************** textnlike(PG_FUNCTION_ARGS) *** 210,219 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) != LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } --- 428,434 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } *************** nameiclike(PG_FUNCTION_ARGS) *** 285,305 **** } else { - /* Force inputs to lower case to achieve case insensitivity */ text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! strtext = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(strtext))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! ! s = VARDATA(strtext); ! slen = (VARSIZE(strtext) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE); } PG_RETURN_BOOL(result); --- 500,510 ---- } else { text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! result = (mbtexticlike(strtext, pat) == LIKE_TRUE); } PG_RETURN_BOOL(result); *************** nameicnlike(PG_FUNCTION_ARGS) *** 326,346 **** } else { - /* Force inputs to lower case to achieve case insensitivity */ text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! strtext = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(strtext))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! ! s = VARDATA(strtext); ! slen = (VARSIZE(strtext) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE); } PG_RETURN_BOOL(result); --- 531,541 ---- } else { text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! result = (mbtexticlike(strtext, pat) != LIKE_TRUE); } PG_RETURN_BOOL(result); *************** texticlike(PG_FUNCTION_ARGS) *** 367,382 **** } else { ! /* Force inputs to lower case to achieve case insensitivity */ ! str = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(str))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! s = VARDATA(str); ! slen = (VARSIZE(str) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE); } PG_RETURN_BOOL(result); --- 562,568 ---- } else { ! result = (mbtexticlike(str, pat) == LIKE_TRUE); } PG_RETURN_BOOL(result); *************** texticnlike(PG_FUNCTION_ARGS) *** 403,418 **** } else { ! /* Force inputs to lower case to achieve case insensitivity */ ! str = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(str))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! s = VARDATA(str); ! slen = (VARSIZE(str) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE); } PG_RETURN_BOOL(result); --- 589,595 ---- } else { ! result = (mbtexticlike(str, pat) != LIKE_TRUE); } PG_RETURN_BOOL(result); diff -cpr HEAD/src/backend/utils/adt/like_match.c patched/src/backend/utils/adt/like_match.c *** HEAD/src/backend/utils/adt/like_match.c Thu Mar 1 09:40:18 2007 --- patched/src/backend/utils/adt/like_match.c Mon Apr 2 13:36:25 2007 *************** MatchText(char *t, int tlen, char *p, in *** 82,88 **** if (*p == '\\') { /* Next pattern char must match literally, whatever it is */ ! NextChar(p, plen); if ((plen <= 0) || !CHAREQ(t, p)) return LIKE_FALSE; } --- 82,88 ---- if (*p == '\\') { /* Next pattern char must match literally, whatever it is */ ! NextByte(p, plen); if ((plen <= 0) || !CHAREQ(t, p)) return LIKE_FALSE; } *************** MatchText(char *t, int tlen, char *p, in *** 91,97 **** /* %% is the same as % according to the SQL standard */ /* Advance past all %'s */ while ((plen > 0) && (*p == '%')) ! NextChar(p, plen); /* Trailing percent matches everything. */ if (plen <= 0) return LIKE_TRUE; --- 91,97 ---- /* %% is the same as % according to the SQL standard */ /* Advance past all %'s */ while ((plen > 0) && (*p == '%')) ! NextByte(p, plen); /* Trailing percent matches everything. */ if (plen <= 0) return LIKE_TRUE; *************** MatchText(char *t, int tlen, char *p, in *** 142,148 **** /* End of input string. Do we have matching pattern remaining? */ while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of * pattern */ ! NextChar(p, plen); if (plen <= 0) return LIKE_TRUE; --- 142,148 ---- /* End of input string. Do we have matching pattern remaining? */ while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of * pattern */ ! NextByte(p, plen); if (plen <= 0) return LIKE_TRUE; *************** MatchTextIC(char *t, int tlen, char *p, *** 168,174 **** if (*p == '\\') { /* Next pattern char must match literally, whatever it is */ ! NextChar(p, plen); if ((plen <= 0) || !ICHAREQ(t, p)) return LIKE_FALSE; } --- 168,174 ---- if (*p == '\\') { /* Next pattern char must match literally, whatever it is */ ! NextByte(p, plen); if ((plen <= 0) || !ICHAREQ(t, p)) return LIKE_FALSE; } *************** MatchTextIC(char *t, int tlen, char *p, *** 177,183 **** /* %% is the same as % according to the SQL standard */ /* Advance past all %'s */ while ((plen > 0) && (*p == '%')) ! NextChar(p, plen); /* Trailing percent matches everything. */ if (plen <= 0) return LIKE_TRUE; --- 177,183 ---- /* %% is the same as % according to the SQL standard */ /* Advance past all %'s */ while ((plen > 0) && (*p == '%')) ! NextByte(p, plen); /* Trailing percent matches everything. */ if (plen <= 0) return LIKE_TRUE; *************** MatchTextIC(char *t, int tlen, char *p, *** 228,234 **** /* End of input string. Do we have matching pattern remaining? */ while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of * pattern */ ! NextChar(p, plen); if (plen <= 0) return LIKE_TRUE; --- 228,234 ---- /* End of input string. Do we have matching pattern remaining? */ while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of * pattern */ ! NextByte(p, plen); if (plen <= 0) return LIKE_TRUE; *************** do_like_escape(text *pat, text *esc) *** 336,338 **** --- 336,346 ---- return result; } + + #undef CHAREQ + #undef ICHAREQ + #undef NextChar + #undef CopyAdvChar + #undef MatchText + #undef MatchTextIC + #undef do_like_escape