regex + locale bug

Started by Angelos Karageorgioualmost 27 years ago1 messages
#1Angelos Karageorgiou
angelos@awesome.incredible.com

I am using postgres 6.4.2 on BSD/OS 3.1 with a Greek locale that I
have developed. I knew that regexes with postgress would not work because
of something I did but a posting from another follow from Sweden gave me a
clue that the problem must be with the regex package and not the locale.

So I investigated the code and found out the pg_isdigit(int ch),
pg_isalpha(int ch) and the associated functions do a comparison of
characters as ints. I changed a few crucial points with a cast to
(unsigned char) and voila , regexs in Greek with full locale support. My
guess is that an int != unsigned char when comparing, the sign bit is
probably the culprit.

Please test the patch on some other language too, Swedish or Finish
would be a nice touch.

Patch follows, but it is trivial really.

--------------------------------------------------------------------------------
*** regcomp.c	Tue Sep  1 07:31:25 1998
--- regcomp.c.patched	Wed Feb 10 19:57:11 1999
***************
*** 1038,1046 ****
  {
  	assert(pg_isalpha(ch));
  	if (pg_isupper(ch))
! 		return tolower(ch);
  	else if (pg_islower(ch))
! 		return toupper(ch);
  	else
  /* peculiar, but could happen */
  		return ch;
--- 1038,1046 ----
  {
  	assert(pg_isalpha(ch));
  	if (pg_isupper(ch))
! 		return tolower((unsigned char)ch);
  	else if (pg_islower(ch))
! 		return toupper((unsigned char)ch);
  	else
  /* peculiar, but could happen */
  		return ch;
***************
*** 1055,1067 ****
  static void
  bothcases(p, ch)
  struct parse *p;
! int			ch;
  {
  	pg_wchar   *oldnext = p->next;
  	pg_wchar   *oldend = p->end;
  	pg_wchar	bracket[3];
! 	assert(othercase(ch) != ch);/* p_bracket() would recurse */
  	p->next = bracket;
  	p->end = bracket + 2;
  	bracket[0] = ch;
--- 1055,1067 ----
  static void
  bothcases(p, ch)
  struct parse *p;
! int		ch;
  {
  	pg_wchar   *oldnext = p->next;
  	pg_wchar   *oldend = p->end;
  	pg_wchar	bracket[3];

! assert(othercase(ch) != (unsigned char)ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
***************
*** 1084,1090 ****
{
cat_t *cap = p->g->categories;

! 	if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
  		bothcases(p, ch);
  	else
  	{
--- 1084,1090 ----
  {
  	cat_t	   *cap = p->g->categories;

! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != (unsigned char)ch)
bothcases(p, ch);
else
{
***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
  #else
! 	return (isdigit((unsigned char)c));
  #endif
  }

***************
*** 1872,1878 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! return (isalpha(c));
#endif
}

--- 1872,1878 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
  #else
! 	return (isalpha((unsigned char)c));
  #endif
  }

***************
*** 1882,1888 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! return (isupper(c));
#endif
}

--- 1882,1888 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && isupper(c));
  #else
! 	return (isupper((unsigned char)c));
  #endif
  }
***************
*** 1892,1897 ****
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && islower(c));
  #else
! 	return (islower(c));
  #endif
  }
--- 1892,1897 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && islower(c));
  #else
! 	return (islower((unsigned char)c));
  #endif
  }

--
Incredible Networks LTD Angelos Karageorgiou
20 Karea st, +30.1.92.12.312 (voice)
116 36 Athens, Greece. +30.1.92.12.314 (fax)
http://www.incredible.com angelos@incredible.com (e-mail)