No title

Started by Angelos Karageorgioualmost 27 years ago12 messages
#1Angelos Karageorgiou
angelos@incredible.com

I am using postgres 6.4.2 on BSD/OS 3.1 with a Greek locale that I
have developed. I knew that regexes with postgress would not work because
of something I did but a posting from another follow from Sweden gave me a
clue that the problem must be with the regex package and not the locale.

So I investigated the code and found out the pg_isdigit(int ch),
pg_isalpha(int ch) and the associated functions do a comparison of
characters as ints. I changed a few crucial points with a cast to
(unsigned char) and voila , regexs in Greek with full locale support. My
guess is that an int != unsigned char when comparing, the sign bit is
probably the culprit.

Please test the patch on some other language too, Swedish or Finish
would be a nice touch.

Patch follows, but it is trivial really.
---------------------------------------------------------------------------------
*** regcomp.c	Tue Sep  1 07:31:25 1998
--- regcomp.c.patched	Wed Feb 10 19:57:11 1999
***************
*** 1038,1046 ****
  {
  	assert(pg_isalpha(ch));
  	if (pg_isupper(ch))
! 		return tolower(ch);
  	else if (pg_islower(ch))
! 		return toupper(ch);
  	else
  /* peculiar, but could happen */
  		return ch;
--- 1038,1046 ----
  {
  	assert(pg_isalpha(ch));
  	if (pg_isupper(ch))
! 		return tolower((unsigned char)ch);
  	else if (pg_islower(ch))
! 		return toupper((unsigned char)ch);
  	else
  /* peculiar, but could happen */
  		return ch;
***************
*** 1055,1067 ****
  static void
  bothcases(p, ch)
  struct parse *p;
! int			ch;
  {
  	pg_wchar   *oldnext = p->next;
  	pg_wchar   *oldend = p->end;
  	pg_wchar	bracket[3];
! 	assert(othercase(ch) != ch);/* p_bracket() would recurse */
  	p->next = bracket;
  	p->end = bracket + 2;
  	bracket[0] = ch;
--- 1055,1067 ----
  static void
  bothcases(p, ch)
  struct parse *p;
! int		ch;
  {
  	pg_wchar   *oldnext = p->next;
  	pg_wchar   *oldend = p->end;
  	pg_wchar	bracket[3];

! assert(othercase(ch) != (unsigned char)ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
***************
*** 1084,1090 ****
{
cat_t *cap = p->g->categories;

! 	if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
  		bothcases(p, ch);
  	else
  	{
--- 1084,1090 ----
  {
  	cat_t	   *cap = p->g->categories;

! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != (unsigned char)ch)
bothcases(p, ch);
else
{
***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
  #else
! 	return (isdigit((unsigned char)c));
  #endif
  }

***************
*** 1872,1878 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! return (isalpha(c));
#endif
}

--- 1872,1878 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
  #else
! 	return (isalpha((unsigned char)c));
  #endif
  }

***************
*** 1882,1888 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! return (isupper(c));
#endif
}

--- 1882,1888 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && isupper(c));
  #else
! 	return (isupper((unsigned char)c));
  #endif
  }
***************
*** 1892,1897 ****
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && islower(c));
  #else
! 	return (islower(c));
  #endif
  }
--- 1892,1897 ----
  #ifdef MULTIBYTE
  	return (c >= 0 && c <= UCHAR_MAX && islower(c));
  #else
! 	return (islower((unsigned char)c));
  #endif
  }
#2Oleg Broytmann
phd@sun.med.ru
In reply to: Angelos Karageorgiou (#1)
Re: your mail

Hello!

Next time you'll send a patch could you use tools in
.../src/tools/make_diff

I've applied the patch to 6.4.2 on Debian 2.0 and ran locale test on
koi8-r locale. The locale test before the patch passed and test after patch
passed as well. I didn't note any difference. What difference you expected?

Please supply data for locale test (look into .../src/test/locale). This
is not related to your patch, we're just collecting test data.

On Wed, 10 Feb 1999, Angelos Karageorgiou wrote:

I am using postgres 6.4.2 on BSD/OS 3.1 with a Greek locale that I
have developed. I knew that regexes with postgress would not work because
of something I did but a posting from another follow from Sweden gave me a
clue that the problem must be with the regex package and not the locale.

So I investigated the code and found out the pg_isdigit(int ch),
pg_isalpha(int ch) and the associated functions do a comparison of
characters as ints. I changed a few crucial points with a cast to
(unsigned char) and voila , regexs in Greek with full locale support. My
guess is that an int != unsigned char when comparing, the sign bit is
probably the culprit.

Please test the patch on some other language too, Swedish or Finish
would be a nice touch.

Patch follows, but it is trivial really.
---------------------------------------------------------------------------------
*** regcomp.c	Tue Sep  1 07:31:25 1998
--- regcomp.c.patched	Wed Feb 10 19:57:11 1999
***************
*** 1038,1046 ****
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return tolower(ch);
else if (pg_islower(ch))
! 		return toupper(ch);
else
/* peculiar, but could happen */
return ch;
--- 1038,1046 ----
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return tolower((unsigned char)ch);
else if (pg_islower(ch))
! 		return toupper((unsigned char)ch);
else
/* peculiar, but could happen */
return ch;
***************
*** 1055,1067 ****
static void
bothcases(p, ch)
struct parse *p;
! int			ch;
{
pg_wchar   *oldnext = p->next;
pg_wchar   *oldend = p->end;
pg_wchar	bracket[3];
! 	assert(othercase(ch) != ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
--- 1055,1067 ----
static void
bothcases(p, ch)
struct parse *p;
! int		ch;
{
pg_wchar   *oldnext = p->next;
pg_wchar   *oldend = p->end;
pg_wchar	bracket[3];

! assert(othercase(ch) != (unsigned char)ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
***************
*** 1084,1090 ****
{
cat_t *cap = p->g->categories;

! 	if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
bothcases(p, ch);
else
{
--- 1084,1090 ----
{
cat_t	   *cap = p->g->categories;

! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != (unsigned char)ch)
bothcases(p, ch);
else
{
***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! 	return (isdigit((unsigned char)c));
#endif
}

***************
*** 1872,1878 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! return (isalpha(c));
#endif
}

--- 1872,1878 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! 	return (isalpha((unsigned char)c));
#endif
}

***************
*** 1882,1888 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! return (isupper(c));
#endif
}

--- 1882,1888 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! 	return (isupper((unsigned char)c));
#endif
}
***************
*** 1892,1897 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! 	return (islower(c));
#endif
}
--- 1892,1897 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! 	return (islower((unsigned char)c));
#endif
}

Oleg.
----
Oleg Broytmann http://members.xoom.com/phd2/ phd2@earthling.net
Programmers don't die, they just GOSUB without RETURN.

#3Oleg Broytmann
phd@sun.med.ru
In reply to: Oleg Broytmann (#2)
Re: your mail

Hello!

Next time you'll send a patch could you use tools in
.../src/tools/make_diff

I've applied the patch to 6.4.2 on Debian 2.0 and ran locale test on
koi8-r locale. The locale test before the patch passed and test after patch
passed as well. I didn't note any difference. What difference you expected?

Please supply data for locale test (look into .../src/test/locale). This
is not related to your patch, we're just collecting test data.

On Wed, 10 Feb 1999, Angelos Karageorgiou wrote:

I am using postgres 6.4.2 on BSD/OS 3.1 with a Greek locale that I
have developed. I knew that regexes with postgress would not work because
of something I did but a posting from another follow from Sweden gave me a
clue that the problem must be with the regex package and not the locale.

So I investigated the code and found out the pg_isdigit(int ch),
pg_isalpha(int ch) and the associated functions do a comparison of
characters as ints. I changed a few crucial points with a cast to
(unsigned char) and voila , regexs in Greek with full locale support. My
guess is that an int != unsigned char when comparing, the sign bit is
probably the culprit.

Please test the patch on some other language too, Swedish or Finish
would be a nice touch.

Patch follows, but it is trivial really.
---------------------------------------------------------------------------------
*** regcomp.c	Tue Sep  1 07:31:25 1998
--- regcomp.c.patched	Wed Feb 10 19:57:11 1999
***************
*** 1038,1046 ****
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return tolower(ch);
else if (pg_islower(ch))
! 		return toupper(ch);
else
/* peculiar, but could happen */
return ch;
--- 1038,1046 ----
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return tolower((unsigned char)ch);
else if (pg_islower(ch))
! 		return toupper((unsigned char)ch);
else
/* peculiar, but could happen */
return ch;
***************
*** 1055,1067 ****
static void
bothcases(p, ch)
struct parse *p;
! int			ch;
{
pg_wchar   *oldnext = p->next;
pg_wchar   *oldend = p->end;
pg_wchar	bracket[3];
! 	assert(othercase(ch) != ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
--- 1055,1067 ----
static void
bothcases(p, ch)
struct parse *p;
! int		ch;
{
pg_wchar   *oldnext = p->next;
pg_wchar   *oldend = p->end;
pg_wchar	bracket[3];

! assert(othercase(ch) != (unsigned char)ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
***************
*** 1084,1090 ****
{
cat_t *cap = p->g->categories;

! 	if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
bothcases(p, ch);
else
{
--- 1084,1090 ----
{
cat_t	   *cap = p->g->categories;

! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != (unsigned char)ch)
bothcases(p, ch);
else
{
***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! 	return (isdigit((unsigned char)c));
#endif
}

***************
*** 1872,1878 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! return (isalpha(c));
#endif
}

--- 1872,1878 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! 	return (isalpha((unsigned char)c));
#endif
}

***************
*** 1882,1888 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! return (isupper(c));
#endif
}

--- 1882,1888 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! 	return (isupper((unsigned char)c));
#endif
}
***************
*** 1892,1897 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! 	return (islower(c));
#endif
}
--- 1892,1897 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! 	return (islower((unsigned char)c));
#endif
}

Oleg.
----
Oleg Broytmann http://members.xoom.com/phd2/ phd2@earthling.net
Programmers don't die, they just GOSUB without RETURN.

#4Oleg Broytmann
phd@sun.med.ru
In reply to: Oleg Broytmann (#3)
Re: your mail

Hi!

On Thu, 11 Feb 1999, Angelos Karageorgiou wrote:

I've applied the patch to 6.4.2 on Debian 2.0 and ran locale test on
koi8-r locale. The locale test before the patch passed and test after patch
passed as well. I didn't note any difference. What difference you expected?

Are you using the multibyte character set or the sigle byte ? I was having

Single byte.

problems with the sigle byte char set where select * where message ~* "os"
would give me different results than select * where message ~* "OS". of
course "OS" is the iso-8859-7 greek letters omikron and sigma, I just used
the english letters here to demostrate the problem

If you look into .../src/test/locale/koi8-r, you'll find there exactly
the same tests. These tests are working right in my locale without your
patch.
For me it seems like a compiler (or compiler option) problem - signed
vs. unsigned chars.

Please supply data for locale test (look into .../src/test/locale). This
is not related to your patch, we're just collecting test data.

I could post some strings in Greek , but it would be meaningless to you I
am afraid, and worse without a font you would not be able to see them ,
that is why I called upon a swedish of finish fellow to test the
differences out.

Tests in .../src/test/locale/koi8-r are meaningless to non-Russian, yet
they are in test suite. :)
We are collecting test to help people test their native locales, not
foreign locales, really.

Oleg.
----
Oleg Broytmann http://members.xoom.com/phd2/ phd2@earthling.net
Programmers don't die, they just GOSUB without RETURN.

#5Oleg Broytmann
phd@sun.med.ru
In reply to: Oleg Broytmann (#4)
Re: your mail

Hi!

On Thu, 11 Feb 1999, Angelos Karageorgiou wrote:

For me it seems like a compiler (or compiler option) problem - signed
vs. unsigned chars.

Yes you are right , the problem is BSD/OS specific , and indeed it has to
do with unsigned chars vs signed chars. I just did not know if others had
the problem too and since and a cast to (unsigned char) has no effect to
an 8bit char I decided to post the patch.

Even test-ctype gives out different results when cp is cast as unsigned
chat and not a plain char. would you like the output from test-ctype for
unsigned chars ?

I am not sure. This should be discussed among other developers. What we
should use: signed or unsigned chars, anyone has an idea?

BTW i appreciate the work on postgres it is an awesome package

Welcome!

Oleg.
----
Oleg Broytmann http://members.xoom.com/phd2/ phd2@earthling.net
Programmers don't die, they just GOSUB without RETURN.

#6J.M.
darcy@druid.net
In reply to: Oleg Broytmann (#5)
Re: [HACKERS] Re: your mail

Thus spake Oleg Broytmann

On Thu, 11 Feb 1999, Angelos Karageorgiou wrote:

For me it seems like a compiler (or compiler option) problem - signed
vs. unsigned chars.

Yes you are right , the problem is BSD/OS specific , and indeed it has to
do with unsigned chars vs signed chars. I just did not know if others had
the problem too and since and a cast to (unsigned char) has no effect to
an 8bit char I decided to post the patch.

Even test-ctype gives out different results when cp is cast as unsigned
chat and not a plain char. would you like the output from test-ctype for
unsigned chars ?

I am not sure. This should be discussed among other developers. What we
should use: signed or unsigned chars, anyone has an idea?

In all my own code, I always set the compiler option to make char an
unsigned type. For portability I like to know that the behaviour
won't change as long as I carry over my compiler options. I like
that way better than casting since I don't get conflict warnings
for sending unsigned (or signed) char to library functions. Remember,
char, signed char and unsigned char are 3 distinct types even though
char has to behave exactly like one of the other two. Setting it up on
the compiler command line gets around that.

As for signed vs. unsigned, I don't think it matters that much. I chose
unsigned since I never do signed arithmetic on char and if I ever did I
would like to have the extra keywork to draw attention to it.

-- 
D'Arcy J.M. Cain <darcy@{druid|vex}.net>   |  Democracy is three wolves
http://www.druid.net/darcy/                |  and a sheep voting on
+1 416 424 2871     (DoD#0082)    (eNTP)   |  what's for dinner.
#7Oleg Broytmann
phd@sun.med.ru
In reply to: J.M. (#6)
Re: [HACKERS] Re: your mail

On Thu, 11 Feb 1999, D'Arcy J.M. Cain wrote:

should use: signed or unsigned chars, anyone has an idea?

In all my own code, I always set the compiler option to make char an
unsigned type. For portability I like to know that the behaviour
won't change as long as I carry over my compiler options. I like
that way better than casting since I don't get conflict warnings
for sending unsigned (or signed) char to library functions. Remember,
char, signed char and unsigned char are 3 distinct types even though
char has to behave exactly like one of the other two. Setting it up on
the compiler command line gets around that.

As for signed vs. unsigned, I don't think it matters that much. I chose
unsigned since I never do signed arithmetic on char and if I ever did I
would like to have the extra keywork to draw attention to it.

That is what I think of, and what I usually use - tweak compiler options
to unsigned char.
So, my conclusion - reject the patch and teach people to change compiler
options.

Oleg.
----
Oleg Broytmann http://members.xoom.com/phd2/ phd2@earthling.net
Programmers don't die, they just GOSUB without RETURN.

#8Bruce Momjian
maillist@candle.pha.pa.us
In reply to: Angelos Karageorgiou (#1)
Re: your mail

Did we reject this 'unsigned' patch, folks? I seem to remember someone
objecting to it.

I am using postgres 6.4.2 on BSD/OS 3.1 with a Greek locale that I
have developed. I knew that regexes with postgress would not work because
of something I did but a posting from another follow from Sweden gave me a
clue that the problem must be with the regex package and not the locale.

So I investigated the code and found out the pg_isdigit(int ch),
pg_isalpha(int ch) and the associated functions do a comparison of
characters as ints. I changed a few crucial points with a cast to
(unsigned char) and voila , regexs in Greek with full locale support. My
guess is that an int != unsigned char when comparing, the sign bit is
probably the culprit.

Please test the patch on some other language too, Swedish or Finish
would be a nice touch.

Patch follows, but it is trivial really.
---------------------------------------------------------------------------------
*** regcomp.c	Tue Sep  1 07:31:25 1998
--- regcomp.c.patched	Wed Feb 10 19:57:11 1999
***************
*** 1038,1046 ****
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return tolower(ch);
else if (pg_islower(ch))
! 		return toupper(ch);
else
/* peculiar, but could happen */
return ch;
--- 1038,1046 ----
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return tolower((unsigned char)ch);
else if (pg_islower(ch))
! 		return toupper((unsigned char)ch);
else
/* peculiar, but could happen */
return ch;
***************
*** 1055,1067 ****
static void
bothcases(p, ch)
struct parse *p;
! int			ch;
{
pg_wchar   *oldnext = p->next;
pg_wchar   *oldend = p->end;
pg_wchar	bracket[3];
! 	assert(othercase(ch) != ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
--- 1055,1067 ----
static void
bothcases(p, ch)
struct parse *p;
! int		ch;
{
pg_wchar   *oldnext = p->next;
pg_wchar   *oldend = p->end;
pg_wchar	bracket[3];

! assert(othercase(ch) != (unsigned char)ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
***************
*** 1084,1090 ****
{
cat_t *cap = p->g->categories;

! 	if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
bothcases(p, ch);
else
{
--- 1084,1090 ----
{
cat_t	   *cap = p->g->categories;

! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != (unsigned char)ch)
bothcases(p, ch);
else
{
***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! 	return (isdigit((unsigned char)c));
#endif
}

***************
*** 1872,1878 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! return (isalpha(c));
#endif
}

--- 1872,1878 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! 	return (isalpha((unsigned char)c));
#endif
}

***************
*** 1882,1888 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! return (isupper(c));
#endif
}

--- 1882,1888 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! 	return (isupper((unsigned char)c));
#endif
}
***************
*** 1892,1897 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! 	return (islower(c));
#endif
}
--- 1892,1897 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! 	return (islower((unsigned char)c));
#endif
}
-- 
  Bruce Momjian                        |  http://www.op.net/~candle
  maillist@candle.pha.pa.us            |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026
#9Bruce Momjian
maillist@candle.pha.pa.us
In reply to: Oleg Broytmann (#5)
Re: [HACKERS] Re: your mail

Ah, here is the e-mail objecting to the unsigned patch.

Hi!

On Thu, 11 Feb 1999, Angelos Karageorgiou wrote:

For me it seems like a compiler (or compiler option) problem - signed
vs. unsigned chars.

Yes you are right , the problem is BSD/OS specific , and indeed it has to
do with unsigned chars vs signed chars. I just did not know if others had
the problem too and since and a cast to (unsigned char) has no effect to
an 8bit char I decided to post the patch.

Even test-ctype gives out different results when cp is cast as unsigned
chat and not a plain char. would you like the output from test-ctype for
unsigned chars ?

I am not sure. This should be discussed among other developers. What we
should use: signed or unsigned chars, anyone has an idea?

BTW i appreciate the work on postgres it is an awesome package

Welcome!

Oleg.
----
Oleg Broytmann http://members.xoom.com/phd2/ phd2@earthling.net
Programmers don't die, they just GOSUB without RETURN.

-- 
  Bruce Momjian                        |  http://www.op.net/~candle
  maillist@candle.pha.pa.us            |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026
#10Bruce Momjian
maillist@candle.pha.pa.us
In reply to: Oleg Broytmann (#7)
Re: [HACKERS] Re: your mail

Ah, here is an even clearer statement on unsigned.

On Thu, 11 Feb 1999, D'Arcy J.M. Cain wrote:

should use: signed or unsigned chars, anyone has an idea?

In all my own code, I always set the compiler option to make char an
unsigned type. For portability I like to know that the behaviour
won't change as long as I carry over my compiler options. I like
that way better than casting since I don't get conflict warnings
for sending unsigned (or signed) char to library functions. Remember,
char, signed char and unsigned char are 3 distinct types even though
char has to behave exactly like one of the other two. Setting it up on
the compiler command line gets around that.

As for signed vs. unsigned, I don't think it matters that much. I chose
unsigned since I never do signed arithmetic on char and if I ever did I
would like to have the extra keywork to draw attention to it.

That is what I think of, and what I usually use - tweak compiler options
to unsigned char.
So, my conclusion - reject the patch and teach people to change compiler
options.

Oleg.
----
Oleg Broytmann http://members.xoom.com/phd2/ phd2@earthling.net
Programmers don't die, they just GOSUB without RETURN.

-- 
  Bruce Momjian                        |  http://www.op.net/~candle
  maillist@candle.pha.pa.us            |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026
#11Tatsuo Ishii
t-ishii@sra.co.jp
In reply to: Bruce Momjian (#10)
Re: [HACKERS] Re: your mail

Did we reject this 'unsigned' patch, folks? I seem to remember someone
objecting to it.

[snip]

***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! 	return (isdigit((unsigned char)c));
#endif
}

According to the ANSI/C standard the argument to isdigit (or some
other friends) must have the value of either an unsigned char or
*EOF*. That's why the argument is typed to int, I guess. This patch
seems to break the rule?

BTW, I would like to propose yet another patches for the problem. This
seems to work on FreeBSD and Linux. Angelos, can you test it on your
platform (is it a BSD/OS?)?
--
Tatsuo Ishii

*** regcomp.c~	Tue Sep  1 13:31:25 1998
--- regcomp.c	Thu Mar 11 16:51:28 1999
***************
*** 95,101 ****
  	static void p_b_eclass(struct parse * p, cset *cs);
  	static pg_wchar p_b_symbol(struct parse * p);
  	static char p_b_coll_elem(struct parse * p, int endc);
! 	static char othercase(int ch);
  	static void bothcases(struct parse * p, int ch);
  	static void ordinary(struct parse * p, int ch);
  	static void nonnewline(struct parse * p);
--- 95,101 ----
  	static void p_b_eclass(struct parse * p, cset *cs);
  	static pg_wchar p_b_symbol(struct parse * p);
  	static char p_b_coll_elem(struct parse * p, int endc);
! 	static unsigned char othercase(int ch);
  	static void bothcases(struct parse * p, int ch);
  	static void ordinary(struct parse * p, int ch);
  	static void nonnewline(struct parse * p);
***************
*** 1032,1049 ****
   - othercase - return the case counterpart of an alphabetic
   == static char othercase(int ch);
   */
! static char						/* if no counterpart, return ch */
  othercase(ch)
  int			ch;
  {
  	assert(pg_isalpha(ch));
  	if (pg_isupper(ch))
! 		return tolower(ch);
  	else if (pg_islower(ch))
! 		return toupper(ch);
  	else
  /* peculiar, but could happen */
! 		return ch;
  }
  /*
--- 1032,1049 ----
   - othercase - return the case counterpart of an alphabetic
   == static char othercase(int ch);
   */
! static unsigned char		/* if no counterpart, return ch */
  othercase(ch)
  int			ch;
  {
  	assert(pg_isalpha(ch));
  	if (pg_isupper(ch))
! 		return (unsigned char)tolower(ch);
  	else if (pg_islower(ch))
! 		return (unsigned char)toupper(ch);
  	else
  /* peculiar, but could happen */
! 		return (unsigned char)ch;
  }

/*

#12Bruce Momjian
maillist@candle.pha.pa.us
In reply to: Tatsuo Ishii (#11)
Re: [HACKERS] Re: your mail

I think we decided against this, right?

Did we reject this 'unsigned' patch, folks? I seem to remember someone
objecting to it.

[snip]

***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! 	return (isdigit((unsigned char)c));
#endif
}

According to the ANSI/C standard the argument to isdigit (or some
other friends) must have the value of either an unsigned char or
*EOF*. That's why the argument is typed to int, I guess. This patch
seems to break the rule?

BTW, I would like to propose yet another patches for the problem. This
seems to work on FreeBSD and Linux. Angelos, can you test it on your
platform (is it a BSD/OS?)?
--
Tatsuo Ishii

*** regcomp.c~	Tue Sep  1 13:31:25 1998
--- regcomp.c	Thu Mar 11 16:51:28 1999
***************
*** 95,101 ****
static void p_b_eclass(struct parse * p, cset *cs);
static pg_wchar p_b_symbol(struct parse * p);
static char p_b_coll_elem(struct parse * p, int endc);
! 	static char othercase(int ch);
static void bothcases(struct parse * p, int ch);
static void ordinary(struct parse * p, int ch);
static void nonnewline(struct parse * p);
--- 95,101 ----
static void p_b_eclass(struct parse * p, cset *cs);
static pg_wchar p_b_symbol(struct parse * p);
static char p_b_coll_elem(struct parse * p, int endc);
! 	static unsigned char othercase(int ch);
static void bothcases(struct parse * p, int ch);
static void ordinary(struct parse * p, int ch);
static void nonnewline(struct parse * p);
***************
*** 1032,1049 ****
- othercase - return the case counterpart of an alphabetic
== static char othercase(int ch);
*/
! static char						/* if no counterpart, return ch */
othercase(ch)
int			ch;
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return tolower(ch);
else if (pg_islower(ch))
! 		return toupper(ch);
else
/* peculiar, but could happen */
! 		return ch;
}
/*
--- 1032,1049 ----
- othercase - return the case counterpart of an alphabetic
== static char othercase(int ch);
*/
! static unsigned char		/* if no counterpart, return ch */
othercase(ch)
int			ch;
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! 		return (unsigned char)tolower(ch);
else if (pg_islower(ch))
! 		return (unsigned char)toupper(ch);
else
/* peculiar, but could happen */
! 		return (unsigned char)ch;
}

/*

-- 
  Bruce Momjian                        |  http://www.op.net/~candle
  maillist@candle.pha.pa.us            |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026