Fixes bug in strlower_libc_sb()
Hi Hackers,
While reviewing Jeff's patch [1]/messages/by-id/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com, I found this bug in strlower_libc_sb():
```
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t
srclen,
pg_locale_t locale)
{
if (srclen < 0)
srclen = strlen(src);
if (srclen + 1 <= destsize)
{
locale_t loc = locale->lt;
char *p;
if (srclen + 1 > destsize) <== This check will never be true
return srclen;
```
So I removed the useless check in the patch.
I also noticed strlower_libc_sb’s behavior is different
from unicode_strlower(), where the function comment says:
```
* Result string is stored in dst, truncating if larger than dstsize. If
* dstsize is greater than the result length, dst will be NUL-terminated;
* otherwise not.
```
And Jeff’s new code in [1]/messages/by-id/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com matches the description. The behavior is like
trying the best to copy as many chars as possible, if not enough space in
dest, then NULL-terminator can be omitted. But the current behavior
of strlower_libc_sb that only copies data to dest when dest has enough
space for src and NULL-terminator.
So I updated strlower_libc_sb(), strtitle_libc_sb() and strupper_libc_sb()
to comply with the behavior in this patch as well. “Make check” passed from
my side, so the patch doesn't seem to break anything.
[1]: /messages/by-id/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
/messages/by-id/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
Best regards,
--
Chao Li (Evan)
HighGo Software Co., Ltd.
https://www.highgo.com/
Attachments:
v1-0001-Fixes-bug-in-strlower_libc_sb.patchapplication/octet-stream; name=v1-0001-Fixes-bug-in-strlower_libc_sb.patchDownload
From 1d8015ef6adebc113e99aa9cd972381dc2048b3e Mon Sep 17 00:00:00 2001
From: "Chao Li (Evan)" <lic@highgo.com>
Date: Tue, 25 Nov 2025 10:26:16 +0800
Subject: [PATCH v1] Fixes bug in strlower_libc_sb()
Fixed a no-impact bug in strlower_libc_sb(), where is lenght check
is useless, thus removed the check.
Author: Chao Li <lic@highgo.com>
---
src/backend/utils/adt/pg_locale_libc.c | 163 +++++++++++++------------
1 file changed, 82 insertions(+), 81 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..877fdedf079 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -426,39 +426,37 @@ static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ ssize_t reallen;
+ locale_t loc = locale->lt;
+
if (srclen < 0)
srclen = strlen(src);
- if (srclen + 1 <= destsize)
- {
- locale_t loc = locale->lt;
- char *p;
-
- if (srclen + 1 > destsize)
- return srclen;
+ reallen = srclen + 1;
+ if (reallen > destsize)
+ reallen = destsize;
- memcpy(dest, src, srclen);
- dest[srclen] = '\0';
+ memcpy(dest, src, reallen);
+ if (reallen < destsize)
+ dest[reallen] = '\0';
- /*
- * Note: we assume that tolower_l() will not be so broken as to need
- * an isupper_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = dest; *p; p++)
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need an
+ * isupper_l() guard test. When using the default collation, we apply the
+ * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+ * but in non-default collations you get exactly what the collation says.
+ */
+ for (char *p = dest; *p; p++)
+ {
+ if (locale->is_default)
{
- if (locale->is_default)
- {
- if (*p >= 'A' && *p <= 'Z')
- *p += 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
- *p = tolower_l((unsigned char) *p, loc);
- }
- else
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
*p = tolower_l((unsigned char) *p, loc);
}
+ else
+ *p = tolower_l((unsigned char) *p, loc);
}
return srclen;
@@ -516,55 +514,57 @@ static size_t
strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ ssize_t reallen;
+ locale_t loc = locale->lt;
+ int wasalnum = false;
+
if (srclen < 0)
srclen = strlen(src);
- if (srclen + 1 <= destsize)
- {
- locale_t loc = locale->lt;
- int wasalnum = false;
- char *p;
+ reallen = srclen + 1;
+ if (reallen > destsize)
+ reallen = destsize;
- memcpy(dest, src, srclen);
- dest[srclen] = '\0';
+ memcpy(dest, src, reallen);
+ if (reallen < destsize)
+ dest[reallen] = '\0';
- /*
- * Note: we assume that toupper_l()/tolower_l() will not be so broken
- * as to need guard tests. When using the default collation, we apply
- * the traditional Postgres behavior that forces ASCII-style treatment
- * of I/i, but in non-default collations you get exactly what the
- * collation says.
- */
- for (p = dest; *p; p++)
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken as
+ * to need guard tests. When using the default collation, we apply the
+ * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+ * but in non-default collations you get exactly what the collation says.
+ */
+ for (char *p = dest; *p; p++)
+ {
+ if (locale->is_default)
{
- if (locale->is_default)
+ if (wasalnum)
{
- if (wasalnum)
- {
- if (*p >= 'A' && *p <= 'Z')
- *p += 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
- *p = tolower_l((unsigned char) *p, loc);
- }
- else
- {
- if (*p >= 'a' && *p <= 'z')
- *p -= 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
- *p = toupper_l((unsigned char) *p, loc);
- }
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
+ *p = tolower_l((unsigned char) *p, loc);
}
else
{
- if (wasalnum)
- *p = tolower_l((unsigned char) *p, loc);
- else
+ if (*p >= 'a' && *p <= 'z')
+ *p -= 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
*p = toupper_l((unsigned char) *p, loc);
}
- wasalnum = isalnum_l((unsigned char) *p, loc);
}
+ else
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, loc);
+ else
+ *p = toupper_l((unsigned char) *p, loc);
+ }
+ wasalnum = isalnum_l((unsigned char) *p, loc);
}
+
return srclen;
}
@@ -627,36 +627,37 @@ static size_t
strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ ssize_t reallen;
+ locale_t loc = locale->lt;
+
if (srclen < 0)
srclen = strlen(src);
- if (srclen + 1 <= destsize)
- {
- locale_t loc = locale->lt;
- char *p;
+ reallen = srclen + 1;
+ if (reallen > destsize)
+ reallen = destsize;
- memcpy(dest, src, srclen);
- dest[srclen] = '\0';
+ memcpy(dest, src, reallen);
+ if (reallen < destsize)
+ dest[reallen] = '\0';
- /*
- * Note: we assume that toupper_l() will not be so broken as to need
- * an islower_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = dest; *p; p++)
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need an
+ * islower_l() guard test. When using the default collation, we apply the
+ * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+ * but in non-default collations you get exactly what the collation says.
+ */
+ for (char *p = dest; *p; p++)
+ {
+ if (locale->is_default)
{
- if (locale->is_default)
- {
- if (*p >= 'a' && *p <= 'z')
- *p -= 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
- *p = toupper_l((unsigned char) *p, loc);
- }
- else
+ if (*p >= 'a' && *p <= 'z')
+ *p -= 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
*p = toupper_l((unsigned char) *p, loc);
}
+ else
+ *p = toupper_l((unsigned char) *p, loc);
}
return srclen;
--
2.39.5 (Apple Git-154)
On Tue, Nov 25, 2025 at 11:03 AM Chao Li <li.evan.chao@gmail.com> wrote:
Hi Hackers,
While reviewing Jeff's patch [1], I found this bug in strlower_libc_sb():
```
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t
srclen,
pg_locale_t locale)
{
if (srclen < 0)
srclen = strlen(src);if (srclen + 1 <= destsize)
{
locale_t loc = locale->lt;
char *p;if (srclen + 1 > destsize) <== This check will never be true
return srclen;
```So I removed the useless check in the patch.
I also noticed strlower_libc_sb’s behavior is different
from unicode_strlower(), where the function comment says:```
* Result string is stored in dst, truncating if larger than dstsize. If
* dstsize is greater than the result length, dst will be NUL-terminated;
* otherwise not.
```And Jeff’s new code in [1] matches the description. The behavior is like
trying the best to copy as many chars as possible, if not enough space in
dest, then NULL-terminator can be omitted. But the current behavior
of strlower_libc_sb that only copies data to dest when dest has enough
space for src and NULL-terminator.So I updated strlower_libc_sb(), strtitle_libc_sb() and strupper_libc_sb()
to comply with the behavior in this patch as well. “Make check” passed from
my side, so the patch doesn't seem to break anything.[1]
/messages/by-id/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
I thought over and splitted the patch into two commits:
0001 - Fixes the obvious bug by simply deleting the useless length check.
0002 - Updates strlower_lic_sb, strtitle_lic_sb, strupper_lic_sb to comply
with unicode_strlower's truncation behavior
Best regards,
Chao Li (Evan)
---------------------
HighGo Software Co., Ltd.
https://www.highgo.com/
Attachments:
v2-0002-Make-libc-based-case-folding-functions-match-unic.patchapplication/octet-stream; name=v2-0002-Make-libc-based-case-folding-functions-match-unic.patchDownload
From 7f3218da7d1206664522d41ff247b1e96815a757 Mon Sep 17 00:00:00 2001
From: "Chao Li (Evan)" <lic@highgo.com>
Date: Tue, 25 Nov 2025 13:40:13 +0800
Subject: [PATCH v2 2/2] Make libc-based case-folding functions match
unicode_strlower()'s behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The comments for unicode_strlower() state that the destination buffer is
filled with as many characters as will fit, truncating the result if needed;
the string is NUL-terminated only if there is sufficient space. In contrast,
the libc variants (strlower_libc_sb(), strupper_libc_sb(), and
strtitle_libc_sb()) previously refused to copy anything unless the destination
was large enough for the full result plus the terminating NULL.
This patch updates all three libc-based functions to follow the same
“best effort copy + optional NULL terminator” model as unicode_strlower().
This ensures consistent behavior across all case-folding routines, regardless
of whether a Unicode locale is in use.
No existing regression tests required changes, and “make check” passes.
Author: Chao Li <lic@highgo.com>
Discussion: https://postgr.es/m/CAEoWx2mW0P8CByavV58zm3=eb2MQHaKOcDEF5B2UJYRyC2c3ig@mail.gmail.com
---
src/backend/utils/adt/pg_locale_libc.c | 159 +++++++++++++------------
1 file changed, 81 insertions(+), 78 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index abf27283a33..c5ff2a0b681 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -426,36 +426,37 @@ static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ ssize_t reallen;
+ locale_t loc = locale->lt;
+
if (srclen < 0)
srclen = strlen(src);
- if (srclen + 1 <= destsize)
- {
- locale_t loc = locale->lt;
- char *p;
+ reallen = srclen + 1;
+ if (reallen > destsize)
+ reallen = destsize;
- memcpy(dest, src, srclen);
- dest[srclen] = '\0';
+ memcpy(dest, src, reallen);
+ if (reallen < destsize)
+ dest[reallen] = '\0';
- /*
- * Note: we assume that tolower_l() will not be so broken as to need
- * an isupper_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = dest; *p; p++)
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need an
+ * isupper_l() guard test. When using the default collation, we apply the
+ * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+ * but in non-default collations you get exactly what the collation says.
+ */
+ for (char *p = dest; *p; p++)
+ {
+ if (locale->is_default)
{
- if (locale->is_default)
- {
- if (*p >= 'A' && *p <= 'Z')
- *p += 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
- *p = tolower_l((unsigned char) *p, loc);
- }
- else
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
*p = tolower_l((unsigned char) *p, loc);
}
+ else
+ *p = tolower_l((unsigned char) *p, loc);
}
return srclen;
@@ -513,53 +514,54 @@ static size_t
strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ ssize_t reallen;
+ locale_t loc = locale->lt;
+ int wasalnum = false;
+
if (srclen < 0)
srclen = strlen(src);
- if (srclen + 1 <= destsize)
- {
- locale_t loc = locale->lt;
- int wasalnum = false;
- char *p;
+ reallen = srclen + 1;
+ if (reallen > destsize)
+ reallen = destsize;
- memcpy(dest, src, srclen);
- dest[srclen] = '\0';
+ memcpy(dest, src, reallen);
+ if (reallen < destsize)
+ dest[reallen] = '\0';
- /*
- * Note: we assume that toupper_l()/tolower_l() will not be so broken
- * as to need guard tests. When using the default collation, we apply
- * the traditional Postgres behavior that forces ASCII-style treatment
- * of I/i, but in non-default collations you get exactly what the
- * collation says.
- */
- for (p = dest; *p; p++)
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken as
+ * to need guard tests. When using the default collation, we apply the
+ * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+ * but in non-default collations you get exactly what the collation says.
+ */
+ for (char *p = dest; *p; p++)
+ {
+ if (locale->is_default)
{
- if (locale->is_default)
+ if (wasalnum)
{
- if (wasalnum)
- {
- if (*p >= 'A' && *p <= 'Z')
- *p += 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
- *p = tolower_l((unsigned char) *p, loc);
- }
- else
- {
- if (*p >= 'a' && *p <= 'z')
- *p -= 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
- *p = toupper_l((unsigned char) *p, loc);
- }
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
+ *p = tolower_l((unsigned char) *p, loc);
}
else
{
- if (wasalnum)
- *p = tolower_l((unsigned char) *p, loc);
- else
+ if (*p >= 'a' && *p <= 'z')
+ *p -= 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
*p = toupper_l((unsigned char) *p, loc);
}
- wasalnum = isalnum_l((unsigned char) *p, loc);
}
+ else
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, loc);
+ else
+ *p = toupper_l((unsigned char) *p, loc);
+ }
+ wasalnum = isalnum_l((unsigned char) *p, loc);
}
return srclen;
@@ -624,36 +626,37 @@ static size_t
strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ ssize_t reallen;
+ locale_t loc = locale->lt;
+
if (srclen < 0)
srclen = strlen(src);
- if (srclen + 1 <= destsize)
- {
- locale_t loc = locale->lt;
- char *p;
+ reallen = srclen + 1;
+ if (reallen > destsize)
+ reallen = destsize;
- memcpy(dest, src, srclen);
- dest[srclen] = '\0';
+ memcpy(dest, src, reallen);
+ if (reallen < destsize)
+ dest[reallen] = '\0';
- /*
- * Note: we assume that toupper_l() will not be so broken as to need
- * an islower_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = dest; *p; p++)
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need an
+ * islower_l() guard test. When using the default collation, we apply the
+ * traditional Postgres behavior that forces ASCII-style treatment of I/i,
+ * but in non-default collations you get exactly what the collation says.
+ */
+ for (char *p = dest; *p; p++)
+ {
+ if (locale->is_default)
{
- if (locale->is_default)
- {
- if (*p >= 'a' && *p <= 'z')
- *p -= 'a' - 'A';
- else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
- *p = toupper_l((unsigned char) *p, loc);
- }
- else
+ if (*p >= 'a' && *p <= 'z')
+ *p -= 'a' - 'A';
+ else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
*p = toupper_l((unsigned char) *p, loc);
}
+ else
+ *p = toupper_l((unsigned char) *p, loc);
}
return srclen;
--
2.39.5 (Apple Git-154)
v2-0001-Fixes-a-bug-in-strlower_libc_sb.patchapplication/octet-stream; name=v2-0001-Fixes-a-bug-in-strlower_libc_sb.patchDownload
From e3fa9f6de184a5057d865b89be640d9cf35f4698 Mon Sep 17 00:00:00 2001
From: "Chao Li (Evan)" <lic@highgo.com>
Date: Tue, 25 Nov 2025 13:26:19 +0800
Subject: [PATCH v2 1/2] Fixes a bug in strlower_libc_sb
Removed a useless length check.
Author: Chao Li <lic@highgo.com>
Discussion: https://postgr.es/m/CAEoWx2mW0P8CByavV58zm3=eb2MQHaKOcDEF5B2UJYRyC2c3ig@mail.gmail.com
---
src/backend/utils/adt/pg_locale_libc.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..abf27283a33 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -434,9 +434,6 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
locale_t loc = locale->lt;
char *p;
- if (srclen + 1 > destsize)
- return srclen;
-
memcpy(dest, src, srclen);
dest[srclen] = '\0';
--
2.39.5 (Apple Git-154)
Hi,
On Tue, Nov 25, 2025 at 01:50:42PM +0800, Chao Li wrote:
On Tue, Nov 25, 2025 at 11:03 AM Chao Li <li.evan.chao@gmail.com> wrote:
Hi Hackers,
While reviewing Jeff's patch [1], I found this bug in strlower_libc_sb():
```
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t
srclen,
pg_locale_t locale)
{
if (srclen < 0)
srclen = strlen(src);
Nice catch!
FWIW this thread gave me the idea to create a coccinelle script to detect dead
branches [1]https://github.com/bdrouvot/coccinelle_on_pg/blob/main/misc/dead_branches.cocci. It reports 4 more that are false positives, so I don't think there
is more dead branches in the code tree.
[1]: https://github.com/bdrouvot/coccinelle_on_pg/blob/main/misc/dead_branches.cocci
Regards,
--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com