strncmp->memcmp when we know the shorter length
When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent. Since memcmp need not watch each byte for a NULL
terminator, it often compares a CPU word at a time for better performance. The
attached patch changes use of strncmp to memcmp where we have the length of the
shorter string. I was most interested in the varlena.c instances, but I tried
to find all applicable call sites. To benchmark it, I used the attached
"bench-texteq.sql". This patch improved my 5-run average timing of the SELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where the
change should be pessimal.
Thanks,
nm
Attachments:
strncmp-memcmp.patchtext/plain; charset=us-asciiDownload
*** a/contrib/hstore/hstore_io.c
--- b/contrib/hstore/hstore_io.c
***************
*** 280,288 **** comparePairs(const void *a, const void *b)
{
if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen)
{
! int res = strncmp(((Pairs *) a)->key,
! ((Pairs *) b)->key,
! ((Pairs *) a)->keylen);
if (res)
return res;
--- 280,288 ----
{
if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen)
{
! int res = memcmp(((Pairs *) a)->key,
! ((Pairs *) b)->key,
! ((Pairs *) a)->keylen);
if (res)
return res;
***************
*** 324,330 **** hstoreUniquePairs(Pairs *a, int4 l, int4 *buflen)
while (ptr - a < l)
{
if (ptr->keylen == res->keylen &&
! strncmp(ptr->key, res->key, res->keylen) == 0)
{
if (ptr->needfree)
{
--- 324,330 ----
while (ptr - a < l)
{
if (ptr->keylen == res->keylen &&
! memcmp(ptr->key, res->key, res->keylen) == 0)
{
if (ptr->needfree)
{
*** a/contrib/hstore/hstore_op.c
--- b/contrib/hstore/hstore_op.c
***************
*** 49,55 **** hstoreFindKey(HStore *hs, int *lowbound, char *key, int keylen)
stopMiddle = stopLow + (stopHigh - stopLow) / 2;
if (HS_KEYLEN(entries, stopMiddle) == keylen)
! difference = strncmp(HS_KEY(entries, base, stopMiddle), key, keylen);
else
difference = (HS_KEYLEN(entries, stopMiddle) > keylen) ? 1 : -1;
--- 49,55 ----
stopMiddle = stopLow + (stopHigh - stopLow) / 2;
if (HS_KEYLEN(entries, stopMiddle) == keylen)
! difference = memcmp(HS_KEY(entries, base, stopMiddle), key, keylen);
else
difference = (HS_KEYLEN(entries, stopMiddle) > keylen) ? 1 : -1;
***************
*** 263,269 **** hstore_delete(PG_FUNCTION_ARGS)
int len = HS_KEYLEN(es, i);
char *ptrs = HS_KEY(es, bufs, i);
! if (!(len == keylen && strncmp(ptrs, keyptr, keylen) == 0))
{
int vallen = HS_VALLEN(es, i);
--- 263,269 ----
int len = HS_KEYLEN(es, i);
char *ptrs = HS_KEY(es, bufs, i);
! if (!(len == keylen && memcmp(ptrs, keyptr, keylen) == 0))
{
int vallen = HS_VALLEN(es, i);
***************
*** 331,339 **** hstore_delete_array(PG_FUNCTION_ARGS)
int skeylen = HS_KEYLEN(es, i);
if (skeylen == key_pairs[j].keylen)
! difference = strncmp(HS_KEY(es, ps, i),
! key_pairs[j].key,
! key_pairs[j].keylen);
else
difference = (skeylen > key_pairs[j].keylen) ? 1 : -1;
}
--- 331,339 ----
int skeylen = HS_KEYLEN(es, i);
if (skeylen == key_pairs[j].keylen)
! difference = memcmp(HS_KEY(es, ps, i),
! key_pairs[j].key,
! key_pairs[j].keylen);
else
difference = (skeylen > key_pairs[j].keylen) ? 1 : -1;
}
***************
*** 416,424 **** hstore_delete_hstore(PG_FUNCTION_ARGS)
int s2keylen = HS_KEYLEN(es2, j);
if (skeylen == s2keylen)
! difference = strncmp(HS_KEY(es, ps, i),
! HS_KEY(es2, ps2, j),
! skeylen);
else
difference = (skeylen > s2keylen) ? 1 : -1;
}
--- 416,424 ----
int s2keylen = HS_KEYLEN(es2, j);
if (skeylen == s2keylen)
! difference = memcmp(HS_KEY(es, ps, i),
! HS_KEY(es2, ps2, j),
! skeylen);
else
difference = (skeylen > s2keylen) ? 1 : -1;
}
***************
*** 433,439 **** hstore_delete_hstore(PG_FUNCTION_ARGS)
if (snullval != HS_VALISNULL(es2, j)
|| (!snullval
&& (svallen != HS_VALLEN(es2, j)
! || strncmp(HS_VAL(es, ps, i), HS_VAL(es2, ps2, j), svallen) != 0)))
{
HS_COPYITEM(ed, bufd, pd,
HS_KEY(es, ps, i), HS_KEYLEN(es, i),
--- 433,439 ----
if (snullval != HS_VALISNULL(es2, j)
|| (!snullval
&& (svallen != HS_VALLEN(es2, j)
! || memcmp(HS_VAL(es, ps, i), HS_VAL(es2, ps2, j), svallen) != 0)))
{
HS_COPYITEM(ed, bufd, pd,
HS_KEY(es, ps, i), HS_KEYLEN(es, i),
***************
*** 526,534 **** hstore_concat(PG_FUNCTION_ARGS)
int s2keylen = HS_KEYLEN(es2, s2idx);
if (s1keylen == s2keylen)
! difference = strncmp(HS_KEY(es1, ps1, s1idx),
! HS_KEY(es2, ps2, s2idx),
! s1keylen);
else
difference = (s1keylen > s2keylen) ? 1 : -1;
}
--- 526,534 ----
int s2keylen = HS_KEYLEN(es2, s2idx);
if (s1keylen == s2keylen)
! difference = memcmp(HS_KEY(es1, ps1, s1idx),
! HS_KEY(es2, ps2, s2idx),
! s1keylen);
else
difference = (s1keylen > s2keylen) ? 1 : -1;
}
***************
*** 996,1002 **** hstore_contains(PG_FUNCTION_ARGS)
if (nullval != HS_VALISNULL(ve, idx)
|| (!nullval
&& (vallen != HS_VALLEN(ve, idx)
! || strncmp(HS_VAL(te, tstr, i), HS_VAL(ve, vstr, idx), vallen))))
res = false;
}
else
--- 996,1002 ----
if (nullval != HS_VALISNULL(ve, idx)
|| (!nullval
&& (vallen != HS_VALLEN(ve, idx)
! || memcmp(HS_VAL(te, tstr, i), HS_VAL(ve, vstr, idx), vallen))))
res = false;
}
else
*** a/contrib/ltree/lquery_op.c
--- b/contrib/ltree/lquery_op.c
***************
*** 111,117 **** checkLevel(lquery_level *curq, ltree_level *curt)
for (i = 0; i < curq->numvar; i++)
{
! cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
if (curvar->flag & LVAR_SUBLEXEME)
{
--- 111,117 ----
for (i = 0; i < curq->numvar; i++)
{
! cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : memcmp;
if (curvar->flag & LVAR_SUBLEXEME)
{
*** a/contrib/ltree/ltree_gist.c
--- b/contrib/ltree/ltree_gist.c
***************
*** 546,552 **** gist_tqcmp(ltree *t, lquery *q)
while (an > 0 && bn > 0)
{
bl = LQL_FIRST(ql);
! if ((res = strncmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
{
if (al->len != bl->len)
return al->len - bl->len;
--- 546,552 ----
while (an > 0 && bn > 0)
{
bl = LQL_FIRST(ql);
! if ((res = memcmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
{
if (al->len != bl->len)
return al->len - bl->len;
*** a/contrib/ltree/ltree_op.c
--- b/contrib/ltree/ltree_op.c
***************
*** 68,74 **** ltree_compare(const ltree *a, const ltree *b)
while (an > 0 && bn > 0)
{
! if ((res = strncmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
{
if (al->len != bl->len)
return (al->len - bl->len) * 10 * (an + 1);
--- 68,74 ----
while (an > 0 && bn > 0)
{
! if ((res = memcmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
{
if (al->len != bl->len)
return (al->len - bl->len) * 10 * (an + 1);
***************
*** 165,171 **** inner_isparent(const ltree *c, const ltree *p)
{
if (cl->len != pl->len)
return false;
! if (strncmp(cl->name, pl->name, cl->len))
return false;
pn--;
--- 165,171 ----
{
if (cl->len != pl->len)
return false;
! if (memcmp(cl->name, pl->name, cl->len))
return false;
pn--;
***************
*** 373,379 **** ltree_index(PG_FUNCTION_ARGS)
bptr = LTREE_FIRST(b);
for (j = 0; j < b->numlevel; j++)
{
! if (!(aptr->len == bptr->len && strncmp(aptr->name, bptr->name, aptr->len) == 0))
break;
aptr = LEVEL_NEXT(aptr);
bptr = LEVEL_NEXT(bptr);
--- 373,379 ----
bptr = LTREE_FIRST(b);
for (j = 0; j < b->numlevel; j++)
{
! if (!(aptr->len == bptr->len && memcmp(aptr->name, bptr->name, aptr->len) == 0))
break;
aptr = LEVEL_NEXT(aptr);
bptr = LEVEL_NEXT(bptr);
***************
*** 451,457 **** lca_inner(ltree **a, int len)
num = 0;
for (i = 0; i < Min(tmp, (*ptr)->numlevel - 1); i++)
{
! if (l1->len == l2->len && strncmp(l1->name, l2->name, l1->len) == 0)
num = i + 1;
else
break;
--- 451,457 ----
num = 0;
for (i = 0; i < Min(tmp, (*ptr)->numlevel - 1); i++)
{
! if (l1->len == l2->len && memcmp(l1->name, l2->name, l1->len) == 0)
num = i + 1;
else
break;
*** a/contrib/ltree/ltxtquery_op.c
--- b/contrib/ltree/ltxtquery_op.c
***************
*** 57,63 **** checkcondition_str(void *checkval, ITEM *val)
char *op = ((CHKVAL *) checkval)->operand + val->distance;
int (*cmpptr) (const char *, const char *, size_t);
! cmpptr = (val->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
while (tlen > 0)
{
if (val->flag & LVAR_SUBLEXEME)
--- 57,63 ----
char *op = ((CHKVAL *) checkval)->operand + val->distance;
int (*cmpptr) (const char *, const char *, size_t);
! cmpptr = (val->flag & LVAR_INCASE) ? ltree_strncasecmp : memcmp;
while (tlen > 0)
{
if (val->flag & LVAR_SUBLEXEME)
*** a/src/backend/nodes/readfuncs.c
--- b/src/backend/nodes/readfuncs.c
***************
*** 1196,1202 **** parseNodeString(void)
token = pg_strtok(&length);
#define MATCH(tokname, namelen) \
! (length == namelen && strncmp(token, tokname, namelen) == 0)
if (MATCH("QUERY", 5))
return_value = _readQuery();
--- 1196,1202 ----
token = pg_strtok(&length);
#define MATCH(tokname, namelen) \
! (length == namelen && memcmp(token, tokname, namelen) == 0)
if (MATCH("QUERY", 5))
return_value = _readQuery();
*** a/src/backend/tsearch/spell.c
--- b/src/backend/tsearch/spell.c
***************
*** 1502,1508 **** CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
{
while ((*ptr)->affix)
{
! if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
{
len = (*ptr)->len;
issuffix = (*ptr)->issuffix;
--- 1502,1508 ----
{
while ((*ptr)->affix)
{
! if (len > (*ptr)->len && memcmp((*ptr)->affix, word, (*ptr)->len) == 0)
{
len = (*ptr)->len;
issuffix = (*ptr)->issuffix;
*** a/src/backend/tsearch/to_tsany.c
--- b/src/backend/tsearch/to_tsany.c
***************
*** 90,96 **** uniqueWORD(ParsedWord *a, int4 l)
while (ptr - a < l)
{
if (!(ptr->len == res->len &&
! strncmp(ptr->word, res->word, res->len) == 0))
{
/*
* Got a new word, so put it in result
--- 90,96 ----
while (ptr - a < l)
{
if (!(ptr->len == res->len &&
! memcmp(ptr->word, res->word, res->len) == 0))
{
/*
* Got a new word, so put it in result
*** a/src/backend/tsearch/ts_selfuncs.c
--- b/src/backend/tsearch/ts_selfuncs.c
***************
*** 316,323 **** tsquery_opr_selec(QueryItem *item, char *operand,
int tlen = VARSIZE_ANY_EXHDR(t->element);
if (tlen >= key.length &&
! strncmp(key.lexeme, VARDATA_ANY(t->element),
! key.length) == 0)
matched += t->frequency;
allmcvs += t->frequency;
}
--- 316,323 ----
int tlen = VARSIZE_ANY_EXHDR(t->element);
if (tlen >= key.length &&
! memcmp(key.lexeme, VARDATA_ANY(t->element),
! key.length) == 0)
matched += t->frequency;
allmcvs += t->frequency;
}
***************
*** 430,434 **** compare_lexeme_textfreq(const void *e1, const void *e2)
return -1;
/* Fall back on byte-for-byte comparison */
! return strncmp(key->lexeme, VARDATA_ANY(t->element), len1);
}
--- 430,434 ----
return -1;
/* Fall back on byte-for-byte comparison */
! return memcmp(key->lexeme, VARDATA_ANY(t->element), len1);
}
*** a/src/backend/tsearch/ts_typanalyze.c
--- b/src/backend/tsearch/ts_typanalyze.c
***************
*** 488,494 **** lexeme_compare(const void *key1, const void *key2)
else if (d1->length < d2->length)
return -1;
/* Lengths are equal, do a byte-by-byte comparison */
! return strncmp(d1->lexeme, d2->lexeme, d1->length);
}
/*
--- 488,494 ----
else if (d1->length < d2->length)
return -1;
/* Lengths are equal, do a byte-by-byte comparison */
! return memcmp(d1->lexeme, d2->lexeme, d1->length);
}
/*
*** a/src/backend/utils/adt/varchar.c
--- b/src/backend/utils/adt/varchar.c
***************
*** 690,696 **** bpchareq(PG_FUNCTION_ARGS)
if (len1 != len2)
result = false;
else
! result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
--- 690,696 ----
if (len1 != len2)
result = false;
else
! result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
***************
*** 717,723 **** bpcharne(PG_FUNCTION_ARGS)
if (len1 != len2)
result = true;
else
! result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
--- 717,723 ----
if (len1 != len2)
result = true;
else
! result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
***************
*** 905,911 **** internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
! result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
if (result != 0)
return result;
else if (len1 < len2)
--- 905,911 ----
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
! result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
if (result != 0)
return result;
else if (len1 < len2)
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 1286,1292 **** varstr_cmp(char *arg1, int len1, char *arg2, int len2)
*/
if (lc_collate_is_c())
{
! result = strncmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
--- 1286,1292 ----
*/
if (lc_collate_is_c())
{
! result = memcmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
***************
*** 1370,1376 **** varstr_cmp(char *arg1, int len1, char *arg2, int len2)
*/
if (result == 0)
{
! result = strncmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
--- 1370,1376 ----
*/
if (result == 0)
{
! result = memcmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
***************
*** 1462,1469 **** texteq(PG_FUNCTION_ARGS)
if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
result = false;
else
! result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! VARSIZE_ANY_EXHDR(arg1)) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
--- 1462,1469 ----
if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
result = false;
else
! result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! VARSIZE_ANY_EXHDR(arg1)) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
***************
*** 1485,1492 **** textne(PG_FUNCTION_ARGS)
if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
result = true;
else
! result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! VARSIZE_ANY_EXHDR(arg1)) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
--- 1485,1492 ----
if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
result = true;
else
! result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! VARSIZE_ANY_EXHDR(arg1)) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
***************
*** 1612,1618 **** internal_text_pattern_compare(text *arg1, text *arg2)
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
! result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
if (result != 0)
return result;
else if (len1 < len2)
--- 1612,1618 ----
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
! result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
if (result != 0)
return result;
else if (len1 < len2)
On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:
When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent. Since memcmp need not watch each byte for a NULL
terminator, it often compares a CPU word at a time for better performance. The
attached patch changes use of strncmp to memcmp where we have the length of the
shorter string. I was most interested in the varlena.c instances, but I tried
to find all applicable call sites. To benchmark it, I used the attached
"bench-texteq.sql". This patch improved my 5-run average timing of the SELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where the
change should be pessimal.
This is a good idea. I will check this over and commit it.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:
When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent. Since memcmp need not watch each byte for aNULL
terminator, it often compares a CPU word at a time for better
performance. The
attached patch changes use of strncmp to memcmp where we have the length
of the
shorter string. I was most interested in the varlena.c instances, but I
tried
to find all applicable call sites. To benchmark it, I used the attached
"bench-texteq.sql". This patch improved my 5-run average timing of theSELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where
the
change should be pessimal.
This is a good idea. I will check this over and commit it.
Doesn't this risk accessing bytes beyond the shorter string? Look at the
warning above the StrNCpy(), for example.
Regards,
--
gurjeet.singh
@ EnterpriseDB - The Enterprise Postgres Company
http://www.EnterpriseDB.com
singh.gurjeet@{ gmail | yahoo }.com
Twitter/Skype: singh_gurjeet
Mail sent from my BlackLaptop device
On Tue, Dec 21, 2010 at 8:29 PM, Gurjeet Singh <singh.gurjeet@gmail.com> wrote:
On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:
When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent. Since memcmp need not watch each byte for a
NULL
terminator, it often compares a CPU word at a time for better
performance. The
attached patch changes use of strncmp to memcmp where we have the length
of the
shorter string. I was most interested in the varlena.c instances, but I
tried
to find all applicable call sites. To benchmark it, I used the attached
"bench-texteq.sql". This patch improved my 5-run average timing of the
SELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where
the
change should be pessimal.This is a good idea. I will check this over and commit it.
Doesn't this risk accessing bytes beyond the shorter string?
If it's done properly, I don't see how this would be a risk.
Look at the
warning above the StrNCpy(), for example.
If you're talking about this comment:
* BTW: when you need to copy a non-null-terminated string (like a text
* datum) and add a null, do not do it with StrNCpy(..., len+1). That
* might seem to work, but it fetches one byte more than there is in the
* text object.
...then that's not applicable here. It's perfectly safe to compare to
strings of length n using an n-byte memcmp(). The bytes being
compared are 0 through n - 1; the terminating null is in byte n, or
else it isn't, but memcmp() certainly isn't going to look at it.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
On Tue, Dec 21, 2010 at 9:01 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Tue, Dec 21, 2010 at 8:29 PM, Gurjeet Singh <singh.gurjeet@gmail.com>
wrote:On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com>
wrote:
On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:
When the caller knows the smaller string length, memcmp and strncmp
are
functionally equivalent. Since memcmp need not watch each byte for a
NULL
terminator, it often compares a CPU word at a time for better
performance. The
attached patch changes use of strncmp to memcmp where we have thelength
of the
shorter string. I was most interested in the varlena.c instances, butI
tried
to find all applicable call sites. To benchmark it, I used theattached
"bench-texteq.sql". This patch improved my 5-run average timing of
the
SELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where
the
change should be pessimal.This is a good idea. I will check this over and commit it.
Doesn't this risk accessing bytes beyond the shorter string?
If it's done properly, I don't see how this would be a risk.
Look at the
warning above the StrNCpy(), for example.If you're talking about this comment:
* BTW: when you need to copy a non-null-terminated string (like a
text
* datum) and add a null, do not do it with StrNCpy(..., len+1). That
* might seem to work, but it fetches one byte more than there is in
the
* text object....then that's not applicable here. It's perfectly safe to compare to
strings of length n using an n-byte memcmp(). The bytes being
compared are 0 through n - 1; the terminating null is in byte n, or
else it isn't, but memcmp() certainly isn't going to look at it.
I missed the part where Noah said "... where we have the length of the *
_shorter_* string". I agree we are safe here.
Regards,
--
gurjeet.singh
@ EnterpriseDB - The Enterprise Postgres Company
http://www.EnterpriseDB.com
singh.gurjeet@{ gmail | yahoo }.com
Twitter/Skype: singh_gurjeet
Mail sent from my BlackLaptop device
On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:
When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent. Since memcmp need not watch each byte for a NULL
terminator, it often compares a CPU word at a time for better performance. The
attached patch changes use of strncmp to memcmp where we have the length of the
shorter string. I was most interested in the varlena.c instances, but I tried
to find all applicable call sites. To benchmark it, I used the attached
"bench-texteq.sql". This patch improved my 5-run average timing of the SELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where the
change should be pessimal.This is a good idea. I will check this over and commit it.
A little benchmarking reveals that on my system (MacOS X 10.6.5) it
appears that strncmp() is faster for a 4 character string, but
memcmp() is faster for a 5+ character string. So I think most of
these are pretty clear wins, but I have reverted the changes to
src/backend/tsearch because I'm not entirely confident that lexemes
and affixes will be long enough on average for this to be a win there.
Please feel free to resubmit that part with performance results
showing that it works out to a win. Some of the ltree changes
produced compiler warnings, so I omitted those also. Committed the
rest.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Robert Haas <robertmhaas@gmail.com> writes:
If it's done properly, I don't see how this would be a risk.
I'm fairly uncomfortable about the broad swath and low return of this
patch. Noah is assuming that none of these places are relying on
strncmp to stop short upon finding a null, and I don't believe that
that's a safe assumption in every single place. Nor do I believe that
it's worth the effort of trying to prove it safe in most of those
places.
I think this might be a good idea in the varchar.c and varlena.c calls,
but I'd be inclined to leave the rest of the calls alone.
regards, tom lane
On Tue, Dec 21, 2010 at 10:24 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Robert Haas <robertmhaas@gmail.com> writes:
If it's done properly, I don't see how this would be a risk.
I'm fairly uncomfortable about the broad swath and low return of this
patch. Noah is assuming that none of these places are relying on
strncmp to stop short upon finding a null, and I don't believe that
that's a safe assumption in every single place. Nor do I believe that
it's worth the effort of trying to prove it safe in most of those
places.I think this might be a good idea in the varchar.c and varlena.c calls,
but I'd be inclined to leave the rest of the calls alone.
Eh, I already committed somewhat more than that. I did think about
the concern which you raise. It seems pretty clear that's not a
danger in readfuncs.c. In the hstore and ltree cases, at least at
first blush, it appears to me that it would be downright broken for
someone to be counting on a null to terminate the comparison. The
intent of these bits of code appears to be to do equality comparison a
string stored as a byte count + a byte string, rather than a
null-terminated cstring, so unless I'm misunderstanding something it's
more likely that the use of strncmp() would lead to a bug; the prior
coding doesn't look like it would be correct if NUL bytes were
possible. The tsearch cases also appear to be safe in this regard,
but since I decided against committing those on other grounds I
haven't looked at them as carefully.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Robert Haas <robertmhaas@gmail.com> writes:
On Tue, Dec 21, 2010 at 10:24 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
I'm fairly uncomfortable about the broad swath and low return of this
patch. �Noah is assuming that none of these places are relying on
strncmp to stop short upon finding a null, and I don't believe that
that's a safe assumption in every single place. �Nor do I believe that
it's worth the effort of trying to prove it safe in most of those
places.
Eh, I already committed somewhat more than that. I did think about
the concern which you raise.
Okay ... I was arguing for not bothering to expend that effort, but
since you already did, it's a moot point.
regards, tom lane
On Tue, Dec 21, 2010 at 10:15:41PM -0500, Robert Haas wrote:
A little benchmarking reveals that on my system (MacOS X 10.6.5) it
appears that strncmp() is faster for a 4 character string, but
memcmp() is faster for a 5+ character string.
Good call; I hadn't considered that possibility.
So I think most of
these are pretty clear wins, but I have reverted the changes to
src/backend/tsearch because I'm not entirely confident that lexemes
and affixes will be long enough on average for this to be a win there.
Please feel free to resubmit that part with performance results
showing that it works out to a win. Some of the ltree changes
produced compiler warnings, so I omitted those also. Committed the
rest.
Thanks for the quick review and commit. I'm not acquainted with the performance
significance of the tsearch and ltree call sites. Leaving those as-is makes
sense to me.
nm