strncmp->memcmp when we know the shorter length

Started by Noah Mischabout 15 years ago10 messages
#1Noah Misch
noah@leadboat.com
2 attachment(s)

When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent. Since memcmp need not watch each byte for a NULL
terminator, it often compares a CPU word at a time for better performance. The
attached patch changes use of strncmp to memcmp where we have the length of the
shorter string. I was most interested in the varlena.c instances, but I tried
to find all applicable call sites. To benchmark it, I used the attached
"bench-texteq.sql". This patch improved my 5-run average timing of the SELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where the
change should be pessimal.

Thanks,
nm

Attachments:

strncmp-memcmp.patchtext/plain; charset=us-asciiDownload
*** a/contrib/hstore/hstore_io.c
--- b/contrib/hstore/hstore_io.c
***************
*** 280,288 **** comparePairs(const void *a, const void *b)
  {
  	if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen)
  	{
! 		int			res = strncmp(((Pairs *) a)->key,
! 								  ((Pairs *) b)->key,
! 								  ((Pairs *) a)->keylen);
  
  		if (res)
  			return res;
--- 280,288 ----
  {
  	if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen)
  	{
! 		int			res = memcmp(((Pairs *) a)->key,
! 								 ((Pairs *) b)->key,
! 								 ((Pairs *) a)->keylen);
  
  		if (res)
  			return res;
***************
*** 324,330 **** hstoreUniquePairs(Pairs *a, int4 l, int4 *buflen)
  	while (ptr - a < l)
  	{
  		if (ptr->keylen == res->keylen &&
! 			strncmp(ptr->key, res->key, res->keylen) == 0)
  		{
  			if (ptr->needfree)
  			{
--- 324,330 ----
  	while (ptr - a < l)
  	{
  		if (ptr->keylen == res->keylen &&
! 			memcmp(ptr->key, res->key, res->keylen) == 0)
  		{
  			if (ptr->needfree)
  			{
*** a/contrib/hstore/hstore_op.c
--- b/contrib/hstore/hstore_op.c
***************
*** 49,55 **** hstoreFindKey(HStore *hs, int *lowbound, char *key, int keylen)
  		stopMiddle = stopLow + (stopHigh - stopLow) / 2;
  
  		if (HS_KEYLEN(entries, stopMiddle) == keylen)
! 			difference = strncmp(HS_KEY(entries, base, stopMiddle), key, keylen);
  		else
  			difference = (HS_KEYLEN(entries, stopMiddle) > keylen) ? 1 : -1;
  
--- 49,55 ----
  		stopMiddle = stopLow + (stopHigh - stopLow) / 2;
  
  		if (HS_KEYLEN(entries, stopMiddle) == keylen)
! 			difference = memcmp(HS_KEY(entries, base, stopMiddle), key, keylen);
  		else
  			difference = (HS_KEYLEN(entries, stopMiddle) > keylen) ? 1 : -1;
  
***************
*** 263,269 **** hstore_delete(PG_FUNCTION_ARGS)
  		int			len = HS_KEYLEN(es, i);
  		char	   *ptrs = HS_KEY(es, bufs, i);
  
! 		if (!(len == keylen && strncmp(ptrs, keyptr, keylen) == 0))
  		{
  			int			vallen = HS_VALLEN(es, i);
  
--- 263,269 ----
  		int			len = HS_KEYLEN(es, i);
  		char	   *ptrs = HS_KEY(es, bufs, i);
  
! 		if (!(len == keylen && memcmp(ptrs, keyptr, keylen) == 0))
  		{
  			int			vallen = HS_VALLEN(es, i);
  
***************
*** 331,339 **** hstore_delete_array(PG_FUNCTION_ARGS)
  			int			skeylen = HS_KEYLEN(es, i);
  
  			if (skeylen == key_pairs[j].keylen)
! 				difference = strncmp(HS_KEY(es, ps, i),
! 									 key_pairs[j].key,
! 									 key_pairs[j].keylen);
  			else
  				difference = (skeylen > key_pairs[j].keylen) ? 1 : -1;
  		}
--- 331,339 ----
  			int			skeylen = HS_KEYLEN(es, i);
  
  			if (skeylen == key_pairs[j].keylen)
! 				difference = memcmp(HS_KEY(es, ps, i),
! 									key_pairs[j].key,
! 									key_pairs[j].keylen);
  			else
  				difference = (skeylen > key_pairs[j].keylen) ? 1 : -1;
  		}
***************
*** 416,424 **** hstore_delete_hstore(PG_FUNCTION_ARGS)
  			int			s2keylen = HS_KEYLEN(es2, j);
  
  			if (skeylen == s2keylen)
! 				difference = strncmp(HS_KEY(es, ps, i),
! 									 HS_KEY(es2, ps2, j),
! 									 skeylen);
  			else
  				difference = (skeylen > s2keylen) ? 1 : -1;
  		}
--- 416,424 ----
  			int			s2keylen = HS_KEYLEN(es2, j);
  
  			if (skeylen == s2keylen)
! 				difference = memcmp(HS_KEY(es, ps, i),
! 									HS_KEY(es2, ps2, j),
! 									skeylen);
  			else
  				difference = (skeylen > s2keylen) ? 1 : -1;
  		}
***************
*** 433,439 **** hstore_delete_hstore(PG_FUNCTION_ARGS)
  			if (snullval != HS_VALISNULL(es2, j)
  				|| (!snullval
  					&& (svallen != HS_VALLEN(es2, j)
! 						|| strncmp(HS_VAL(es, ps, i), HS_VAL(es2, ps2, j), svallen) != 0)))
  			{
  				HS_COPYITEM(ed, bufd, pd,
  							HS_KEY(es, ps, i), HS_KEYLEN(es, i),
--- 433,439 ----
  			if (snullval != HS_VALISNULL(es2, j)
  				|| (!snullval
  					&& (svallen != HS_VALLEN(es2, j)
! 						|| memcmp(HS_VAL(es, ps, i), HS_VAL(es2, ps2, j), svallen) != 0)))
  			{
  				HS_COPYITEM(ed, bufd, pd,
  							HS_KEY(es, ps, i), HS_KEYLEN(es, i),
***************
*** 526,534 **** hstore_concat(PG_FUNCTION_ARGS)
  			int			s2keylen = HS_KEYLEN(es2, s2idx);
  
  			if (s1keylen == s2keylen)
! 				difference = strncmp(HS_KEY(es1, ps1, s1idx),
! 									 HS_KEY(es2, ps2, s2idx),
! 									 s1keylen);
  			else
  				difference = (s1keylen > s2keylen) ? 1 : -1;
  		}
--- 526,534 ----
  			int			s2keylen = HS_KEYLEN(es2, s2idx);
  
  			if (s1keylen == s2keylen)
! 				difference = memcmp(HS_KEY(es1, ps1, s1idx),
! 									HS_KEY(es2, ps2, s2idx),
! 									s1keylen);
  			else
  				difference = (s1keylen > s2keylen) ? 1 : -1;
  		}
***************
*** 996,1002 **** hstore_contains(PG_FUNCTION_ARGS)
  			if (nullval != HS_VALISNULL(ve, idx)
  				|| (!nullval
  					&& (vallen != HS_VALLEN(ve, idx)
! 			|| strncmp(HS_VAL(te, tstr, i), HS_VAL(ve, vstr, idx), vallen))))
  				res = false;
  		}
  		else
--- 996,1002 ----
  			if (nullval != HS_VALISNULL(ve, idx)
  				|| (!nullval
  					&& (vallen != HS_VALLEN(ve, idx)
! 			|| memcmp(HS_VAL(te, tstr, i), HS_VAL(ve, vstr, idx), vallen))))
  				res = false;
  		}
  		else
*** a/contrib/ltree/lquery_op.c
--- b/contrib/ltree/lquery_op.c
***************
*** 111,117 **** checkLevel(lquery_level *curq, ltree_level *curt)
  
  	for (i = 0; i < curq->numvar; i++)
  	{
! 		cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
  
  		if (curvar->flag & LVAR_SUBLEXEME)
  		{
--- 111,117 ----
  
  	for (i = 0; i < curq->numvar; i++)
  	{
! 		cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : memcmp;
  
  		if (curvar->flag & LVAR_SUBLEXEME)
  		{
*** a/contrib/ltree/ltree_gist.c
--- b/contrib/ltree/ltree_gist.c
***************
*** 546,552 **** gist_tqcmp(ltree *t, lquery *q)
  	while (an > 0 && bn > 0)
  	{
  		bl = LQL_FIRST(ql);
! 		if ((res = strncmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
  		{
  			if (al->len != bl->len)
  				return al->len - bl->len;
--- 546,552 ----
  	while (an > 0 && bn > 0)
  	{
  		bl = LQL_FIRST(ql);
! 		if ((res = memcmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
  		{
  			if (al->len != bl->len)
  				return al->len - bl->len;
*** a/contrib/ltree/ltree_op.c
--- b/contrib/ltree/ltree_op.c
***************
*** 68,74 **** ltree_compare(const ltree *a, const ltree *b)
  
  	while (an > 0 && bn > 0)
  	{
! 		if ((res = strncmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
  		{
  			if (al->len != bl->len)
  				return (al->len - bl->len) * 10 * (an + 1);
--- 68,74 ----
  
  	while (an > 0 && bn > 0)
  	{
! 		if ((res = memcmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
  		{
  			if (al->len != bl->len)
  				return (al->len - bl->len) * 10 * (an + 1);
***************
*** 165,171 **** inner_isparent(const ltree *c, const ltree *p)
  	{
  		if (cl->len != pl->len)
  			return false;
! 		if (strncmp(cl->name, pl->name, cl->len))
  			return false;
  
  		pn--;
--- 165,171 ----
  	{
  		if (cl->len != pl->len)
  			return false;
! 		if (memcmp(cl->name, pl->name, cl->len))
  			return false;
  
  		pn--;
***************
*** 373,379 **** ltree_index(PG_FUNCTION_ARGS)
  			bptr = LTREE_FIRST(b);
  			for (j = 0; j < b->numlevel; j++)
  			{
! 				if (!(aptr->len == bptr->len && strncmp(aptr->name, bptr->name, aptr->len) == 0))
  					break;
  				aptr = LEVEL_NEXT(aptr);
  				bptr = LEVEL_NEXT(bptr);
--- 373,379 ----
  			bptr = LTREE_FIRST(b);
  			for (j = 0; j < b->numlevel; j++)
  			{
! 				if (!(aptr->len == bptr->len && memcmp(aptr->name, bptr->name, aptr->len) == 0))
  					break;
  				aptr = LEVEL_NEXT(aptr);
  				bptr = LEVEL_NEXT(bptr);
***************
*** 451,457 **** lca_inner(ltree **a, int len)
  			num = 0;
  			for (i = 0; i < Min(tmp, (*ptr)->numlevel - 1); i++)
  			{
! 				if (l1->len == l2->len && strncmp(l1->name, l2->name, l1->len) == 0)
  					num = i + 1;
  				else
  					break;
--- 451,457 ----
  			num = 0;
  			for (i = 0; i < Min(tmp, (*ptr)->numlevel - 1); i++)
  			{
! 				if (l1->len == l2->len && memcmp(l1->name, l2->name, l1->len) == 0)
  					num = i + 1;
  				else
  					break;
*** a/contrib/ltree/ltxtquery_op.c
--- b/contrib/ltree/ltxtquery_op.c
***************
*** 57,63 **** checkcondition_str(void *checkval, ITEM *val)
  	char	   *op = ((CHKVAL *) checkval)->operand + val->distance;
  	int			(*cmpptr) (const char *, const char *, size_t);
  
! 	cmpptr = (val->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
  	while (tlen > 0)
  	{
  		if (val->flag & LVAR_SUBLEXEME)
--- 57,63 ----
  	char	   *op = ((CHKVAL *) checkval)->operand + val->distance;
  	int			(*cmpptr) (const char *, const char *, size_t);
  
! 	cmpptr = (val->flag & LVAR_INCASE) ? ltree_strncasecmp : memcmp;
  	while (tlen > 0)
  	{
  		if (val->flag & LVAR_SUBLEXEME)
*** a/src/backend/nodes/readfuncs.c
--- b/src/backend/nodes/readfuncs.c
***************
*** 1196,1202 **** parseNodeString(void)
  	token = pg_strtok(&length);
  
  #define MATCH(tokname, namelen) \
! 	(length == namelen && strncmp(token, tokname, namelen) == 0)
  
  	if (MATCH("QUERY", 5))
  		return_value = _readQuery();
--- 1196,1202 ----
  	token = pg_strtok(&length);
  
  #define MATCH(tokname, namelen) \
! 	(length == namelen && memcmp(token, tokname, namelen) == 0)
  
  	if (MATCH("QUERY", 5))
  		return_value = _readQuery();
*** a/src/backend/tsearch/spell.c
--- b/src/backend/tsearch/spell.c
***************
*** 1502,1508 **** CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
  	{
  		while ((*ptr)->affix)
  		{
! 			if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
  			{
  				len = (*ptr)->len;
  				issuffix = (*ptr)->issuffix;
--- 1502,1508 ----
  	{
  		while ((*ptr)->affix)
  		{
! 			if (len > (*ptr)->len && memcmp((*ptr)->affix, word, (*ptr)->len) == 0)
  			{
  				len = (*ptr)->len;
  				issuffix = (*ptr)->issuffix;
*** a/src/backend/tsearch/to_tsany.c
--- b/src/backend/tsearch/to_tsany.c
***************
*** 90,96 **** uniqueWORD(ParsedWord *a, int4 l)
  	while (ptr - a < l)
  	{
  		if (!(ptr->len == res->len &&
! 			  strncmp(ptr->word, res->word, res->len) == 0))
  		{
  			/*
  			 * Got a new word, so put it in result
--- 90,96 ----
  	while (ptr - a < l)
  	{
  		if (!(ptr->len == res->len &&
! 			  memcmp(ptr->word, res->word, res->len) == 0))
  		{
  			/*
  			 * Got a new word, so put it in result
*** a/src/backend/tsearch/ts_selfuncs.c
--- b/src/backend/tsearch/ts_selfuncs.c
***************
*** 316,323 **** tsquery_opr_selec(QueryItem *item, char *operand,
  				int			tlen = VARSIZE_ANY_EXHDR(t->element);
  
  				if (tlen >= key.length &&
! 					strncmp(key.lexeme, VARDATA_ANY(t->element),
! 							key.length) == 0)
  					matched += t->frequency;
  				allmcvs += t->frequency;
  			}
--- 316,323 ----
  				int			tlen = VARSIZE_ANY_EXHDR(t->element);
  
  				if (tlen >= key.length &&
! 					memcmp(key.lexeme, VARDATA_ANY(t->element),
! 						   key.length) == 0)
  					matched += t->frequency;
  				allmcvs += t->frequency;
  			}
***************
*** 430,434 **** compare_lexeme_textfreq(const void *e1, const void *e2)
  		return -1;
  
  	/* Fall back on byte-for-byte comparison */
! 	return strncmp(key->lexeme, VARDATA_ANY(t->element), len1);
  }
--- 430,434 ----
  		return -1;
  
  	/* Fall back on byte-for-byte comparison */
! 	return memcmp(key->lexeme, VARDATA_ANY(t->element), len1);
  }
*** a/src/backend/tsearch/ts_typanalyze.c
--- b/src/backend/tsearch/ts_typanalyze.c
***************
*** 488,494 **** lexeme_compare(const void *key1, const void *key2)
  	else if (d1->length < d2->length)
  		return -1;
  	/* Lengths are equal, do a byte-by-byte comparison */
! 	return strncmp(d1->lexeme, d2->lexeme, d1->length);
  }
  
  /*
--- 488,494 ----
  	else if (d1->length < d2->length)
  		return -1;
  	/* Lengths are equal, do a byte-by-byte comparison */
! 	return memcmp(d1->lexeme, d2->lexeme, d1->length);
  }
  
  /*
*** a/src/backend/utils/adt/varchar.c
--- b/src/backend/utils/adt/varchar.c
***************
*** 690,696 **** bpchareq(PG_FUNCTION_ARGS)
  	if (len1 != len2)
  		result = false;
  	else
! 		result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
--- 690,696 ----
  	if (len1 != len2)
  		result = false;
  	else
! 		result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
***************
*** 717,723 **** bpcharne(PG_FUNCTION_ARGS)
  	if (len1 != len2)
  		result = true;
  	else
! 		result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
--- 717,723 ----
  	if (len1 != len2)
  		result = true;
  	else
! 		result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
***************
*** 905,911 **** internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
  	len1 = bcTruelen(arg1);
  	len2 = bcTruelen(arg2);
  
! 	result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
  	if (result != 0)
  		return result;
  	else if (len1 < len2)
--- 905,911 ----
  	len1 = bcTruelen(arg1);
  	len2 = bcTruelen(arg2);
  
! 	result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
  	if (result != 0)
  		return result;
  	else if (len1 < len2)
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 1286,1292 **** varstr_cmp(char *arg1, int len1, char *arg2, int len2)
  	 */
  	if (lc_collate_is_c())
  	{
! 		result = strncmp(arg1, arg2, Min(len1, len2));
  		if ((result == 0) && (len1 != len2))
  			result = (len1 < len2) ? -1 : 1;
  	}
--- 1286,1292 ----
  	 */
  	if (lc_collate_is_c())
  	{
! 		result = memcmp(arg1, arg2, Min(len1, len2));
  		if ((result == 0) && (len1 != len2))
  			result = (len1 < len2) ? -1 : 1;
  	}
***************
*** 1370,1376 **** varstr_cmp(char *arg1, int len1, char *arg2, int len2)
  			 */
  			if (result == 0)
  			{
! 				result = strncmp(arg1, arg2, Min(len1, len2));
  				if ((result == 0) && (len1 != len2))
  					result = (len1 < len2) ? -1 : 1;
  			}
--- 1370,1376 ----
  			 */
  			if (result == 0)
  			{
! 				result = memcmp(arg1, arg2, Min(len1, len2));
  				if ((result == 0) && (len1 != len2))
  					result = (len1 < len2) ? -1 : 1;
  			}
***************
*** 1462,1469 **** texteq(PG_FUNCTION_ARGS)
  	if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
  		result = false;
  	else
! 		result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! 						  VARSIZE_ANY_EXHDR(arg1)) == 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
--- 1462,1469 ----
  	if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
  		result = false;
  	else
! 		result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! 						 VARSIZE_ANY_EXHDR(arg1)) == 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
***************
*** 1485,1492 **** textne(PG_FUNCTION_ARGS)
  	if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
  		result = true;
  	else
! 		result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! 						  VARSIZE_ANY_EXHDR(arg1)) != 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
--- 1485,1492 ----
  	if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
  		result = true;
  	else
! 		result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
! 						 VARSIZE_ANY_EXHDR(arg1)) != 0);
  
  	PG_FREE_IF_COPY(arg1, 0);
  	PG_FREE_IF_COPY(arg2, 1);
***************
*** 1612,1618 **** internal_text_pattern_compare(text *arg1, text *arg2)
  	len1 = VARSIZE_ANY_EXHDR(arg1);
  	len2 = VARSIZE_ANY_EXHDR(arg2);
  
! 	result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
  	if (result != 0)
  		return result;
  	else if (len1 < len2)
--- 1612,1618 ----
  	len1 = VARSIZE_ANY_EXHDR(arg1);
  	len2 = VARSIZE_ANY_EXHDR(arg2);
  
! 	result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
  	if (result != 0)
  		return result;
  	else if (len1 < len2)
bench-texteq.sqltext/plain; charset=us-asciiDownload
#2Robert Haas
robertmhaas@gmail.com
In reply to: Noah Misch (#1)
Re: strncmp->memcmp when we know the shorter length

On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:

When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent.  Since memcmp need not watch each byte for a NULL
terminator, it often compares a CPU word at a time for better performance.  The
attached patch changes use of strncmp to memcmp where we have the length of the
shorter string.  I was most interested in the varlena.c instances, but I tried
to find all applicable call sites.  To benchmark it, I used the attached
"bench-texteq.sql".  This patch improved my 5-run average timing of the SELECT
from 65.8s to 56.9s, a 13% improvement.  I can't think of a case where the
change should be pessimal.

This is a good idea. I will check this over and commit it.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#3Gurjeet Singh
singh.gurjeet@gmail.com
In reply to: Robert Haas (#2)
Re: strncmp->memcmp when we know the shorter length

On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:

When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent. Since memcmp need not watch each byte for a

NULL

terminator, it often compares a CPU word at a time for better

performance. The

attached patch changes use of strncmp to memcmp where we have the length

of the

shorter string. I was most interested in the varlena.c instances, but I

tried

to find all applicable call sites. To benchmark it, I used the attached
"bench-texteq.sql". This patch improved my 5-run average timing of the

SELECT

from 65.8s to 56.9s, a 13% improvement. I can't think of a case where

the

change should be pessimal.

This is a good idea. I will check this over and commit it.

Doesn't this risk accessing bytes beyond the shorter string? Look at the
warning above the StrNCpy(), for example.

Regards,
--
gurjeet.singh
@ EnterpriseDB - The Enterprise Postgres Company
http://www.EnterpriseDB.com

singh.gurjeet@{ gmail | yahoo }.com
Twitter/Skype: singh_gurjeet

Mail sent from my BlackLaptop device

#4Robert Haas
robertmhaas@gmail.com
In reply to: Gurjeet Singh (#3)
Re: strncmp->memcmp when we know the shorter length

On Tue, Dec 21, 2010 at 8:29 PM, Gurjeet Singh <singh.gurjeet@gmail.com> wrote:

On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:

When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent.  Since memcmp need not watch each byte for a
NULL
terminator, it often compares a CPU word at a time for better
performance.  The
attached patch changes use of strncmp to memcmp where we have the length
of the
shorter string.  I was most interested in the varlena.c instances, but I
tried
to find all applicable call sites.  To benchmark it, I used the attached
"bench-texteq.sql".  This patch improved my 5-run average timing of the
SELECT
from 65.8s to 56.9s, a 13% improvement.  I can't think of a case where
the
change should be pessimal.

This is a good idea.  I will check this over and commit it.

Doesn't this risk accessing bytes beyond the shorter string?

If it's done properly, I don't see how this would be a risk.

Look at the
warning above the StrNCpy(), for example.

If you're talking about this comment:

* BTW: when you need to copy a non-null-terminated string (like a text
* datum) and add a null, do not do it with StrNCpy(..., len+1). That
* might seem to work, but it fetches one byte more than there is in the
* text object.

...then that's not applicable here. It's perfectly safe to compare to
strings of length n using an n-byte memcmp(). The bytes being
compared are 0 through n - 1; the terminating null is in byte n, or
else it isn't, but memcmp() certainly isn't going to look at it.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#5Gurjeet Singh
singh.gurjeet@gmail.com
In reply to: Robert Haas (#4)
Re: strncmp->memcmp when we know the shorter length

On Tue, Dec 21, 2010 at 9:01 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Tue, Dec 21, 2010 at 8:29 PM, Gurjeet Singh <singh.gurjeet@gmail.com>
wrote:

On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com>

wrote:

On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:

When the caller knows the smaller string length, memcmp and strncmp

are

functionally equivalent. Since memcmp need not watch each byte for a
NULL
terminator, it often compares a CPU word at a time for better
performance. The
attached patch changes use of strncmp to memcmp where we have the

length

of the
shorter string. I was most interested in the varlena.c instances, but

I

tried
to find all applicable call sites. To benchmark it, I used the

attached

"bench-texteq.sql". This patch improved my 5-run average timing of

the

SELECT
from 65.8s to 56.9s, a 13% improvement. I can't think of a case where
the
change should be pessimal.

This is a good idea. I will check this over and commit it.

Doesn't this risk accessing bytes beyond the shorter string?

If it's done properly, I don't see how this would be a risk.

Look at the
warning above the StrNCpy(), for example.

If you're talking about this comment:

* BTW: when you need to copy a non-null-terminated string (like a
text
* datum) and add a null, do not do it with StrNCpy(..., len+1). That
* might seem to work, but it fetches one byte more than there is in
the
* text object.

...then that's not applicable here. It's perfectly safe to compare to
strings of length n using an n-byte memcmp(). The bytes being
compared are 0 through n - 1; the terminating null is in byte n, or
else it isn't, but memcmp() certainly isn't going to look at it.

I missed the part where Noah said "... where we have the length of the *
_shorter_* string". I agree we are safe here.

Regards,
--
gurjeet.singh
@ EnterpriseDB - The Enterprise Postgres Company
http://www.EnterpriseDB.com

singh.gurjeet@{ gmail | yahoo }.com
Twitter/Skype: singh_gurjeet

Mail sent from my BlackLaptop device

#6Robert Haas
robertmhaas@gmail.com
In reply to: Robert Haas (#2)
Re: strncmp->memcmp when we know the shorter length

On Tue, Dec 21, 2010 at 6:24 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Mon, Dec 20, 2010 at 1:10 PM, Noah Misch <noah@leadboat.com> wrote:

When the caller knows the smaller string length, memcmp and strncmp are
functionally equivalent.  Since memcmp need not watch each byte for a NULL
terminator, it often compares a CPU word at a time for better performance.  The
attached patch changes use of strncmp to memcmp where we have the length of the
shorter string.  I was most interested in the varlena.c instances, but I tried
to find all applicable call sites.  To benchmark it, I used the attached
"bench-texteq.sql".  This patch improved my 5-run average timing of the SELECT
from 65.8s to 56.9s, a 13% improvement.  I can't think of a case where the
change should be pessimal.

This is a good idea.  I will check this over and commit it.

A little benchmarking reveals that on my system (MacOS X 10.6.5) it
appears that strncmp() is faster for a 4 character string, but
memcmp() is faster for a 5+ character string. So I think most of
these are pretty clear wins, but I have reverted the changes to
src/backend/tsearch because I'm not entirely confident that lexemes
and affixes will be long enough on average for this to be a win there.
Please feel free to resubmit that part with performance results
showing that it works out to a win. Some of the ltree changes
produced compiler warnings, so I omitted those also. Committed the
rest.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#7Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#4)
Re: strncmp->memcmp when we know the shorter length

Robert Haas <robertmhaas@gmail.com> writes:

If it's done properly, I don't see how this would be a risk.

I'm fairly uncomfortable about the broad swath and low return of this
patch. Noah is assuming that none of these places are relying on
strncmp to stop short upon finding a null, and I don't believe that
that's a safe assumption in every single place. Nor do I believe that
it's worth the effort of trying to prove it safe in most of those
places.

I think this might be a good idea in the varchar.c and varlena.c calls,
but I'd be inclined to leave the rest of the calls alone.

regards, tom lane

#8Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#7)
Re: strncmp->memcmp when we know the shorter length

On Tue, Dec 21, 2010 at 10:24 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Robert Haas <robertmhaas@gmail.com> writes:

If it's done properly, I don't see how this would be a risk.

I'm fairly uncomfortable about the broad swath and low return of this
patch.  Noah is assuming that none of these places are relying on
strncmp to stop short upon finding a null, and I don't believe that
that's a safe assumption in every single place.  Nor do I believe that
it's worth the effort of trying to prove it safe in most of those
places.

I think this might be a good idea in the varchar.c and varlena.c calls,
but I'd be inclined to leave the rest of the calls alone.

Eh, I already committed somewhat more than that. I did think about
the concern which you raise. It seems pretty clear that's not a
danger in readfuncs.c. In the hstore and ltree cases, at least at
first blush, it appears to me that it would be downright broken for
someone to be counting on a null to terminate the comparison. The
intent of these bits of code appears to be to do equality comparison a
string stored as a byte count + a byte string, rather than a
null-terminated cstring, so unless I'm misunderstanding something it's
more likely that the use of strncmp() would lead to a bug; the prior
coding doesn't look like it would be correct if NUL bytes were
possible. The tsearch cases also appear to be safe in this regard,
but since I decided against committing those on other grounds I
haven't looked at them as carefully.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#9Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#8)
Re: strncmp->memcmp when we know the shorter length

Robert Haas <robertmhaas@gmail.com> writes:

On Tue, Dec 21, 2010 at 10:24 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I'm fairly uncomfortable about the broad swath and low return of this
patch. �Noah is assuming that none of these places are relying on
strncmp to stop short upon finding a null, and I don't believe that
that's a safe assumption in every single place. �Nor do I believe that
it's worth the effort of trying to prove it safe in most of those
places.

Eh, I already committed somewhat more than that. I did think about
the concern which you raise.

Okay ... I was arguing for not bothering to expend that effort, but
since you already did, it's a moot point.

regards, tom lane

#10Noah Misch
noah@leadboat.com
In reply to: Robert Haas (#6)
Re: strncmp->memcmp when we know the shorter length

On Tue, Dec 21, 2010 at 10:15:41PM -0500, Robert Haas wrote:

A little benchmarking reveals that on my system (MacOS X 10.6.5) it
appears that strncmp() is faster for a 4 character string, but
memcmp() is faster for a 5+ character string.

Good call; I hadn't considered that possibility.

So I think most of
these are pretty clear wins, but I have reverted the changes to
src/backend/tsearch because I'm not entirely confident that lexemes
and affixes will be long enough on average for this to be a win there.
Please feel free to resubmit that part with performance results
showing that it works out to a win. Some of the ltree changes
produced compiler warnings, so I omitted those also. Committed the
rest.

Thanks for the quick review and commit. I'm not acquainted with the performance
significance of the tsearch and ltree call sites. Leaving those as-is makes
sense to me.

nm