appendBinaryStringInfo stuff

Started by Peter Eisentrautabout 3 years ago23 messages

peter.eisentraut@enterprisedb.com

about 3 years ago

2 attachment(s)

I found a couple of adjacent weird things:

There are a bunch of places in the json code that use
appendBinaryStringInfo() where appendStringInfoString() could be used, e.g.,

appendBinaryStringInfo(buf, ".size()", 7);

Is there a reason for this? Are we that stretched for performance? I
find this kind of code very fragile.

Also, the argument type of appendBinaryStringInfo() is char *. There is
some code that uses this function to assemble some kind of packed binary
layout, which requires a bunch of casts because of this. I think
functions taking binary data plus length should take void * instead,
like memcpy() for example.

Attached are two patches that illustrate these issues and show proposed
changes.

Attachments:

0001-Use-appendStringInfoString-instead-of-appendBinarySt.patchtext/plain; charset=UTF-8; name=0001-Use-appendStringInfoString-instead-of-appendBinarySt.patchDownload

From 15b103edec2f89a63596d112fd62cfc2367576bf Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 19 Dec 2022 07:01:27 +0100
Subject: [PATCH 1/2] Use appendStringInfoString instead of
 appendBinaryStringInfo where possible

---
 src/backend/utils/adt/jsonb.c    | 10 ++++----
 src/backend/utils/adt/jsonpath.c | 42 ++++++++++++++++----------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index 7c1e5e6144..c153c87ba6 100644
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@@ -361,7 +361,7 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal)
 	switch (scalarVal->type)
 	{
 		case jbvNull:
-			appendBinaryStringInfo(out, "null", 4);
+			appendStringInfoString(out, "null");
 			break;
 		case jbvString:
 			escape_json(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len));
@@ -373,9 +373,9 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal)
 			break;
 		case jbvBool:
 			if (scalarVal->val.boolean)
-				appendBinaryStringInfo(out, "true", 4);
+				appendStringInfoString(out, "true");
 			else
-				appendBinaryStringInfo(out, "false", 5);
+				appendStringInfoString(out, "false");
 			break;
 		default:
 			elog(ERROR, "unknown jsonb scalar type");
@@ -565,7 +565,7 @@ JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool
 
 				/* json rules guarantee this is a string */
 				jsonb_put_escaped_value(out, &v);
-				appendBinaryStringInfo(out, ": ", 2);
+				appendStringInfoString(out, ": ");
 
 				type = JsonbIteratorNext(&it, &v, false);
 				if (type == WJB_VALUE)
@@ -630,7 +630,7 @@ add_indent(StringInfo out, bool indent, int level)
 
 		appendStringInfoCharMacro(out, '\n');
 		for (i = 0; i < level; i++)
-			appendBinaryStringInfo(out, "    ", 4);
+			appendStringInfoString(out, "    ");
 	}
 }
 
diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index 91af030095..a39eab9c20 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -213,7 +213,7 @@ jsonPathToCstring(StringInfo out, JsonPath *in, int estimated_len)
 	enlargeStringInfo(out, estimated_len);
 
 	if (!(in->header & JSONPATH_LAX))
-		appendBinaryStringInfo(out, "strict ", 7);
+		appendStringInfoString(out, "strict ");
 
 	jspInit(&v, in);
 	printJsonPathItem(out, &v, false, true);
@@ -510,9 +510,9 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			break;
 		case jpiBool:
 			if (jspGetBool(v))
-				appendBinaryStringInfo(buf, "true", 4);
+				appendStringInfoString(buf, "true");
 			else
-				appendBinaryStringInfo(buf, "false", 5);
+				appendStringInfoString(buf, "false");
 			break;
 		case jpiAnd:
 		case jpiOr:
@@ -553,13 +553,13 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 							  operationPriority(elem.type) <=
 							  operationPriority(v->type));
 
-			appendBinaryStringInfo(buf, " like_regex ", 12);
+			appendStringInfoString(buf, " like_regex ");
 
 			escape_json(buf, v->content.like_regex.pattern);
 
 			if (v->content.like_regex.flags)
 			{
-				appendBinaryStringInfo(buf, " flag \"", 7);
+				appendStringInfoString(buf, " flag \"");
 
 				if (v->content.like_regex.flags & JSP_REGEX_ICASE)
 					appendStringInfoChar(buf, 'i');
@@ -591,13 +591,13 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 				appendStringInfoChar(buf, ')');
 			break;
 		case jpiFilter:
-			appendBinaryStringInfo(buf, "?(", 2);
+			appendStringInfoString(buf, "?(");
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
 			appendStringInfoChar(buf, ')');
 			break;
 		case jpiNot:
-			appendBinaryStringInfo(buf, "!(", 2);
+			appendStringInfoString(buf, "!(");
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
 			appendStringInfoChar(buf, ')');
@@ -606,10 +606,10 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			appendStringInfoChar(buf, '(');
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
-			appendBinaryStringInfo(buf, ") is unknown", 12);
+			appendStringInfoString(buf, ") is unknown");
 			break;
 		case jpiExists:
-			appendBinaryStringInfo(buf, "exists (", 8);
+			appendStringInfoString(buf, "exists (");
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
 			appendStringInfoChar(buf, ')');
@@ -623,10 +623,10 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			appendStringInfoChar(buf, '$');
 			break;
 		case jpiLast:
-			appendBinaryStringInfo(buf, "last", 4);
+			appendStringInfoString(buf, "last");
 			break;
 		case jpiAnyArray:
-			appendBinaryStringInfo(buf, "[*]", 3);
+			appendStringInfoString(buf, "[*]");
 			break;
 		case jpiAnyKey:
 			if (inKey)
@@ -648,7 +648,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 
 				if (range)
 				{
-					appendBinaryStringInfo(buf, " to ", 4);
+					appendStringInfoString(buf, " to ");
 					printJsonPathItem(buf, &to, false, false);
 				}
 			}
@@ -660,7 +660,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 
 			if (v->content.anybounds.first == 0 &&
 				v->content.anybounds.last == PG_UINT32_MAX)
-				appendBinaryStringInfo(buf, "**", 2);
+				appendStringInfoString(buf, "**");
 			else if (v->content.anybounds.first == v->content.anybounds.last)
 			{
 				if (v->content.anybounds.first == PG_UINT32_MAX)
@@ -681,25 +681,25 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 								 v->content.anybounds.last);
 			break;
 		case jpiType:
-			appendBinaryStringInfo(buf, ".type()", 7);
+			appendStringInfoString(buf, ".type()");
 			break;
 		case jpiSize:
-			appendBinaryStringInfo(buf, ".size()", 7);
+			appendStringInfoString(buf, ".size()");
 			break;
 		case jpiAbs:
-			appendBinaryStringInfo(buf, ".abs()", 6);
+			appendStringInfoString(buf, ".abs()");
 			break;
 		case jpiFloor:
-			appendBinaryStringInfo(buf, ".floor()", 8);
+			appendStringInfoString(buf, ".floor()");
 			break;
 		case jpiCeiling:
-			appendBinaryStringInfo(buf, ".ceiling()", 10);
+			appendStringInfoString(buf, ".ceiling()");
 			break;
 		case jpiDouble:
-			appendBinaryStringInfo(buf, ".double()", 9);
+			appendStringInfoString(buf, ".double()");
 			break;
 		case jpiDatetime:
-			appendBinaryStringInfo(buf, ".datetime(", 10);
+			appendStringInfoString(buf, ".datetime(");
 			if (v->content.arg)
 			{
 				jspGetArg(v, &elem);
@@ -708,7 +708,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			appendStringInfoChar(buf, ')');
 			break;
 		case jpiKeyValue:
-			appendBinaryStringInfo(buf, ".keyvalue()", 11);
+			appendStringInfoString(buf, ".keyvalue()");
 			break;
 		default:
 			elog(ERROR, "unrecognized jsonpath item type: %d", v->type);

base-commit: 7122f9d5437789312cb0a7e26e853bb8d2e57add
-- 
2.39.0

0002-Change-argument-of-appendBinaryStringInfo-from-char-.patchtext/plain; charset=UTF-8; name=0002-Change-argument-of-appendBinaryStringInfo-from-char-.patchDownload

From 2e765008d1314eb6d8a3bf5e7fb9c00e93a86461 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 19 Dec 2022 07:01:27 +0100
Subject: [PATCH 2/2] Change argument of appendBinaryStringInfo from char * to
 void *

---
 src/backend/utils/adt/jsonpath.c  | 18 +++++++++---------
 src/backend/utils/adt/xid8funcs.c |  4 ++--
 src/common/stringinfo.c           |  4 ++--
 src/include/lib/stringinfo.h      |  4 ++--
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index a39eab9c20..acc8fd97f1 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -258,18 +258,18 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 		case jpiString:
 		case jpiVariable:
 		case jpiKey:
-			appendBinaryStringInfo(buf, (char *) &item->value.string.len,
+			appendBinaryStringInfo(buf, &item->value.string.len,
 								   sizeof(item->value.string.len));
 			appendBinaryStringInfo(buf, item->value.string.val,
 								   item->value.string.len);
 			appendStringInfoChar(buf, '\0');
 			break;
 		case jpiNumeric:
-			appendBinaryStringInfo(buf, (char *) item->value.numeric,
+			appendBinaryStringInfo(buf, item->value.numeric,
 								   VARSIZE(item->value.numeric));
 			break;
 		case jpiBool:
-			appendBinaryStringInfo(buf, (char *) &item->value.boolean,
+			appendBinaryStringInfo(buf, &item->value.boolean,
 								   sizeof(item->value.boolean));
 			break;
 		case jpiAnd:
@@ -313,11 +313,11 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 				int32		offs;
 
 				appendBinaryStringInfo(buf,
-									   (char *) &item->value.like_regex.flags,
+									   &item->value.like_regex.flags,
 									   sizeof(item->value.like_regex.flags));
 				offs = reserveSpaceForItemPointer(buf);
 				appendBinaryStringInfo(buf,
-									   (char *) &item->value.like_regex.patternlen,
+									   &item->value.like_regex.patternlen,
 									   sizeof(item->value.like_regex.patternlen));
 				appendBinaryStringInfo(buf, item->value.like_regex.pattern,
 									   item->value.like_regex.patternlen);
@@ -373,7 +373,7 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 				int			offset;
 				int			i;
 
-				appendBinaryStringInfo(buf, (char *) &nelems, sizeof(nelems));
+				appendBinaryStringInfo(buf, &nelems, sizeof(nelems));
 
 				offset = buf->len;
 
@@ -404,10 +404,10 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 			break;
 		case jpiAny:
 			appendBinaryStringInfo(buf,
-								   (char *) &item->value.anybounds.first,
+								   &item->value.anybounds.first,
 								   sizeof(item->value.anybounds.first));
 			appendBinaryStringInfo(buf,
-								   (char *) &item->value.anybounds.last,
+								   &item->value.anybounds.last,
 								   sizeof(item->value.anybounds.last));
 			break;
 		case jpiType:
@@ -464,7 +464,7 @@ reserveSpaceForItemPointer(StringInfo buf)
 	int32		pos = buf->len;
 	int32		ptr = 0;
 
-	appendBinaryStringInfo(buf, (char *) &ptr, sizeof(ptr));
+	appendBinaryStringInfo(buf, &ptr, sizeof(ptr));
 
 	return pos;
 }
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
index 3baf5f7d90..15d4db4b70 100644
--- a/src/backend/utils/adt/xid8funcs.c
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -260,7 +260,7 @@ buf_init(FullTransactionId xmin, FullTransactionId xmax)
 	snap.nxip = 0;
 
 	buf = makeStringInfo();
-	appendBinaryStringInfo(buf, (char *) &snap, PG_SNAPSHOT_SIZE(0));
+	appendBinaryStringInfo(buf, &snap, PG_SNAPSHOT_SIZE(0));
 	return buf;
 }
 
@@ -272,7 +272,7 @@ buf_add_txid(StringInfo buf, FullTransactionId fxid)
 	/* do this before possible realloc */
 	snap->nxip++;
 
-	appendBinaryStringInfo(buf, (char *) &fxid, sizeof(fxid));
+	appendBinaryStringInfo(buf, &fxid, sizeof(fxid));
 }
 
 static pg_snapshot *
diff --git a/src/common/stringinfo.c b/src/common/stringinfo.c
index 76ff4d3e24..66a64180c9 100644
--- a/src/common/stringinfo.c
+++ b/src/common/stringinfo.c
@@ -224,7 +224,7 @@ appendStringInfoSpaces(StringInfo str, int count)
  * if necessary. Ensures that a trailing null byte is present.
  */
 void
-appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
+appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
 {
 	Assert(str != NULL);
 
@@ -250,7 +250,7 @@ appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
  * if necessary. Does not ensure a trailing null-byte exists.
  */
 void
-appendBinaryStringInfoNT(StringInfo str, const char *data, int datalen)
+appendBinaryStringInfoNT(StringInfo str, const void *data, int datalen)
 {
 	Assert(str != NULL);
 
diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h
index 9b755c4883..63ea1eca63 100644
--- a/src/include/lib/stringinfo.h
+++ b/src/include/lib/stringinfo.h
@@ -142,7 +142,7 @@ extern void appendStringInfoSpaces(StringInfo str, int count);
  * if necessary.
  */
 extern void appendBinaryStringInfo(StringInfo str,
-								   const char *data, int datalen);
+								   const void *data, int datalen);
 
 /*------------------------
  * appendBinaryStringInfoNT
@@ -150,7 +150,7 @@ extern void appendBinaryStringInfo(StringInfo str,
  * if necessary. Does not ensure a trailing null-byte exists.
  */
 extern void appendBinaryStringInfoNT(StringInfo str,
-									 const char *data, int datalen);
+									 const void *data, int datalen);
 
 /*------------------------
  * enlargeStringInfo
-- 
2.39.0

Andres Freund

andres@anarazel.de

about 3 years ago

In reply to: Peter Eisentraut (#1)

Re: appendBinaryStringInfo stuff

Hi,

On 2022-12-19 07:13:40 +0100, Peter Eisentraut wrote:

I found a couple of adjacent weird things:

There are a bunch of places in the json code that use
appendBinaryStringInfo() where appendStringInfoString() could be used, e.g.,

appendBinaryStringInfo(buf, ".size()", 7);

Is there a reason for this? Are we that stretched for performance?

strlen() isn't that cheap, so it doesn't generally seem unreasonable. I
don't think we should add the strlen overhead in places that can
conceivably be a bottleneck - and some of the jsonb code clearly can be
that.

I find this kind of code very fragile.

But this is obviously an issue.

Perhaps we should make appendStringInfoString() a static inline function
- most compilers can compute strlen() of a constant string at compile
time.

Greetings,

Andres Freund

David Rowley

dgrowleyml@gmail.com

about 3 years ago

In reply to: Andres Freund (#2)

Re: appendBinaryStringInfo stuff

On Mon, 19 Dec 2022 at 21:12, Andres Freund <andres@anarazel.de> wrote:

Perhaps we should make appendStringInfoString() a static inline function
- most compilers can compute strlen() of a constant string at compile
time.

I had wondered about that, but last time I looked into it there was a
small increase in the size of the binary from doing it. Perhaps it
does not matter, but it's something to consider.

Re-thinking, I wonder if we could use the same macro trick used in
ereport_domain(). Something like:

#ifdef HAVE__BUILTIN_CONSTANT_P
#define appendStringInfoString(str, s) \
__builtin_constant_p(s) ? \
appendBinaryStringInfo(str, s, sizeof(s) - 1) : \
appendStringInfoStringInternal(str, s)
#else
#define appendStringInfoString(str, s) \
appendStringInfoStringInternal(str, s)
#endif

and rename the existing function to appendStringInfoStringInternal.

Because __builtin_constant_p is a known compile-time constant, it
should be folded to just call the corresponding function during
compilation.

Just looking at the binary sizes for postgres. I see:

unpatched = 9972128 bytes
inline function = 9990064 bytes
macro trick = 9984968 bytes

I'm currently not sure why the macro trick increases the binary at
all. I understand why the inline function does.

David

Tom Lane

tgl@sss.pgh.pa.us

about 3 years ago

In reply to: David Rowley (#3)

Re: appendBinaryStringInfo stuff

David Rowley <dgrowleyml@gmail.com> writes:

I'm currently not sure why the macro trick increases the binary at
all. I understand why the inline function does.

In the places where it changes the code at all, you're replacing

appendStringInfoString(buf, s);

with

appendBinaryStringInfo(buf, s, n);

Even if n is a constant, the latter surely requires more instructions
per call site.

Whether this is a win seems to depend on how many of these are
performance-critical.

regards, tom lane

Peter Eisentraut

peter.eisentraut@enterprisedb.com

about 3 years ago

In reply to: Andres Freund (#2)

Re: appendBinaryStringInfo stuff

On 19.12.22 09:12, Andres Freund wrote:

There are a bunch of places in the json code that use
appendBinaryStringInfo() where appendStringInfoString() could be used, e.g.,

appendBinaryStringInfo(buf, ".size()", 7);

Is there a reason for this? Are we that stretched for performance?

strlen() isn't that cheap, so it doesn't generally seem unreasonable. I
don't think we should add the strlen overhead in places that can
conceivably be a bottleneck - and some of the jsonb code clearly can be
that.

AFAICT, the code in question is for the text output of the jsonpath
type, which is used ... for barely anything?

David Rowley

dgrowleyml@gmail.com

about 3 years ago

In reply to: Peter Eisentraut (#5)

Re: appendBinaryStringInfo stuff

On Tue, 20 Dec 2022 at 09:23, Peter Eisentraut
<peter.eisentraut@enterprisedb.com> wrote:

AFAICT, the code in question is for the text output of the jsonpath
type, which is used ... for barely anything?

I think the performance of a type's output function is quite critical.
I've seen huge performance gains in COPY TO performance from
optimising output functions in the past (see dad75eb4a and aa2387e2f).
It would be good to see some measurements to find out how much adding
the strlen calls back in would cost us. If we're unable to measure the
change, then maybe the cleanup patch would be nice. If it's going to
slow COPY TO down 10-20%, then we need to leave this or consider the
inline function mentioned by Andres or the macro trick mentioned by
me.

David

Tom Lane

tgl@sss.pgh.pa.us

about 3 years ago

In reply to: David Rowley (#6)

Re: appendBinaryStringInfo stuff

David Rowley <dgrowleyml@gmail.com> writes:

On Tue, 20 Dec 2022 at 09:23, Peter Eisentraut
<peter.eisentraut@enterprisedb.com> wrote:

AFAICT, the code in question is for the text output of the jsonpath
type, which is used ... for barely anything?

I think the performance of a type's output function is quite critical.

I think Peter is entirely right to question whether *this* type's
output function is performance-critical. Who's got large tables with
jsonpath columns? It seems to me the type would mostly only exist
as constants within queries.

regards, tom lane

David Rowley

dgrowleyml@gmail.com

about 3 years ago

In reply to: Tom Lane (#7)

Re: appendBinaryStringInfo stuff

On Tue, 20 Dec 2022 at 11:42, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think Peter is entirely right to question whether *this* type's
output function is performance-critical. Who's got large tables with
jsonpath columns? It seems to me the type would mostly only exist
as constants within queries.

The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.

David

Andrew Dunstan

andrew@dunslane.net

about 3 years ago

In reply to: David Rowley (#8)

Re: appendBinaryStringInfo stuff

On 2022-12-19 Mo 17:48, David Rowley wrote:

On Tue, 20 Dec 2022 at 11:42, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think Peter is entirely right to question whether *this* type's
output function is performance-critical. Who's got large tables with
jsonpath columns? It seems to me the type would mostly only exist
as constants within queries.

The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.

I agree that some of the uses in the jsonpath code could reasonably just
be converted to use appendStringInfoString()

There are 5 uses in the jsonb code where the length param is a compile
time constant:

andrew@ub22:adt $ grep appendBinary.*[0-9] jsonb*
jsonb.c:            appendBinaryStringInfo(out, "null", 4);
jsonb.c:                appendBinaryStringInfo(out, "true", 4);
jsonb.c:                appendBinaryStringInfo(out, "false", 5);
jsonb.c:                appendBinaryStringInfo(out, ": ", 2);
jsonb.c:            appendBinaryStringInfo(out, "    ", 4);

None of these really bother me much, TBH. In fact the last one is
arguably nicer because it tells you without counting how many spaces
there are.

Changing the type of the second argument to appendBinaryStringInfo to
void* seems reasonable.

cheers

andrew

--
Andrew Dunstan
EDB: https://www.enterprisedb.com

#10

Andres Freund

andres@anarazel.de

about 3 years ago

In reply to: David Rowley (#3)

Re: appendBinaryStringInfo stuff

Hi,

On 2022-12-19 21:29:10 +1300, David Rowley wrote:

On Mon, 19 Dec 2022 at 21:12, Andres Freund <andres@anarazel.de> wrote:

Perhaps we should make appendStringInfoString() a static inline function
- most compilers can compute strlen() of a constant string at compile
time.

I had wondered about that, but last time I looked into it there was a
small increase in the size of the binary from doing it. Perhaps it
does not matter, but it's something to consider.

I'd not be too worried about that in this case.

Re-thinking, I wonder if we could use the same macro trick used in
ereport_domain(). Something like:

#ifdef HAVE__BUILTIN_CONSTANT_P
#define appendStringInfoString(str, s) \
__builtin_constant_p(s) ? \
appendBinaryStringInfo(str, s, sizeof(s) - 1) : \
appendStringInfoStringInternal(str, s)
#else
#define appendStringInfoString(str, s) \
appendStringInfoStringInternal(str, s)
#endif

and rename the existing function to appendStringInfoStringInternal.

Because __builtin_constant_p is a known compile-time constant, it
should be folded to just call the corresponding function during
compilation.

Several compilers can optimize away repeated strlen() calls, even if the
string isn't a compile-time constant. So I'm not really convinced that
tying inlining-strlen to __builtin_constant_p() is a good ida.

Just looking at the binary sizes for postgres. I see:

unpatched = 9972128 bytes
inline function = 9990064 bytes

That seems acceptable to me.

macro trick = 9984968 bytes

I'm currently not sure why the macro trick increases the binary at
all. I understand why the inline function does.

I think Tom's explanation is on point.

I've in the past looked at stringinfo.c being the bottleneck in a bunch
of places and concluded that we really need to remove the function call
in the happy path entirely - we should have an enlargeStringInfo() that
we can call externally iff needed and then implement the rest of
appendBinaryStringInfo() etc in an inline function. That allows the
compiler to e.g. optimize out the repeated maintenance of the \0 write
etc.

Greetings,

Andres Freund

#11

David Rowley

dgrowleyml@gmail.com

about 3 years ago

In reply to: Andrew Dunstan (#9)

Re: appendBinaryStringInfo stuff

On Wed, 21 Dec 2022 at 04:47, Andrew Dunstan <andrew@dunslane.net> wrote:

jsonb.c: appendBinaryStringInfo(out, " ", 4);

None of these really bother me much, TBH. In fact the last one is
arguably nicer because it tells you without counting how many spaces
there are.

appendStringInfoSpaces() might be even better.

David

#12

Robert Haas

robertmhaas@gmail.com

about 3 years ago

In reply to: Andrew Dunstan (#9)

Re: appendBinaryStringInfo stuff

On Tue, Dec 20, 2022 at 10:47 AM Andrew Dunstan <andrew@dunslane.net> wrote:

There are 5 uses in the jsonb code where the length param is a compile
time constant:

andrew@ub22:adt $ grep appendBinary.*[0-9] jsonb*
jsonb.c: appendBinaryStringInfo(out, "null", 4);
jsonb.c: appendBinaryStringInfo(out, "true", 4);
jsonb.c: appendBinaryStringInfo(out, "false", 5);
jsonb.c: appendBinaryStringInfo(out, ": ", 2);
jsonb.c: appendBinaryStringInfo(out, " ", 4);

None of these really bother me much, TBH. In fact the last one is
arguably nicer because it tells you without counting how many spaces
there are.

+1. There are certainly cases where this kind of style can create
confusion, but I have a hard time putting any of these instances into
that category. It's obvious at a glance that null is 4 bytes, false is
5, etc.

--
Robert Haas
EDB: http://www.enterprisedb.com

#13

David Rowley

dgrowleyml@gmail.com

about 3 years ago

In reply to: David Rowley (#6)

Re: appendBinaryStringInfo stuff

On Tue, 20 Dec 2022 at 11:26, David Rowley <dgrowleyml@gmail.com> wrote:

It would be good to see some measurements to find out how much adding
the strlen calls back in would cost us.

I tried this out. I'm not pretending I found the best test which
highlights how much the performance will change in a real-world case.
I just wanted to try to get an indication of if changing jsonb's
output function to make more use of appendStringInfoString instead of
appendBinaryStringInfo is likely to affect performance.

Also, in test 2, I picked a use case that makes quite a bit of use of
appendStringInfoString already and checked if inlining that function
would help improve things. I imagine test 2 really is not
bottlenecked on appendStringInfoString enough to get a true idea of
how much inlining appendStringInfoString could really help (spoiler,
it helps quite a bit)

Test 1: See if using appendStringInfoString instead of
appendBinaryStringInfo hinders jsonb output performance.

setup:
create table jb (j jsonb);
insert into jb select row_to_json(pg_class) from pg_class;
vacuum freeze analyze jb;

bench.sql:
select sum(length(j::text)) from jb;

master (@3f28bd73):
$ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
latency average = 1.896 ms
latency average = 1.885 ms
latency average = 1.899 ms

22.57% postgres [.] escape_json
21.83% postgres [.] pg_utf_mblen
9.23% postgres [.] JsonbIteratorNext.part.0
7.12% postgres [.] AllocSetAlloc
4.07% postgres [.] pg_mbstrlen_with_len
3.71% postgres [.] JsonbToCStringWorker
3.70% postgres [.] fillJsonbValue
3.17% postgres [.] appendBinaryStringInfo
2.95% postgres [.] enlargeStringInfo
2.09% postgres [.] jsonb_put_escaped_value
1.89% postgres [.] palloc

master + 0001-Use-appendStringInfoString-instead-of-appendBinarySt.patch

$ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
latency average = 1.912 ms
latency average = 1.912 ms
latency average = 1.912 ms (~1% slower)

22.38% postgres [.] escape_json
21.98% postgres [.] pg_utf_mblen
9.07% postgres [.] JsonbIteratorNext.part.0
5.93% postgres [.] AllocSetAlloc
4.11% postgres [.] pg_mbstrlen_with_len
3.87% postgres [.] fillJsonbValue
3.66% postgres [.] JsonbToCStringWorker
2.28% postgres [.] enlargeStringInfo
2.15% postgres [.] appendStringInfoString
1.98% postgres [.] jsonb_put_escaped_value
1.92% postgres [.] palloc
1.58% postgres [.] appendBinaryStringInfo
1.42% postgres [.] pnstrdup

Test 2: Test if inlining appendStringInfoString helps

bench.sql:
select sum(length(pg_get_ruledef(oid))) from pg_rewrite;

master (@3f28bd73):
$ pgbench -n -T 60 -f bench.sql postgres | grep latency
latency average = 16.355 ms
latency average = 16.290 ms
latency average = 16.303 ms

static inline appendStringInfoString
$ pgbench -n -T 60 -f bench.sql postgres | grep latency
latency average = 15.690 ms
latency average = 15.575 ms
latency average = 15.604 ms (~4.4% faster)

David

#14

Tom Lane

tgl@sss.pgh.pa.us

about 3 years ago

In reply to: David Rowley (#13)

Re: appendBinaryStringInfo stuff

David Rowley <dgrowleyml@gmail.com> writes:

22.57% postgres [.] escape_json

Hmm ... shouldn't we do something like

-                appendStringInfoString(buf, "\\b");
+                appendStringInfoCharMacro(buf, '\\');
+                appendStringInfoCharMacro(buf, 'b');

and so on in that function? I'm not convinced that this one
hotspot justifies inlining appendStringInfoString everywhere.

regards, tom lane

#15

David Rowley

dgrowleyml@gmail.com

about 3 years ago

In reply to: Tom Lane (#14)

Re: appendBinaryStringInfo stuff

On Thu, 22 Dec 2022 at 20:56, Tom Lane <tgl@sss.pgh.pa.us> wrote:

David Rowley <dgrowleyml@gmail.com> writes:

22.57% postgres [.] escape_json

Hmm ... shouldn't we do something like
-                appendStringInfoString(buf, "\\b");
+                appendStringInfoCharMacro(buf, '\\');
+                appendStringInfoCharMacro(buf, 'b');
and so on in that function? I'm not convinced that this one
hotspot justifies inlining appendStringInfoString everywhere.

It improves things slightly:

Test 1 (from earlier)

master + escape_json using appendStringInfoCharMacro
$ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
latency average = 1.807 ms
latency average = 1.800 ms
latency average = 1.812 ms (~4.8% faster than master)

23.05% postgres [.] pg_utf_mblen
22.55% postgres [.] escape_json
8.58% postgres [.] JsonbIteratorNext.part.0
6.80% postgres [.] AllocSetAlloc
4.23% postgres [.] pg_mbstrlen_with_len
3.88% postgres [.] JsonbToCStringWorker
3.79% postgres [.] fillJsonbValue
3.18% postgres [.] appendBinaryStringInfo
2.43% postgres [.] enlargeStringInfo
2.02% postgres [.] palloc
1.61% postgres [.] jsonb_put_escaped_value

David

#16

John Naylor

john.naylor@enterprisedb.com

about 3 years ago

In reply to: David Rowley (#15)

Re: appendBinaryStringInfo stuff

On Thu, Dec 22, 2022 at 4:19 PM David Rowley <dgrowleyml@gmail.com> wrote:

Test 1 (from earlier)

master + escape_json using appendStringInfoCharMacro
$ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
latency average = 1.807 ms
latency average = 1.800 ms
latency average = 1.812 ms (~4.8% faster than master)

23.05% postgres [.] pg_utf_mblen

I get about 20% improvement by adding an ascii fast path in
pg_mbstrlen_with_len, which I think would work with all encodings we
support:

@@ -1064,7 +1064,12 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)

        while (limit > 0 && *mbstr)
        {
-               int                     l = pg_mblen(mbstr);
+               int                     l;
+
+               if (!IS_HIGHBIT_SET(*mbstr))
+                       l = 1;
+               else
+                       l = pg_mblen(mbstr);

--
John Naylor
EDB: http://www.enterprisedb.com

#17

Peter Eisentraut

peter.eisentraut@enterprisedb.com

about 3 years ago

In reply to: David Rowley (#8)

2 attachment(s)

Re: appendBinaryStringInfo stuff

On 19.12.22 23:48, David Rowley wrote:

On Tue, 20 Dec 2022 at 11:42, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think Peter is entirely right to question whether *this* type's
output function is performance-critical. Who's got large tables with
jsonpath columns? It seems to me the type would mostly only exist
as constants within queries.

The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.

Ok, let's leave the jsonb output alone. The jsonb output code also
won't change a lot, but there is a bunch of stuff for jsonpath on the
horizon, so having some more robust coding style to imitate there seems
useful. Here is another patch set with the jsonb changes omitted.

Attachments:

v2-0001-Use-appendStringInfoString-instead-of-appendBinar.patchtext/plain; charset=UTF-8; name=v2-0001-Use-appendStringInfoString-instead-of-appendBinar.patchDownload

From 319c206bec80d326808a211f9edd50346edbeb8c Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 19 Dec 2022 07:01:27 +0100
Subject: [PATCH v2 1/2] Use appendStringInfoString instead of
 appendBinaryStringInfo where possible

For the jsonpath output, we don't need to squeeze out every bit of
performance, so instead use a more robust coding style.  There are
similar calls in jsonb.c, which we leave alone here since there is
indeed a performance impact for bulk exports.

Discussion: https://www.postgresql.org/message-id/flat/a0086cfc-ff0f-2827-20fe-52b591d2666c%40enterprisedb.com
---
 src/backend/utils/adt/jsonpath.c | 42 ++++++++++++++++----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index 91af030095..a39eab9c20 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -213,7 +213,7 @@ jsonPathToCstring(StringInfo out, JsonPath *in, int estimated_len)
 	enlargeStringInfo(out, estimated_len);
 
 	if (!(in->header & JSONPATH_LAX))
-		appendBinaryStringInfo(out, "strict ", 7);
+		appendStringInfoString(out, "strict ");
 
 	jspInit(&v, in);
 	printJsonPathItem(out, &v, false, true);
@@ -510,9 +510,9 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			break;
 		case jpiBool:
 			if (jspGetBool(v))
-				appendBinaryStringInfo(buf, "true", 4);
+				appendStringInfoString(buf, "true");
 			else
-				appendBinaryStringInfo(buf, "false", 5);
+				appendStringInfoString(buf, "false");
 			break;
 		case jpiAnd:
 		case jpiOr:
@@ -553,13 +553,13 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 							  operationPriority(elem.type) <=
 							  operationPriority(v->type));
 
-			appendBinaryStringInfo(buf, " like_regex ", 12);
+			appendStringInfoString(buf, " like_regex ");
 
 			escape_json(buf, v->content.like_regex.pattern);
 
 			if (v->content.like_regex.flags)
 			{
-				appendBinaryStringInfo(buf, " flag \"", 7);
+				appendStringInfoString(buf, " flag \"");
 
 				if (v->content.like_regex.flags & JSP_REGEX_ICASE)
 					appendStringInfoChar(buf, 'i');
@@ -591,13 +591,13 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 				appendStringInfoChar(buf, ')');
 			break;
 		case jpiFilter:
-			appendBinaryStringInfo(buf, "?(", 2);
+			appendStringInfoString(buf, "?(");
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
 			appendStringInfoChar(buf, ')');
 			break;
 		case jpiNot:
-			appendBinaryStringInfo(buf, "!(", 2);
+			appendStringInfoString(buf, "!(");
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
 			appendStringInfoChar(buf, ')');
@@ -606,10 +606,10 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			appendStringInfoChar(buf, '(');
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
-			appendBinaryStringInfo(buf, ") is unknown", 12);
+			appendStringInfoString(buf, ") is unknown");
 			break;
 		case jpiExists:
-			appendBinaryStringInfo(buf, "exists (", 8);
+			appendStringInfoString(buf, "exists (");
 			jspGetArg(v, &elem);
 			printJsonPathItem(buf, &elem, false, false);
 			appendStringInfoChar(buf, ')');
@@ -623,10 +623,10 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			appendStringInfoChar(buf, '$');
 			break;
 		case jpiLast:
-			appendBinaryStringInfo(buf, "last", 4);
+			appendStringInfoString(buf, "last");
 			break;
 		case jpiAnyArray:
-			appendBinaryStringInfo(buf, "[*]", 3);
+			appendStringInfoString(buf, "[*]");
 			break;
 		case jpiAnyKey:
 			if (inKey)
@@ -648,7 +648,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 
 				if (range)
 				{
-					appendBinaryStringInfo(buf, " to ", 4);
+					appendStringInfoString(buf, " to ");
 					printJsonPathItem(buf, &to, false, false);
 				}
 			}
@@ -660,7 +660,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 
 			if (v->content.anybounds.first == 0 &&
 				v->content.anybounds.last == PG_UINT32_MAX)
-				appendBinaryStringInfo(buf, "**", 2);
+				appendStringInfoString(buf, "**");
 			else if (v->content.anybounds.first == v->content.anybounds.last)
 			{
 				if (v->content.anybounds.first == PG_UINT32_MAX)
@@ -681,25 +681,25 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 								 v->content.anybounds.last);
 			break;
 		case jpiType:
-			appendBinaryStringInfo(buf, ".type()", 7);
+			appendStringInfoString(buf, ".type()");
 			break;
 		case jpiSize:
-			appendBinaryStringInfo(buf, ".size()", 7);
+			appendStringInfoString(buf, ".size()");
 			break;
 		case jpiAbs:
-			appendBinaryStringInfo(buf, ".abs()", 6);
+			appendStringInfoString(buf, ".abs()");
 			break;
 		case jpiFloor:
-			appendBinaryStringInfo(buf, ".floor()", 8);
+			appendStringInfoString(buf, ".floor()");
 			break;
 		case jpiCeiling:
-			appendBinaryStringInfo(buf, ".ceiling()", 10);
+			appendStringInfoString(buf, ".ceiling()");
 			break;
 		case jpiDouble:
-			appendBinaryStringInfo(buf, ".double()", 9);
+			appendStringInfoString(buf, ".double()");
 			break;
 		case jpiDatetime:
-			appendBinaryStringInfo(buf, ".datetime(", 10);
+			appendStringInfoString(buf, ".datetime(");
 			if (v->content.arg)
 			{
 				jspGetArg(v, &elem);
@@ -708,7 +708,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 			appendStringInfoChar(buf, ')');
 			break;
 		case jpiKeyValue:
-			appendBinaryStringInfo(buf, ".keyvalue()", 11);
+			appendStringInfoString(buf, ".keyvalue()");
 			break;
 		default:
 			elog(ERROR, "unrecognized jsonpath item type: %d", v->type);

base-commit: f4f2f2b84a0bbf9edbfc4a8684040a941cd6d085
-- 
2.39.0

v2-0002-Change-argument-of-appendBinaryStringInfo-from-ch.patchtext/plain; charset=UTF-8; name=v2-0002-Change-argument-of-appendBinaryStringInfo-from-ch.patchDownload

From a56c77066e750ca9d7afcf1f16679d8dcd9af06f Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 19 Dec 2022 07:01:27 +0100
Subject: [PATCH v2 2/2] Change argument of appendBinaryStringInfo from char *
 to void *

There is some code that uses this function to assemble some kind of
packed binary layout, which requires a bunch of casts because of this.
Functions taking binary data plus length should take void * instead,
like memcpy() for example.

Discussion: https://www.postgresql.org/message-id/flat/a0086cfc-ff0f-2827-20fe-52b591d2666c%40enterprisedb.com
---
 src/backend/utils/adt/jsonpath.c  | 18 +++++++++---------
 src/backend/utils/adt/xid8funcs.c |  4 ++--
 src/common/stringinfo.c           |  4 ++--
 src/include/lib/stringinfo.h      |  4 ++--
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index a39eab9c20..acc8fd97f1 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -258,18 +258,18 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 		case jpiString:
 		case jpiVariable:
 		case jpiKey:
-			appendBinaryStringInfo(buf, (char *) &item->value.string.len,
+			appendBinaryStringInfo(buf, &item->value.string.len,
 								   sizeof(item->value.string.len));
 			appendBinaryStringInfo(buf, item->value.string.val,
 								   item->value.string.len);
 			appendStringInfoChar(buf, '\0');
 			break;
 		case jpiNumeric:
-			appendBinaryStringInfo(buf, (char *) item->value.numeric,
+			appendBinaryStringInfo(buf, item->value.numeric,
 								   VARSIZE(item->value.numeric));
 			break;
 		case jpiBool:
-			appendBinaryStringInfo(buf, (char *) &item->value.boolean,
+			appendBinaryStringInfo(buf, &item->value.boolean,
 								   sizeof(item->value.boolean));
 			break;
 		case jpiAnd:
@@ -313,11 +313,11 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 				int32		offs;
 
 				appendBinaryStringInfo(buf,
-									   (char *) &item->value.like_regex.flags,
+									   &item->value.like_regex.flags,
 									   sizeof(item->value.like_regex.flags));
 				offs = reserveSpaceForItemPointer(buf);
 				appendBinaryStringInfo(buf,
-									   (char *) &item->value.like_regex.patternlen,
+									   &item->value.like_regex.patternlen,
 									   sizeof(item->value.like_regex.patternlen));
 				appendBinaryStringInfo(buf, item->value.like_regex.pattern,
 									   item->value.like_regex.patternlen);
@@ -373,7 +373,7 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 				int			offset;
 				int			i;
 
-				appendBinaryStringInfo(buf, (char *) &nelems, sizeof(nelems));
+				appendBinaryStringInfo(buf, &nelems, sizeof(nelems));
 
 				offset = buf->len;
 
@@ -404,10 +404,10 @@ flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
 			break;
 		case jpiAny:
 			appendBinaryStringInfo(buf,
-								   (char *) &item->value.anybounds.first,
+								   &item->value.anybounds.first,
 								   sizeof(item->value.anybounds.first));
 			appendBinaryStringInfo(buf,
-								   (char *) &item->value.anybounds.last,
+								   &item->value.anybounds.last,
 								   sizeof(item->value.anybounds.last));
 			break;
 		case jpiType:
@@ -464,7 +464,7 @@ reserveSpaceForItemPointer(StringInfo buf)
 	int32		pos = buf->len;
 	int32		ptr = 0;
 
-	appendBinaryStringInfo(buf, (char *) &ptr, sizeof(ptr));
+	appendBinaryStringInfo(buf, &ptr, sizeof(ptr));
 
 	return pos;
 }
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
index 3baf5f7d90..15d4db4b70 100644
--- a/src/backend/utils/adt/xid8funcs.c
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -260,7 +260,7 @@ buf_init(FullTransactionId xmin, FullTransactionId xmax)
 	snap.nxip = 0;
 
 	buf = makeStringInfo();
-	appendBinaryStringInfo(buf, (char *) &snap, PG_SNAPSHOT_SIZE(0));
+	appendBinaryStringInfo(buf, &snap, PG_SNAPSHOT_SIZE(0));
 	return buf;
 }
 
@@ -272,7 +272,7 @@ buf_add_txid(StringInfo buf, FullTransactionId fxid)
 	/* do this before possible realloc */
 	snap->nxip++;
 
-	appendBinaryStringInfo(buf, (char *) &fxid, sizeof(fxid));
+	appendBinaryStringInfo(buf, &fxid, sizeof(fxid));
 }
 
 static pg_snapshot *
diff --git a/src/common/stringinfo.c b/src/common/stringinfo.c
index 76ff4d3e24..66a64180c9 100644
--- a/src/common/stringinfo.c
+++ b/src/common/stringinfo.c
@@ -224,7 +224,7 @@ appendStringInfoSpaces(StringInfo str, int count)
  * if necessary. Ensures that a trailing null byte is present.
  */
 void
-appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
+appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
 {
 	Assert(str != NULL);
 
@@ -250,7 +250,7 @@ appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
  * if necessary. Does not ensure a trailing null-byte exists.
  */
 void
-appendBinaryStringInfoNT(StringInfo str, const char *data, int datalen)
+appendBinaryStringInfoNT(StringInfo str, const void *data, int datalen)
 {
 	Assert(str != NULL);
 
diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h
index 9b755c4883..63ea1eca63 100644
--- a/src/include/lib/stringinfo.h
+++ b/src/include/lib/stringinfo.h
@@ -142,7 +142,7 @@ extern void appendStringInfoSpaces(StringInfo str, int count);
  * if necessary.
  */
 extern void appendBinaryStringInfo(StringInfo str,
-								   const char *data, int datalen);
+								   const void *data, int datalen);
 
 /*------------------------
  * appendBinaryStringInfoNT
@@ -150,7 +150,7 @@ extern void appendBinaryStringInfo(StringInfo str,
  * if necessary. Does not ensure a trailing null-byte exists.
  */
 extern void appendBinaryStringInfoNT(StringInfo str,
-									 const char *data, int datalen);
+									 const void *data, int datalen);
 
 /*------------------------
  * enlargeStringInfo
-- 
2.39.0

#18

David Rowley

dgrowleyml@gmail.com

about 3 years ago

In reply to: Peter Eisentraut (#17)

Re: appendBinaryStringInfo stuff

On Fri, 23 Dec 2022 at 22:04, Peter Eisentraut
<peter.eisentraut@enterprisedb.com> wrote:

On 19.12.22 23:48, David Rowley wrote:

On Tue, 20 Dec 2022 at 11:42, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think Peter is entirely right to question whether *this* type's
output function is performance-critical. Who's got large tables with
jsonpath columns? It seems to me the type would mostly only exist
as constants within queries.

The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.

Ok, let's leave the jsonb output alone. The jsonb output code also
won't change a lot, but there is a bunch of stuff for jsonpath on the
horizon, so having some more robust coding style to imitate there seems
useful. Here is another patch set with the jsonb changes omitted.

Maybe if there's concern that inlining appendStringInfoString is going
to bloat the binary too much, then how about we just invent an inlined
version of it using some other name that we can use when performance
matters? We could then safely replace the offending
appendBinaryStringInfos from both places without any concern for
regressing performance.

FWIW, I just did a few compilation runs of our supported versions to
see how much postgres binary grew release to release:

branch postgres binary size growth bytes
REL_10_STABLE 8230232 0
REL_11_STABLE 8586024 355792
REL_12_STABLE 8831664 245640
REL_13_STABLE 8990824 159160
REL_14_STABLE 9484848 494024
REL_15_STABLE 9744680 259832
master 9977896 233216
inline_asis 10004032 26136

(inlined_asis = inlined appendStringInfoString)

On the other hand, if we went with inlining the existing function,
then it looks to be about 10% of the growth we saw between v14 and
v15. That seems quite large.

David

#19

Peter Eisentraut

peter.eisentraut@enterprisedb.com

about 3 years ago

In reply to: Peter Eisentraut (#17)

Re: appendBinaryStringInfo stuff

On 23.12.22 10:04, Peter Eisentraut wrote:

On 19.12.22 23:48, David Rowley wrote:

On Tue, 20 Dec 2022 at 11:42, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think Peter is entirely right to question whether *this* type's
output function is performance-critical. Who's got large tables with
jsonpath columns? It seems to me the type would mostly only exist
as constants within queries.

The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.

Ok, let's leave the jsonb output alone. The jsonb output code also
won't change a lot, but there is a bunch of stuff for jsonpath on the
horizon, so having some more robust coding style to imitate there seems
useful. Here is another patch set with the jsonb changes omitted.

I have committed these.

#20

Peter Eisentraut

peter.eisentraut@enterprisedb.com

about 3 years ago

In reply to: David Rowley (#18)

Re: appendBinaryStringInfo stuff

On 23.12.22 14:01, David Rowley wrote:

Maybe if there's concern that inlining appendStringInfoString is going
to bloat the binary too much, then how about we just invent an inlined
version of it using some other name that we can use when performance
matters? We could then safely replace the offending
appendBinaryStringInfos from both places without any concern for
regressing performance.

The jsonpath output routines don't appear to be written with deep
concern about performance now, so I'm not sure this is the place to
start tweaking. For the jsonb parts, there are only a handful of
strings this affects ("true", "false", "null"), so using
appendBinaryStringInfo() there a few times doesn't seem so bad. So I'm
not too worried about this altogether.

#21

Peter Eisentraut

peter.eisentraut@enterprisedb.com

almost 3 years ago

In reply to: Peter Eisentraut (#1)

1 attachment(s)

Re: appendBinaryStringInfo stuff

On 19.12.22 07:13, Peter Eisentraut wrote:

Also, the argument type of appendBinaryStringInfo() is char *. There is
some code that uses this function to assemble some kind of packed binary
layout, which requires a bunch of casts because of this. I think
functions taking binary data plus length should take void * instead,
like memcpy() for example.

I found a little follow-up for this one: Make the same change to
pq_sendbytes(), which is a thin wrapper around appendBinaryStringInfo().
This would allow getting rid of further casts at call sites.

Attachments:

0001-Change-argument-type-of-pq_sendbytes-from-char-to-vo.patchtext/plain; charset=UTF-8; name=0001-Change-argument-type-of-pq_sendbytes-from-char-to-vo.patchDownload

From 0aed766e046ba71b01093b44f0cae3a4098f1e5b Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 10 Feb 2023 13:11:09 +0100
Subject: [PATCH] Change argument type of pq_sendbytes from char * to void *

---
 src/backend/libpq/pqformat.c            |  2 +-
 src/backend/utils/adt/array_userfuncs.c | 11 +++++------
 src/backend/utils/adt/uuid.c            |  2 +-
 src/backend/utils/adt/varbit.c          |  2 +-
 src/include/libpq/pqformat.h            |  2 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/backend/libpq/pqformat.c b/src/backend/libpq/pqformat.c
index b7e2c7b6c9..d712c80b5e 100644
--- a/src/backend/libpq/pqformat.c
+++ b/src/backend/libpq/pqformat.c
@@ -123,7 +123,7 @@ pq_beginmessage_reuse(StringInfo buf, char msgtype)
  * --------------------------------
  */
 void
-pq_sendbytes(StringInfo buf, const char *data, int datalen)
+pq_sendbytes(StringInfo buf, const void *data, int datalen)
 {
 	/* use variant that maintains a trailing null-byte, out of caution */
 	appendBinaryStringInfo(buf, data, datalen);
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index c8a8d33ca3..80750191d8 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -654,7 +654,7 @@ array_agg_serialize(PG_FUNCTION_ARGS)
 	pq_sendbyte(&buf, state->typalign);
 
 	/* dnulls */
-	pq_sendbytes(&buf, (char *) state->dnulls, sizeof(bool) * state->nelems);
+	pq_sendbytes(&buf, state->dnulls, sizeof(bool) * state->nelems);
 
 	/*
 	 * dvalues.  By agreement with array_agg_deserialize, when the element
@@ -664,8 +664,7 @@ array_agg_serialize(PG_FUNCTION_ARGS)
 	 * must be sent first).
 	 */
 	if (state->typbyval)
-		pq_sendbytes(&buf, (char *) state->dvalues,
-					 sizeof(Datum) * state->nelems);
+		pq_sendbytes(&buf, state->dvalues, sizeof(Datum) * state->nelems);
 	else
 	{
 		SerialIOData *iodata;
@@ -1097,7 +1096,7 @@ array_agg_array_serialize(PG_FUNCTION_ARGS)
 	if (state->nullbitmap)
 	{
 		Assert(state->aitems > 0);
-		pq_sendbytes(&buf, (char *) state->nullbitmap, (state->aitems + 7) / 8);
+		pq_sendbytes(&buf, state->nullbitmap, (state->aitems + 7) / 8);
 	}
 
 	/* nitems */
@@ -1107,10 +1106,10 @@ array_agg_array_serialize(PG_FUNCTION_ARGS)
 	pq_sendint32(&buf, state->ndims);
 
 	/* dims: XXX should we just send ndims elements? */
-	pq_sendbytes(&buf, (char *) state->dims, sizeof(state->dims));
+	pq_sendbytes(&buf, state->dims, sizeof(state->dims));
 
 	/* lbs */
-	pq_sendbytes(&buf, (char *) state->lbs, sizeof(state->lbs));
+	pq_sendbytes(&buf, state->lbs, sizeof(state->lbs));
 
 	result = pq_endtypsend(&buf);
 
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 76ff6c533f..4f7aa768fd 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -154,7 +154,7 @@ uuid_send(PG_FUNCTION_ARGS)
 	StringInfoData buffer;
 
 	pq_begintypsend(&buffer);
-	pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
+	pq_sendbytes(&buffer, uuid->data, UUID_LEN);
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
 }
 
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
index 4fde0c1b14..3dbbd1207f 100644
--- a/src/backend/utils/adt/varbit.c
+++ b/src/backend/utils/adt/varbit.c
@@ -685,7 +685,7 @@ varbit_send(PG_FUNCTION_ARGS)
 
 	pq_begintypsend(&buf);
 	pq_sendint32(&buf, VARBITLEN(s));
-	pq_sendbytes(&buf, (char *) VARBITS(s), VARBITBYTES(s));
+	pq_sendbytes(&buf, VARBITS(s), VARBITBYTES(s));
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 }
 
diff --git a/src/include/libpq/pqformat.h b/src/include/libpq/pqformat.h
index c5644c0061..0d2f958af3 100644
--- a/src/include/libpq/pqformat.h
+++ b/src/include/libpq/pqformat.h
@@ -22,7 +22,7 @@ extern void pq_beginmessage_reuse(StringInfo buf, char msgtype);
 extern void pq_endmessage(StringInfo buf);
 extern void pq_endmessage_reuse(StringInfo buf);
 
-extern void pq_sendbytes(StringInfo buf, const char *data, int datalen);
+extern void pq_sendbytes(StringInfo buf, const void *data, int datalen);
 extern void pq_sendcountedtext(StringInfo buf, const char *str, int slen,
 							   bool countincludesself);
 extern void pq_sendtext(StringInfo buf, const char *str, int slen);
-- 
2.39.1

#22

Corey Huinker

corey.huinker@gmail.com

almost 3 years ago

In reply to: Peter Eisentraut (#21)

Re: appendBinaryStringInfo stuff

On Fri, Feb 10, 2023 at 7:16 AM Peter Eisentraut <
peter.eisentraut@enterprisedb.com> wrote:

On 19.12.22 07:13, Peter Eisentraut wrote:

Also, the argument type of appendBinaryStringInfo() is char *. There is
some code that uses this function to assemble some kind of packed binary
layout, which requires a bunch of casts because of this. I think
functions taking binary data plus length should take void * instead,
like memcpy() for example.

I found a little follow-up for this one: Make the same change to
pq_sendbytes(), which is a thin wrapper around appendBinaryStringInfo().
This would allow getting rid of further casts at call sites.

Has all the benefits that 54a177a948b0a773c25c6737d1cc3cc49222a526 had.

Passes make check-world.

#23

Peter Eisentraut

peter.eisentraut@enterprisedb.com

almost 3 years ago

In reply to: Corey Huinker (#22)

Re: appendBinaryStringInfo stuff

On 10.02.23 20:08, Corey Huinker wrote:

On Fri, Feb 10, 2023 at 7:16 AM Peter Eisentraut
<peter.eisentraut@enterprisedb.com
<mailto:peter.eisentraut@enterprisedb.com>> wrote:

On 19.12.22 07:13, Peter Eisentraut wrote:

Also, the argument type of appendBinaryStringInfo() is char *.

There is

some code that uses this function to assemble some kind of packed

binary

layout, which requires a bunch of casts because of this. I think
functions taking binary data plus length should take void * instead,
like memcpy() for example.

I found a little follow-up for this one: Make the same change to
pq_sendbytes(), which is a thin wrapper around
appendBinaryStringInfo().
This would allow getting rid of further casts at call sites.

+1

Has all the benefits that 54a177a948b0a773c25c6737d1cc3cc49222a526 had.

Passes make check-world.

committed, thanks