From 8f8aa638ee2b6dfa85fa8bc0ec5788f44768e92f Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Fri, 24 Jun 2022 09:19:13 +0200 Subject: [PATCH] Optimize json_lex_string by batching character copies When parsing JSON strings need to be converted from the JSON string format to a c-style string. A simple copy of the buffer does not suffice because of the various escape sequences that that JSON supports. Because of this our JSON parser wrote characters into the c-style string buffer one at a time. However, this is only necessary for these escaped sequences that map to another character. This patch changes the behaviour for non-escaped characters. These are now copied in batches instead of one character at a time. To test performance of this change I used COPY BINARY from a JSONB table into another, containing fairly JSONB values of ~15kB. The JSONB values are a JSON object with a single level. They contain a few small keys and values, but one very big value that's a stringified JSON blob. So this JSON blob contains a relatively high number of escape characters, to escape all the " characters. This change improves performance for workload this workload on my machine by ~18% (going from 1m24s to 1m09s). --- src/common/jsonapi.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index 98e4ef0942..219ecb9df9 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -674,6 +674,7 @@ json_lex_string(JsonLexContext *lex) char *s; int len; int hi_surrogate = -1; + int copyable_characters_length = 0; if (lex->strval != NULL) resetStringInfo(lex->strval); @@ -692,7 +693,18 @@ json_lex_string(JsonLexContext *lex) return JSON_INVALID_TOKEN; } else if (*s == '"') + { + if (copyable_characters_length) + { + /* flush copyable characters */ + appendBinaryStringInfo( + lex->strval, + s - copyable_characters_length, + copyable_characters_length); + + } break; + } else if ((unsigned char) *s < 32) { /* Per RFC4627, these characters MUST be escaped. */ @@ -702,6 +714,16 @@ json_lex_string(JsonLexContext *lex) } else if (*s == '\\') { + if (copyable_characters_length) + { + /* flush copyable characters */ + appendBinaryStringInfo( + lex->strval, + s - copyable_characters_length, + copyable_characters_length); + copyable_characters_length = 0; + + } /* OK, we have an escape character. */ s++; len++; @@ -818,7 +840,7 @@ json_lex_string(JsonLexContext *lex) case '"': case '\\': case '/': - appendStringInfoChar(lex->strval, *s); + copyable_characters_length++; break; case 'b': appendStringInfoChar(lex->strval, '\b'); @@ -861,7 +883,7 @@ json_lex_string(JsonLexContext *lex) if (hi_surrogate != -1) return JSON_UNICODE_LOW_SURROGATE; - appendStringInfoChar(lex->strval, *s); + copyable_characters_length++; } } -- 2.34.1