From ef486287090daa24d51735ba9fa9585341b6e8ec Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 6 Jul 2022 15:35:33 +0700
Subject: [PATCH v4 3/4] Use lookahead path in json string lexing for the
 non-escape case too

This removes some duplicated code and enables the no-escape path
to be optimized in the same way.

Per suggestion from Andres Freund
---
 src/common/jsonapi.c | 46 +++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index ad4858c623..81e176ad8d 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -686,15 +686,6 @@ json_lex_string(JsonLexContext *lex)
 			lex->token_terminator = s;
 			return JSON_INVALID_TOKEN;
 		}
-		else if (*s == '"')
-			break;
-		else if ((unsigned char) *s < 32)
-		{
-			/* Per RFC4627, these characters MUST be escaped. */
-			/* Since *s isn't printable, exclude it from the context string */
-			lex->token_terminator = s;
-			return JSON_ESCAPING_REQUIRED;
-		}
 		else if (*s == '\\')
 		{
 			/* OK, we have an escape character. */
@@ -849,22 +840,41 @@ json_lex_string(JsonLexContext *lex)
 				return JSON_ESCAPING_INVALID;
 			}
 		}
-		else if (lex->strval != NULL)
+		else
 		{
-			/* start lookahead at next byte */
-			char	   *p = s + 1;
+			/* start lookahead at current byte */
+			char	   *p = s;
 
 			if (hi_surrogate != -1)
 				return JSON_UNICODE_LOW_SURROGATE;
 
 			while (p < end)
 			{
-				if (*p == '\\' || *p == '"' || (unsigned char) *p < 32)
+				if (*p == '\\' || *p == '"')
 					break;
+				else if ((unsigned char) *p < 32)
+				{
+					/* Per RFC4627, these characters MUST be escaped. */
+					/*
+					 * Since *s isn't printable, exclude it from the context
+					 * string
+					 */
+					lex->token_terminator = p;
+					return JSON_ESCAPING_REQUIRED;
+				}
 				p++;
 			}
 
-			appendBinaryStringInfo(lex->strval, s, p - s);
+			if (lex->strval != NULL)
+				appendBinaryStringInfo(lex->strval, s, p - s);
+
+			if (*p == '"')
+			{
+				/* Hooray, we found the end of the string! */
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = p + 1;
+				return JSON_SUCCESS;
+			}
 
 			/*
 			 * s will be incremented at the top of the loop, so set it to just
@@ -873,14 +883,6 @@ json_lex_string(JsonLexContext *lex)
 			s = p - 1;
 		}
 	}
-
-	if (hi_surrogate != -1)
-		return JSON_UNICODE_LOW_SURROGATE;
-
-	/* Hooray, we found the end of the string! */
-	lex->prev_token_terminator = lex->token_terminator;
-	lex->token_terminator = s + 1;
-	return JSON_SUCCESS;
 }
 
 /*
-- 
2.36.1