From 1484b7541b9ed3f0c476e31f99340d36895bc629 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 5 Jul 2022 18:57:37 +0700
Subject: [PATCH v3 3/3] Use vectorized lookahead in json_lex_string on x86

---
 src/common/jsonapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index ad4858c623..978f18b129 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -24,6 +24,12 @@
 #include "miscadmin.h"
 #endif
 
+/*  WIP: put somewhere sensible and consider removing CRC from the names */
+#if (defined (__x86_64__) || defined(_M_AMD64)) && (defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK))
+#include <nmmintrin.h>
+#define USE_SSE2
+#endif
+
 /*
  * The context of the parser is maintained by the recursive descent
  * mechanism, but is passed explicitly to the error reporting routine
@@ -851,12 +857,54 @@ json_lex_string(JsonLexContext *lex)
 		}
 		else if (lex->strval != NULL)
 		{
+#ifdef USE_SSE2
+			__m128i		block,
+						has_backslash,
+						has_doublequote,
+						control,
+						has_control,
+						error_cum = _mm_setzero_si128();
+			const		__m128i backslash = _mm_set1_epi8('\\');
+			const		__m128i doublequote = _mm_set1_epi8('"');
+			const		__m128i max_control = _mm_set1_epi8(0x1F);
+#endif
 			/* start lookahead at next byte */
 			char	   *p = s + 1;
 
 			if (hi_surrogate != -1)
 				return JSON_UNICODE_LOW_SURROGATE;
 
+#ifdef USE_SSE2
+			while (p < end - sizeof(__m128i))
+			{
+				block = _mm_loadu_si128((const __m128i *) p);
+
+				/* direct comparison to quotes and backslashes */
+				has_backslash = _mm_cmpeq_epi8(block, backslash);
+				has_doublequote = _mm_cmpeq_epi8(block, doublequote);
+
+				/*
+				 * use saturation arithmetic to check for <= highest control
+				 * char
+				 */
+				control = _mm_subs_epu8(block, max_control);
+				has_control = _mm_cmpeq_epi8(control, _mm_setzero_si128());
+
+				/*
+				 * set bits in error_cum where the corresponding lanes in has_*
+				 * are set
+				 */
+				error_cum = _mm_or_si128(error_cum, has_backslash);
+				error_cum = _mm_or_si128(error_cum, has_doublequote);
+				error_cum = _mm_or_si128(error_cum, has_control);
+
+				if (_mm_movemask_epi8(error_cum))
+					break;
+
+				p += sizeof(__m128i);
+			}
+#endif							/* USE_SSE2 */
+
 			while (p < end)
 			{
 				if (*p == '\\' || *p == '"' || (unsigned char) *p < 32)
-- 
2.36.1

