From b589d728b54de071b8d4383a3a51de5f7c2e2293 Mon Sep 17 00:00:00 2001
From: Steven Niu <niushiji@highgo.com>
Date: Wed, 26 Mar 2025 14:43:43 +0800
Subject: [PATCH v2 1/2] Optimize function byteain() to avoid double scanning

Optimized the function to eliminate the need for two scans,
while preserving correctness and efficiency.

Author: Steven Niu <niushiji@gmail.com>
---
 src/backend/utils/adt/varlena.c | 66 +++++++++++----------------------
 1 file changed, 22 insertions(+), 44 deletions(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 3e4d5568bde..f1f1efba053 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -291,7 +291,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
  *		ereport(ERROR, ...) if bad form.
  *
  *		BUGS:
- *				The input is scanned twice.
  *				The error checking of input is minimal.
  */
 Datum
@@ -302,6 +301,7 @@ byteain(PG_FUNCTION_ARGS)
 	char	   *tp;
 	char	   *rp;
 	int			bc;
+	size_t	   input_len;
 	bytea	   *result;
 
 	/* Recognize hex input */
@@ -318,45 +318,28 @@ byteain(PG_FUNCTION_ARGS)
 		PG_RETURN_BYTEA_P(result);
 	}
 
-	/* Else, it's the traditional escaped style */
-	for (bc = 0, tp = inputText; *tp != '\0'; bc++)
-	{
-		if (tp[0] != '\\')
-			tp++;
-		else if ((tp[0] == '\\') &&
-				 (tp[1] >= '0' && tp[1] <= '3') &&
-				 (tp[2] >= '0' && tp[2] <= '7') &&
-				 (tp[3] >= '0' && tp[3] <= '7'))
-			tp += 4;
-		else if ((tp[0] == '\\') &&
-				 (tp[1] == '\\'))
-			tp += 2;
-		else
-		{
-			/*
-			 * one backslash, not followed by another or ### valid octal
-			 */
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type %s", "bytea")));
-		}
-	}
-
-	bc += VARHDRSZ;
-
-	result = (bytea *) palloc(bc);
-	SET_VARSIZE(result, bc);
-
-	tp = inputText;
+	/* Handle traditional escaped style in a single pass */
+	input_len = strlen(inputText);
+	result = palloc(input_len + VARHDRSZ);  /* Allocate max possible size */
 	rp = VARDATA(result);
+	tp = inputText;
+
 	while (*tp != '\0')
 	{
 		if (tp[0] != '\\')
+		{
 			*rp++ = *tp++;
-		else if ((tp[0] == '\\') &&
-				 (tp[1] >= '0' && tp[1] <= '3') &&
-				 (tp[2] >= '0' && tp[2] <= '7') &&
-				 (tp[3] >= '0' && tp[3] <= '7'))
+			continue;
+		}
+
+		if (tp[1] == '\\')
+		{
+			*rp++ = '\\';
+			tp += 2;
+		}
+		else if ((tp[1] >= '0' && tp[1] <= '3') && 
+			 (tp[2] >= '0' && tp[2] <= '7') && 
+			 (tp[3] >= '0' && tp[3] <= '7'))
 		{
 			bc = VAL(tp[1]);
 			bc <<= 3;
@@ -366,23 +349,18 @@ byteain(PG_FUNCTION_ARGS)
 
 			tp += 4;
 		}
-		else if ((tp[0] == '\\') &&
-				 (tp[1] == '\\'))
-		{
-			*rp++ = '\\';
-			tp += 2;
-		}
 		else
 		{
-			/*
-			 * We should never get here. The first pass should not allow it.
-			 */
+			/* Invalid escape sequence: report error */
 			ereturn(escontext, (Datum) 0,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("invalid input syntax for type %s", "bytea")));
 		}
 	}
 
+	/* Set the actual size of the bytea */
+	SET_VARSIZE(result, (rp - VARDATA(result)) + VARHDRSZ);
+
 	PG_RETURN_BYTEA_P(result);
 }
 
-- 
2.43.0

