From ad050583d3c14bdec44266d8d2110b384fa9d7dc Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 14 Oct 2025 13:18:13 +0300
Subject: [PATCH v3 2/2] COPY SIMD per-line heuristic

---
 src/include/commands/copyfrom_internal.h |  7 ++
 src/backend/commands/copyfrom.c          |  6 ++
 src/backend/commands/copyfromparse.c     | 82 ++++++++++++++++++++++--
 3 files changed, 89 insertions(+), 6 deletions(-)

diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index c8b22af22d8..9dd31320f52 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -89,6 +89,13 @@ typedef struct CopyFromStateData
 	const char *cur_attval;		/* current att value for error messages */
 	bool		relname_only;	/* don't output line number, att, etc. */
 
+	/* SIMD variables */
+	bool		simd_continue;
+	bool		simd_initialized;
+	uint16		simd_last_sleep_cycle;
+	uint16		simd_current_sleep_cycle;
+
+
 	/*
 	 * Working state
 	 */
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 12781963b4f..4bdfd96c244 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1721,6 +1721,12 @@ BeginCopyFrom(ParseState *pstate,
 	cstate->cur_attval = NULL;
 	cstate->relname_only = false;
 
+	/* Initialize SIMD variables */
+	cstate->simd_continue = false;
+	cstate->simd_initialized = false;
+	cstate->simd_current_sleep_cycle = 0;
+	cstate->simd_last_sleep_cycle = 0;
+
 	/*
 	 * Allocate buffers for the input pipeline.
 	 *
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 99959a40fab..24cef54e5e4 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -143,12 +143,14 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 
 /* non-export function prototypes */
 static bool CopyReadLine(CopyFromState cstate, bool is_csv);
-static bool CopyReadLineText(CopyFromState cstate, bool is_csv);
 static int	CopyReadAttributesText(CopyFromState cstate);
 static int	CopyReadAttributesCSV(CopyFromState cstate);
 static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
 									 Oid typioparam, int32 typmod,
 									 bool *isnull);
+static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate,
+														bool is_csv,
+														bool simd_continue);
 static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate,
 															  ExprContext *econtext,
 															  Datum *values,
@@ -1173,8 +1175,23 @@ CopyReadLine(CopyFromState cstate, bool is_csv)
 	resetStringInfo(&cstate->line_buf);
 	cstate->line_buf_valid = false;
 
-	/* Parse data and transfer into line_buf */
-	result = CopyReadLineText(cstate, is_csv);
+	/* If that is the first time we do read, initalize the SIMD */
+	if (unlikely(!cstate->simd_initialized))
+	{
+		cstate->simd_initialized = true;
+		cstate->simd_continue = true;
+		cstate->simd_current_sleep_cycle = 0;
+		cstate->simd_last_sleep_cycle = 0;
+	}
+
+	/*
+	 * Parse data and transfer into line_buf. To get benefit from inlining,
+	 * call CopyReadLineText() with the constant boolean variables.
+	 */
+	if (cstate->simd_continue)
+		result = CopyReadLineText(cstate, is_csv, true);
+	else
+		result = CopyReadLineText(cstate, is_csv, false);
 
 	if (result)
 	{
@@ -1241,8 +1258,8 @@ CopyReadLine(CopyFromState cstate, bool is_csv)
 /*
  * CopyReadLineText - inner loop of CopyReadLine for text mode
  */
-static bool
-CopyReadLineText(CopyFromState cstate, bool is_csv)
+static pg_attribute_always_inline bool
+CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_continue)
 {
 	char	   *copy_input_buf;
 	int			input_buf_ptr;
@@ -1258,11 +1275,16 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 	char		escapec = '\0';
 
 #ifndef USE_NO_SIMD
+#define SIMD_SLEEP_MAX 1024
+#define SIMD_ADVANCE_AT_LEAST 5
 	Vector8		nl = vector8_broadcast('\n');
 	Vector8		cr = vector8_broadcast('\r');
 	Vector8		bs = vector8_broadcast('\\');
 	Vector8		quote = vector8_broadcast(0);
 	Vector8		escape = vector8_broadcast(0);
+
+	uint64		simd_total_cycle = 0;
+	uint64		simd_total_advance = 0;
 #endif
 
 	if (is_csv)
@@ -1358,12 +1380,14 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 		 * sequentially. - The remaining buffer is smaller than one vector
 		 * width (sizeof(Vector8)); SIMD operates on fixed-size chunks.
 		 */
-		if (!last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8))
+		if (simd_continue && !last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8))
 		{
 			Vector8		chunk;
 			Vector8		match = vector8_broadcast(0);
 			uint32		mask;
 
+			simd_total_cycle++;
+
 			/* Load a chunk of data into a vector register */
 			vector8_load(&chunk, (const uint8 *) &copy_input_buf[input_buf_ptr]);
 
@@ -1391,11 +1415,13 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 				int			advance = pg_rightmost_one_pos32(mask);
 
 				input_buf_ptr += advance;
+				simd_total_advance += advance;
 			}
 			else
 			{
 				/* No special characters found, so skip the entire chunk */
 				input_buf_ptr += sizeof(Vector8);
+				simd_total_advance += sizeof(Vector8);
 				continue;
 			}
 		}
@@ -1603,6 +1629,50 @@ CopyReadLineText(CopyFromState cstate, bool is_csv)
 		}
 	}							/* end of outer loop */
 
+#ifndef USE_NO_SIMD
+
+	/* SIMD was enabled */
+	if (simd_continue)
+	{
+		/* SIMD is worth */
+		if (simd_total_cycle && simd_total_advance / simd_total_cycle >= SIMD_ADVANCE_AT_LEAST)
+		{
+			Assert(cstate->simd_current_sleep_cycle == 0);
+			cstate->simd_last_sleep_cycle >>= 1;
+		}
+		/* SIMD was enabled but it isn't worth */
+		else
+		{
+			uint16		simd_last_sleep_cycle = cstate->simd_last_sleep_cycle;
+
+			cstate->simd_continue = false;
+
+			if (simd_last_sleep_cycle == 0)
+				simd_last_sleep_cycle = 1;
+			else if (simd_last_sleep_cycle >= SIMD_SLEEP_MAX / 2)
+				simd_last_sleep_cycle = SIMD_SLEEP_MAX;
+			else
+				simd_last_sleep_cycle <<= 1;
+			cstate->simd_current_sleep_cycle = simd_last_sleep_cycle;
+			cstate->simd_last_sleep_cycle = simd_last_sleep_cycle;
+		}
+	}
+	/* SIMD was disabled */
+	else
+	{
+		/*
+		 * We should come here with decrementing
+		 * cstate->simd_current_sleep_cycle from a positive number.
+		 */
+		Assert(cstate->simd_current_sleep_cycle != 0);
+		cstate->simd_current_sleep_cycle--;
+
+		if (cstate->simd_current_sleep_cycle == 0)
+			cstate->simd_continue = true;
+	}
+
+#endif
+
 	/*
 	 * Transfer any still-uncopied data to line_buf.
 	 */
-- 
2.51.0

