From 2060f79bd3ee29809788a466700a9dc49459ca8e Mon Sep 17 00:00:00 2001
From: AyoubKAZ <kazarayoub2004@gmail.com>
Date: Sat, 14 Feb 2026 15:12:42 +0100
Subject: [PATCH] Speed up COPY TO text/CSV using SIMD

Use SIMD to scan for special characters in COPY TO, processing 16+ bytes
at a time instead of byte-by-byte. This speeds up export of fields that
contain few characters requiring escaping, so the code falls back to scalar path when we find a special char.
---
 src/backend/commands/copyto.c | 159 +++++++++++++++++++++++++++++++++-
 1 file changed, 157 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 9ceeff6d99e..49198137531 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -31,6 +31,8 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "port/pg_bitutils.h"
+#include "port/simd.h"
 #include "storage/fd.h"
 #include "tcop/tcopprot.h"
 #include "utils/lsyscache.h"
@@ -121,6 +123,142 @@ static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
 								bool use_quote);
 static void CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel,
 						   uint64 *processed);
+static pg_attribute_always_inline void CopySkipTextSIMD(const char **ptr, 
+							 size_t len, char delimc);
+
+static pg_attribute_always_inline void CopyCheckCSVQuoteNeedSIMD(const char **ptr,
+							 size_t len, char delimc, char quotec);
+
+static pg_attribute_always_inline void CopySkipCSVEscapeSIMD(const char **ptr,
+							 size_t len, char escapec, char quotec);
+
+/*
+ * CopySkipTextSIMD - Skip forward past safe characters in TEXT mode using SIMD
+ *
+ * Advances ptr as far as possible, stopping at first special character.
+ */
+static pg_attribute_always_inline void
+CopySkipTextSIMD(const char **ptr, size_t len, char delimc)
+{
+#ifndef USE_NO_SIMD
+	const char *p = *ptr;
+
+	const char *end = p + len;
+
+	while (p + sizeof(Vector8) <= end)
+	{
+		Vector8 chunk;
+		Vector8 control_mask;
+		Vector8 backslash_mask;
+		Vector8 delim_mask;
+		Vector8 special_mask;
+		uint32 mask;
+
+		vector8_load(&chunk, (const uint8 *) p);
+		control_mask = vector8_gt(vector8_broadcast(0x20), chunk);
+		backslash_mask = vector8_eq(vector8_broadcast('\\'), chunk);
+		delim_mask = vector8_eq(vector8_broadcast(delimc), chunk);
+
+		special_mask = vector8_or(control_mask,
+								  vector8_or(backslash_mask, delim_mask));
+
+		mask = vector8_highbit_mask(special_mask);
+		if (mask != 0)
+		{
+			*ptr = p + pg_rightmost_one_pos32(mask);
+			return;
+		}
+
+		p += sizeof(Vector8);
+	}
+
+	*ptr = p;
+#endif
+}
+
+/*
+ * CopyCheckCSVQuoteNeedSIMD - Check if CSV field needs quoting using SIMD
+ *
+ * Advances ptr as far as possible, stopping at first special character.
+ */
+static pg_attribute_always_inline void
+CopyCheckCSVQuoteNeedSIMD(const char **ptr, size_t len, char delimc, char quotec)
+{
+#ifndef USE_NO_SIMD
+	const char *p = *ptr;
+	const char *end = p + len;
+
+	Vector8 delim_mask = vector8_broadcast(delimc);
+	Vector8 quote_mask = vector8_broadcast(quotec);
+	Vector8 newline_mask = vector8_broadcast('\n');
+	Vector8 carriage_return_mask = vector8_broadcast('\r');
+
+	while (p + sizeof(Vector8) <= end)
+	{
+		Vector8 chunk;
+		Vector8 special_mask;
+		uint32 mask;
+
+		vector8_load(&chunk, (const uint8 *) p);
+		special_mask = vector8_or(
+			vector8_or(vector8_eq(chunk, delim_mask),
+					   vector8_eq(chunk, quote_mask)),
+			vector8_or(vector8_eq(chunk, newline_mask),
+					   vector8_eq(chunk, carriage_return_mask))
+		);
+
+		mask = vector8_highbit_mask(special_mask);
+		if (mask != 0)
+		{
+			*ptr = p + pg_rightmost_one_pos32(mask);
+			return;
+		}
+
+		p += sizeof(Vector8);
+	}
+
+	*ptr = p;
+#endif
+}
+
+/*
+ * CopySkipCSVEscapeSIMD - Skip forward past safe characters in CSV mode using SIMD
+ *
+ * Advances ptr as far as possible, stopping at first quote or escape character.
+ */
+static pg_attribute_always_inline void
+CopySkipCSVEscapeSIMD(const char **ptr, size_t len, char escapec, char quotec)
+{
+#ifndef USE_NO_SIMD
+	const char *p = *ptr;
+	const char *end = p + len;
+
+	Vector8 escape_mask = vector8_broadcast(escapec);
+	Vector8 quote_mask = vector8_broadcast(quotec);
+
+	while (p + sizeof(Vector8) <= end)
+	{
+		Vector8 chunk;
+		Vector8 special_mask;
+		uint32 mask;
+
+		vector8_load(&chunk, (const uint8 *) p);
+		special_mask = vector8_or(vector8_eq(chunk, escape_mask),
+								  vector8_eq(chunk, quote_mask));
+
+		mask = vector8_highbit_mask(special_mask);
+		if (mask != 0)
+		{
+			*ptr = p + pg_rightmost_one_pos32(mask);
+			return;
+		}
+
+		p += sizeof(Vector8);
+	}
+
+	*ptr = p;
+#endif
+}
 
 /* built-in format-specific routines */
 static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
@@ -1245,9 +1383,14 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	const char *start;
 	char		c;
 	char		delimc = cstate->opts.delim[0];
+	size_t len = strlen(string);
 
 	if (cstate->need_transcoding)
-		ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+	{
+		ptr = pg_server_to_any(string, len, cstate->file_encoding);
+		/* We have to recalculate the length after transcoding, because it can change the string length */
+		len = strlen(ptr);
+	}
 	else
 		ptr = string;
 
@@ -1268,6 +1411,8 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	if (cstate->encoding_embeds_ascii)
 	{
 		start = ptr;
+		CopySkipTextSIMD(&ptr, len, delimc);
+
 		while ((c = *ptr) != '\0')
 		{
 			if ((unsigned char) c < (unsigned char) 0x20)
@@ -1328,6 +1473,8 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
 	else
 	{
 		start = ptr;
+		CopySkipTextSIMD(&ptr, len, delimc);
+
 		while ((c = *ptr) != '\0')
 		{
 			if ((unsigned char) c < (unsigned char) 0x20)
@@ -1402,13 +1549,18 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 	char		quotec = cstate->opts.quote[0];
 	char		escapec = cstate->opts.escape[0];
 	bool		single_attr = (list_length(cstate->attnumlist) == 1);
+	size_t 	len = strlen(string);
 
 	/* force quoting if it matches null_print (before conversion!) */
 	if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
 		use_quote = true;
 
 	if (cstate->need_transcoding)
-		ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+	{
+		ptr = pg_server_to_any(string, len, cstate->file_encoding);
+		/* We have to recalculate the length after transcoding, because it can change the string length */
+		len = strlen(ptr);
+	}
 	else
 		ptr = string;
 
@@ -1429,6 +1581,7 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 		else
 		{
 			const char *tptr = ptr;
+			CopyCheckCSVQuoteNeedSIMD(&tptr, len, delimc, quotec);
 
 			while ((c = *tptr) != '\0')
 			{
@@ -1453,6 +1606,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
 		 * We adopt the same optimization strategy as in CopyAttributeOutText
 		 */
 		start = ptr;
+		CopySkipCSVEscapeSIMD(&ptr, len, escapec, quotec);
+
 		while ((c = *ptr) != '\0')
 		{
 			if (c == quotec || c == escapec)
-- 
2.34.1

