From e49334de4eb51f5ba061bf48d1979a7ab8ef0de9 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Sun, 7 Feb 2021 00:10:05 +0200
Subject: [PATCH 1/1] In psql \copy from, send data to server in larger chunks.

Previously, we would send each line as a separate CopyData message.
That's pretty wasteful if the table is narrow, as each CopyData message
has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input
data into each CopyData message.
---
 src/bin/psql/copy.c | 114 +++++++++++++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 45 deletions(-)

diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index 78f0dc5a507..8c56f2470e2 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -581,77 +581,101 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
 	else
 	{
 		bool		copydone = false;
+		int			buflen;
+		bool		at_line_begin = true;
 
+		if (showprompt)
+		{
+			const char *prompt = get_prompt(PROMPT_COPY, NULL);
+
+			fputs(prompt, stdout);
+			fflush(stdout);
+		}
+
+		/*
+		 * In text mode, We have to read the input one line at a time, so that
+		 * we can stop reading at the EOF marker (\.).  We mustn't read beyond
+		 * the EOF marker, because if the data is inlined in a SQL script, we
+		 * would eat up the commands after the EOF marker.
+		 */
+		buflen = 0;
 		while (!copydone)
-		{						/* for each input line ... */
-			bool		firstload;
-			bool		linedone;
+		{
+			int			linelen;
+			char	   *fgresult;
 
-			if (showprompt)
+			if (at_line_begin && showprompt)
 			{
-				const char *prompt = get_prompt(PROMPT_COPY, NULL);
+				const char *prompt;
 
+				sigint_interrupt_enabled = false;
+
+				prompt = get_prompt(PROMPT_COPY, NULL);
 				fputs(prompt, stdout);
 				fflush(stdout);
-			}
 
-			firstload = true;
-			linedone = false;
-
-			while (!linedone)
-			{					/* for each bufferload in line ... */
-				int			linelen;
-				char	   *fgresult;
-
-				/* enable longjmp while waiting for input */
 				sigint_interrupt_enabled = true;
+			}
 
-				fgresult = fgets(buf, sizeof(buf), copystream);
-
-				sigint_interrupt_enabled = false;
+			/* enable longjmp while waiting for input */
+			sigint_interrupt_enabled = true;
 
-				if (!fgresult)
-				{
-					copydone = true;
-					break;
-				}
+			fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream);
 
-				linelen = strlen(buf);
+			sigint_interrupt_enabled = false;
 
-				/* current line is done? */
-				if (linelen > 0 && buf[linelen - 1] == '\n')
-					linedone = true;
+			if (!fgresult)
+				copydone = true;
+			else
+			{
+				linelen = strlen(fgresult);
+				buflen += linelen;
 
-				/* check for EOF marker, but not on a partial line */
-				if (firstload)
+				if (buf[buflen - 1] == '\n')
 				{
-					/*
-					 * This code erroneously assumes '\.' on a line alone
-					 * inside a quoted CSV string terminates the \copy.
-					 * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
-					 */
-					if (strcmp(buf, "\\.\n") == 0 ||
-						strcmp(buf, "\\.\r\n") == 0)
+					/* check for EOF marker, but not on a partial line */
+					if (at_line_begin)
 					{
-						copydone = true;
-						break;
+						/*
+						 * This code erroneously assumes '\.' on a line alone
+						 * inside a quoted CSV string terminates the \copy.
+						 * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
+						 */
+						if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
+							(linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
+						{
+							copydone = true;
+						}
 					}
 
-					firstload = false;
+					if (copystream == pset.cur_cmd_source)
+					{
+						pset.lineno++;
+						pset.stmt_lineno++;
+					}
+					at_line_begin = true;
 				}
+				else
+					at_line_begin = false;
+			}
 
-				if (PQputCopyData(conn, buf, linelen) <= 0)
+			/*
+			 * If the buffer is full, or we've reached the EOF, flush it.
+			 *
+			 * Make sure there's always space for four more bytes in the buffer,
+			 * plus a NUL terminator. That way, an EOF marker is never split
+			 * across two fgets() calls, which simplies the logic.
+			 */
+			if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0))
+			{
+				if (PQputCopyData(conn, buf, buflen) <= 0)
 				{
 					OK = false;
 					copydone = true;
 					break;
 				}
-			}
 
-			if (copystream == pset.cur_cmd_source)
-			{
-				pset.lineno++;
-				pset.stmt_lineno++;
+				buflen = 0;
 			}
 		}
 	}
-- 
2.30.0

