From 502f4c64babf6049181edb25bbf1f7445f916fd8 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 28 Jan 2021 18:42:25 +0200
Subject: [PATCH v3 5/5] Do COPY FROM encoding conversion/verification in
 larger chunks.

NOTE: This changes behavior in one corner-case: if client and server
encodings are the same single-byte encoding (e.g. latin1), previously the
input would not be checked for zero bytes ('\0'). Any fields containing
zero bytes would be truncated at the zero. But if encoding conversion was
needed, the conversion routine would throw an error on the zero. After
this commit, the input is always checked for zeros.
---
 src/backend/commands/copyfrom.c           |  58 +--
 src/backend/commands/copyfromparse.c      | 473 ++++++++++++++++------
 src/include/commands/copyfrom_internal.h  |  53 +--
 src/test/regress/expected/copycorners.out | 202 +++++++++
 src/test/regress/sql/copycorners.sql      |  90 ++++
 5 files changed, 715 insertions(+), 161 deletions(-)
 create mode 100644 src/test/regress/expected/copycorners.out
 create mode 100644 src/test/regress/sql/copycorners.sql
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index c39cc736ed2..3f787b885ae 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -3,6 +3,12 @@
  * copyfrom.c
  *		COPY <table> FROM file/program/client
  *
+ * This file contains routines needed to efficiently load tuples into a
+ * table. That includes looking up the correct partition, firing triggers,
+ * calling the table AM function to insert the data, and updating indexes.
+ * Reading data from the input file or client and parsing it into Datums
+ * is handled in copyfromparse.c.
+ *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -23,6 +29,7 @@
 #include "access/tableam.h"
 #include "access/xact.h"
 #include "access/xlog.h"
+#include "catalog/namespace.h"
 #include "commands/copy.h"
 #include "commands/copyfrom_internal.h"
 #include "commands/progress.h"
@@ -149,15 +156,9 @@ CopyFromErrorCallback(void *arg)
 			/*
 			 * Error is relevant to a particular line.
 			 *
-			 * If line_buf still contains the correct line, and it's already
-			 * transcoded, print it. If it's still in a foreign encoding, it's
-			 * quite likely that the error is precisely a failure to do
-			 * encoding conversion (ie, bad data). We dare not try to convert
-			 * it, and at present there's no way to regurgitate it without
-			 * conversion. So we have to punt and just report the line number.
+			 * If line_buf still contains the correct line, print it.
 			 */
-			if (cstate->line_buf_valid &&
-				(cstate->line_buf_converted || !cstate->need_transcoding))
+			if (cstate->line_buf_valid)
 			{
 				char	   *lineval;
 
@@ -1305,15 +1306,20 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->file_encoding = cstate->opts.file_encoding;
 
 	/*
-	 * Set up encoding conversion info.  Even if the file and server encodings
-	 * are the same, we must apply pg_any_to_server() to validate data in
-	 * multibyte encodings.
+	 * Look up encoding conversion function.
 	 */
-	cstate->need_transcoding =
-		(cstate->file_encoding != GetDatabaseEncoding() ||
-		 pg_database_encoding_max_length() > 1);
-	/* See Multibyte encoding comment above */
-	cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
+	if (cstate->file_encoding == GetDatabaseEncoding() ||
+		cstate->file_encoding == PG_SQL_ASCII ||
+		GetDatabaseEncoding() == PG_SQL_ASCII)
+	{
+		cstate->need_transcoding = false;
+	}
+	else
+	{
+		cstate->need_transcoding = true;
+		cstate->conversion_proc = FindDefaultConversionProc(cstate->file_encoding,
+															GetDatabaseEncoding());
+	}
 
 	cstate->copy_src = COPY_FILE;	/* default */
 
@@ -1332,17 +1338,23 @@ BeginCopyFrom(ParseState *pstate,
 	cstate->cur_attval = NULL;
 
 	/*
-	 * Set up variables to avoid per-attribute overhead.  attribute_buf and
-	 * raw_buf are used in both text and binary modes, but we use line_buf
-	 * only in text mode.
+	 * Allocate buffers for the input pipeline.
+	 *
+	 * attribute_buf and input_buf are used in both text and binary modes, but
+	 * we use line_buf only in text mode.
 	 */
 	initStringInfo(&cstate->attribute_buf);
-	cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
-	cstate->raw_buf_index = cstate->raw_buf_len = 0;
+
 	if (!cstate->opts.binary)
-	{
 		initStringInfo(&cstate->line_buf);
-		cstate->line_buf_converted = false;
+
+	cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
+	cstate->input_buf_index = cstate->input_buf_len = 0;
+
+	if (!cstate->opts.binary && cstate->need_transcoding)
+	{
+		cstate->raw_buf = palloc(RAW_BUF_SIZE);
+		cstate->raw_buf_index = cstate->raw_buf_len = 0;
 	}
 
 	/* Assign range table, we'll need it in CopyFrom. */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 4c74067f849..03d1c621792 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -3,6 +3,48 @@
  * copyfromparse.c
  *		Parse CSV/text/binary format for COPY FROM.
  *
+ * This file contains routines to parse the text, CSV and binary input
+ * formats.  The main entry point is NextCopyFrom(), which parses the
+ * next input line and returns it as Datums.
+ *
+ * In text/CSV mode, the parsing happens in multiple stages:
+ *
+ * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
+ *                1.          2.            3.           4.
+ *
+ * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
+ * places it into 'raw_buf'.
+ *
+ * 2. CopyLoadInputBufText() calls the encoding conversion function to
+ * convert the data in 'raw_buf' from client to server encoding, placing the
+ * converted result in 'input_buf'.
+ *
+ * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
+ * It is responsible for finding the next newline marker, taking quote and
+ * escape characters into account according to the COPY options.  The line
+ * is copied into 'line_buf', with quotes and escape characters still intact.
+ *
+ * 4. CopyReadAttributesText/CSV() function takes the input line from
+ * 'line_buf', and splits it into fields, unescaping the data as required.
+ * The fields are stored in 'attribute_buf', and 'raw_fields' array holds
+ * pointers to each field.
+ *
+ * If encoding conversion is not required, a shortcut is taken in step 2
+ * to avoid copying the raw data unnecessarily.  The 'input_buf' pointer is
+ * set to point directly to 'raw_buf', so that CopyLoadRawBuf() loads the
+ * raw data directly into 'input_buf'.  CopyLoadInputBufText() then merely
+ * validates that the data is valid in the current encoding.
+ *
+ * In binary mode, the pipeline is much simpler.  Input is loaded directly
+ * into 'input_buf', and encoding conversion is done in the datatype-specific
+ * receive functions, if required.  'line_buf' is not used, but
+ * 'attribute_buf' is used as a temporary buffer to hold one attribute's data
+ * when it's passed the receive function.
+ *
+ * input_buf is always 64 kB in size.  'raw_buf' is also 64 kB, if encoding
+ * conversion is required.  'line_buf' and 'attribute_buf' are expanded on
+ * demand, to hold the longest line encountered so far.
+ *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -35,7 +77,7 @@
 #define OCTVALUE(c) ((c) - '0')
 
 /*
- * These macros centralize code used to process line_buf and raw_buf buffers.
+ * These macros centralize code used to process line_buf and input_buf buffers.
  * They are macros because they often do continue/break control and to avoid
  * function call overhead in tight COPY loops.
  *
@@ -53,9 +95,9 @@
 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
 if (1) \
 { \
-	if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
+	if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
 	{ \
-		raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
+		input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
 		need_data = true; \
 		continue; \
 	} \
@@ -65,10 +107,10 @@ if (1) \
 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
 if (1) \
 { \
-	if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
+	if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
 	{ \
 		if (extralen) \
-			raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
+			input_buf_ptr = copy_buf_len; /* consume the partial character */ \
 		/* backslash just before EOF, treat as data char */ \
 		result = true; \
 		break; \
@@ -77,17 +119,17 @@ if (1) \
 
 /*
  * Transfer any approved data to line_buf; must do this to be sure
- * there is some room in raw_buf.
+ * there is some room in input_buf.
  */
 #define REFILL_LINEBUF \
 if (1) \
 { \
-	if (raw_buf_ptr > cstate->raw_buf_index) \
+	if (input_buf_ptr > cstate->input_buf_index) \
 	{ \
 		appendBinaryStringInfo(&cstate->line_buf, \
-							 cstate->raw_buf + cstate->raw_buf_index, \
-							   raw_buf_ptr - cstate->raw_buf_index); \
-		cstate->raw_buf_index = raw_buf_ptr; \
+							 cstate->input_buf + cstate->input_buf_index, \
+							   input_buf_ptr - cstate->input_buf_index); \
+		cstate->input_buf_index = input_buf_ptr; \
 	} \
 } else ((void) 0)
 
@@ -95,7 +137,7 @@ if (1) \
 #define NO_END_OF_COPY_GOTO \
 if (1) \
 { \
-	raw_buf_ptr = prev_raw_ptr + 1; \
+	input_buf_ptr = prev_raw_ptr + 1; \
 	goto not_end_of_copy; \
 } else ((void) 0)
 
@@ -118,7 +160,8 @@ static int	CopyGetData(CopyFromState cstate, void *databuf,
 						int minread, int maxread);
 static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
 static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
-static bool CopyLoadRawBuf(CopyFromState cstate);
+static bool CopyLoadInputBufText(CopyFromState cstate);
+static bool CopyLoadInputBufBinary(CopyFromState cstate);
 static int	CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
 
 void
@@ -359,42 +402,286 @@ CopyGetInt16(CopyFromState cstate, int16 *val)
 	return true;
 }
 
+/*
+ * Perform encoding conversion on data in 'raw_buf', writing the converted
+ * data into 'input_buf'.
+ *
+ * On entry, there must be some data to convert in 'raw_buf'.
+ */
+static void
+CopyConvertBuf(CopyFromState cstate)
+{
+	int			convertedbytes;
+	unsigned char *src;
+	int			srclen;
+	unsigned char *dst;
+	int			dstlen;
+
+	Assert(cstate->raw_buf_len > 0);
+	/*
+	 * we assume that the caller has moved any remaining data in the
+	 * buffer to the beginning.
+	 */
+	Assert(cstate->input_buf_index == 0);
+
+	src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
+	srclen = cstate->raw_buf_len - cstate->raw_buf_index;
+	dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
+	dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
+
+	/*
+	 * Do the conversion. This might stop short, if there is an invalid byte
+	 * sequence in the input. We'll convert as much as we can in that case.
+	 *
+	 * Note: Even if we hit an invalid byte sequence, we don't report the error
+	 * until all the valid bytes have been consumed.  The input might contain
+	 * an end-of-input marker (\.), and we don't want to report an error if
+	 * the invalid byte sequence is after the end-of-input marker.  We might
+	 * unnecessarily convert some data after the end-of-input marker as long
+	 * as it's valid for the encoding, but that's harmless.
+	 */
+	convertedbytes = pg_do_encoding_conversion_buf(cstate->conversion_proc,
+												   cstate->file_encoding,
+												   GetDatabaseEncoding(),
+												   src, srclen,
+												   dst, dstlen,
+												   true);
+	if (convertedbytes == 0)
+	{
+		/*
+		 * No more valid input in the buffer, and we have hit an invalid or
+		 * untranslatable byte sequence. Call the conversion function again,
+		 * with noError=false, to let it throw an appropriate error.
+		 */
+		(void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
+											 cstate->file_encoding,
+											 GetDatabaseEncoding(),
+											 src, srclen,
+											 dst, dstlen,
+											 false);
+		/*
+		 * Should not get here, because if the input contained invalid data on the
+		 * first call, the second pg_do_encoding_conversion_buf with noError = false
+		 * should've reported an error. But just in case the conversion function
+		 * messsed up.
+		 */
+		elog(ERROR, "encoding conversion failed without error");
+	}
+	cstate->raw_buf_index += convertedbytes;
+	cstate->input_buf_len += strlen((char *) dst);
+}
 
 /*
- * CopyLoadRawBuf loads some more data into raw_buf
+ * Load more data from data source to raw_buf.
  *
- * Returns true if able to obtain at least one more byte, else false.
+ * Returns true if at least one more byte was loaded, false means EOF was reached.
  *
  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
- * of the buffer and then we load more data after that.  This case occurs only
- * when a multibyte character crosses a bufferload boundary.
+ * of the buffer and then we load more data after that.
  */
 static bool
 CopyLoadRawBuf(CopyFromState cstate)
 {
-	int			nbytes = RAW_BUF_BYTES(cstate);
+	int			nbytes;
 	int			inbytes;
 
+	/*
+	 * If encoding conversion is not required, raw_buf and input_buf point
+	 * to the same buffer. Their len/index should agree, too, otherwise
+	 * we are confused.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		Assert(!cstate->need_transcoding);
+		Assert(cstate->raw_buf_index == cstate->input_buf_index);
+		Assert(cstate->input_buf_len <= cstate->raw_buf_len);
+	}
+
 	/* Copy down the unprocessed data if any. */
-	if (nbytes > 0)
+	nbytes = RAW_BUF_BYTES(cstate);
+	if (nbytes > 0 && cstate->raw_buf_index > 0)
 		memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
 				nbytes);
+	cstate->raw_buf_len -= cstate->raw_buf_index;
+	cstate->raw_buf_index = 0;
+
+	/*
+	 * If raw_buf and input_buf are in fact the same buffer, adjust the
+	 * input_buf variables, too.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		cstate->input_buf_len -= cstate->input_buf_index;
+		cstate->input_buf_index = 0;
+	}
+
+	/* Load more data */
+	inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
+						  1, RAW_BUF_SIZE - cstate->raw_buf_len);
+	cstate->raw_buf_len += inbytes;
+
+	cstate->bytes_processed += inbytes;
+	pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
+
+	return (inbytes > 0);
+}
+
+/*
+ * CopyLoadInputBuf loads some more data into input_buf
+ *
+ * Returns true if able to obtain at least one more byte, else false.
+ *
+ * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
+ * of the buffer and then we load more data after that.
+ */
+static bool
+CopyLoadInputBufText(CopyFromState cstate)
+{
+	int			nbytes;
+
+	if (!cstate->need_transcoding)
+	{
+		/*
+		 * If the file and server encoding are the same, no encoding conversion
+		 * is required, and we can load the input data directly into 'input_buf'.
+		 * However, we still need to verify that the input is valid for the encoding.
+		 *
+		 * FIXME: for single-byte encoding, the verification only needs to check
+		 * that the input doesn't contain any zero bytes. Could we skip that
+		 * altogether?
+		 *
+		 * On entry, input_buf_len indicates how many bytes in the buffer have
+		 * already been validated.  raw_buf_len can be larger, if there was an
+		 * incomplete multi-byte character at the bufferload boundary, or if the
+		 * input contains an invalid character.
+		 */
+		for (;;)
+		{
+			int			verified_bytes = INPUT_BUF_BYTES(cstate);
+			int			unverified_bytes = cstate->raw_buf_len - cstate->input_buf_len;
+			int			nvalidated;
+
+			/* Load more bytes to the buffer */
+			cstate->raw_buf_index = cstate->input_buf_index;
+			cstate->raw_buf = cstate->input_buf;
+			if (!CopyLoadRawBuf(cstate))
+			{
+				/*
+				 * EOF reached. If we have any unverified bytes left, it means
+				 * that there was an incomplete multi-byte character at the end.
+				 */
+				if (unverified_bytes > 0)
+					report_invalid_encoding(cstate->file_encoding,
+											cstate->raw_buf + verified_bytes,
+											unverified_bytes);
 
-	inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
-						  1, RAW_BUF_SIZE - nbytes);
+				/* truly hit EOF */
+				return false;
+			}
+			Assert(cstate->raw_buf_index == 0);
+			Assert(cstate->input_buf_index == 0);
+			unverified_bytes = cstate->raw_buf_len - verified_bytes;
+			Assert(unverified_bytes > 0);
+
+			/* Verify the new data (including any unverified bytes from previous round) */
+			nvalidated = pg_encoding_verifymbstr(cstate->file_encoding,
+												 cstate->raw_buf + verified_bytes,
+												 unverified_bytes);
+			if (nvalidated == 0)
+			{
+				/*
+				 * No valid characters in the buffer.  It could be because
+				 * there are only few bytes in the buffer, and they don't form
+				 * any whole characters. In that case, load more data.  But if
+				 * we have enough data, then it must be an invalid byte
+				 * sequence.
+				 */
+				if (unverified_bytes < pg_database_encoding_max_length())
+					continue;
+				else
+					report_invalid_encoding(cstate->file_encoding, cstate->raw_buf + verified_bytes,
+											unverified_bytes);
+			}
+			verified_bytes += nvalidated;
+
+			cstate->input_buf_index = 0;
+			cstate->input_buf_len = verified_bytes;
+			return true;
+		}
+	}
+	else
+	{
+		/*
+		 * Encoding conversion is needed. First, copy down the unprocessed data
+		 * if any.
+		 */
+		nbytes = INPUT_BUF_BYTES(cstate);
+		if (nbytes > 0 && cstate->input_buf_index > 0)
+			memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
+					nbytes);
+		cstate->input_buf_index = 0;
+		cstate->input_buf_len = nbytes;
+
+		for (;;)
+		{
+			/* If we still have a good amount of unconverted data left, convert it. */
+			nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
+			if (nbytes >= MAX_CONVERSION_GROWTH)
+			{
+				CopyConvertBuf(cstate);
+				break;
+			}
+
+			/* Load more bytes to the raw buffer */
+			if (!CopyLoadRawBuf(cstate))
+			{
+				/* Hit EOF. If we have any unconverted bytes left, convert them now */
+				if (cstate->raw_buf_index < cstate->raw_buf_len)
+				{
+					CopyConvertBuf(cstate);
+					break;
+				}
+
+				/*
+				 * No more input data, and no unconverted data remain in raw_buf. Report
+				 * the EOF to the caller
+				 */
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+static bool
+CopyLoadInputBufBinary(CopyFromState cstate)
+{
+	int			nbytes = INPUT_BUF_BYTES(cstate);
+	int			inbytes;
+
+	/* Copy down the unprocessed data if any. */
+	if (nbytes > 0)
+		memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
+				nbytes);
+
+	inbytes = CopyGetData(cstate, cstate->input_buf + nbytes,
+						  1, INPUT_BUF_SIZE - nbytes);
 	nbytes += inbytes;
-	cstate->raw_buf[nbytes] = '\0';
-	cstate->raw_buf_index = 0;
-	cstate->raw_buf_len = nbytes;
+	cstate->input_buf[nbytes] = '\0';
+	cstate->input_buf_index = 0;
+	cstate->input_buf_len = nbytes;
+
 	cstate->bytes_processed += nbytes;
 	pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
+
 	return (inbytes > 0);
 }
 
 /*
  * CopyReadBinaryData
  *
- * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
+ * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->input_buf
  * and writes them to 'dest'.  Returns the number of bytes read (which
  * would be less than 'nbytes' only if we reach EOF).
  */
@@ -403,11 +690,11 @@ CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
 {
 	int			copied_bytes = 0;
 
-	if (RAW_BUF_BYTES(cstate) >= nbytes)
+	if (INPUT_BUF_BYTES(cstate) >= nbytes)
 	{
 		/* Enough bytes are present in the buffer. */
-		memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
-		cstate->raw_buf_index += nbytes;
+		memcpy(dest, cstate->input_buf + cstate->input_buf_index, nbytes);
+		cstate->input_buf_index += nbytes;
 		copied_bytes = nbytes;
 	}
 	else
@@ -421,16 +708,16 @@ CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
 			int			copy_bytes;
 
 			/* Load more data if buffer is empty. */
-			if (RAW_BUF_BYTES(cstate) == 0)
+			if (INPUT_BUF_BYTES(cstate) == 0)
 			{
-				if (!CopyLoadRawBuf(cstate))
+				if (!CopyLoadInputBufBinary(cstate))
 					break;		/* EOF */
 			}
 
 			/* Transfer some bytes. */
-			copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
-			memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
-			cstate->raw_buf_index += copy_bytes;
+			copy_bytes = Min(nbytes - copied_bytes, INPUT_BUF_BYTES(cstate));
+			memcpy(dest, cstate->input_buf + cstate->input_buf_index, copy_bytes);
+			cstate->input_buf_index += copy_bytes;
 			dest += copy_bytes;
 			copied_bytes += copy_bytes;
 		} while (copied_bytes < nbytes);
@@ -699,9 +986,6 @@ CopyReadLine(CopyFromState cstate)
 	resetStringInfo(&cstate->line_buf);
 	cstate->line_buf_valid = true;
 
-	/* Mark that encoding conversion hasn't occurred yet */
-	cstate->line_buf_converted = false;
-
 	/* Parse data and transfer into line_buf */
 	result = CopyReadLineText(cstate);
 
@@ -714,10 +998,13 @@ CopyReadLine(CopyFromState cstate)
 		 */
 		if (cstate->copy_src == COPY_NEW_FE)
 		{
+			int			inbytes;
+
 			do
 			{
-				cstate->raw_buf_index = cstate->raw_buf_len;
-			} while (CopyLoadRawBuf(cstate));
+				inbytes = CopyGetData(cstate, cstate->input_buf,
+									  1, INPUT_BUF_SIZE);
+			} while (inbytes > 0);
 		}
 	}
 	else
@@ -754,26 +1041,6 @@ CopyReadLine(CopyFromState cstate)
 		}
 	}
 
-	/* Done reading the line.  Convert it to server encoding. */
-	if (cstate->need_transcoding)
-	{
-		char	   *cvt;
-
-		cvt = pg_any_to_server(cstate->line_buf.data,
-							   cstate->line_buf.len,
-							   cstate->file_encoding);
-		if (cvt != cstate->line_buf.data)
-		{
-			/* transfer converted data back to line_buf */
-			resetStringInfo(&cstate->line_buf);
-			appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
-			pfree(cvt);
-		}
-	}
-
-	/* Now it's safe to use the buffer in error messages */
-	cstate->line_buf_converted = true;
-
 	return result;
 }
 
@@ -783,13 +1050,12 @@ CopyReadLine(CopyFromState cstate)
 static bool
 CopyReadLineText(CopyFromState cstate)
 {
-	char	   *copy_raw_buf;
-	int			raw_buf_ptr;
+	char	   *copy_input_buf;
+	int			input_buf_ptr;
 	int			copy_buf_len;
 	bool		need_data = false;
 	bool		hit_eof = false;
 	bool		result = false;
-	char		mblen_str[2];
 
 	/* CSV variables */
 	bool		first_char_in_line = true;
@@ -807,8 +1073,6 @@ CopyReadLineText(CopyFromState cstate)
 			escapec = '\0';
 	}
 
-	mblen_str[1] = '\0';
-
 	/*
 	 * The objective of this loop is to transfer the entire next input line
 	 * into line_buf.  Hence, we only care for detecting newlines (\r and/or
@@ -821,18 +1085,25 @@ CopyReadLineText(CopyFromState cstate)
 	 * These four characters, and the CSV escape and quote characters, are
 	 * assumed the same in frontend and backend encodings.
 	 *
-	 * For speed, we try to move data from raw_buf to line_buf in chunks
-	 * rather than one character at a time.  raw_buf_ptr points to the next
-	 * character to examine; any characters from raw_buf_index to raw_buf_ptr
+	 * The input has already been converted to the database encoding. All
+	 * supported server encodings have the property that all bytes in a
+	 * multi-byte sequence have the high bit set, so a multibyte character
+	 * cannot contain any newline or escape characters embedded in the
+	 * multibyte sequence. Therefore, we can process the input byte-by-byte,
+	 * regardless of the encoding.
+	 *
+	 * For speed, we try to move data from input_buf to line_buf in chunks
+	 * rather than one character at a time.  input_buf_ptr points to the next
+	 * character to examine; any characters from input_buf_index to input_buf_ptr
 	 * have been determined to be part of the line, but not yet transferred to
 	 * line_buf.
 	 *
-	 * For a little extra speed within the loop, we copy raw_buf and
-	 * raw_buf_len into local variables.
+	 * For a little extra speed within the loop, we copy input_buf and
+	 * input_buf_len into local variables.
 	 */
-	copy_raw_buf = cstate->raw_buf;
-	raw_buf_ptr = cstate->raw_buf_index;
-	copy_buf_len = cstate->raw_buf_len;
+	copy_input_buf = cstate->input_buf;
+	input_buf_ptr = cstate->input_buf_index;
+	copy_buf_len = cstate->input_buf_len;
 
 	for (;;)
 	{
@@ -849,18 +1120,18 @@ CopyReadLineText(CopyFromState cstate)
 		 * cstate->copy_src != COPY_OLD_FE, but it hardly seems worth it,
 		 * considering the size of the buffer.
 		 */
-		if (raw_buf_ptr >= copy_buf_len || need_data)
+		if (input_buf_ptr >= copy_buf_len || need_data)
 		{
 			REFILL_LINEBUF;
 
 			/*
 			 * Try to read some more data.  This will certainly reset
-			 * raw_buf_index to zero, and raw_buf_ptr must go with it.
+			 * input_buf_index to zero, and input_buf_ptr must go with it.
 			 */
-			if (!CopyLoadRawBuf(cstate))
+			if (!CopyLoadInputBufText(cstate))
 				hit_eof = true;
-			raw_buf_ptr = 0;
-			copy_buf_len = cstate->raw_buf_len;
+			input_buf_ptr = 0;
+			copy_buf_len = cstate->input_buf_len;
 
 			/*
 			 * If we are completely out of data, break out of the loop,
@@ -875,8 +1146,8 @@ CopyReadLineText(CopyFromState cstate)
 		}
 
 		/* OK to fetch a character */
-		prev_raw_ptr = raw_buf_ptr;
-		c = copy_raw_buf[raw_buf_ptr++];
+		prev_raw_ptr = input_buf_ptr;
+		c = copy_input_buf[input_buf_ptr++];
 
 		if (cstate->opts.csv_mode)
 		{
@@ -930,16 +1201,16 @@ CopyReadLineText(CopyFromState cstate)
 				 * If need more data, go back to loop top to load it.
 				 *
 				 * Note that if we are at EOF, c will wind up as '\0' because
-				 * of the guaranteed pad of raw_buf.
+				 * of the guaranteed pad of input_buf.
 				 */
 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 
 				/* get next char */
-				c = copy_raw_buf[raw_buf_ptr];
+				c = copy_input_buf[input_buf_ptr];
 
 				if (c == '\n')
 				{
-					raw_buf_ptr++;	/* eat newline */
+					input_buf_ptr++;	/* eat newline */
 					cstate->eol_type = EOL_CRNL;	/* in case not set yet */
 				}
 				else
@@ -1009,11 +1280,11 @@ CopyReadLineText(CopyFromState cstate)
 			 * through and continue processing for file encoding.
 			 * -----
 			 */
-			c2 = copy_raw_buf[raw_buf_ptr];
+			c2 = copy_input_buf[input_buf_ptr];
 
 			if (c2 == '.')
 			{
-				raw_buf_ptr++;	/* consume the '.' */
+				input_buf_ptr++;	/* consume the '.' */
 
 				/*
 				 * Note: if we loop back for more data here, it does not
@@ -1025,7 +1296,7 @@ CopyReadLineText(CopyFromState cstate)
 					/* Get the next character */
 					IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 					/* if hit_eof, c2 will become '\0' */
-					c2 = copy_raw_buf[raw_buf_ptr++];
+					c2 = copy_input_buf[input_buf_ptr++];
 
 					if (c2 == '\n')
 					{
@@ -1050,7 +1321,7 @@ CopyReadLineText(CopyFromState cstate)
 				/* Get the next character */
 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 				/* if hit_eof, c2 will become '\0' */
-				c2 = copy_raw_buf[raw_buf_ptr++];
+				c2 = copy_input_buf[input_buf_ptr++];
 
 				if (c2 != '\r' && c2 != '\n')
 				{
@@ -1075,11 +1346,11 @@ CopyReadLineText(CopyFromState cstate)
 				 * Transfer only the data before the \. into line_buf, then
 				 * discard the data and the \. sequence.
 				 */
-				if (prev_raw_ptr > cstate->raw_buf_index)
+				if (prev_raw_ptr > cstate->input_buf_index)
 					appendBinaryStringInfo(&cstate->line_buf,
-										   cstate->raw_buf + cstate->raw_buf_index,
-										   prev_raw_ptr - cstate->raw_buf_index);
-				cstate->raw_buf_index = raw_buf_ptr;
+										   cstate->input_buf + cstate->input_buf_index,
+										   prev_raw_ptr - cstate->input_buf_index);
+				cstate->input_buf_index = input_buf_ptr;
 				result = true;	/* report EOF */
 				break;
 			}
@@ -1096,7 +1367,7 @@ CopyReadLineText(CopyFromState cstate)
 				 * character after the backslash just like a normal character,
 				 * so we don't increment in those cases.
 				 */
-				raw_buf_ptr++;
+				input_buf_ptr++;
 		}
 
 		/*
@@ -1106,30 +1377,6 @@ CopyReadLineText(CopyFromState cstate)
 		 * value, while in non-CSV mode, \. cannot be a data value.
 		 */
 not_end_of_copy:
-
-		/*
-		 * Process all bytes of a multi-byte character as a group.
-		 *
-		 * We only support multi-byte sequences where the first byte has the
-		 * high-bit set, so as an optimization we can avoid this block
-		 * entirely if it is not set.
-		 */
-		if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
-		{
-			int			mblen;
-
-			/*
-			 * It is enough to look at the first byte in all our encodings, to
-			 * get the length.  (GB18030 is a bit special, but still works for
-			 * our purposes; see comment in pg_gb18030_mblen())
-			 */
-			mblen_str[0] = c;
-			mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
-
-			IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
-			IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
-			raw_buf_ptr += mblen - 1;
-		}
 		first_char_in_line = false;
 	}							/* end of outer loop */
 
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index e37942df391..86c92394a09 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -53,17 +53,6 @@ typedef enum CopyInsertMethod
 /*
  * This struct contains all the state variables used throughout a COPY FROM
  * operation.
- *
- * Multi-byte encodings: all supported client-side encodings encode multi-byte
- * characters by having the first byte's high bit set. Subsequent bytes of the
- * character can have the high bit not set. When scanning data in such an
- * encoding to look for a match to a single-byte (ie ASCII) character, we must
- * use the full pg_encoding_mblen() machinery to skip over multibyte
- * characters, else we might find a false match to a trailing byte. In
- * supported server encodings, there is no possibility of a false match, and
- * it's faster to make useless comparisons to trailing bytes than it is to
- * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
- * when we have to do it the hard way.
  */
 typedef struct CopyFromStateData
 {
@@ -77,7 +66,7 @@ typedef struct CopyFromStateData
 	EolType		eol_type;		/* EOL type of input */
 	int			file_encoding;	/* file or remote side's character encoding */
 	bool		need_transcoding;	/* file encoding diff from server? */
-	bool		encoding_embeds_ascii;	/* ASCII can be non-first byte? */
+	Oid			conversion_proc;
 
 	/* parameters from the COPY command */
 	Relation	rel;			/* relation to copy from */
@@ -132,31 +121,45 @@ typedef struct CopyFromStateData
 
 	/*
 	 * Similarly, line_buf holds the whole input line being processed. The
-	 * input cycle is first to read the whole line into line_buf, convert it
-	 * to server encoding there, and then extract the individual attribute
-	 * fields into attribute_buf.  line_buf is preserved unmodified so that we
-	 * can display it in error messages if appropriate.  (In binary mode,
-	 * line_buf is not used.)
+	 * input cycle is first to read the whole line into line_buf, and then
+	 * extract the individual attribute fields into attribute_buf.  line_buf
+	 * is preserved unmodified so that we can display it in error messages
+	 * if appropriate.  (In binary mode, line_buf is not used.)
 	 */
 	StringInfoData line_buf;
-	bool		line_buf_converted; /* converted to server encoding? */
 	bool		line_buf_valid; /* contains the row being processed? */
 
 	/*
-	 * Finally, raw_buf holds raw data read from the data source (file or
-	 * client connection).  In text mode, CopyReadLine parses this data
+	 * input_buf holds input data, already converted to database encoding.
+	 *
+	 * In text mode, CopyReadLine parses this data
 	 * sufficiently to locate line boundaries, then transfers the data to
-	 * line_buf and converts it.  In binary mode, CopyReadBinaryData fetches
+	 * line_buf.  In binary mode, CopyReadBinaryData fetches
 	 * appropriate amounts of data from this buffer.  In both modes, we
-	 * guarantee that there is a \0 at raw_buf[raw_buf_len].
+	 * guarantee that there is a \0 at input_buf[input_buf_len]. FIXME: do we still?
 	 */
-#define RAW_BUF_SIZE 65536		/* we palloc RAW_BUF_SIZE+1 bytes */
+#define INPUT_BUF_SIZE 65536		/* we palloc INPUT_BUF_SIZE+1 bytes */
+	char	   *input_buf;
+	int			input_buf_index;	/* next byte to process */
+	int			input_buf_len;	/* total # of bytes stored */
+	/* Shorthand for number of unconsumed bytes available in input_buf */
+#define INPUT_BUF_BYTES(cstate) ((cstate)->input_buf_len - (cstate)->input_buf_index)
+
+	/*
+	 * raw_buf holds raw input data read from the data source (file or
+	 * client connection), not yet converted to the database encoding.
+	 *
+	 * If the encoding conversion is not required, the input data is read
+	 * directly into 'input_buf', and raw_buf is not used.
+	 */
+#define RAW_BUF_SIZE 65536		/* allocated size of the buffer */
 	char	   *raw_buf;
 	int			raw_buf_index;	/* next byte to process */
-	int			raw_buf_len;	/* total # of bytes stored */
-	uint64		bytes_processed;/* number of bytes processed so far */
+	int			raw_buf_len;		/* total # of bytes stored */
 	/* Shorthand for number of unconsumed bytes available in raw_buf */
 #define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
+
+	uint64		bytes_processed; /* number of bytes processed so far */
 } CopyFromStateData;
 
 extern void ReceiveCopyBegin(CopyFromState cstate);
diff --git a/src/test/regress/expected/copycorners.out b/src/test/regress/expected/copycorners.out
new file mode 100644
index 00000000000..ac3a6fe022b
--- /dev/null
+++ b/src/test/regress/expected/copycorners.out
@@ -0,0 +1,202 @@
+create temp table copytest (
+  a text,
+  b text,
+  c text);
+create extension plperlu;
+create function write_test_file(content text) returns void language plperlu as
+$$
+use strict;
+use warnings;
+
+open(FH, '>', '/tmp/copycorners.data') or die $!;
+print FH $_[0];
+close(FH);
+
+$$;
+create function copytest(copyoptions text, content text) returns setof copytest language plpgsql as
+$$
+begin
+   truncate copytest;
+   perform write_test_file($2);
+
+   execute 'copy copytest from ''/tmp/copycorners.data''' || copyoptions;
+   return query select * from copytest;
+end;
+$$;
+-- Basic tests. Not very interesting but see that write_test_file() works.
+select * from copytest('',
+$$a	b	c$$
+);
+ a | b | c 
+---+---+---
+ a | b | c
+(1 row)
+
+select * from copytest('',
+$$a	b	c
+$$
+);
+ a | b | c 
+---+---+---
+ a | b | c
+(1 row)
+
+--
+-- Test EOL detection
+--
+select * from copytest('', E'a	b	c\nd	e	f\n'); -- ok
+ a | b | c 
+---+---+---
+ a | b | c
+ d | e | f
+(2 rows)
+
+select * from copytest('', E'a	b	c\rd	e	f\r'); -- ok
+ a | b | c 
+---+---+---
+ a | b | c
+ d | e | f
+(2 rows)
+
+select * from copytest('', E'a	b	c\r\nd	e	f\r\n'); -- ok
+ a | b | c 
+---+---+---
+ a | b | c
+ d | e | f
+(2 rows)
+
+select * from copytest('', E'a	b	c\nd	e	f\r'); -- mismatch
+ERROR:  literal carriage return found in data
+HINT:  Use "\r" to represent carriage return.
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\rd	e	f\n'); -- mismatch
+ERROR:  literal newline found in data
+HINT:  Use "\n" to represent newline.
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\r\nd	e	f\n'); -- mismatch
+ERROR:  literal newline found in data
+HINT:  Use "\n" to represent newline.
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\r\nd	e	f\r'); -- mismatch
+ERROR:  literal carriage return found in data
+HINT:  Use "\r" to represent carriage return.
+CONTEXT:  COPY copytest, line 2: "d	e	f"
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+--
+-- Test end-of-copy markers at different locations.
+--
+select * from copytest('', E'a	b	c\\.');
+ERROR:  end-of-copy marker corrupt
+CONTEXT:  COPY copytest, line 1: "a	b	c"
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\\.\n');
+ a | b | c 
+---+---+---
+ a | b | c
+(1 row)
+
+select * from copytest('', E'a	b	c\n\n\\.');
+ERROR:  missing data for column "b"
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\n\n\\.\n');
+ERROR:  missing data for column "b"
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+-- \. on a line of its own, with garbage after it
+select * from copytest('', E'a	b	c\n\\.\ngarbage');
+ a | b | c 
+---+---+---
+ a | b | c
+(1 row)
+
+-- \. at beginning of line, with garbage after it
+select * from copytest('', E'a	b	c\n\\.garbage');
+ERROR:  end-of-copy marker corrupt
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+-- \. in the middle of file, and garbage after it.
+select * from copytest('', E'a	b\\.garbage');
+ERROR:  end-of-copy marker corrupt
+CONTEXT:  COPY copytest, line 1: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+--
+-- Test end-of-copy markers with different EOLs
+--
+select * from copytest('', E'a	b	c\nd	e	f\\.\n');
+ a | b | c 
+---+---+---
+ a | b | c
+ d | e | f
+(2 rows)
+
+select * from copytest('', E'a	b	c\rd	e	f\\.\r');
+ a | b | c 
+---+---+---
+ a | b | c
+ d | e | f
+(2 rows)
+
+select * from copytest('', E'a	b	c\r\nd	e	f\\.\r\n');
+ a | b | c 
+---+---+---
+ a | b | c
+ d | e | f
+(2 rows)
+
+-- mismatch between EOL style and EOL after \.
+select * from copytest('', E'a	b	c\na	b	c\\.\r');
+ERROR:  end-of-copy marker does not match previous newline style
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\ra	b	c\\.\n');
+ERROR:  end-of-copy marker does not match previous newline style
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\r\na	b	c\\.\n');
+ERROR:  end-of-copy marker does not match previous newline style
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\na	b	c\\.\r\n');
+ERROR:  end-of-copy marker does not match previous newline style
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+-- end-of-copy marker on first line, with different EOL styles
+select * from copytest('', E'a	b	c\\.');
+ERROR:  end-of-copy marker corrupt
+CONTEXT:  COPY copytest, line 1: "a	b	c"
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
+select * from copytest('', E'a	b	c\\.\n');
+ a | b | c 
+---+---+---
+ a | b | c
+(1 row)
+
+select * from copytest('', E'a	b	c\\.\r');
+ a | b | c 
+---+---+---
+ a | b | c
+(1 row)
+
+select * from copytest('', E'a	b	c\\.\r\n');
+ERROR:  missing data for column "b"
+CONTEXT:  COPY copytest, line 2: ""
+SQL statement "copy copytest from '/tmp/copycorners.data'"
+PL/pgSQL function copytest(text,text) line 6 at EXECUTE
diff --git a/src/test/regress/sql/copycorners.sql b/src/test/regress/sql/copycorners.sql
new file mode 100644
index 00000000000..c5960bdceab
--- /dev/null
+++ b/src/test/regress/sql/copycorners.sql
@@ -0,0 +1,90 @@
+create temp table copytest (
+  a text,
+  b text,
+  c text);
+
+create extension plperlu;
+
+create function write_test_file(content text) returns void language plperlu as
+$$
+use strict;
+use warnings;
+
+open(FH, '>', '/tmp/copycorners.data') or die $!;
+print FH $_[0];
+close(FH);
+
+$$;
+
+create function copytest(copyoptions text, content text) returns setof copytest language plpgsql as
+$$
+begin
+   truncate copytest;
+   perform write_test_file($2);
+
+   execute 'copy copytest from ''/tmp/copycorners.data''' || copyoptions;
+   return query select * from copytest;
+end;
+$$;
+
+-- Basic tests. Not very interesting but see that write_test_file() works.
+select * from copytest('',
+$$a	b	c$$
+);
+
+select * from copytest('',
+$$a	b	c
+$$
+);
+
+--
+-- Test EOL detection
+--
+select * from copytest('', E'a	b	c\nd	e	f\n'); -- ok
+select * from copytest('', E'a	b	c\rd	e	f\r'); -- ok
+select * from copytest('', E'a	b	c\r\nd	e	f\r\n'); -- ok
+select * from copytest('', E'a	b	c\nd	e	f\r'); -- mismatch
+select * from copytest('', E'a	b	c\rd	e	f\n'); -- mismatch
+select * from copytest('', E'a	b	c\r\nd	e	f\n'); -- mismatch
+select * from copytest('', E'a	b	c\r\nd	e	f\r'); -- mismatch
+
+--
+-- Test end-of-copy markers at different locations.
+--
+
+select * from copytest('', E'a	b	c\\.');
+
+select * from copytest('', E'a	b	c\\.\n');
+
+select * from copytest('', E'a	b	c\n\n\\.');
+
+select * from copytest('', E'a	b	c\n\n\\.\n');
+
+-- \. on a line of its own, with garbage after it
+select * from copytest('', E'a	b	c\n\\.\ngarbage');
+
+-- \. at beginning of line, with garbage after it
+select * from copytest('', E'a	b	c\n\\.garbage');
+
+-- \. in the middle of file, and garbage after it.
+select * from copytest('', E'a	b\\.garbage');
+
+
+--
+-- Test end-of-copy markers with different EOLs
+--
+select * from copytest('', E'a	b	c\nd	e	f\\.\n');
+select * from copytest('', E'a	b	c\rd	e	f\\.\r');
+select * from copytest('', E'a	b	c\r\nd	e	f\\.\r\n');
+
+-- mismatch between EOL style and EOL after \.
+select * from copytest('', E'a	b	c\na	b	c\\.\r');
+select * from copytest('', E'a	b	c\ra	b	c\\.\n');
+select * from copytest('', E'a	b	c\r\na	b	c\\.\n');
+select * from copytest('', E'a	b	c\na	b	c\\.\r\n');
+
+-- end-of-copy marker on first line, with different EOL styles
+select * from copytest('', E'a	b	c\\.');
+select * from copytest('', E'a	b	c\\.\n');
+select * from copytest('', E'a	b	c\\.\r');
+select * from copytest('', E'a	b	c\\.\r\n');
-- 
2.29.2