*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
***************
*** 143,167 **** typedef struct CopyStateData
  
  	/*
  	 * Similarly, line_buf holds the whole input line being processed. The
! 	 * input cycle is first to read the whole line into line_buf, convert it
! 	 * to server encoding there, and then extract the individual attribute
! 	 * fields into attribute_buf.  line_buf is preserved unmodified so that we
! 	 * can display it in error messages if appropriate.
  	 */
  	StringInfoData line_buf;
- 	bool		line_buf_converted;		/* converted to server encoding? */
  
  	/*
! 	 * Finally, raw_buf holds raw data read from the data source (file or
! 	 * client connection).	CopyReadLine parses this data sufficiently to
! 	 * locate line boundaries, then transfers the data to line_buf and
! 	 * converts it.  Note: we guarantee that there is a \0 at
! 	 * raw_buf[raw_buf_len].
  	 */
  #define RAW_BUF_SIZE 65536		/* we palloc RAW_BUF_SIZE+1 bytes */
  	char	   *raw_buf;
  	int			raw_buf_index;	/* next byte to process */
  	int			raw_buf_len;	/* total # of bytes stored */
  } CopyStateData;
  
  typedef CopyStateData *CopyState;
--- 143,175 ----
  
  	/*
  	 * Similarly, line_buf holds the whole input line being processed. The
! 	 * input cycle is first to convert the input to server encoding in
! 	 * raw_buf, then read the whole line into line_buf, and then extract the
! 	 * individual attribute fields into attribute_buf.  line_buf is preserved
! 	 * unmodified so that we can display it in error messages if appropriate.
  	 */
  	StringInfoData line_buf;
  
  	/*
! 	 * raw_buf holds raw data read from the data source (file or client
! 	 * connection), converted to server encoding if necessary. CopyReadLine
! 	 * parses this data sufficiently to locate line boundaries, then
! 	 * transfers the data to line_buf.  Note: we guarantee that there is
! 	 * a \0 at raw_buf[raw_buf_len].
  	 */
  #define RAW_BUF_SIZE 65536		/* we palloc RAW_BUF_SIZE+1 bytes */
  	char	   *raw_buf;
  	int			raw_buf_index;	/* next byte to process */
  	int			raw_buf_len;	/* total # of bytes stored */
+ 
+ 	/*
+ 	 * Finally, unconverted_buf holds residual raw data read from data source
+ 	 * that could not be converted to server encoding yet, because we don't
+ 	 * have the whole multi-byte character yet.
+ 	 */
+ #define MAX_CONVERSION_GROWTH 4 /* from mbutils.c */
+ 	char		unconverted_buf[MAX_CONVERSION_GROWTH];
+ 	int			unconverted_buf_len;	/* total # of bytes stored */
  } CopyStateData;
  
  typedef CopyStateData *CopyState;
***************
*** 250,255 **** static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
--- 258,264 ----
  static void CopyFrom(CopyState cstate);
  static bool CopyReadLine(CopyState cstate);
  static bool CopyReadLineText(CopyState cstate);
+ static bool CopyReadLineCSV(CopyState cstate);
  static int CopyReadAttributesText(CopyState cstate, int maxfields,
  					   char **fieldvals);
  static int CopyReadAttributesCSV(CopyState cstate, int maxfields,
***************
*** 650,656 **** CopyGetInt16(CopyState cstate, int16 *val)
  
  
  /*
!  * CopyLoadRawBuf loads some more data into raw_buf
   *
   * Returns TRUE if able to obtain at least one more byte, else FALSE.
   *
--- 659,666 ----
  
  
  /*
!  * CopyLoadRawBuf loads some more data into raw_buf, converting to
!  * server encoding if necessary.
   *
   * Returns TRUE if able to obtain at least one more byte, else FALSE.
   *
***************
*** 664,669 **** CopyLoadRawBuf(CopyState cstate)
--- 674,683 ----
  {
  	int			nbytes;
  	int			inbytes;
+ 	bool		needmore;
+ 
+ getmore:
+ 	needmore = false;
  
  	if (cstate->raw_buf_index < cstate->raw_buf_len)
  	{
***************
*** 675,686 **** CopyLoadRawBuf(CopyState cstate)
  	else
  		nbytes = 0;				/* no data need be saved */
  
! 	inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
! 						  1, RAW_BUF_SIZE - nbytes);
  	nbytes += inbytes;
  	cstate->raw_buf[nbytes] = '\0';
  	cstate->raw_buf_index = 0;
  	cstate->raw_buf_len = nbytes;
  	return (inbytes > 0);
  }
  
--- 689,827 ----
  	else
  		nbytes = 0;				/* no data need be saved */
  
! 	if (!cstate->need_transcoding)
! 	{
! 		/* Read data directly to raw_buf */
! 		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
! 							  1, RAW_BUF_SIZE - nbytes);
! 	}
! 	else
! 	{
! 		/*
! 		 * Read data and convert to server encoding.
! 		 */
! 		char	   *cvt;
! 		int			convertable_bytes;
! 		char	   *raw;
! 
! 		/*
! 		 * Read data directly to raw_buf. That way, if pg_client_to_server
! 		 * doesn't need to do any conversion after all, we avoid one memcpy.
! 		 * Prepend any unconverted bytes from the last batch first.
! 		 */
! 		raw = cstate->raw_buf + nbytes;
! 		memcpy(raw, cstate->unconverted_buf, cstate->unconverted_buf_len);
! 		inbytes = CopyGetData(cstate, raw + cstate->unconverted_buf_len, 1,
! 							  (RAW_BUF_SIZE - nbytes - cstate->unconverted_buf_len) / MAX_CONVERSION_GROWTH); /* XXX: Is dividing with that right? */
! 		if (inbytes == 0 && cstate->unconverted_buf_len > 0)
! 		{
! 			/*
! 			 * Oops, we hit EOF, but we still had a partial multi-byte
! 			 * character stashed in unconverted_buf. Let pg_client_to_server()
! 			 * throw an appropriate error on it.
! 			 */
! 			(void) pg_client_to_server(raw, inbytes);
! 			/* pg_client_to_server should've errored already */
! 			elog(ERROR, "invalid multi-byte sequence at end of input");
! 		}
! 		inbytes += cstate->unconverted_buf_len;
! 		if (inbytes > 0)
! 		{
! 			/*
! 			 * Determine the number of bytes that can be converted in this
! 			 * batch. If we the batch ends with the a partial multi-byte
! 			 * character, we must stop the conversion before that and stash
! 			 * away the partial character to be processed on the next round.
! 			 */
! 			if (cstate->client_encoding == PG_UTF8)
! 			{
! 				/*
! 				 * In UTF-8, you can distinguish whether a byte belongs to
! 				 * multi-byte characters, and if it's the first or non-first
! 				 * byte in such a character, just by looking at the byte.
! 				 * The first byte of a multibyte character always has 10 in
! 				 * the two high bits, and a non-first byte has 11.
! 				 *
! 				 * Knowing that, start from the last byte read, and backtrack
! 				 * to the beginning of the last character.
! 				 *
! 				 * XXX: Do other encodings have similar properties? This
! 				 * should be generalized and pushed to wchar.c.
! 				 */
! 				convertable_bytes = inbytes;
! 				while((raw[convertable_bytes - 1] & 0xC0) == 0x80 &&
! 					  (inbytes - convertable_bytes) < MAX_CONVERSION_GROWTH)
! 					convertable_bytes--;
! 			}
! 			else if (pg_encoding_max_length(cstate->client_encoding) > 1)
! 			{
! 				/*
! 				 * Otherwise we have to do it the hard way, starting from
! 				 * the beginning and traversing with pg_encoding_mblen() to
! 				 * the end.
! 				 */
! 				convertable_bytes = 0;
! 				for (;;)
! 				{
! 					int n = pg_encoding_mblen(cstate->client_encoding,
! 											  &raw[convertable_bytes]);
! 					if (n + convertable_bytes > inbytes)
! 						break;
! 					convertable_bytes += n;
! 				}
! 			}
! 			else
! 				convertable_bytes = inbytes;
! 
! 			/*
! 			 * Any multi-byte character should fit in MAX_CONVERSION_GROWTH,
! 			 * so if we have more residual bytes than that, there's something
! 			 * wrong with the input. Let pg_client_to_server() report the
! 			 * error on it.
! 			 */
! 			if (inbytes - convertable_bytes > MAX_CONVERSION_GROWTH)
! 			{
! 				cvt = pg_client_to_server(raw, inbytes);
! 				/* pg_client_to_server should've errored already */
! 				elog(ERROR, "unexpected multi-byte sequence");
! 			}
! 
! 			/*
! 			 * Stash away any unconverted bytes at the end for the next round
! 			 */
! 			memcpy(cstate->unconverted_buf,
! 				   raw + convertable_bytes,
! 				   inbytes - convertable_bytes);
! 
! 			/*
! 			 * Perform conversion. If we didn't read enough to have a whole
! 			 * character, try to read more, rather than be totally useless
! 			 * to the caller.
! 			 */
! 			if (convertable_bytes > 0)
! 			{
! 				cvt = pg_client_to_server(raw, convertable_bytes);
! 				if (cvt != raw)
! 				{
! 					inbytes = strlen(cvt);
! 					memcpy(cstate->raw_buf + nbytes, cvt, inbytes);
! 					pfree(cvt);
! 				}
! 				else
! 					inbytes = convertable_bytes;
! 			}
! 			else
! 				needmore = true;
! 		}
! 	}
  	nbytes += inbytes;
  	cstate->raw_buf[nbytes] = '\0';
  	cstate->raw_buf_index = 0;
  	cstate->raw_buf_len = nbytes;
+ 
+ 	if (needmore)
+ 		goto getmore;
+ 
  	return (inbytes > 0);
  }
  
***************
*** 1160,1168 **** DoCopy(const CopyStmt *stmt, const char *queryString)
  	/* Set up variables to avoid per-attribute overhead. */
  	initStringInfo(&cstate->attribute_buf);
  	initStringInfo(&cstate->line_buf);
- 	cstate->line_buf_converted = false;
  	cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
  	cstate->raw_buf_index = cstate->raw_buf_len = 0;
  	cstate->processed = 0;
  
  	/*
--- 1301,1309 ----
  	/* Set up variables to avoid per-attribute overhead. */
  	initStringInfo(&cstate->attribute_buf);
  	initStringInfo(&cstate->line_buf);
  	cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
  	cstate->raw_buf_index = cstate->raw_buf_len = 0;
+ 	cstate->unconverted_buf_len = 0;
  	cstate->processed = 0;
  
  	/*
***************
*** 1596,1623 **** copy_in_error_callback(void *arg)
  		else
  		{
  			/* error is relevant to a particular line */
! 			if (cstate->line_buf_converted || !cstate->need_transcoding)
! 			{
! 				char	   *lineval;
  
! 				lineval = limit_printout_length(cstate->line_buf.data);
! 				errcontext("COPY %s, line %d: \"%s\"",
! 						   cstate->cur_relname, cstate->cur_lineno, lineval);
! 				pfree(lineval);
! 			}
! 			else
! 			{
! 				/*
! 				 * Here, the line buffer is still in a foreign encoding, and
! 				 * indeed it's quite likely that the error is precisely a
! 				 * failure to do encoding conversion (ie, bad data).  We dare
! 				 * not try to convert it, and at present there's no way to
! 				 * regurgitate it without conversion.  So we have to punt and
! 				 * just report the line number.
! 				 */
! 				errcontext("COPY %s, line %d",
! 						   cstate->cur_relname, cstate->cur_lineno);
! 			}
  		}
  	}
  }
--- 1737,1748 ----
  		else
  		{
  			/* error is relevant to a particular line */
! 			char	   *lineval;
  
! 			lineval = limit_printout_length(cstate->line_buf.data);
! 			errcontext("COPY %s, line %d: \"%s\"",
! 					   cstate->cur_relname, cstate->cur_lineno, lineval);
! 			pfree(lineval);
  		}
  	}
  }
***************
*** 2239,2249 **** CopyReadLine(CopyState cstate)
  
  	resetStringInfo(&cstate->line_buf);
  
- 	/* Mark that encoding conversion hasn't occurred yet */
- 	cstate->line_buf_converted = false;
- 
  	/* Parse data and transfer into line_buf */
! 	result = CopyReadLineText(cstate);
  
  	if (result)
  	{
--- 2364,2374 ----
  
  	resetStringInfo(&cstate->line_buf);
  
  	/* Parse data and transfer into line_buf */
! 	if (cstate->csv_mode)
! 		result = CopyReadLineCSV(cstate);
! 	else
! 		result = CopyReadLineText(cstate);
  
  	if (result)
  	{
***************
*** 2294,2326 **** CopyReadLine(CopyState cstate)
  		}
  	}
  
! 	/* Done reading the line.  Convert it to server encoding. */
! 	if (cstate->need_transcoding)
  	{
! 		char	   *cvt;
  
! 		cvt = pg_client_to_server(cstate->line_buf.data,
! 								  cstate->line_buf.len);
! 		if (cvt != cstate->line_buf.data)
  		{
! 			/* transfer converted data back to line_buf */
! 			resetStringInfo(&cstate->line_buf);
! 			appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
! 			pfree(cvt);
  		}
- 	}
  
! 	/* Now it's safe to use the buffer in error messages */
! 	cstate->line_buf_converted = true;
  
  	return result;
  }
  
  /*
!  * CopyReadLineText - inner loop of CopyReadLine for text mode
   */
  static bool
! CopyReadLineText(CopyState cstate)
  {
  	char	   *copy_raw_buf;
  	int			raw_buf_ptr;
--- 2419,2676 ----
  		}
  	}
  
! 	return result;
! }
! 
! /*
!  * CopyReadLineText - inner loop of CopyReadLine for text mode
!  */
! static bool
! CopyReadLineText(CopyState cstate)
! {
! 	char	   *copy_raw_buf;
! 	int			raw_buf_ptr;
! 	int			prev_raw_ptr;
! 	int			copy_buf_len;
! 	bool		need_data = false;
! 	bool		hit_eof = false;
! 	bool		result = false;
! 
! 	/*
! 	 * The objective of this loop is to transfer the entire next input line
! 	 * into line_buf.  Hence, we only care for detecting newlines (\r and/or
! 	 * \n) and the end-of-copy marker (\.).
! 	 *
! 	 * For speed, we try to move data from raw_buf to line_buf in chunks
! 	 * rather than one character at a time.  raw_buf_ptr points to the next
! 	 * character to examine; any characters from raw_buf_index to raw_buf_ptr
! 	 * have been determined to be part of the line, but not yet transferred to
! 	 * line_buf.
! 	 *
! 	 * For a little extra speed within the loop, we copy raw_buf and
! 	 * raw_buf_len into local variables.
! 	 */
! 	copy_raw_buf = cstate->raw_buf;
! 	raw_buf_ptr = cstate->raw_buf_index;
! 	copy_buf_len = cstate->raw_buf_len;
! 
! 	for (;;)
  	{
! 		char c;
! 		char *cc;
  
! 		/*
! 		 * Load more data if needed.  Ideally we would just force four bytes
! 		 * of read-ahead and avoid the many calls to
! 		 * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
! 		 * does not allow us to read too far ahead or we might read into the
! 		 * next data, so we read-ahead only as far we know we can.	One
! 		 * optimization would be to read-ahead four byte here if
! 		 * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
! 		 * considering the size of the buffer.
! 		 */
! 		if (raw_buf_ptr >= copy_buf_len || need_data)
  		{
! 			REFILL_LINEBUF;
! 
! 			/*
! 			 * Try to read some more data.	This will certainly reset
! 			 * raw_buf_index to zero, and raw_buf_ptr must go with it.
! 			 */
! 			if (!CopyLoadRawBuf(cstate))
! 				hit_eof = true;
! 			raw_buf_ptr = 0;
! 			copy_buf_len = cstate->raw_buf_len;
! 
! 			/*
! 			 * If we are completely out of data, break out of the loop,
! 			 * reporting EOF.
! 			 */
! 			if (copy_buf_len <= 0)
! 			{
! 				result = true;
! 				break;
! 			}
! 			need_data = false;
! 
! 			/*
! 			 * If line terminator is NL, check that there is no CRs in the
! 			 * input, and vice versa. We do this here, rather than
! 			 * line-at-a-time, for efficiency. Presumably the raw input
! 			 * buffer is larger than an average line.
! 			 *
! 			 * CRNL mode is more complicated and is handled line-at-a-time.
! 			 */
! 			if (cstate->eol_type == EOL_NL)
! 			{
! 				if (memchr(&copy_raw_buf[raw_buf_ptr], '\r',
! 						   copy_buf_len - raw_buf_ptr) != NULL)
! 				{
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal carriage return found in data"),
! 							 errhint("Use \"\\r\" to represent carriage return.")));
! 
! 				}
! 			}
! 			if (cstate->eol_type == EOL_CR)
! 			{
! 				if (memchr(&copy_raw_buf[raw_buf_ptr], '\n',
! 						   copy_buf_len - raw_buf_ptr) != NULL)
! 				{
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal newline found in data"),
! 							 errhint("Use \"\\n\" to represent newline.")));
! 
! 				}
! 			}
  		}
  
! 		/* Look for the first CR or LF */
! 		if (cstate->eol_type == EOL_UNKNOWN)
! 		{
! 			char *nl,
! 				 *cr;
! 
! 			nl = memchr(&copy_raw_buf[raw_buf_ptr], '\n',
! 						copy_buf_len - raw_buf_ptr);
! 			if (nl == NULL)
! 				nl = &copy_raw_buf[copy_buf_len];
! 
! 			cr = memchr(&copy_raw_buf[raw_buf_ptr], '\r',
! 						copy_buf_len - raw_buf_ptr);
! 			if (cr == NULL)
! 				cr = &copy_raw_buf[copy_buf_len];
! 
! 			if (nl < cr)
! 			{
! 				raw_buf_ptr = nl - copy_raw_buf + 1;
! 
! 				cstate->eol_type = EOL_NL;
! 
! 				/* If reach here, we have found the line terminator */
! 				break;
! 			}
! 			if (cr < nl)
! 			{
! 				prev_raw_ptr = cr - copy_raw_buf;
! 				raw_buf_ptr = prev_raw_ptr + 1;
! 
! 				/*
! 				 * If need more data, go back to loop top to load it.
! 				 *
! 				 * Note that if we are at EOF, c will wind up as '\0' because
! 				 * of the guaranteed pad of raw_buf.
! 				 */
! 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
! 
! 				/* get next char */
! 				c = copy_raw_buf[raw_buf_ptr];
! 
! 				if (c == '\n')
! 				{
! 					raw_buf_ptr++;		/* eat newline */
! 					cstate->eol_type = EOL_CRNL;		/* in case not set yet */
! 				}
! 				else
! 				{
! 					/* found \r, but no \n */
! 					/*
! 					 * if we got here, it is the first line and we didn't find
! 					 * \n, so don't consume the peeked character
! 					 */
! 					cstate->eol_type = EOL_CR;
! 				}
! 			}
! 			/* If reach here, we have found the line terminator */
! 			break;
! 		}
! 
! 		/* Search for the next line terminator */
! 		Assert(cstate->eol_type != EOL_UNKNOWN);
! 		if (cstate->eol_type == EOL_NL)
! 		{
! 			cc = memchr(&copy_raw_buf[raw_buf_ptr], '\n',
! 						copy_buf_len - raw_buf_ptr);
! 			if (cc == NULL)
! 			{
! 				/* No newline. Read more data */
! 				raw_buf_ptr = copy_buf_len;
! 				continue;
! 			}
! 			else
! 			{
! 				/* Found line terminator. */
! 				raw_buf_ptr = cc - copy_raw_buf + 1;
! 				break;
! 			}
! 		}
! 		else
! 		{
! 			cc = memchr(&copy_raw_buf[raw_buf_ptr], '\r',
! 						copy_buf_len - raw_buf_ptr);
! 			if (cc == NULL)
! 			{
! 				raw_buf_ptr = copy_buf_len;
! 				continue;
! 			}
! 			prev_raw_ptr = cc - copy_raw_buf;
! 			raw_buf_ptr = prev_raw_ptr + 1;
! 
! 			if (cstate->eol_type == EOL_CRNL)
! 			{
! 				/*
! 				 * If need more data, go back to loop top to load it.
! 				 *
! 				 * Note that if we are at EOF, c will wind up as '\0' because
! 				 * of the guaranteed pad of raw_buf.
! 				 */
! 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
! 
! 				/* ensure that there's no stray newlines before the CR */
! 				if (memchr(&copy_raw_buf[cstate->raw_buf_index], '\n',
! 						   copy_buf_len - cstate->raw_buf_index) != NULL)
! 				{
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal newline found in data"),
! 							 errhint("Use \"\\n\" to represent newline.")));
! 
! 				}
! 
! 				/* get next char */
! 				c = copy_raw_buf[raw_buf_ptr];
! 
! 				if (c != '\n')
! 				{
! 					/* found \r, but no \n */
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal carriage return found in data"),
! 							 errhint("Use \"\\r\" to represent carriage return.")));
! 				}
! 
! 				raw_buf_ptr++;		/* eat newline */
! 			}
! 			/* If reach here, we have found the line terminator */
! 			break;
! 		}
! 	}							/* end of outer loop */
! 
! 	/*
! 	 * Transfer any still-uncopied data to line_buf.
! 	 */
! 	REFILL_LINEBUF;
  
  	return result;
  }
  
  /*
!  * CopyReadLineCSV - inner loop of CopyReadLine for CSV mode
   */
  static bool
! CopyReadLineCSV(CopyState cstate)
  {
  	char	   *copy_raw_buf;
  	int			raw_buf_ptr;
***************
*** 2328,2334 **** CopyReadLineText(CopyState cstate)
  	bool		need_data = false;
  	bool		hit_eof = false;
  	bool		result = false;
- 	char		mblen_str[2];
  
  	/* CSV variables */
  	bool		first_char_in_line = true;
--- 2678,2683 ----
***************
*** 2337,2343 **** CopyReadLineText(CopyState cstate)
  	char		quotec = '\0';
  	char		escapec = '\0';
  
- 	if (cstate->csv_mode)
  	{
  		quotec = cstate->quote[0];
  		escapec = cstate->escape[0];
--- 2686,2691 ----
***************
*** 2346,2353 **** CopyReadLineText(CopyState cstate)
  			escapec = '\0';
  	}
  
- 	mblen_str[1] = '\0';
- 
  	/*
  	 * The objective of this loop is to transfer the entire next input line
  	 * into line_buf.  Hence, we only care for detecting newlines (\r and/or
--- 2694,2699 ----
***************
*** 2417,2423 **** CopyReadLineText(CopyState cstate)
  		prev_raw_ptr = raw_buf_ptr;
  		c = copy_raw_buf[raw_buf_ptr++];
  
- 		if (cstate->csv_mode)
  		{
  			/*
  			 * If character is '\\' or '\r', we may need to look ahead below.
--- 2763,2768 ----
***************
*** 2459,2465 **** CopyReadLineText(CopyState cstate)
  		}
  
  		/* Process \r */
! 		if (c == '\r' && (!cstate->csv_mode || !in_quote))
  		{
  			/* Check for \r\n on first line, _and_ handle \r\n. */
  			if (cstate->eol_type == EOL_UNKNOWN ||
--- 2804,2810 ----
  		}
  
  		/* Process \r */
! 		if (c == '\r' && !in_quote)
  		{
  			/* Check for \r\n on first line, _and_ handle \r\n. */
  			if (cstate->eol_type == EOL_UNKNOWN ||
***************
*** 2487,2497 **** CopyReadLineText(CopyState cstate)
  					if (cstate->eol_type == EOL_CRNL)
  						ereport(ERROR,
  								(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 								 !cstate->csv_mode ?
! 							errmsg("literal carriage return found in data") :
! 							errmsg("unquoted carriage return found in data"),
! 								 !cstate->csv_mode ?
! 						errhint("Use \"\\r\" to represent carriage return.") :
  								 errhint("Use quoted CSV field to represent carriage return.")));
  
  					/*
--- 2832,2838 ----
  					if (cstate->eol_type == EOL_CRNL)
  						ereport(ERROR,
  								(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 								 errmsg("unquoted carriage return found in data"),
  								 errhint("Use quoted CSV field to represent carriage return.")));
  
  					/*
***************
*** 2504,2530 **** CopyReadLineText(CopyState cstate)
  			else if (cstate->eol_type == EOL_NL)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 !cstate->csv_mode ?
- 						 errmsg("literal carriage return found in data") :
  						 errmsg("unquoted carriage return found in data"),
- 						 !cstate->csv_mode ?
- 					   errhint("Use \"\\r\" to represent carriage return.") :
  						 errhint("Use quoted CSV field to represent carriage return.")));
  			/* If reach here, we have found the line terminator */
  			break;
  		}
  
  		/* Process \n */
! 		if (c == '\n' && (!cstate->csv_mode || !in_quote))
  		{
  			if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 !cstate->csv_mode ?
- 						 errmsg("literal newline found in data") :
  						 errmsg("unquoted newline found in data"),
- 						 !cstate->csv_mode ?
- 						 errhint("Use \"\\n\" to represent newline.") :
  					 errhint("Use quoted CSV field to represent newline.")));
  			cstate->eol_type = EOL_NL;	/* in case not set yet */
  			/* If reach here, we have found the line terminator */
--- 2845,2863 ----
  			else if (cstate->eol_type == EOL_NL)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
  						 errmsg("unquoted carriage return found in data"),
  						 errhint("Use quoted CSV field to represent carriage return.")));
  			/* If reach here, we have found the line terminator */
  			break;
  		}
  
  		/* Process \n */
! 		if (c == '\n' && !in_quote)
  		{
  			if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
  						 errmsg("unquoted newline found in data"),
  					 errhint("Use quoted CSV field to represent newline.")));
  			cstate->eol_type = EOL_NL;	/* in case not set yet */
  			/* If reach here, we have found the line terminator */
***************
*** 2535,2541 **** CopyReadLineText(CopyState cstate)
  		 * In CSV mode, we only recognize \. alone on a line.  This is because
  		 * \. is a valid CSV data value.
  		 */
! 		if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
  		{
  			char		c2;
  
--- 2868,2874 ----
  		 * In CSV mode, we only recognize \. alone on a line.  This is because
  		 * \. is a valid CSV data value.
  		 */
! 		if (c == '\\' && first_char_in_line)
  		{
  			char		c2;
  
***************
*** 2568,2588 **** CopyReadLineText(CopyState cstate)
  
  					if (c2 == '\n')
  					{
! 						if (!cstate->csv_mode)
! 							ereport(ERROR,
! 									(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 									 errmsg("end-of-copy marker does not match previous newline style")));
! 						else
! 							NO_END_OF_COPY_GOTO;
  					}
  					else if (c2 != '\r')
  					{
! 						if (!cstate->csv_mode)
! 							ereport(ERROR,
! 									(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 									 errmsg("end-of-copy marker corrupt")));
! 						else
! 							NO_END_OF_COPY_GOTO;
  					}
  				}
  
--- 2901,2911 ----
  
  					if (c2 == '\n')
  					{
! 						NO_END_OF_COPY_GOTO;
  					}
  					else if (c2 != '\r')
  					{
! 						NO_END_OF_COPY_GOTO;
  					}
  				}
  
***************
*** 2593,2604 **** CopyReadLineText(CopyState cstate)
  
  				if (c2 != '\r' && c2 != '\n')
  				{
! 					if (!cstate->csv_mode)
! 						ereport(ERROR,
! 								(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 								 errmsg("end-of-copy marker corrupt")));
! 					else
! 						NO_END_OF_COPY_GOTO;
  				}
  
  				if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
--- 2916,2922 ----
  
  				if (c2 != '\r' && c2 != '\n')
  				{
! 					NO_END_OF_COPY_GOTO;
  				}
  
  				if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
***************
*** 2622,2641 **** CopyReadLineText(CopyState cstate)
  				result = true;	/* report EOF */
  				break;
  			}
! 			else if (!cstate->csv_mode)
! 
  				/*
  				 * If we are here, it means we found a backslash followed by
! 				 * something other than a period.  In non-CSV mode, anything
! 				 * after a backslash is special, so we skip over that second
! 				 * character too.  If we didn't do that \\. would be
! 				 * considered an eof-of copy, while in non-CVS mode it is a
! 				 * literal backslash followed by a period.	In CSV mode,
  				 * backslashes are not special, so we want to process the
  				 * character after the backslash just like a normal character,
  				 * so we don't increment in those cases.
  				 */
! 				raw_buf_ptr++;
  		}
  
  		/*
--- 2940,2955 ----
  				result = true;	/* report EOF */
  				break;
  			}
! 			else
! 			{
  				/*
  				 * If we are here, it means we found a backslash followed by
! 				 * something other than a period.  In CSV mode,
  				 * backslashes are not special, so we want to process the
  				 * character after the backslash just like a normal character,
  				 * so we don't increment in those cases.
  				 */
! 			}
  		}
  
  		/*
***************
*** 2645,2669 **** CopyReadLineText(CopyState cstate)
  		 * value, while in non-CSV mode, \. cannot be a data value.
  		 */
  not_end_of_copy:
- 
- 		/*
- 		 * Process all bytes of a multi-byte character as a group.
- 		 *
- 		 * We only support multi-byte sequences where the first byte has the
- 		 * high-bit set, so as an optimization we can avoid this block
- 		 * entirely if it is not set.
- 		 */
- 		if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
- 		{
- 			int			mblen;
- 
- 			mblen_str[0] = c;
- 			/* All our encodings only read the first byte to get the length */
- 			mblen = pg_encoding_mblen(cstate->client_encoding, mblen_str);
- 			IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
- 			IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
- 			raw_buf_ptr += mblen - 1;
- 		}
  		first_char_in_line = false;
  	}							/* end of outer loop */
  
--- 2959,2964 ----