*** src/backend/commands/copy.c
--- src/backend/commands/copy.c
***************
*** 147,153 **** typedef struct CopyStateData
  	 * can display it in error messages if appropriate.
  	 */
  	StringInfoData line_buf;
- 	bool		line_buf_converted;		/* converted to server encoding? */
  
  	/*
  	 * Finally, raw_buf holds raw data read from the data source (file or
--- 147,152 ----
***************
*** 160,165 **** typedef struct CopyStateData
--- 159,168 ----
  	char	   *raw_buf;
  	int			raw_buf_index;	/* next byte to process */
  	int			raw_buf_len;	/* total # of bytes stored */
+ 
+ #define MAX_CONVERSION_GROWTH 4 /* from mbutils.c */
+ 	char		unconverted_buf[MAX_CONVERSION_GROWTH];
+ 	int			unconverted_buf_len;	/* total # of bytes stored */
  } CopyStateData;
  
  typedef CopyStateData *CopyState;
***************
*** 248,253 **** static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
--- 251,257 ----
  static void CopyFrom(CopyState cstate);
  static bool CopyReadLine(CopyState cstate);
  static bool CopyReadLineText(CopyState cstate);
+ static bool CopyReadLineCSV(CopyState cstate);
  static int CopyReadAttributesText(CopyState cstate, int maxfields,
  					   char **fieldvals);
  static int CopyReadAttributesCSV(CopyState cstate, int maxfields,
***************
*** 673,680 **** CopyLoadRawBuf(CopyState cstate)
  	else
  		nbytes = 0;				/* no data need be saved */
  
! 	inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
! 						  1, RAW_BUF_SIZE - nbytes);
  	nbytes += inbytes;
  	cstate->raw_buf[nbytes] = '\0';
  	cstate->raw_buf_index = 0;
--- 677,773 ----
  	else
  		nbytes = 0;				/* no data need be saved */
  
! 	if (cstate->need_transcoding)
! 	{
! 		char	   *cvt;
! 		int			convertable_bytes;
! 		char	   *raw;
! 
! 		/*
! 		 * Read data directly to raw_buf. That way, if pg_client_to_server
! 		 * doesn't need to do any conversion after all, we avoid one memcpy.
! 		 * Prepend any unconverted bytes from the last batch first.
! 		 */
! 		raw = cstate->raw_buf + nbytes;
! 
! 		memcpy(raw, cstate->unconverted_buf, cstate->unconverted_buf_len);
! 
! 		inbytes = CopyGetData(cstate, raw + cstate->unconverted_buf_len, 1,
! 							  (RAW_BUF_SIZE - nbytes - cstate->unconverted_buf_len) / MAX_CONVERSION_GROWTH);
! 		inbytes += cstate->unconverted_buf_len;
! 
! 		if (inbytes > 0)
! 		{
! 			/*
! 			 * Determine the number of bytes that can be converted in this
! 			 * batch. 
! 			 */
! 			if (cstate->client_encoding == PG_UTF8)
! 			{
! 				/*
! 				 * Start from the end, and backtrack to the beginning of
! 				 * the last character. XXX: this should be generalized and
! 				 * pushed to wchar.c. I believe we could do the same for
! 				 * many other encodings too.
! 				 */
! 				convertable_bytes = inbytes;
! 				while((raw[convertable_bytes - 1] & 0xC0) == 0x80 &&
! 					  (inbytes - convertable_bytes) < MAX_CONVERSION_GROWTH)
! 					convertable_bytes--;
! 			}
! 			else if (pg_encoding_max_length(cstate->client_encoding) > 1)
! 			{
! 				convertable_bytes = 0;
! 				for (;;)
! 				{
! 					int n = pg_encoding_mblen(cstate->client_encoding,
! 											  &raw[convertable_bytes]);
! 			
! 					if (n + convertable_bytes > inbytes)
! 						break;
! 					convertable_bytes += n;
! 				}
! 			}
! 			else
! 				convertable_bytes = inbytes;
! 
! 			if (convertable_bytes == 0)
! 			{
! 				/*
! 				 * EOF, and there was some unconvertable chars at the end.
! 				 * Call pg_client_to_server on the remaining bytes, to
! 				 * let it throw an error.
! 				 */
! 				cvt = pg_client_to_server(raw, inbytes);
! 				Assert(false); /* pg_client_to_server should've errored */
! 			}
! 
! 			/*
! 			 * Stash away any unconvertable bytes at the end for the next
! 			 * round
! 			 */
! 			memcpy(cstate->unconverted_buf,
! 				   raw + convertable_bytes,
! 				   inbytes - convertable_bytes);
! 
! 			/* Perform conversion */
! 			cvt = pg_client_to_server(raw, convertable_bytes);
! 			if (cvt != raw)
! 			{
! 				inbytes = strlen(cvt);
! 				memcpy(cstate->raw_buf + nbytes, cvt, inbytes);
! 				pfree(cvt);
! 			}
! 			else
! 				inbytes = convertable_bytes;
! 		}
! 	}
! 	else
! 	{
! 		/* Read data directly to raw_buf */
! 		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
! 							  1, RAW_BUF_SIZE - nbytes);
! 	}
  	nbytes += inbytes;
  	cstate->raw_buf[nbytes] = '\0';
  	cstate->raw_buf_index = 0;
***************
*** 1120,1128 **** DoCopy(const CopyStmt *stmt, const char *queryString)
  	/* Set up variables to avoid per-attribute overhead. */
  	initStringInfo(&cstate->attribute_buf);
  	initStringInfo(&cstate->line_buf);
- 	cstate->line_buf_converted = false;
  	cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
  	cstate->raw_buf_index = cstate->raw_buf_len = 0;
  	cstate->processed = 0;
  
  	/*
--- 1213,1221 ----
  	/* Set up variables to avoid per-attribute overhead. */
  	initStringInfo(&cstate->attribute_buf);
  	initStringInfo(&cstate->line_buf);
  	cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
  	cstate->raw_buf_index = cstate->raw_buf_len = 0;
+ 	cstate->unconverted_buf_len = 0;
  	cstate->processed = 0;
  
  	/*
***************
*** 1556,1583 **** copy_in_error_callback(void *arg)
  		else
  		{
  			/* error is relevant to a particular line */
! 			if (cstate->line_buf_converted || !cstate->need_transcoding)
! 			{
! 				char	   *lineval;
  
! 				lineval = limit_printout_length(cstate->line_buf.data);
! 				errcontext("COPY %s, line %d: \"%s\"",
! 						   cstate->cur_relname, cstate->cur_lineno, lineval);
! 				pfree(lineval);
! 			}
! 			else
! 			{
! 				/*
! 				 * Here, the line buffer is still in a foreign encoding, and
! 				 * indeed it's quite likely that the error is precisely a
! 				 * failure to do encoding conversion (ie, bad data).  We dare
! 				 * not try to convert it, and at present there's no way to
! 				 * regurgitate it without conversion.  So we have to punt and
! 				 * just report the line number.
! 				 */
! 				errcontext("COPY %s, line %d",
! 						   cstate->cur_relname, cstate->cur_lineno);
! 			}
  		}
  	}
  }
--- 1649,1660 ----
  		else
  		{
  			/* error is relevant to a particular line */
! 			char	   *lineval;
  
! 			lineval = limit_printout_length(cstate->line_buf.data);
! 			errcontext("COPY %s, line %d: \"%s\"",
! 					   cstate->cur_relname, cstate->cur_lineno, lineval);
! 			pfree(lineval);
  		}
  	}
  }
***************
*** 2187,2197 **** CopyReadLine(CopyState cstate)
  
  	resetStringInfo(&cstate->line_buf);
  
- 	/* Mark that encoding conversion hasn't occurred yet */
- 	cstate->line_buf_converted = false;
- 
  	/* Parse data and transfer into line_buf */
! 	result = CopyReadLineText(cstate);
  
  	if (result)
  	{
--- 2264,2274 ----
  
  	resetStringInfo(&cstate->line_buf);
  
  	/* Parse data and transfer into line_buf */
! 	if (cstate->csv_mode)
! 		result = CopyReadLineCSV(cstate);
! 	else
! 		result = CopyReadLineText(cstate);
  
  	if (result)
  	{
***************
*** 2242,2274 **** CopyReadLine(CopyState cstate)
  		}
  	}
  
! 	/* Done reading the line.  Convert it to server encoding. */
! 	if (cstate->need_transcoding)
  	{
! 		char	   *cvt;
  
! 		cvt = pg_client_to_server(cstate->line_buf.data,
! 								  cstate->line_buf.len);
! 		if (cvt != cstate->line_buf.data)
  		{
! 			/* transfer converted data back to line_buf */
! 			resetStringInfo(&cstate->line_buf);
! 			appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
! 			pfree(cvt);
  		}
- 	}
  
! 	/* Now it's safe to use the buffer in error messages */
! 	cstate->line_buf_converted = true;
  
  	return result;
  }
  
  /*
!  * CopyReadLineText - inner loop of CopyReadLine for text mode
   */
  static bool
! CopyReadLineText(CopyState cstate)
  {
  	char	   *copy_raw_buf;
  	int			raw_buf_ptr;
--- 2319,2567 ----
  		}
  	}
  
! 	return result;
! }
! 
! /*
!  * CopyReadLineText - inner loop of CopyReadLine for text mode
!  */
! static bool
! CopyReadLineText(CopyState cstate)
! {
! 	char	   *copy_raw_buf;
! 	int			raw_buf_ptr;
! 	int			prev_raw_ptr;
! 	int			copy_buf_len;
! 	bool		need_data = false;
! 	bool		hit_eof = false;
! 	bool		result = false;
! 
! 	/*
! 	 * The objective of this loop is to transfer the entire next input line
! 	 * into line_buf.  Hence, we only care for detecting newlines (\r and/or
! 	 * \n) and the end-of-copy marker (\.).
! 	 *
! 	 * For speed, we try to move data from raw_buf to line_buf in chunks
! 	 * rather than one character at a time.  raw_buf_ptr points to the next
! 	 * character to examine; any characters from raw_buf_index to raw_buf_ptr
! 	 * have been determined to be part of the line, but not yet transferred to
! 	 * line_buf.
! 	 *
! 	 * For a little extra speed within the loop, we copy raw_buf and
! 	 * raw_buf_len into local variables.
! 	 */
! 	copy_raw_buf = cstate->raw_buf;
! 	raw_buf_ptr = cstate->raw_buf_index;
! 	copy_buf_len = cstate->raw_buf_len;
! 
! 	for (;;)
  	{
! 		char c;
! 		char *cc;
! 
! 		/*
! 		 * Load more data if needed.  Ideally we would just force four bytes
! 		 * of read-ahead and avoid the many calls to
! 		 * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
! 		 * does not allow us to read too far ahead or we might read into the
! 		 * next data, so we read-ahead only as far we know we can.	One
! 		 * optimization would be to read-ahead four byte here if
! 		 * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
! 		 * considering the size of the buffer.
! 		 */
! 		if (raw_buf_ptr >= copy_buf_len || need_data)
! 		{
! 			REFILL_LINEBUF;
! 
! 			/*
! 			 * Try to read some more data.	This will certainly reset
! 			 * raw_buf_index to zero, and raw_buf_ptr must go with it.
! 			 */
! 			if (!CopyLoadRawBuf(cstate))
! 				hit_eof = true;
! 			raw_buf_ptr = 0;
! 			copy_buf_len = cstate->raw_buf_len;
! 
! 			/*
! 			 * If we are completely out of data, break out of the loop,
! 			 * reporting EOF.
! 			 */
! 			if (copy_buf_len <= 0)
! 			{
! 				result = true;
! 				break;
! 			}
! 			need_data = false;
! 
! 			/* Validate input */
! 			if (cstate->eol_type == EOL_NL)
! 			{
! 				if (memchr(&copy_raw_buf[raw_buf_ptr], '\r',
! 						   copy_buf_len - raw_buf_ptr) != NULL)
! 				{
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal carriage return found in data"),
! 							 errhint("Use \"\\r\" to represent carriage return.")));
! 
! 				}
! 			}
! 			if (cstate->eol_type == EOL_CR)
! 			{
! 				if (memchr(&copy_raw_buf[raw_buf_ptr], '\n',
! 						   copy_buf_len - raw_buf_ptr) != NULL)
! 				{
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal newline found in data"),
! 							 errhint("Use \"\\n\" to represent newline.")));
  
! 				}
! 			}
! 			/* CRLN mode is handled below */
! 		}
! 
! 		/* Look for the first CR or LF */
! 		if (cstate->eol_type == EOL_UNKNOWN)
  		{
! 			char *nl,
! 				 *cr;
! 
! 			nl = memchr(&copy_raw_buf[raw_buf_ptr], '\n',
! 						copy_buf_len - raw_buf_ptr);
! 			if (nl == NULL)
! 				nl = &copy_raw_buf[copy_buf_len];
! 
! 			cr = memchr(&copy_raw_buf[raw_buf_ptr], '\r',
! 						copy_buf_len - raw_buf_ptr);
! 			if (cr == NULL)
! 				cr = &copy_raw_buf[copy_buf_len];
! 
! 			if (nl < cr)
! 			{
! 				raw_buf_ptr = nl - copy_raw_buf + 1;
! 
! 				cstate->eol_type = EOL_NL;
! 
! 				/* If reach here, we have found the line terminator */
! 				break;
! 			}
! 			if (cr < nl)
! 			{
! 				prev_raw_ptr = cr - copy_raw_buf;
! 				raw_buf_ptr = prev_raw_ptr + 1;
! 
! 				/*
! 				 * If need more data, go back to loop top to load it.
! 				 *
! 				 * Note that if we are at EOF, c will wind up as '\0' because
! 				 * of the guaranteed pad of raw_buf.
! 				 */
! 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
! 
! 				/* get next char */
! 				c = copy_raw_buf[raw_buf_ptr];
! 
! 				if (c == '\n')
! 				{
! 					raw_buf_ptr++;		/* eat newline */
! 					cstate->eol_type = EOL_CRNL;		/* in case not set yet */
! 				}
! 				else
! 				{
! 					/* found \r, but no \n */
! 					/*
! 					 * if we got here, it is the first line and we didn't find
! 					 * \n, so don't consume the peeked character
! 					 */
! 					cstate->eol_type = EOL_CR;
! 				}
! 			}
! 			/* If reach here, we have found the line terminator */
! 			break;
  		}
  
! 		/* OK to fetch a character */
! 		Assert(cstate->eol_type != EOL_UNKNOWN);
! 
! 		if (cstate->eol_type == EOL_NL)
! 		{
! 			cc = memchr(&copy_raw_buf[raw_buf_ptr], '\n',
! 						copy_buf_len - raw_buf_ptr);
! 			if (cc == NULL)
! 			{
! 				raw_buf_ptr = copy_buf_len;
! 				continue;
! 			}
! 			raw_buf_ptr = cc - copy_raw_buf + 1;
! 			/* If reach here, we have found the line terminator */
! 			break;
! 		}
! 		else
! 		{
! 			cc = memchr(&copy_raw_buf[raw_buf_ptr], '\r',
! 						copy_buf_len - raw_buf_ptr);
! 			if (cc == NULL)
! 			{
! 				raw_buf_ptr = copy_buf_len;
! 				continue;
! 			}
! 			prev_raw_ptr = cc - copy_raw_buf;
! 			raw_buf_ptr = prev_raw_ptr + 1;
! 
! 			if (cstate->eol_type == EOL_CRNL)
! 			{
! 				/*
! 				 * If need more data, go back to loop top to load it.
! 				 *
! 				 * Note that if we are at EOF, c will wind up as '\0' because
! 				 * of the guaranteed pad of raw_buf.
! 				 */
! 				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
! 
! 				/* ensure that there's no stray newlines before the CR */
! 				if (memchr(&copy_raw_buf[cstate->raw_buf_index], '\n',
! 						   copy_buf_len - cstate->raw_buf_index) != NULL)
! 				{
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal newline found in data"),
! 							 errhint("Use \"\\n\" to represent newline.")));
! 
! 				}
! 
! 				/* get next char */
! 				c = copy_raw_buf[raw_buf_ptr];
! 
! 				if (c != '\n')
! 				{
! 					/* found \r, but no \n */
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("literal carriage return found in data"),
! 							 errhint("Use \"\\r\" to represent carriage return.")));
! 				}
! 
! 				raw_buf_ptr++;		/* eat newline */
! 			}
! 			/* If reach here, we have found the line terminator */
! 			break;
! 		}
! 	}							/* end of outer loop */
! 
! 	/*
! 	 * Transfer any still-uncopied data to line_buf.
! 	 */
! 	REFILL_LINEBUF;
  
  	return result;
  }
  
  /*
!  * CopyReadLineCSV - inner loop of CopyReadLine for CSV mode
   */
  static bool
! CopyReadLineCSV(CopyState cstate)
  {
  	char	   *copy_raw_buf;
  	int			raw_buf_ptr;
***************
*** 2276,2282 **** CopyReadLineText(CopyState cstate)
  	bool		need_data = false;
  	bool		hit_eof = false;
  	bool		result = false;
- 	char		mblen_str[2];
  
  	/* CSV variables */
  	bool		first_char_in_line = true;
--- 2569,2574 ----
***************
*** 2285,2291 **** CopyReadLineText(CopyState cstate)
  	char		quotec = '\0';
  	char		escapec = '\0';
  
- 	if (cstate->csv_mode)
  	{
  		quotec = cstate->quote[0];
  		escapec = cstate->escape[0];
--- 2577,2582 ----
***************
*** 2294,2301 **** CopyReadLineText(CopyState cstate)
  			escapec = '\0';
  	}
  
- 	mblen_str[1] = '\0';
- 
  	/*
  	 * The objective of this loop is to transfer the entire next input line
  	 * into line_buf.  Hence, we only care for detecting newlines (\r and/or
--- 2585,2590 ----
***************
*** 2365,2371 **** CopyReadLineText(CopyState cstate)
  		prev_raw_ptr = raw_buf_ptr;
  		c = copy_raw_buf[raw_buf_ptr++];
  
- 		if (cstate->csv_mode)
  		{
  			/*
  			 * If character is '\\' or '\r', we may need to look ahead below.
--- 2654,2659 ----
***************
*** 2407,2413 **** CopyReadLineText(CopyState cstate)
  		}
  
  		/* Process \r */
! 		if (c == '\r' && (!cstate->csv_mode || !in_quote))
  		{
  			/* Check for \r\n on first line, _and_ handle \r\n. */
  			if (cstate->eol_type == EOL_UNKNOWN ||
--- 2695,2701 ----
  		}
  
  		/* Process \r */
! 		if (c == '\r' && !in_quote)
  		{
  			/* Check for \r\n on first line, _and_ handle \r\n. */
  			if (cstate->eol_type == EOL_UNKNOWN ||
***************
*** 2593,2617 **** CopyReadLineText(CopyState cstate)
  		 * value, while in non-CSV mode, \. cannot be a data value.
  		 */
  not_end_of_copy:
- 
- 		/*
- 		 * Process all bytes of a multi-byte character as a group.
- 		 *
- 		 * We only support multi-byte sequences where the first byte has the
- 		 * high-bit set, so as an optimization we can avoid this block
- 		 * entirely if it is not set.
- 		 */
- 		if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
- 		{
- 			int			mblen;
- 
- 			mblen_str[0] = c;
- 			/* All our encodings only read the first byte to get the length */
- 			mblen = pg_encoding_mblen(cstate->client_encoding, mblen_str);
- 			IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
- 			IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
- 			raw_buf_ptr += mblen - 1;
- 		}
  		first_char_in_line = false;
  	}							/* end of outer loop */
  
--- 2881,2886 ----
***************
*** 2623,2628 **** not_end_of_copy:
--- 2892,2898 ----
  	return result;
  }
  
+ 
  /*
   *	Return decimal value for a hexadecimal digit
   */
