diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 38424ad..1f9e4cc 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -40,7 +40,8 @@ COPY { table_name [ ( quote_character'
ESCAPE 'escape_character'
FORCE_QUOTE { ( column [, ...] ) | * }
- FORCE_NOT_NULL ( column [, ...] )
+ FORCE_NOT_NULL ( column [, ...] ) |
+ ENCODING encoding_name
@@ -282,6 +283,18 @@ COPY { table_name [ (
+
+ ENCODING>
+
+
+ Specifies that the file is encoded in the encoding_name. If this option is
+ omitted, the current client encoding is used. See the Notes below
+ for more details.
+
+
+
+
@@ -377,8 +390,9 @@ COPY count
- Input data is interpreted according to the current client encoding,
- and output data is encoded in the current client encoding, even
+ Input data is interpreted according to ENCODING
+ option or the current client encoding, and output data is encoded
+ in ENCODING or the current client encoding, even
if the data does not pass through the client but is read from or
written to a file directly by the server.
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 841bf22..c571315 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -97,7 +97,9 @@ typedef struct CopyStateData
bool fe_eof; /* true if detected end of copy data */
EolType eol_type; /* EOL type of input */
int client_encoding; /* remote side's character encoding */
- bool need_transcoding; /* client encoding diff from server? */
+ int saved_encoding; /* client encoding to be restored */
+ bool encoding_option; /* has encoding option? */
+ bool need_transcoding; /* encoding diff from server? */
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
uint64 processed; /* # of tuples processed */
@@ -811,6 +813,22 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
errmsg("conflicting or redundant options")));
cstate->escape = defGetString(defel);
}
+ else if (strcmp(defel->defname, "encoding") == 0)
+ {
+ if (cstate->encoding_option)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ if (PG_VALID_ENCODING(pg_char_to_encoding(defGetString(defel))))
+ cstate->client_encoding =
+ pg_char_to_encoding(defGetString(defel));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a valid encoding name",
+ defel->defname)));
+ cstate->encoding_option = true;
+ }
else if (strcmp(defel->defname, "force_quote") == 0)
{
if (force_quote || force_quote_all)
@@ -1169,11 +1187,25 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
cstate->processed = 0;
/*
- * Set up encoding conversion info. Even if the client and server
- * encodings are the same, we must apply pg_client_to_server() to validate
- * data in multibyte encodings.
+ * Set up encoding conversion info. If encoding option is specified,
+ * use it instead of client_encoding of GUC. In any cases, we use
+ * pg_client_to_server/server_to_client() for performance reason.
+ * Be careful to restore the previous encoding setting on changing
+ * current client_encoding.
+ */
+ if (cstate->encoding_option)
+ {
+ cstate->saved_encoding = pg_get_client_encoding();
+ SetClientEncoding(cstate->client_encoding, true);
+ }
+ else
+ cstate->client_encoding = pg_get_client_encoding();
+
+ /*
+ * Even if the client and server encodings are the same,
+ * we must apply pg_client_to_server() to validate data in
+ * multibyte encodings.
*/
- cstate->client_encoding = pg_get_client_encoding();
cstate->need_transcoding =
(cstate->client_encoding != GetDatabaseEncoding() ||
pg_database_encoding_max_length() > 1);
@@ -1188,6 +1220,10 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
else
DoCopyTo(cstate); /* copy from database to file */
+ /* restore previous client_encoding if client_encoding has changed */
+ if (cstate->encoding_option)
+ SetClientEncoding(cstate->saved_encoding, true);
+
/*
* Close the relation or query. If reading, we can release the
* AccessShareLock we got; if writing, we should hold the lock until end
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 660947c..3318dd0 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -2216,6 +2216,10 @@ copy_opt_item:
{
$$ = makeDefElem("force_not_null", (Node *)$4);
}
+ | ENCODING ColId_or_Sconst
+ {
+ $$ = makeDefElem("encoding", (Node *)makeString($2));
+ }
;
/* The following exist for backward compatibility with very old versions */
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 15cbe02..88d7d16 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -46,10 +46,10 @@ CONTEXT: COPY x, line 1: "2001 231 \N \N"
COPY x from stdin;
ERROR: extra data after last expected column
CONTEXT: COPY x, line 1: "2002 232 40 50 60 70 80"
--- various COPY options: delimiters, oids, NULL string
+-- various COPY options: delimiters, oids, NULL string, encoding
COPY x (b, c, d, e) from stdin with oids delimiter ',' null 'x';
COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
-COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X';
+COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING sql_ascii;
-- check results of copy in
SELECT * FROM x;
a | b | c | d | e
@@ -187,7 +187,7 @@ COPY y TO stdout WITH CSV QUOTE '''' DELIMITER '|';
Jackson, Sam|\h
It is "perfect".|
''|
-COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
+COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING sql_ascii;
"Jackson, Sam","\\h"
"It is \"perfect\"."," "
"",
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index c2e8b03..d2683d1 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -72,7 +72,7 @@ COPY x from stdin;
2002 232 40 50 60 70 80
\.
--- various COPY options: delimiters, oids, NULL string
+-- various COPY options: delimiters, oids, NULL string, encoding
COPY x (b, c, d, e) from stdin with oids delimiter ',' null 'x';
500000,x,45,80,90
500001,x,\x,\\x,\\\x
@@ -83,7 +83,7 @@ COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
3000;;c;;
\.
-COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X';
+COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING sql_ascii;
4000:\X:C:\X:\X
4001:1:empty::
4002:2:null:\X:\X
@@ -127,7 +127,7 @@ INSERT INTO y VALUES ('', NULL);
COPY y TO stdout WITH CSV;
COPY y TO stdout WITH CSV QUOTE '''' DELIMITER '|';
-COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
+COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING sql_ascii;
COPY y TO stdout WITH CSV FORCE QUOTE *;
-- Repeat above tests with new 9.0 option syntax