diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 38424ad..1f9e4cc 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -40,7 +40,8 @@ COPY { table_name [ ( quote_character' ESCAPE 'escape_character' FORCE_QUOTE { ( column [, ...] ) | * } - FORCE_NOT_NULL ( column [, ...] ) + FORCE_NOT_NULL ( column [, ...] ) | + ENCODING encoding_name @@ -282,6 +283,18 @@ COPY { table_name [ ( + + ENCODING + + + Specifies that the file is encoded in the encoding_name. If this option is + omitted, the current client encoding is used. See the Notes below + for more details. + + + + @@ -377,8 +390,9 @@ COPY count - Input data is interpreted according to the current client encoding, - and output data is encoded in the current client encoding, even + Input data is interpreted according to ENCODING + option or the current client encoding, and output data is encoded + in ENCODING or the current client encoding, even if the data does not pass through the client but is read from or written to a file directly by the server. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 841bf22..c571315 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -97,7 +97,9 @@ typedef struct CopyStateData bool fe_eof; /* true if detected end of copy data */ EolType eol_type; /* EOL type of input */ int client_encoding; /* remote side's character encoding */ - bool need_transcoding; /* client encoding diff from server? */ + int saved_encoding; /* client encoding to be restored */ + bool encoding_option; /* has encoding option? */ + bool need_transcoding; /* encoding diff from server? */ bool encoding_embeds_ascii; /* ASCII can be non-first byte? */ uint64 processed; /* # of tuples processed */ @@ -811,6 +813,22 @@ DoCopy(const CopyStmt *stmt, const char *queryString) errmsg("conflicting or redundant options"))); cstate->escape = defGetString(defel); } + else if (strcmp(defel->defname, "encoding") == 0) + { + if (cstate->encoding_option) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + if (PG_VALID_ENCODING(pg_char_to_encoding(defGetString(defel)))) + cstate->client_encoding = + pg_char_to_encoding(defGetString(defel)); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a valid encoding name", + defel->defname))); + cstate->encoding_option = true; + } else if (strcmp(defel->defname, "force_quote") == 0) { if (force_quote || force_quote_all) @@ -1169,11 +1187,25 @@ DoCopy(const CopyStmt *stmt, const char *queryString) cstate->processed = 0; /* - * Set up encoding conversion info. Even if the client and server - * encodings are the same, we must apply pg_client_to_server() to validate - * data in multibyte encodings. + * Set up encoding conversion info. If encoding option is specified, + * use it instead of client_encoding of GUC. In any cases, we use + * pg_client_to_server/server_to_client() for performance reason. + * Be careful to restore the previous encoding setting on changing + * current client_encoding. + */ + if (cstate->encoding_option) + { + cstate->saved_encoding = pg_get_client_encoding(); + SetClientEncoding(cstate->client_encoding, true); + } + else + cstate->client_encoding = pg_get_client_encoding(); + + /* + * Even if the client and server encodings are the same, + * we must apply pg_client_to_server() to validate data in + * multibyte encodings. */ - cstate->client_encoding = pg_get_client_encoding(); cstate->need_transcoding = (cstate->client_encoding != GetDatabaseEncoding() || pg_database_encoding_max_length() > 1); @@ -1188,6 +1220,10 @@ DoCopy(const CopyStmt *stmt, const char *queryString) else DoCopyTo(cstate); /* copy from database to file */ + /* restore previous client_encoding if client_encoding has changed */ + if (cstate->encoding_option) + SetClientEncoding(cstate->saved_encoding, true); + /* * Close the relation or query. If reading, we can release the * AccessShareLock we got; if writing, we should hold the lock until end diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 660947c..3318dd0 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2216,6 +2216,10 @@ copy_opt_item: { $$ = makeDefElem("force_not_null", (Node *)$4); } + | ENCODING ColId_or_Sconst + { + $$ = makeDefElem("encoding", (Node *)makeString($2)); + } ; /* The following exist for backward compatibility with very old versions */ diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 15cbe02..88d7d16 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -46,10 +46,10 @@ CONTEXT: COPY x, line 1: "2001 231 \N \N" COPY x from stdin; ERROR: extra data after last expected column CONTEXT: COPY x, line 1: "2002 232 40 50 60 70 80" --- various COPY options: delimiters, oids, NULL string +-- various COPY options: delimiters, oids, NULL string, encoding COPY x (b, c, d, e) from stdin with oids delimiter ',' null 'x'; COPY x from stdin WITH DELIMITER AS ';' NULL AS ''; -COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X'; +COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING sql_ascii; -- check results of copy in SELECT * FROM x; a | b | c | d | e @@ -187,7 +187,7 @@ COPY y TO stdout WITH CSV QUOTE '''' DELIMITER '|'; Jackson, Sam|\h It is "perfect".| ''| -COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\'; +COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING sql_ascii; "Jackson, Sam","\\h" "It is \"perfect\"."," " "", diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index c2e8b03..d2683d1 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -72,7 +72,7 @@ COPY x from stdin; 2002 232 40 50 60 70 80 \. --- various COPY options: delimiters, oids, NULL string +-- various COPY options: delimiters, oids, NULL string, encoding COPY x (b, c, d, e) from stdin with oids delimiter ',' null 'x'; 500000,x,45,80,90 500001,x,\x,\\x,\\\x @@ -83,7 +83,7 @@ COPY x from stdin WITH DELIMITER AS ';' NULL AS ''; 3000;;c;; \. -COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X'; +COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING sql_ascii; 4000:\X:C:\X:\X 4001:1:empty:: 4002:2:null:\X:\X @@ -127,7 +127,7 @@ INSERT INTO y VALUES ('', NULL); COPY y TO stdout WITH CSV; COPY y TO stdout WITH CSV QUOTE '''' DELIMITER '|'; -COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\'; +COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING sql_ascii; COPY y TO stdout WITH CSV FORCE QUOTE *; -- Repeat above tests with new 9.0 option syntax