From 35d6ed6248bf66b2349f31b05d82bc4b07d5cde0 Mon Sep 17 00:00:00 2001 From: Shinya Kato Date: Thu, 26 Jun 2025 14:11:41 +0900 Subject: [PATCH v1] Add support for multi-line header skipping in COPY The HEADER option for COPY now accepts a non-negative integer value, allowing users to skip an arbitrary number of header lines when importing data with COPY FROM. For COPY TO, only 0 or 1 is allowed. --- doc/src/sgml/ref/copy.sgml | 5 +- src/backend/commands/copy.c | 81 ++++++++++++++++++++-------- src/backend/commands/copyfromparse.c | 11 ++-- src/backend/commands/copyto.c | 2 +- src/include/commands/copy.h | 15 +++--- src/test/regress/expected/copy.out | 25 ++++++++- src/test/regress/expected/copy2.out | 6 +++ src/test/regress/sql/copy.sql | 30 +++++++++++ src/test/regress/sql/copy2.sql | 3 ++ src/tools/pgindent/typedefs.list | 2 +- 10 files changed, 141 insertions(+), 39 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 8433344e5b6..f858e73358c 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -37,7 +37,7 @@ COPY { table_name [ ( delimiter_character' NULL 'null_string' DEFAULT 'default_string' - HEADER [ boolean | MATCH ] + HEADER [ boolean | integer | MATCH ] QUOTE 'quote_character' ESCAPE 'escape_character' FORCE_QUOTE { ( column_name [, ...] ) | * } @@ -307,6 +307,9 @@ COPY { table_name [ ( true (or equivalent Boolean value). + If this option is set to a non-negative integer value, specified number of + lines are discarded. An integer value greater than 1 is only valid for + COPY FROM commands. If this option is set to MATCH, the number and names of the columns in the header line must match the actual column names of the table, in order; otherwise an error is raised. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 74ae42b19a7..7e5ef6bb836 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -322,33 +322,49 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, } /* - * Extract a CopyHeaderChoice value from a DefElem. This is like - * defGetBoolean() but also accepts the special value "match". + * Extract a CopyHeaderOption value from a DefElem. + * + * Parses the HEADER option for COPY. Accepts boolean values, integer values + * (number of lines to skip, via skip_lines), or the special value "match". + * + * For COPY TO, boolean values and skip_lines 0 or 1 are allowed. + * For COPY FROM, boolean values, skip_lines >= 0, and "match" are allowed. */ -static CopyHeaderChoice -defGetCopyHeaderChoice(DefElem *def, bool is_from) +static CopyHeaderOption +defGetCopyHeaderOption(DefElem *def, bool is_from) { + CopyHeaderOption option; + /* * If no parameter value given, assume "true" is meant. */ if (def->arg == NULL) - return COPY_HEADER_TRUE; + { + option.match = false; + option.skip_lines = 1; + return option; + } /* - * Allow 0, 1, "true", "false", "on", "off", or "match". + * Allow integer value, "true", "false", "on", "off", or "match". */ switch (nodeTag(def->arg)) { case T_Integer: - switch (intVal(def->arg)) { - case 0: - return COPY_HEADER_FALSE; - case 1: - return COPY_HEADER_TRUE; - default: - /* otherwise, error out below */ - break; + int skip_lines = intVal(def->arg); + if (skip_lines < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("HEADER must be non-negative integer"))); + if (!is_from && skip_lines > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use multi-line header in COPY TO"))); + + option.match = false; + option.skip_lines = skip_lines; + return option; } break; default: @@ -360,13 +376,29 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from) * grammar's opt_boolean_or_string production. */ if (pg_strcasecmp(sval, "true") == 0) - return COPY_HEADER_TRUE; + { + option.match = false; + option.skip_lines = 1; + return option; + } if (pg_strcasecmp(sval, "false") == 0) - return COPY_HEADER_FALSE; + { + option.match = false; + option.skip_lines = 0; + return option; + } if (pg_strcasecmp(sval, "on") == 0) - return COPY_HEADER_TRUE; + { + option.match = false; + option.skip_lines = 1; + return option; + } if (pg_strcasecmp(sval, "off") == 0) - return COPY_HEADER_FALSE; + { + option.match = false; + option.skip_lines = 0; + return option; + } if (pg_strcasecmp(sval, "match") == 0) { if (!is_from) @@ -374,16 +406,19 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot use \"%s\" with HEADER in COPY TO", sval))); - return COPY_HEADER_MATCH; + + option.match = true; + option.skip_lines = 1; + return option; } } break; } ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a Boolean value or \"match\"", + errmsg("%s requires a Boolean value, integer value, or \"match\"", def->defname))); - return COPY_HEADER_FALSE; /* keep compiler quiet */ + return option; /* keep compiler quiet */ } /* @@ -566,7 +601,7 @@ ProcessCopyOptions(ParseState *pstate, if (header_specified) errorConflictingDefElem(defel, pstate); header_specified = true; - opts_out->header_line = defGetCopyHeaderChoice(defel, is_from); + opts_out->header = defGetCopyHeaderOption(defel, is_from); } else if (strcmp(defel->defname, "quote") == 0) { @@ -769,7 +804,7 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); /* Check header */ - if (opts_out->binary && opts_out->header_line) + if (opts_out->binary && opts_out->header.skip_lines > 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index f5fc346e201..1edf2af4d98 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -777,17 +777,20 @@ NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields Assert(!cstate->opts.binary); /* on input check that the header line is correct if needed */ - if (cstate->cur_lineno == 0 && cstate->opts.header_line) + if (cstate->cur_lineno == 0 && cstate->opts.header.skip_lines) { ListCell *cur; TupleDesc tupDesc; tupDesc = RelationGetDescr(cstate->rel); - cstate->cur_lineno++; - done = CopyReadLine(cstate, is_csv); + for (int i = 0; i < cstate->opts.header.skip_lines; i++) + { + cstate->cur_lineno++; + done = CopyReadLine(cstate, is_csv); + } - if (cstate->opts.header_line == COPY_HEADER_MATCH) + if (cstate->opts.header.match) { int fldnum; diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index ea6f18f2c80..d897d003a8d 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) cstate->file_encoding); /* if a header has been requested send the line */ - if (cstate->opts.header_line) + if (cstate->opts.header.skip_lines == 1) { ListCell *cur; bool hdr_delim = false; diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 06dfdfef721..65b5ecc6af8 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -20,15 +20,14 @@ #include "tcop/dest.h" /* - * Represents whether a header line should be present, and whether it must - * match the actual names (which implies "true"). + * Represents how many lines to skip, and whether it must match the actual names + * (which implies "true"). */ -typedef enum CopyHeaderChoice +typedef struct CopyHeaderOption { - COPY_HEADER_FALSE = 0, - COPY_HEADER_TRUE, - COPY_HEADER_MATCH, -} CopyHeaderChoice; + bool match; /* header line must match actual names? */ + int skip_lines; /* number of lines to skip before data */ +} CopyHeaderOption; /* * Represents where to save input processing errors. More values to be added @@ -64,7 +63,7 @@ typedef struct CopyFormatOptions bool binary; /* binary format? */ bool freeze; /* freeze rows on loading? */ bool csv_mode; /* Comma Separated Value format? */ - CopyHeaderChoice header_line; /* header line? */ + CopyHeaderOption header; /* header line? */ char *null_print; /* NULL marker string (server encoding!) */ int null_print_len; /* length of same */ char *null_print_client; /* same converted to file encoding */ diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 8d5a06563c4..03b363eaf34 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -81,6 +81,29 @@ copy copytest4 to stdout (header); c1 colname with tab: \t 1 a 2 b +-- test multi-line header line feature +create temp table copytest5 (c1 int); +copy copytest5 from stdin (format csv, header 2); +copy copytest5 to stdout (header); +c1 +1 +2 +truncate copytest5; +copy copytest5 from stdin (format csv, header 4); +select count(*) from copytest5; + count +------- + 0 +(1 row) + +truncate copytest5; +copy copytest5 from stdin (format csv, header 5); +select count(*) from copytest5; + count +------- + 0 +(1 row) + -- test copy from with a partitioned table create table parted_copytest ( a int, @@ -224,7 +247,7 @@ alter table header_copytest add column c text; copy header_copytest to stdout with (header match); ERROR: cannot use "match" with HEADER in COPY TO copy header_copytest from stdin with (header wrong_choice); -ERROR: header requires a Boolean value or "match" +ERROR: header requires a Boolean value, integer value, or "match" -- works copy header_copytest from stdin with (header match); copy header_copytest (c, a, b) from stdin with (header match); diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 64ea33aeae8..4c957559477 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1); ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE COPY x from stdin with (on_error ignore, reject_limit 0); ERROR: REJECT_LIMIT (0) must be greater than zero +COPY x from stdin with (header -1); +ERROR: HEADER must be non-negative integer +COPY x from stdin with (header 2.5); +ERROR: header requires a Boolean value, integer value, or "match" +COPY x to stdout with (header 2); +ERROR: cannot use multi-line header in COPY TO -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; ERROR: column "d" specified more than once diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index f0b88a23db8..a1316c73bac 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed copy copytest4 to stdout (header); +-- test multi-line header line feature + +create temp table copytest5 (c1 int); + +copy copytest5 from stdin (format csv, header 2); +this is a first header line. +this is a second header line. +1 +2 +\. +copy copytest5 to stdout (header); + +truncate copytest5; +copy copytest5 from stdin (format csv, header 4); +this is a first header line. +this is a second header line. +1 +2 +\. +select count(*) from copytest5; + +truncate copytest5; +copy copytest5 from stdin (format csv, header 5); +this is a first header line. +this is a second header line. +1 +2 +\. +select count(*) from copytest5; + -- test copy from with a partitioned table create table parted_copytest ( a int, diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 45273557ce0..cef45868db5 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported); COPY x from stdin (log_verbosity unsupported); COPY x from stdin with (reject_limit 1); COPY x from stdin with (on_error ignore, reject_limit 0); +COPY x from stdin with (header -1); +COPY x from stdin with (header 2.5); +COPY x to stdout with (header 2); -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 32d6e718adc..49a76fac9ad 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -521,7 +521,7 @@ CopyFormatOptions CopyFromRoutine CopyFromState CopyFromStateData -CopyHeaderChoice +CopyHeaderOption CopyInsertMethod CopyLogVerbosityChoice CopyMethod -- 2.39.3