START/END line number for COPY FROM
Hi,
Currently we can skip header line on COPY FROM but having the ability to
skip and stop copying at any line can use to divide long copy operation and
enable to copy a subset of the file and skipping footer. Attach is a patch
for it
Regards
Surafel
Attachments:
copy_from_start_stop_line_v1.patchtext/x-patch; charset=US-ASCII; name=copy_from_start_stop_line_v1.patchDownload
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 411941ed31..86f9a6a905 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -43,6 +43,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
+ START <replaceable class="parameter">starting_line_number</replaceable>
+ END <replaceable class="parameter">ending_line_number</replaceable>
</synopsis>
</refsynopsisdiv>
@@ -353,6 +355,24 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>START</literal></term>
+ <listitem>
+ <para>
+ Specifies the line number to begin copying.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>END</literal></term>
+ <listitem>
+ <para>
+ Specifies the line number to end copying.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 4311e16007..4d07716a9f 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -121,6 +121,8 @@ typedef struct CopyStateData
int file_encoding; /* file or remote side's character encoding */
bool need_transcoding; /* file encoding diff from server? */
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
+ int start_postion; /* copying star line */
+ int end_postion; /* copying end line */
/* parameters from the COPY command */
Relation rel; /* relation to copy to or from */
@@ -347,6 +349,7 @@ static void CopySendInt32(CopyState cstate, int32 val);
static bool CopyGetInt32(CopyState cstate, int32 *val);
static void CopySendInt16(CopyState cstate, int16 val);
static bool CopyGetInt16(CopyState cstate, int16 *val);
+static void skipLines(CopyState cstate);
/*
@@ -1223,6 +1226,34 @@ ProcessCopyOptions(ParseState *pstate,
defel->defname),
parser_errposition(pstate, defel->location)));
}
+ else if (strcmp(defel->defname, "start") == 0)
+ {
+ if (cstate->start_postion)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ parser_errposition(pstate, defel->location)));
+ cstate->start_postion = defGetInt64(defel);
+ if (cstate->start_postion < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid START line number"),
+ parser_errposition(pstate, defel->location)));
+ }
+ else if (strcmp(defel->defname, "end") == 0)
+ {
+ if (cstate->end_postion)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ parser_errposition(pstate, defel->location)));
+ cstate->end_postion = defGetInt64(defel);
+ if (cstate->end_postion < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid END line number"),
+ parser_errposition(pstate, defel->location)));
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -1373,6 +1404,13 @@ ProcessCopyOptions(ParseState *pstate,
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the NULL specification")));
+
+ if (cstate->end_postion != 0 &&
+ cstate->start_postion > cstate->end_postion)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("START line number can not be greater then END line number")));
+
}
/*
@@ -2317,6 +2355,7 @@ CopyFrom(CopyState cstate)
uint64 lastPartitionSampleLineNo = 0;
uint64 nPartitionChanges = 0;
double avgTuplesPerPartChange = 0;
+ uint64 end_line = 1;
Assert(cstate->rel);
@@ -2619,6 +2658,20 @@ CopyFrom(CopyState cstate)
has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_instead_row);
+ if (cstate->start_postion)
+ {
+ end_line = cstate->start_postion;
+ skipLines(cstate);
+ }
+
+ /* throw the header line away means start copying at second line */
+ if (cstate->start_postion == 0 && cstate->header_line)
+ {
+ cstate->start_postion = 2;
+ end_line = cstate->start_postion;
+ skipLines(cstate);
+ }
+
/*
* Check BEFORE STATEMENT insertion triggers. It's debatable whether we
* should do this for COPY, since it's not really an "INSERT" statement as
@@ -2644,6 +2697,9 @@ CopyFrom(CopyState cstate)
TupleTableSlot *slot;
bool skip_tuple;
+ if (cstate->end_postion !=0 && cstate->end_postion < end_line++)
+ break;
+
CHECK_FOR_INTERRUPTS();
if (nBufferedTuples == 0)
@@ -3394,14 +3450,6 @@ NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
/* only available for text or csv input */
Assert(!cstate->binary);
- /* on input just throw the header line away */
- if (cstate->cur_lineno == 0 && cstate->header_line)
- {
- cstate->cur_lineno++;
- if (CopyReadLine(cstate))
- return false; /* done */
- }
-
cstate->cur_lineno++;
/* Actually read the line into memory here */
@@ -4086,6 +4134,22 @@ not_end_of_copy:
return result;
}
+/*
+ * go to starting postion.
+ */
+static void
+skipLines(CopyState cstate)
+{
+ for (int i=1; i < cstate->start_postion; i++)
+ {
+ if (CopyReadLineText(cstate))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("START line can not be greater than the number of record in the file")));
+
+ }
+}
+
/*
* Return decimal value for a hexadecimal digit
*/
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2c2208ffb7..a16b91cf11 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -3093,6 +3093,14 @@ copy_opt_item:
{
$$ = makeDefElem("encoding", (Node *)makeString($2), @1);
}
+ | START Iconst
+ {
+ $$ = makeDefElem("start", (Node *)makeInteger($2), @1);
+ }
+ | END_P Iconst
+ {
+ $$ = makeDefElem("end", (Node *)makeInteger($2), @1);
+ }
;
/* The following exist for backward compatibility with very old versions */
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index b5cadce3ef..ae2d11226f 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -41,4 +41,5 @@ extern uint64 CopyFrom(CopyState cstate);
extern DestReceiver *CreateCopyDestReceiver(void);
+
#endif /* COPY_H */
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 19bb538411..d01e3f5cf0 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -49,6 +49,9 @@ CONTEXT: COPY x, line 1: "2002 232 40 50 60 70 80"
COPY x (b, c, d, e) from stdin delimiter ',' null 'x';
COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
+COPY x (a, b, c, d, e) from stdin (START 2);
+COPY x (a, b, c, d, e) from stdin (END 3);
+COPY x (a, b, c, d, e) from stdin (START 2 , END 3);
-- check results of copy in
SELECT * FROM x;
a | b | c | d | e
@@ -73,12 +76,21 @@ SELECT * FROM x;
4006 | 6 | BackslashN | \N | before trigger fired
4007 | 7 | XX | XX | before trigger fired
4008 | 8 | Delimiter | : | before trigger fired
+ 50002 | 23 | 33 | 43 | before trigger fired
+ 50003 | 24 | 34 | 44 | before trigger fired
+ 50004 | 25 | 35 | 45 | before trigger fired
+ 50005 | 26 | 36 | 46 | before trigger fired
+ 60001 | 22 | 32 | 42 | before trigger fired
+ 60002 | 23 | 33 | 43 | before trigger fired
+ 60003 | 24 | 34 | 44 | before trigger fired
+ 70002 | 23 | 33 | 43 | before trigger fired
+ 70003 | 24 | 34 | 44 | before trigger fired
1 | 1 | stuff | test_1 | after trigger fired
2 | 2 | stuff | test_2 | after trigger fired
3 | 3 | stuff | test_3 | after trigger fired
4 | 4 | stuff | test_4 | after trigger fired
5 | 5 | stuff | test_5 | after trigger fired
-(25 rows)
+(34 rows)
-- check copy out
COPY x TO stdout;
@@ -102,6 +114,15 @@ COPY x TO stdout;
4006 6 BackslashN \\N before trigger fired
4007 7 XX XX before trigger fired
4008 8 Delimiter : before trigger fired
+50002 23 33 43 before trigger fired
+50003 24 34 44 before trigger fired
+50004 25 35 45 before trigger fired
+50005 26 36 46 before trigger fired
+60001 22 32 42 before trigger fired
+60002 23 33 43 before trigger fired
+60003 24 34 44 before trigger fired
+70002 23 33 43 before trigger fired
+70003 24 34 44 before trigger fired
1 1 stuff test_1 after trigger fired
2 2 stuff test_2 after trigger fired
3 3 stuff test_3 after trigger fired
@@ -128,6 +149,15 @@ N before trigger fired
BackslashN before trigger fired
XX before trigger fired
Delimiter before trigger fired
+33 before trigger fired
+34 before trigger fired
+35 before trigger fired
+36 before trigger fired
+32 before trigger fired
+33 before trigger fired
+34 before trigger fired
+33 before trigger fired
+34 before trigger fired
stuff after trigger fired
stuff after trigger fired
stuff after trigger fired
@@ -154,6 +184,15 @@ I'm null before trigger fired
6 before trigger fired
7 before trigger fired
8 before trigger fired
+23 before trigger fired
+24 before trigger fired
+25 before trigger fired
+26 before trigger fired
+22 before trigger fired
+23 before trigger fired
+24 before trigger fired
+23 before trigger fired
+24 before trigger fired
1 after trigger fired
2 after trigger fired
3 after trigger fired
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index e36df8858e..92e576e0b2 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -95,6 +95,30 @@ COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
4008:8:Delimiter:\::\:
\.
+COPY x (a, b, c, d, e) from stdin (START 2);
+50001 22 32 42 52
+50002 23 33 43 53
+50003 24 34 44 54
+50004 25 35 45 55
+50005 26 36 46 56
+\.
+
+COPY x (a, b, c, d, e) from stdin (END 3);
+60001 22 32 42 52
+60002 23 33 43 53
+60003 24 34 44 54
+60004 25 35 45 55
+60005 26 36 46 56
+\.
+
+COPY x (a, b, c, d, e) from stdin (START 2 , END 3);
+70001 22 32 42 52
+70002 23 33 43 53
+70003 24 34 44 54
+70004 25 35 45 55
+70005 26 36 46 56
+\.
+
-- check results of copy in
SELECT * FROM x;
Surafel Temesgen <surafel3000@gmail.com> writes:
Currently we can skip header line on COPY FROM but having the ability to
skip and stop copying at any line can use to divide long copy operation and
enable to copy a subset of the file and skipping footer. Attach is a patch
for it
I do not think this is a good idea. We have resisted attempts to add
ETL-like features to COPY on the grounds that it would add complexity
and cost performance, and that that's not what COPY is for. This
seems to fall squarely in the domain of something you should be doing
with another tool.
regards, tom lane
On 20/12/2018 14:02, Surafel Temesgen wrote:
Currently we can skip header line on COPY FROM but having the ability to
skip and stop copying at any line can use to divide long copy operation
and enable to copy a subset of the file and skipping footer.
It seems a bit fragile to me if I want to skip a footer and need to
figure out the total line count, subtract one, and then oh, was it zero-
or one-based.
--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
Hi,
On Fri, Jan 4, 2019 at 5:37 PM Peter Eisentraut <
peter.eisentraut@2ndquadrant.com> wrote:
It seems a bit fragile to me if I want to skip a footer and need to
figure out the total line count, subtract one, and then oh, was it zero-
or one-based.
But normally we don't say start copying from line number 0
regards
Surafel
On Fri, 21 Dec 2018 at 02:02, Surafel Temesgen <surafel3000@gmail.com> wrote:
Currently we can skip header line on COPY FROM but having the ability to skip and stop copying at any line can use to divide long copy operation and enable to copy a subset of the file and skipping footer. Attach is a patch for it
I'm struggling a bit to see the sense in this. If you really want to
improve the performance of a long copy, then I think it makes more
sense to have performed the backup in multiple pieces in the first
place. Having the database read the input stream and ignore the first
N lines sounds like a bit of a waste of effort, and effort that
wouldn't be required if the COPY TO had been done in multiple pieces.
--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
Hi,
On Sat, Jan 5, 2019 at 1:10 PM David Rowley <david.rowley@2ndquadrant.com>
wrote:
On Fri, 21 Dec 2018 at 02:02, Surafel Temesgen <surafel3000@gmail.com>
wrote:Currently we can skip header line on COPY FROM but having the ability to
skip and stop copying at any line can use to divide long copy operation and
enable to copy a subset of the file and skipping footer. Attach is a patch
for itI'm struggling a bit to see the sense in this. If you really want to
improve the performance of a long copy, then I think it makes more
sense to have performed the backup in multiple pieces in the first
place.
it is not always the case to have in control of the data importing it may
came from
external system
regards
Surafel
On 06/01/2019 12:59, Surafel Temesgen wrote:
it is not always the case to have in control of the data importing it
may came from
external system
But the problem that David described remains: If your data loading
requirement is so complicated that you need to load the file in chunks,
then doing it by line numbers will require you to skip over the leading
lines at every subsequent chunk. That's not going to be good for larger
files.
I think your problem needs a different solution.
--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services