From 16877d4cdd64db5f85bed9cd559e618d8211e598 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikoshia@oss.nttdata.com>
Date: Mon, 27 Feb 2023 12:02:16 +0900
Subject: [PATCH v1] Add COPY option IGNORE_DATATYPE_ERRORS

---
 src/backend/commands/copy.c              |  8 ++++++++
 src/backend/commands/copyfrom.c          | 11 +++++++++++
 src/backend/commands/copyfromparse.c     | 12 ++++++++++--
 src/backend/parser/gram.y                |  8 +++++++-
 src/bin/psql/tab-complete.c              |  3 ++-
 src/include/commands/copy.h              |  1 +
 src/include/commands/copyfrom_internal.h |  2 ++
 src/include/parser/kwlist.h              |  1 +
 src/test/regress/expected/copy2.out      | 14 ++++++++++++++
 src/test/regress/sql/copy2.sql           | 12 ++++++++++++
 10 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index e34f583ea7..2f1cfb3f4d 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -410,6 +410,7 @@ ProcessCopyOptions(ParseState *pstate,
 	bool		format_specified = false;
 	bool		freeze_specified = false;
 	bool		header_specified = false;
+	bool		ignore_datatype_errors_specified= false;
 	ListCell   *option;
 
 	/* Support external use for option sanity checking */
@@ -449,6 +450,13 @@ ProcessCopyOptions(ParseState *pstate,
 			freeze_specified = true;
 			opts_out->freeze = defGetBoolean(defel);
 		}
+		else if (strcmp(defel->defname, "ignore_datatype_errors") == 0)
+		{
+			if (ignore_datatype_errors_specified)
+				errorConflictingDefElem(defel, pstate);
+			ignore_datatype_errors_specified= true;
+			opts_out->ignore_datatype_errors = defGetBoolean(defel);
+		}
 		else if (strcmp(defel->defname, "delimiter") == 0)
 		{
 			if (opts_out->delim)
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index af52faca6d..24eec6a27d 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -959,6 +959,7 @@ CopyFrom(CopyFromState cstate)
 	{
 		TupleTableSlot *myslot;
 		bool		skip_tuple;
+		ErrorSaveContext escontext = {T_ErrorSaveContext};
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -991,10 +992,20 @@ CopyFrom(CopyFromState cstate)
 
 		ExecClearTuple(myslot);
 
+		if (cstate->opts.ignore_datatype_errors)
+		{
+			escontext.details_wanted = true;
+			cstate->escontext = escontext;
+		}
+
 		/* Directly store the values/nulls array in the slot */
 		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
 			break;
 
+		/* Soft error occured, skip this tuple */
+		if(cstate->escontext.error_occurred)
+			continue;
+
 		ExecStoreVirtualTuple(myslot);
 
 		/*
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 91b564c2bc..12b1780fd6 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -70,6 +70,7 @@
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "nodes/miscnodes.h"
 #include "pgstat.h"
 #include "port/pg_bswap.h"
 #include "utils/builtins.h"
@@ -938,10 +939,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 
 			cstate->cur_attname = NameStr(att->attname);
 			cstate->cur_attval = string;
-			values[m] = InputFunctionCall(&in_functions[m],
+			if (!InputFunctionCallSafe(&in_functions[m],
 										  string,
 										  typioparams[m],
-										  att->atttypmod);
+										  att->atttypmod,
+										  (Node *) &cstate->escontext,
+										  &values[m]))
+			{
+					ereport(WARNING,
+							  errmsg("%s", cstate->escontext.error_data->message));
+					return true;
+			}
 			if (string != NULL)
 				nulls[m] = false;
 			cstate->cur_attname = NULL;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a0138382a1..d79d293c0d 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -701,7 +701,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
 	HANDLER HAVING HEADER_P HOLD HOUR_P
 
-	IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
+	IDENTITY_P IF_P IGNORE_DATATYPE_ERRORS ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
 	INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
 	INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
 	INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
@@ -3378,6 +3378,10 @@ copy_opt_item:
 				{
 					$$ = makeDefElem("freeze", (Node *) makeBoolean(true), @1);
 				}
+			| IGNORE_DATATYPE_ERRORS
+				{
+					$$ = makeDefElem("ignore_datatype_errors", (Node *)makeBoolean(true), @1);
+				}
 			| DELIMITER opt_as Sconst
 				{
 					$$ = makeDefElem("delimiter", (Node *) makeString($3), @1);
@@ -16821,6 +16825,7 @@ unreserved_keyword:
 			| HOUR_P
 			| IDENTITY_P
 			| IF_P
+			| IGNORE_DATATYPE_ERRORS
 			| IMMEDIATE
 			| IMMUTABLE
 			| IMPLICIT_P
@@ -17375,6 +17380,7 @@ bare_label_keyword:
 			| HOLD
 			| IDENTITY_P
 			| IF_P
+			| IGNORE_DATATYPE_ERRORS
 			| ILIKE
 			| IMMEDIATE
 			| IMMUTABLE
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 5e1882eaea..a363351d3b 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2857,7 +2857,8 @@ psql_completion(const char *text, int start, int end)
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "("))
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
-					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING");
+					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING",
+					  "IGNORE_DATATYPE_ERRORS");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 8e5f6ff148..a7eb0f8883 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -42,6 +42,7 @@ typedef struct CopyFormatOptions
 								 * -1 if not specified */
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
+	bool		ignore_datatype_errors;  /* ignore rows with datatype errors */
 	bool		csv_mode;		/* Comma Separated Value format? */
 	CopyHeaderChoice header_line;	/* header line? */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 7b1c4327bd..d74c633481 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
 
 #include "commands/copy.h"
 #include "commands/trigger.h"
+#include "nodes/miscnodes.h"
 
 /*
  * Represents the different source cases we need to worry about at
@@ -94,6 +95,7 @@ typedef struct CopyFromStateData
 	AttrNumber	num_defaults;
 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
 	Oid		   *typioparams;	/* array of element types for in_functions */
+	ErrorSaveContext escontext; /* soft error trapper during in_functions execution */
 	int		   *defmap;			/* array of default att numbers */
 	ExprState **defexprs;		/* array of default att expressions */
 	bool		volatile_defexprs;	/* is any of defexprs volatile? */
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index bb36213e6f..1d7f9efbc0 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -196,6 +196,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD, AS_LABEL)
 PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("ignore_datatype_errors", IGNORE_DATATYPE_ERRORS, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
 PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 090ef6c7a8..08e8056fc1 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -666,6 +666,20 @@ SELECT * FROM instead_of_insert_tbl;
 (2 rows)
 
 COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  value "3333333333" is out of range for type integer
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  invalid input syntax for type integer: ""
+SELECT * FROM check_ign_err;
+ n |  m  | k 
+---+-----+---
+ 1 | {1} | 1
+ 5 | {5} | 5
+(2 rows)
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index b0de82c3aa..380adfce96 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -464,6 +464,18 @@ test1
 SELECT * FROM instead_of_insert_tbl;
 COMMIT;
 
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+1	{1}	1
+a	{2}	2
+3	{3}	3333333333
+4	{a, 4}	4
+
+5	{5}	5
+\.
+SELECT * FROM check_ign_err;
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
-- 
2.25.1

