From a8b0161d6ddbdf7fc636ac4c6fc189e38b3ec08d Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Wed, 30 Nov 2022 13:44:55 -0500
Subject: [PATCH 5/5] Add COPY FROM ... NULL ON ERROR ...

---
 doc/src/sgml/ref/copy.sgml               | 13 ++++++++++
 src/backend/commands/copy.c              | 20 +++++++++++++++
 src/backend/commands/copyfrom.c          | 28 +++++++++++++++++++++
 src/backend/commands/copyfromparse.c     | 32 +++++++++++++++++++-----
 src/include/commands/copy.h              |  1 +
 src/include/commands/copyfrom_internal.h |  3 +++
 src/test/regress/expected/copy.out       | 13 ++++++++++
 src/test/regress/sql/copy.sql            | 12 +++++++++
 8 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index c25b52d0cb..f660ec6599 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -42,6 +42,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
     FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
     FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
+    NULL_ON_ERROR ( <replaceable class="parameter">column_name</replaceable> [, ...] )
     ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
 </synopsis>
  </refsynopsisdiv>
@@ -356,6 +357,18 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>NULL_ON_ERROR</literal></term>
+    <listitem>
+     <para>
+      If an error occurs in datatype's input function of the one of
+      specified columns, set column value to <literal>NULL</literal>.
+      This option is allowed only in <command>COPY FROM</command>, and
+      only when using <literal>CSV</literal> format.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>ENCODING</literal></term>
     <listitem>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index db4c9dbc23..b25d737c49 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -539,6 +539,19 @@ ProcessCopyOptions(ParseState *pstate,
 								defel->defname),
 						 parser_errposition(pstate, defel->location)));
 		}
+		else if (strcmp(defel->defname, "null_on_error") == 0)
+		{
+			if (opts_out->null_on_error)
+				errorConflictingDefElem(defel, pstate);
+			if (defel->arg && IsA(defel->arg, List))
+				opts_out->null_on_error = castNode(List, defel->arg);
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("argument to option \"%s\" must be a list of column names",
+								defel->defname),
+						 parser_errposition(pstate, defel->location)));
+		}
 		else if (strcmp(defel->defname, "encoding") == 0)
 		{
 			if (opts_out->file_encoding >= 0)
@@ -701,6 +714,13 @@ ProcessCopyOptions(ParseState *pstate,
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("CSV quote character must not appear in the NULL specification")));
+
+	/* Check null_on_error */
+	if (opts_out->null_on_error != NIL && !is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY null on error only available using COPY FROM")));
+
 }
 
 /*
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index a079c70152..f2a0bd599a 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1349,6 +1349,7 @@ BeginCopyFrom(ParseState *pstate,
 	Oid			in_func_oid;
 	int		   *defmap;
 	ExprState **defexprs;
+	bool	   *null_on_error_flags;
 	MemoryContext oldcontext;
 	bool		volatile_defexprs;
 	const int	progress_cols[] = {
@@ -1460,6 +1461,33 @@ BeginCopyFrom(ParseState *pstate,
 		}
 	}
 
+	/* Convert NULL_ON_ERROR name list to per-column flags, check validity */
+	null_on_error_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+	cstate->null_on_error = null_on_error_flags;
+	if (cstate->opts.null_on_error)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.null_on_error);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("NULL_ON_ERROR column \"%s\" not referenced by COPY",
+								NameStr(attr->attname))));
+			null_on_error_flags[attnum - 1] = true;
+		}
+		cstate->io_context = makeNode(IOCallContext);
+		cstate->io_context->no_error_throw = true;
+	}
+
 	/* Use client encoding when ENCODING option is not specified. */
 	if (cstate->opts.file_encoding < 0)
 		cstate->file_encoding = pg_get_client_encoding();
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 097414ef12..9c82ae1d09 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -897,6 +897,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			int			attnum = lfirst_int(cur);
 			int			m = attnum - 1;
 			Form_pg_attribute att = TupleDescAttr(tupDesc, m);
+			bool null_on_error = cstate->null_on_error[m];
 
 			if (fieldno >= fldct)
 				ereport(ERROR,
@@ -938,12 +939,31 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 
 			cstate->cur_attname = NameStr(att->attname);
 			cstate->cur_attval = string;
-			values[m] = InputFunctionCall(&in_functions[m],
-										  string,
-										  typioparams[m],
-										  att->atttypmod);
-			if (string != NULL)
-				nulls[m] = false;
+
+			if (null_on_error)
+			{
+				IOCallContext *ioc = cstate->io_context;
+				ioc->error_found = false;
+				values[m] = InputFunctionCallContext(&in_functions[m],
+													 string,
+													 typioparams[m],
+													 att->atttypmod,
+													 ioc);
+				if (ioc->error_found)
+					nulls[m] = true;
+				else if (string != NULL)
+					nulls[m] = false;
+			}
+			else
+			{
+				values[m] = InputFunctionCall(&in_functions[m],
+											  string,
+											  typioparams[m],
+											  att->atttypmod);
+				if (string != NULL)
+					nulls[m] = false;
+			}
+
 			cstate->cur_attname = NULL;
 			cstate->cur_attval = NULL;
 		}
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index b77b935005..de2198dc09 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -59,6 +59,7 @@ typedef struct CopyFormatOptions
 	bool	   *force_null_flags;	/* per-column CSV FN flags */
 	bool		convert_selectively;	/* do selective binary conversion? */
 	List	   *convert_select; /* list of column names (can be NIL) */
+	List	   *null_on_error;	/* list of column names */
 } CopyFormatOptions;
 
 /* These are private in commands/copy[from|to].c */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 8d9cc5accd..8ad532997d 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
 
 #include "commands/copy.h"
 #include "commands/trigger.h"
+#include "nodes/primnodes.h"
 
 /*
  * Represents the different source cases we need to worry about at
@@ -74,6 +75,7 @@ typedef struct CopyFromStateData
 	char	   *filename;		/* filename, or NULL for STDIN */
 	bool		is_program;		/* is 'filename' a program to popen? */
 	copy_data_source_cb data_source_cb; /* function for reading data */
+	bool       *null_on_error;  /* which attnums to set null on input error */
 
 	CopyFormatOptions opts;
 	bool	   *convert_select_flags;	/* per-column CSV/TEXT CS flags */
@@ -93,6 +95,7 @@ typedef struct CopyFromStateData
 
 	AttrNumber	num_defaults;
 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
+	IOCallContext *io_context;  /* used for null_on_error input*/
 	Oid		   *typioparams;	/* array of element types for in_functions */
 	int		   *defmap;			/* array of default att numbers */
 	ExprState **defexprs;		/* array of default att expressions */
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index 3fad1c52d1..96e51101ce 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -240,3 +240,16 @@ SELECT * FROM header_copytest ORDER BY a;
 (5 rows)
 
 drop table header_copytest;
+create table null_on_error_copytest(i int, b bool, t tsvector);
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+ERROR:  syntax error in tsvector: "b:c"
+CONTEXT:  COPY null_on_error_copytest, line 2, column t: "b:c"
+select * from null_on_error_copytest;
+ i | b |  t  
+---+---+-----
+ 1 |   | 'a'
+   | t | 'b'
+(2 rows)
+
+drop table null_on_error_copytest;
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 285022e07c..7a895c2ba1 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -268,3 +268,15 @@ a	c	b
 
 SELECT * FROM header_copytest ORDER BY a;
 drop table header_copytest;
+
+create table null_on_error_copytest(i int, b bool, t tsvector);
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+1	a 	a
+err	1 	b
+\.
+copy null_on_error_copytest from stdin with (null_on_error(i, b, t));
+2	a 	a
+3	1 	b:c
+\.
+select * from null_on_error_copytest;
+drop table null_on_error_copytest;
-- 
2.34.1

