raw output from copy

Started by Pavel Stehulealmost 11 years ago80 messages
#1Pavel Stehule
pavel.stehule@gmail.com

Hi

This thread was finished without real work. I have a real use case - export
XML doc in non utf8 encoding.

/messages/by-id/16174.1319228878@sss.pgh.pa.us

I propose to implement new format option "RAW" like Tom proposed.

It requires only one row, one column result - and result is just raw binary
data without size.

Objections? Ideas?

Regards

Pavel

#2Pavel Stehule
pavel.stehule@gmail.com
In reply to: Pavel Stehule (#1)
1 attachment(s)
Re: raw output from copy

Hi

I wrote a prototype of this patch, and it works well

postgres=# set client_encoding to 'latin2';
SET
Time: 1.488 ms
postgres=# \copy (select xmlelement(name xx, d) from d) to ~/d.xml (format
'raw')
COPY 1
Time: 1.108 ms
postgres=# copy (select xmlelement(name xx, d) from d) to stdout (format
'raw') ;
<?xml version="1.0" encoding="LATIN2"?><xx>příliš žluťoučký kůň</xx>Time:
1.000 ms

Regards

Pavel

2015-04-09 20:48 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com>:

Show quoted text

Hi

This thread was finished without real work. I have a real use case -
export XML doc in non utf8 encoding.

/messages/by-id/16174.1319228878@sss.pgh.pa.us

I propose to implement new format option "RAW" like Tom proposed.

It requires only one row, one column result - and result is just raw
binary data without size.

Objections? Ideas?

Regards

Pavel

Attachments:

copy-raw.patchtext/x-patch; charset=US-ASCII; name=copy-raw.patchDownload
commit 60c6701fe5a91c41e9ed0db99676c8b1a27e85e3
Author: Pavel Stehule <pavel.stehule@gooddata.com>
Date:   Fri Apr 10 23:22:39 2015 +0200

    initial

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 92ff632..5701f8b 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -113,6 +113,7 @@ typedef struct CopyStateData
 	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
 	bool		is_program;		/* is 'filename' a program to popen? */
 	bool		binary;			/* binary format? */
+	bool		raw;			/* raw format - data only */
 	bool		oids;			/* include OIDs? */
 	bool		freeze;			/* freeze rows on loading? */
 	bool		csv_mode;		/* Comma Separated Value format? */
@@ -348,6 +349,13 @@ SendCopyBegin(CopyState cstate)
 		int16		format = (cstate->binary ? 1 : 0);
 		int			i;
 
+		if (cstate->binary)
+			format = 1;
+		else if (cstate->raw)
+			format = 2;
+		else
+			format = 0;
+
 		pq_beginmessage(&buf, 'H');
 		pq_sendbyte(&buf, format);		/* overall format */
 		pq_sendint(&buf, natts, 2);
@@ -359,7 +367,7 @@ SendCopyBegin(CopyState cstate)
 	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 	{
 		/* old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
@@ -371,7 +379,7 @@ SendCopyBegin(CopyState cstate)
 	else
 	{
 		/* very old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
@@ -485,7 +493,7 @@ CopySendEndOfRow(CopyState cstate)
 	switch (cstate->copy_dest)
 	{
 		case COPY_FILE:
-			if (!cstate->binary)
+			if (!(cstate->binary || cstate->raw))
 			{
 				/* Default line termination depends on platform */
 #ifndef WIN32
@@ -543,7 +551,7 @@ CopySendEndOfRow(CopyState cstate)
 			break;
 		case COPY_NEW_FE:
 			/* The FE/BE protocol uses \n as newline for all platforms */
-			if (!cstate->binary)
+			if (!(cstate->binary || cstate->raw))
 				CopySendChar(cstate, '\n');
 
 			/* Dump the accumulated row as one CopyData message */
@@ -1005,6 +1013,8 @@ ProcessCopyOptions(CopyState cstate,
 				cstate->csv_mode = true;
 			else if (strcmp(fmt, "binary") == 0)
 				cstate->binary = true;
+			else if (strcmp(fmt, "raw") == 0)
+				cstate->raw = true;
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1808,6 +1818,10 @@ CopyTo(CopyState cstate)
 	num_phys_attrs = tupDesc->natts;
 	cstate->null_print_client = cstate->null_print;		/* default */
 
+	/* don't allow more columns for raw format */
+	if (tupDesc->natts > 1)
+		elog(ERROR, "too much columns for RAW output");
+
 	/* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
 	cstate->fe_msgbuf = makeStringInfo();
 
@@ -1819,7 +1833,7 @@ CopyTo(CopyState cstate)
 		Oid			out_func_oid;
 		bool		isvarlena;
 
-		if (cstate->binary)
+		if ((cstate->binary || cstate->raw))
 			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
 									&out_func_oid,
 									&isvarlena);
@@ -1858,7 +1872,7 @@ CopyTo(CopyState cstate)
 		tmp = 0;
 		CopySendInt32(cstate, tmp);
 	}
-	else
+	else if (!cstate->raw)
 	{
 		/*
 		 * For non-binary copy, we need to convert null_print to file
@@ -1970,7 +1984,7 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 			CopySendInt32(cstate, tupleOid);
 		}
 	}
-	else
+	else if (!cstate->raw)
 	{
 		/* Text format has no per-tuple header, but send OID if wanted */
 		/* Assume digits don't need any quoting or encoding conversion */
@@ -1998,14 +2012,16 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 
 		if (isnull)
 		{
-			if (!cstate->binary)
+			if (cstate->raw)
+				elog(ERROR, "cannot to push NULL in raw output");
+			else if (!cstate->binary)
 				CopySendString(cstate, cstate->null_print_client);
 			else
 				CopySendInt32(cstate, -1);
 		}
 		else
 		{
-			if (!cstate->binary)
+			if (!(cstate->binary || cstate->raw))
 			{
 				string = OutputFunctionCall(&out_functions[attnum - 1],
 											value);
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
index a847f08..135c052 100644
--- a/src/interfaces/libpq/fe-protocol3.c
+++ b/src/interfaces/libpq/fe-protocol3.c
@@ -1352,6 +1352,8 @@ getCopyStart(PGconn *conn, ExecStatusType copytype)
 		 */
 		format = (int) ((int16) format);
 		result->attDescs[i].format = format;
+
+		conn->copy_raw_mode = format == 2;
 	}
 
 	/* Success! */
@@ -1544,13 +1546,38 @@ pqGetCopyData3(PGconn *conn, char **buffer, int async)
 								  libpq_gettext("out of memory\n"));
 				return -2;
 			}
-			memcpy(*buffer, &conn->inBuffer[conn->inCursor], msgLength);
-			(*buffer)[msgLength] = '\0';		/* Add terminating null */
 
-			/* Mark message consumed */
-			conn->inStart = conn->inCursor + msgLength;
+			if (!conn->copy_raw_mode)
+			{
+				memcpy(*buffer, &conn->inBuffer[conn->inCursor], msgLength);
+				(*buffer)[msgLength] = '\0';		/* Add terminating null */
 
-			return msgLength;
+				conn->inStart = conn->inCursor + msgLength;
+
+				return msgLength;
+			}
+			else
+			{
+				if (msgLength < 4)
+				{
+					printfPQExpBuffer(&conn->errorMessage,
+									  libpq_gettext("broken format\n"));
+					return -2;
+				}
+
+				/*
+				 * raw format is same as binary without addtional info. Every
+				 * binary data starts with length 4B
+				 */
+				memcpy(*buffer, &conn->inBuffer[conn->inCursor + 4], msgLength - 4);
+				(*buffer)[msgLength] = '\0';		/* Add terminating null */
+
+				conn->inStart = conn->inCursor + msgLength;
+
+				return msgLength - 4;
+			}
+
+			/* Mark message consumed */
 		}
 
 		/* Empty, so drop it and loop around for another */
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 2175957..23bde61 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -421,6 +421,8 @@ struct pg_conn
 	PGresult   *result;			/* result being constructed */
 	PGresult   *next_result;	/* next result (used in single-row mode) */
 
+	bool		copy_raw_mode;		/* true, when one value is passed by copy protocol */
+
 	/* Assorted state for SSL, GSS, etc */
 
 #ifdef USE_SSL
#3Peter Eisentraut
peter_e@gmx.net
In reply to: Pavel Stehule (#2)
Re: raw output from copy

On 4/10/15 5:26 PM, Pavel Stehule wrote:

Hi

I wrote a prototype of this patch, and it works well

postgres=# set client_encoding to 'latin2';
SET
Time: 1.488 ms
postgres=# \copy (select xmlelement(name xx, d) from d) to ~/d.xml
(format 'raw')
COPY 1
Time: 1.108 ms
postgres=# copy (select xmlelement(name xx, d) from d) to stdout (format
'raw') ;
<?xml version="1.0" encoding="LATIN2"?><xx>příliš žluťoučký
kůň</xx>Time: 1.000 ms

I think you can get the same thing using regular psql output and just
turning off all field and record separators and tuple headers and so on.

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Pavel Stehule
pavel.stehule@gmail.com
In reply to: Peter Eisentraut (#3)
Re: raw output from copy

It would be nice, but it is not true. You can get correct non utf8 xml with
encoding specification only when binary mode is used. Psql doesn't support
binary mode.

Regards

Pavel
Dne 15. 4. 2015 22:06 napsal uživatel "Peter Eisentraut" <peter_e@gmx.net>:

Show quoted text

On 4/10/15 5:26 PM, Pavel Stehule wrote:

Hi

I wrote a prototype of this patch, and it works well

postgres=# set client_encoding to 'latin2';
SET
Time: 1.488 ms
postgres=# \copy (select xmlelement(name xx, d) from d) to ~/d.xml
(format 'raw')
COPY 1
Time: 1.108 ms
postgres=# copy (select xmlelement(name xx, d) from d) to stdout (format
'raw') ;
<?xml version="1.0" encoding="LATIN2"?><xx>příliš žluťoučký
kůň</xx>Time: 1.000 ms

I think you can get the same thing using regular psql output and just
turning off all field and record separators and tuple headers and so on.

#5Pavel Golub
pavel@microolap.com
In reply to: Pavel Stehule (#2)
Re: raw output from copy

Hello Pavel.

I looked through the patch. Sources are OK. However I didn't find any docs
and test cases. Would you please provide me with short description on this
feature and why it is important. Because I didn't manage to find the old
Andrew Dunstan's post either.

On Sat, Apr 11, 2015 at 12:26 AM, Pavel Stehule <pavel.stehule@gmail.com>
wrote:

Hi

I wrote a prototype of this patch, and it works well

postgres=# set client_encoding to 'latin2';
SET
Time: 1.488 ms
postgres=# \copy (select xmlelement(name xx, d) from d) to ~/d.xml (format
'raw')
COPY 1
Time: 1.108 ms
postgres=# copy (select xmlelement(name xx, d) from d) to stdout (format
'raw') ;
<?xml version="1.0" encoding="LATIN2"?><xx>příliš žluťoučký kůň</xx>Time:
1.000 ms

Regards

Pavel

2015-04-09 20:48 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com>:

Hi

This thread was finished without real work. I have a real use case -
export XML doc in non utf8 encoding.

/messages/by-id/16174.1319228878@sss.pgh.pa.us

I propose to implement new format option "RAW" like Tom proposed.

It requires only one row, one column result - and result is just raw
binary data without size.

Objections? Ideas?

Regards

Pavel

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

--
Nullus est in vitae sensus ipsa vera est sensus.

#6Simon Riggs
simon@2ndQuadrant.com
In reply to: Pavel Golub (#5)
Re: raw output from copy

On 1 July 2015 at 07:42, Pavel Golub <pavel@microolap.com> wrote:

I looked through the patch. Sources are OK. However I didn't find any
docs and test cases. Would you please provide me with short description on
this feature and why it is important. Because I didn't manage to find the
old Andrew Dunstan's post either.

Feature sounds OK, so lets do it.

Pavel S, please submit a polished patch. Coding guidelines, tests, docs
etc. Set back to Waiting On Author.

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#7Pavel Stehule
pavel.stehule@gmail.com
In reply to: Simon Riggs (#6)
Re: raw output from copy

Hi

I'll do it today evening

Pavel

2015-07-02 12:55 GMT+02:00 Simon Riggs <simon@2ndquadrant.com>:

Show quoted text

On 1 July 2015 at 07:42, Pavel Golub <pavel@microolap.com> wrote:

I looked through the patch. Sources are OK. However I didn't find any
docs and test cases. Would you please provide me with short description on
this feature and why it is important. Because I didn't manage to find the
old Andrew Dunstan's post either.

Feature sounds OK, so lets do it.

Pavel S, please submit a polished patch. Coding guidelines, tests, docs
etc. Set back to Waiting On Author.

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#8Andrew Dunstan
andrew@dunslane.net
In reply to: Pavel Stehule (#7)
Re: raw output from copy

On 07/02/2015 07:14 AM, Pavel Stehule wrote:

Hi

I'll do it today evening

Pavel,

Please don't top-post on the PostgreSQL lists. You've been around here
long enough to know that bottom posting is our custom.

I posted a patch for this in 2013 at
</messages/by-id/50F2FA92.9040000@dunslane.net&gt; but
it can apply to a SELECT, and doesn't need COPY. Nobody seemed very
interested, so I dropped it. Apparently people now want something along
these lines, which is good.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#9Andrew Dunstan
andrew@dunslane.net
In reply to: Andrew Dunstan (#8)
Re: raw output from copy

On 07/02/2015 09:02 AM, Andrew Dunstan wrote:

On 07/02/2015 07:14 AM, Pavel Stehule wrote:

Hi

I'll do it today evening

Pavel,

Please don't top-post on the PostgreSQL lists. You've been around here
long enough to know that bottom posting is our custom.

I posted a patch for this in 2013 at
</messages/by-id/50F2FA92.9040000@dunslane.net&gt;
but it can apply to a SELECT, and doesn't need COPY. Nobody seemed
very interested, so I dropped it. Apparently people now want something
along these lines, which is good.

For reference, here's the Wayback Machine's version of the original blog
post referred to:
<http://web.archive.org/web/20110916023912/http://people.planetpostgresql.org/andrew/index.php?/archives/196-Clever-trick-challenge.html&gt;

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#10Simon Riggs
simon@2ndQuadrant.com
In reply to: Andrew Dunstan (#8)
Re: raw output from copy

On 2 July 2015 at 14:02, Andrew Dunstan <andrew@dunslane.net> wrote:

Please don't top-post on the PostgreSQL lists. You've been around here
long enough to know that bottom posting is our custom.

I posted a patch for this in 2013 at <
/messages/by-id/50F2FA92.9040000@dunslane.net&gt; but
it can apply to a SELECT, and doesn't need COPY. Nobody seemed very
interested, so I dropped it. Apparently people now want something along
these lines, which is good.

It's a shame that both solutions are restricted to either COPY or psql.

Both of those are working on suggestions from Tom, so there is no history
of preference there.

Can we have both please, gentlemen?

If we implemented Andrew's solution, how would we request it in a COPY
statement? Seems like we would want the RAW format keyword anyway.

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#11Pavel Stehule
pavel.stehule@gmail.com
In reply to: Simon Riggs (#10)
Re: raw output from copy

2015-07-02 15:43 GMT+02:00 Simon Riggs <simon@2ndquadrant.com>:

On 2 July 2015 at 14:02, Andrew Dunstan <andrew@dunslane.net> wrote:

Please don't top-post on the PostgreSQL lists. You've been around here
long enough to know that bottom posting is our custom.

I posted a patch for this in 2013 at <
/messages/by-id/50F2FA92.9040000@dunslane.net&gt; but
it can apply to a SELECT, and doesn't need COPY. Nobody seemed very
interested, so I dropped it. Apparently people now want something along
these lines, which is good.

It's a shame that both solutions are restricted to either COPY or psql.

Both of those are working on suggestions from Tom, so there is no history
of preference there.

Can we have both please, gentlemen?

If we implemented Andrew's solution, how would we request it in a COPY
statement? Seems like we would want the RAW format keyword anyway.

I prefer a COPY like solution - it can be used on both sides (server,
client), and it can be used little bit simply for psql -c "XXX" pattern.

Regards

Pavel

Show quoted text

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#12Andrew Dunstan
andrew@dunslane.net
In reply to: Simon Riggs (#10)
Re: raw output from copy

On 07/02/2015 09:43 AM, Simon Riggs wrote:

On 2 July 2015 at 14:02, Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net>> wrote:

Please don't top-post on the PostgreSQL lists. You've been around
here long enough to know that bottom posting is our custom.

I posted a patch for this in 2013 at
</messages/by-id/50F2FA92.9040000@dunslane.net&gt;
but it can apply to a SELECT, and doesn't need COPY. Nobody seemed
very interested, so I dropped it. Apparently people now want
something along these lines, which is good.

It's a shame that both solutions are restricted to either COPY or psql.

Both of those are working on suggestions from Tom, so there is no
history of preference there.

Can we have both please, gentlemen?

If we implemented Andrew's solution, how would we request it in a COPY
statement? Seems like we would want the RAW format keyword anyway.

What's the use case? My original motivation was that I had a function
that returned a bytea (it was a PDF in fact) that I wanted to be able to
write to a file. Of course, this is easy enough to do with a client
library like perl's DBD::Pg, but it seems sad to have to resort to that
for something so simple.

My original suggestion
(</messages/by-id/4EA1B83B.2050605@pgexperts.com&gt;)
was to invent a \bcopy command.

I don't have a problem in building in a RAW mode for copy, but we'll
still need to teach psql how to deal with it.

Another case where it could be useful is JSON - so we can avoid having
to play tricks like
<http://adpgtech.blogspot.com/2014/09/importing-json-data.html&gt;. Similar
considerations probably apply to XML, and the tricks are less guaranteed
to work.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#13Pavel Stehule
pavel.stehule@gmail.com
In reply to: Andrew Dunstan (#12)
Re: raw output from copy

2015-07-02 16:02 GMT+02:00 Andrew Dunstan <andrew@dunslane.net>:

On 07/02/2015 09:43 AM, Simon Riggs wrote:

On 2 July 2015 at 14:02, Andrew Dunstan <andrew@dunslane.net <mailto:
andrew@dunslane.net>> wrote:

Please don't top-post on the PostgreSQL lists. You've been around
here long enough to know that bottom posting is our custom.

I posted a patch for this in 2013 at
</messages/by-id/50F2FA92.9040000@dunslane.net&gt;
but it can apply to a SELECT, and doesn't need COPY. Nobody seemed
very interested, so I dropped it. Apparently people now want
something along these lines, which is good.

It's a shame that both solutions are restricted to either COPY or psql.

Both of those are working on suggestions from Tom, so there is no history
of preference there.

Can we have both please, gentlemen?

If we implemented Andrew's solution, how would we request it in a COPY
statement? Seems like we would want the RAW format keyword anyway.

What's the use case? My original motivation was that I had a function that
returned a bytea (it was a PDF in fact) that I wanted to be able to write
to a file. Of course, this is easy enough to do with a client library like
perl's DBD::Pg, but it seems sad to have to resort to that for something so
simple.

My original suggestion (<
/messages/by-id/4EA1B83B.2050605@pgexperts.com&gt;) was
to invent a \bcopy command.

I don't have a problem in building in a RAW mode for copy, but we'll still
need to teach psql how to deal with it.

It can be used from psql without any problems.

Show quoted text

Another case where it could be useful is JSON - so we can avoid having to
play tricks like <
http://adpgtech.blogspot.com/2014/09/importing-json-data.html&gt;. Similar
considerations probably apply to XML, and the tricks are less guaranteed to
work.

cheers

andrew

#14Simon Riggs
simon@2ndQuadrant.com
In reply to: Pavel Stehule (#13)
Re: raw output from copy

On 2 July 2015 at 15:07, Pavel Stehule <pavel.stehule@gmail.com> wrote:

It can be used from psql without any problems.

It can, but your patch does not yet do that, while Andrew's does.

We want a solution that works from psql and other clients. Hopefully the
same-ish solution.

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#15Andrew Dunstan
andrew@dunslane.net
In reply to: Pavel Stehule (#13)
Re: raw output from copy

On 07/02/2015 10:07 AM, Pavel Stehule wrote:

2015-07-02 16:02 GMT+02:00 Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net>>:

On 07/02/2015 09:43 AM, Simon Riggs wrote:

On 2 July 2015 at 14:02, Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net> <mailto:andrew@dunslane.net
<mailto:andrew@dunslane.net>>> wrote:

Please don't top-post on the PostgreSQL lists. You've been
around
here long enough to know that bottom posting is our custom.

I posted a patch for this in 2013 at

</messages/by-id/50F2FA92.9040000@dunslane.net&gt;
but it can apply to a SELECT, and doesn't need COPY.
Nobody seemed
very interested, so I dropped it. Apparently people now want
something along these lines, which is good.

It's a shame that both solutions are restricted to either COPY
or psql.

Both of those are working on suggestions from Tom, so there is
no history of preference there.

Can we have both please, gentlemen?

If we implemented Andrew's solution, how would we request it
in a COPY statement? Seems like we would want the RAW format
keyword anyway.

What's the use case? My original motivation was that I had a
function that returned a bytea (it was a PDF in fact) that I
wanted to be able to write to a file. Of course, this is easy
enough to do with a client library like perl's DBD::Pg, but it
seems sad to have to resort to that for something so simple.

My original suggestion
(</messages/by-id/4EA1B83B.2050605@pgexperts.com&gt;)
was to invent a \bcopy command.

I don't have a problem in building in a RAW mode for copy, but
we'll still need to teach psql how to deal with it.

It can be used from psql without any problems.

In fact your patch will not work with psql's \copy nor to stdout at all,
unless I'm misreading it:

    -        if (cstate->binary)
    +        if (cstate->binary || cstate->raw)
                  ereport(ERROR,
                          (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                  errmsg("COPY BINARY is not supported to stdout or from
    stdin")));

So it looks like you're only supporting this where the server is writing
to a file. That's horribly narrow, and certainly doesn't meet my
original need.

Does the COPY line protocol even support binary data? If not, we're dead
in the water here from the psql POV. Because my patch doesn't use the
COPY protocol it doesn't have this problem.

Perhaps we should do both, although I'm not sure I understand the use
case for the COPY solution.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#16Tom Lane
tgl@sss.pgh.pa.us
In reply to: Andrew Dunstan (#15)
Re: raw output from copy

Andrew Dunstan <andrew@dunslane.net> writes:

Does the COPY line protocol even support binary data?

The protocol, per se, just transmits a byte stream. There is a field
in the CopyInResponse/CopyOutResponse messages that indicates whether
a text or binary copy is being done. One thing we'd have to consider
is whether "raw" mode is sufficiently different from binary to justify
an additional value for this field, and if so whether that constitutes
a protocol break.

IIRC, psql wouldn't really care; it just transfers the byte stream to or
from the target file, regardless of text or binary mode. But there might
be other client libraries that are smarter and expect "binary" mode to
mean the binary file format specified in the COPY reference page. So
there may be value in being explicit about "raw" mode in these messages.

A key point in all this is that people who need "raw" transfer probably
need it in both directions, a point that your SELECT proposal cannot
satisfy, but hacking COPY could. So I lean towards the latter really.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#17Andrew Dunstan
andrew@dunslane.net
In reply to: Tom Lane (#16)
Re: raw output from copy

On 07/02/2015 11:02 AM, Tom Lane wrote:

Andrew Dunstan <andrew@dunslane.net> writes:

Does the COPY line protocol even support binary data?

The protocol, per se, just transmits a byte stream. There is a field
in the CopyInResponse/CopyOutResponse messages that indicates whether
a text or binary copy is being done. One thing we'd have to consider
is whether "raw" mode is sufficiently different from binary to justify
an additional value for this field, and if so whether that constitutes
a protocol break.

IIRC, psql wouldn't really care; it just transfers the byte stream to or
from the target file, regardless of text or binary mode. But there might
be other client libraries that are smarter and expect "binary" mode to
mean the binary file format specified in the COPY reference page. So
there may be value in being explicit about "raw" mode in these messages.

A key point in all this is that people who need "raw" transfer probably
need it in both directions, a point that your SELECT proposal cannot
satisfy, but hacking COPY could. So I lean towards the latter really.

OK, let's do that. I await the result with interest.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#18Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#16)
Re: raw output from copy

2015-07-02 17:02 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Andrew Dunstan <andrew@dunslane.net> writes:

Does the COPY line protocol even support binary data?

The protocol, per se, just transmits a byte stream. There is a field
in the CopyInResponse/CopyOutResponse messages that indicates whether
a text or binary copy is being done. One thing we'd have to consider
is whether "raw" mode is sufficiently different from binary to justify
an additional value for this field, and if so whether that constitutes
a protocol break. sql/plpgsql_check_passive-9.6.sql

IIRC, psql wouldn't really care; it just transfers the byte stream to or
from the target file, regardless of text or binary mode. But there might
be other client libraries that are smarter and expect "binary" mode to
mean the binary file format specified in the COPY reference page. So
there may be value in being explicit about "raw" mode in these messages.

The safe way is create new mode and propagate it on client. It should to
not break any current applications, because no one uses COPY RAW.

A key point in all this is that people who need "raw" transfer probably
need it in both directions, a point that your SELECT proposal cannot
satisfy, but hacking COPY could. So I lean towards the latter really.

yes, it has sense. I am not sure, if I'll have time to implement it in this
step, but I'll look on it.

regards

Pavel

Show quoted text

regards, tom lane

#19Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#16)
1 attachment(s)
Re: raw output from copy

Hi

here is a version with both direction support.

postgres=# copy foo from '/tmp/1.jpg' (format raw);
COPY 1
Time: 93.021 ms
postgres=# \dt+ foo
List of relations
┌────────┬──────┬───────┬───────┬────────┬─────────────┐
│ Schema │ Name │ Type │ Owner │ Size │ Description │
╞════════╪══════╪═══════╪═══════╪════════╪═════════════╡
│ public │ foo │ table │ pavel │ 256 kB │ │
└────────┴──────┴───────┴───────┴────────┴─────────────┘
(1 row)

postgres=# \copy foo to '~/3.jpg' (format raw)
COPY 1
Time: 2.401 ms

Regards

Pavel

2015-07-02 17:02 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Show quoted text

Andrew Dunstan <andrew@dunslane.net> writes:

Does the COPY line protocol even support binary data?

The protocol, per se, just transmits a byte stream. There is a field
in the CopyInResponse/CopyOutResponse messages that indicates whether
a text or binary copy is being done. One thing we'd have to consider
is whether "raw" mode is sufficiently different from binary to justify
an additional value for this field, and if so whether that constitutes
a protocol break.

IIRC, psql wouldn't really care; it just transfers the byte stream to or
from the target file, regardless of text or binary mode. But there might
be other client libraries that are smarter and expect "binary" mode to
mean the binary file format specified in the COPY reference page. So
there may be value in being explicit about "raw" mode in these messages.

A key point in all this is that people who need "raw" transfer probably
need it in both directions, a point that your SELECT proposal cannot
satisfy, but hacking COPY could. So I lean towards the latter really.

regards, tom lane

Attachments:

copy-raw-format-20150706-01.patchtext/x-patch; charset=US-ASCII; name=copy-raw-format-20150706-01.patchDownload
commit 5599347d6b0b29a2674d465b3ff03164fce59810
Author: Pavel Stehule <pavel.stehule@gooddata.com>
Date:   Mon Jul 6 23:18:18 2015 +0200

    COPY FROM/TO (FORMAT RAW)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 2850b47..4b7b64d 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -190,7 +190,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Selects the data format to be read or written:
       <literal>text</>,
       <literal>csv</> (Comma Separated Values),
-      or <literal>binary</>.
+      <literal>binary</> or <literal>raw</literal>.
       The default is <literal>text</>.
      </para>
     </listitem>
@@ -881,6 +881,23 @@ OIDs to be shown as null if that ever proves desirable.
     </para>
    </refsect3>
   </refsect2>
+
+  <refsect2>
+     <title>Raw Format</title>
+
+   <para>
+    The <literal>raw</literal> format option causes all data to be
+    stored/read as binary format rather than as text. It shares format
+    for data with <literal>binary</literal> format. This format doesn't
+    use any metadata - only row data in network byte order are exported
+    or imported.
+   </para>
+
+   <para>
+    Because this format doesn't support any delimiter, only one value
+    can be exported or imported. NULL values are not allowed.
+   </para>
+  </refsect2>
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 8904676..2ad7eb1 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -92,6 +92,11 @@ typedef enum EolType
  * it's faster to make useless comparisons to trailing bytes than it is to
  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
  * when we have to do it the hard way.
+ *
+ * COPY supports three modes: text, binary and raw. The text format is plain
+ * text multiline format with specified delimiter. The binary format holds
+ * metadata (numbers, sizes) and data. The raw format holds data only and
+ * only one non NULL value can be processed.
  */
 typedef struct CopyStateData
 {
@@ -113,6 +118,7 @@ typedef struct CopyStateData
 	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
 	bool		is_program;		/* is 'filename' a program to popen? */
 	bool		binary;			/* binary format? */
+	bool		raw;			/* required raw binary? */
 	bool		oids;			/* include OIDs? */
 	bool		freeze;			/* freeze rows on loading? */
 	bool		csv_mode;		/* Comma Separated Value format? */
@@ -202,6 +208,9 @@ typedef struct CopyStateData
 	char	   *raw_buf;
 	int			raw_buf_index;	/* next byte to process */
 	int			raw_buf_len;	/* total # of bytes stored */
+
+	/* field for RAW mode */
+	bool		row_processed;		/* true, when first row was processed */
 } CopyStateData;
 
 /* DestReceiver for COPY (SELECT) TO */
@@ -345,9 +354,16 @@ SendCopyBegin(CopyState cstate)
 		/* new way */
 		StringInfoData buf;
 		int			natts = list_length(cstate->attnumlist);
-		int16		format = (cstate->binary ? 1 : 0);
+		int16		format;
 		int			i;
 
+		if (cstate->raw)
+			format = 2;
+		else if (cstate->binary)
+			format = 1;
+		else
+			format = 0;
+
 		pq_beginmessage(&buf, 'H');
 		pq_sendbyte(&buf, format);		/* overall format */
 		pq_sendint(&buf, natts, 2);
@@ -359,7 +375,7 @@ SendCopyBegin(CopyState cstate)
 	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 	{
 		/* old way */
-		if (cstate->binary)
+		if (cstate->binary && cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
@@ -371,7 +387,7 @@ SendCopyBegin(CopyState cstate)
 	else
 	{
 		/* very old way */
-		if (cstate->binary)
+		if (cstate->binary && cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
@@ -390,9 +406,16 @@ ReceiveCopyBegin(CopyState cstate)
 		/* new way */
 		StringInfoData buf;
 		int			natts = list_length(cstate->attnumlist);
-		int16		format = (cstate->binary ? 1 : 0);
+		int16		format;
 		int			i;
 
+		if (cstate->raw)
+			format = 2;
+		else if (cstate->binary)
+			format = 1;
+		else
+			format = 0;
+
 		pq_beginmessage(&buf, 'G');
 		pq_sendbyte(&buf, format);		/* overall format */
 		pq_sendint(&buf, natts, 2);
@@ -405,7 +428,7 @@ ReceiveCopyBegin(CopyState cstate)
 	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 	{
 		/* old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
@@ -417,7 +440,7 @@ ReceiveCopyBegin(CopyState cstate)
 	else
 	{
 		/* very old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
@@ -485,7 +508,7 @@ CopySendEndOfRow(CopyState cstate)
 	switch (cstate->copy_dest)
 	{
 		case COPY_FILE:
-			if (!cstate->binary)
+			if (!cstate->binary && !cstate->raw)
 			{
 				/* Default line termination depends on platform */
 #ifndef WIN32
@@ -530,7 +553,7 @@ CopySendEndOfRow(CopyState cstate)
 			break;
 		case COPY_OLD_FE:
 			/* The FE/BE protocol uses \n as newline for all platforms */
-			if (!cstate->binary)
+			if (!cstate->binary && !cstate->raw)
 				CopySendChar(cstate, '\n');
 
 			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
@@ -543,7 +566,7 @@ CopySendEndOfRow(CopyState cstate)
 			break;
 		case COPY_NEW_FE:
 			/* The FE/BE protocol uses \n as newline for all platforms */
-			if (!cstate->binary)
+			if (!cstate->binary && !cstate->raw)
 				CopySendChar(cstate, '\n');
 
 			/* Dump the accumulated row as one CopyData message */
@@ -600,6 +623,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
 			bytesread = minread;
 			break;
 		case COPY_NEW_FE:
+
 			while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
 			{
 				int			avail;
@@ -769,6 +793,36 @@ CopyLoadRawBuf(CopyState cstate)
 	return (inbytes > 0);
 }
 
+/*
+ * CopyLoadAllRawBuf load all file into raw_buf.
+ *
+ * It is used for reading content in raw mode. If original RAW_BUF_SIZE is not
+ * enough, the buffer is enlarged.
+ */
+static void
+CopyLoadAllRawBuf(CopyState cstate)
+{
+	int			nbytes = 0;
+	int			inbytes;
+	Size			raw_buf_size = RAW_BUF_SIZE;
+
+	inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, RAW_BUF_SIZE);
+	while (inbytes == RAW_BUF_SIZE)
+	{
+		nbytes += inbytes;
+
+		/* Have to enlarge raw_buf */
+		raw_buf_size += RAW_BUF_SIZE + 1;
+		cstate->raw_buf = repalloc(cstate->raw_buf, raw_buf_size);
+
+		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, RAW_BUF_SIZE);
+	}
+
+	nbytes += inbytes;
+	cstate->raw_buf[nbytes] = '\0';
+	cstate->raw_buf_index = 0;
+	cstate->raw_buf_len = nbytes;
+}
 
 /*
  *	 DoCopy executes the SQL COPY statement
@@ -1006,6 +1060,8 @@ ProcessCopyOptions(CopyState cstate,
 				cstate->csv_mode = true;
 			else if (strcmp(fmt, "binary") == 0)
 				cstate->binary = true;
+			else if (strcmp(fmt, "raw") == 0)
+				cstate->raw = true;
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1155,15 +1211,20 @@ ProcessCopyOptions(CopyState cstate,
 	 * Check for incompatible options (must do these two before inserting
 	 * defaults)
 	 */
-	if (cstate->binary && cstate->delim)
+	if ((cstate->binary || cstate->raw) && cstate->delim)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify DELIMITER in BINARY or RAW mode")));
+
+	if ((cstate->binary || cstate->raw) && cstate->null_print)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("cannot specify DELIMITER in BINARY mode")));
+				 errmsg("cannot specify NULL in BINARY or RAW mode")));
 
-	if (cstate->binary && cstate->null_print)
+	if (cstate->raw && cstate->oids)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("cannot specify NULL in BINARY mode")));
+				 errmsg("cannot specify OIDS in RAW mode")));
 
 	/* Set defaults for omitted options */
 	if (!cstate->delim)
@@ -1559,6 +1620,20 @@ BeginCopy(bool is_from,
 		}
 	}
 
+	/*
+	 * Initializaze the field "row_processed" for one row output in RAW mode,
+	 * and ensure only one output column.
+	 */
+	if (cstate->raw)
+	{
+		cstate->row_processed = false;
+
+		if (num_phys_attrs > 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("only single column result is allowed in RAW mode")));
+	}
+
 	/* Use client encoding when ENCODING option is not specified. */
 	if (cstate->file_encoding < 0)
 		cstate->file_encoding = pg_get_client_encoding();
@@ -1821,7 +1896,7 @@ CopyTo(CopyState cstate)
 		Oid			out_func_oid;
 		bool		isvarlena;
 
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
 									&out_func_oid,
 									&isvarlena);
@@ -1860,7 +1935,7 @@ CopyTo(CopyState cstate)
 		tmp = 0;
 		CopySendInt32(cstate, tmp);
 	}
-	else
+	else if (!cstate->raw)
 	{
 		/*
 		 * For non-binary copy, we need to convert null_print to file
@@ -1928,7 +2003,7 @@ CopyTo(CopyState cstate)
 	else
 	{
 		/* run the plan --- the dest receiver will send tuples */
-		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
+		ExecutorRun(cstate->queryDesc, ForwardScanDirection, cstate->raw ? 2L : 0L);
 		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
 	}
 
@@ -1972,6 +2047,14 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 			CopySendInt32(cstate, tupleOid);
 		}
 	}
+	else if (cstate->raw)
+	{
+		if (cstate->row_processed)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_ROWS),
+					 errmsg("only single row result is allowed in RAW mode")));
+		cstate->row_processed = true;
+	}
 	else
 	{
 		/* Text format has no per-tuple header, but send OID if wanted */
@@ -1991,7 +2074,7 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 		Datum		value = values[attnum - 1];
 		bool		isnull = nulls[attnum - 1];
 
-		if (!cstate->binary)
+		if (!cstate->binary && !cstate->raw)
 		{
 			if (need_delim)
 				CopySendChar(cstate, cstate->delim[0]);
@@ -2000,14 +2083,32 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 
 		if (isnull)
 		{
-			if (!cstate->binary)
+			if (cstate->raw)
+					ereport(ERROR,
+						(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+						  errmsg("cannot to copy NULL value in RAW mode.")));
+			else if (!cstate->binary)
 				CopySendString(cstate, cstate->null_print_client);
 			else
 				CopySendInt32(cstate, -1);
 		}
 		else
 		{
-			if (!cstate->binary)
+			if (cstate->binary || cstate->raw)
+			{
+				bytea	   *outputbytes;
+
+				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
+											   value);
+
+				/* send the size only in binary mode */
+				if (cstate->binary)
+					CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
+
+				CopySendData(cstate, VARDATA(outputbytes),
+							 VARSIZE(outputbytes) - VARHDRSZ);
+			}
+			else
 			{
 				string = OutputFunctionCall(&out_functions[attnum - 1],
 											value);
@@ -2018,16 +2119,6 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 				else
 					CopyAttributeOutText(cstate, string);
 			}
-			else
-			{
-				bytea	   *outputbytes;
-
-				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
-											   value);
-				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
-				CopySendData(cstate, VARDATA(outputbytes),
-							 VARSIZE(outputbytes) - VARHDRSZ);
-			}
 		}
 	}
 
@@ -2657,7 +2748,7 @@ BeginCopyFrom(Relation rel,
 			continue;
 
 		/* Fetch the input function and typioparam info */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
 								   &in_func_oid, &typioparams[attnum - 1]);
 		else
@@ -2752,7 +2843,7 @@ BeginCopyFrom(Relation rel,
 		}
 	}
 
-	if (!cstate->binary)
+	if (!cstate->binary || cstate->raw)
 	{
 		/* must rely on user to tell us... */
 		cstate->file_has_oids = cstate->oids;
@@ -2804,7 +2895,7 @@ BeginCopyFrom(Relation rel,
 	}
 
 	/* create workspace for CopyReadAttributes results */
-	if (!cstate->binary)
+	if (!cstate->binary && !cstate->raw)
 	{
 		AttrNumber	attr_count = list_length(cstate->attnumlist);
 		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
@@ -2909,8 +3000,120 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
 	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
 	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
 
-	if (!cstate->binary)
+	if (cstate->binary)
+	{
+		int16		fld_count;
+		ListCell   *cur;
+
+		cstate->cur_lineno++;
+
+		if (!CopyGetInt16(cstate, &fld_count))
+		{
+			/* EOF detected (end of file, or protocol-level EOF) */
+			return false;
+		}
+
+		if (fld_count == -1)
+		{
+			/*
+			 * Received EOF marker.  In a V3-protocol copy, wait for the
+			 * protocol-level EOF, and complain if it doesn't come
+			 * immediately.  This ensures that we correctly handle CopyFail,
+			 * if client chooses to send that now.
+			 *
+			 * Note that we MUST NOT try to read more data in an old-protocol
+			 * copy, since there is no protocol-level EOF marker then.  We
+			 * could go either way for copy from file, but choose to throw
+			 * error if there's data after the EOF marker, for consistency
+			 * with the new-protocol case.
+			 */
+			char		dummy;
+
+			if (cstate->copy_dest != COPY_OLD_FE &&
+				CopyGetData(cstate, &dummy, 1, 1) > 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 errmsg("received copy data after EOF marker")));
+			return false;
+		}
+
+		if (fld_count != attr_count)
+			ereport(ERROR,
+					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("row field count is %d, expected %d",
+							(int) fld_count, attr_count)));
+
+		if (file_has_oids)
+		{
+			Oid			loaded_oid;
+
+			cstate->cur_attname = "oid";
+			loaded_oid =
+				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
+														 0,
+													&cstate->oid_in_function,
+													  cstate->oid_typioparam,
+														 -1,
+														 &isnull));
+			if (isnull || loaded_oid == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 errmsg("invalid OID in COPY data")));
+			cstate->cur_attname = NULL;
+			if (cstate->oids && tupleOid != NULL)
+				*tupleOid = loaded_oid;
+		}
+
+		i = 0;
+		foreach(cur, cstate->attnumlist)
+		{
+			int			attnum = lfirst_int(cur);
+			int			m = attnum - 1;
+
+			cstate->cur_attname = NameStr(attr[m]->attname);
+			i++;
+			values[m] = CopyReadBinaryAttribute(cstate,
+												i,
+												&in_functions[m],
+												typioparams[m],
+												attr[m]->atttypmod,
+												&nulls[m]);
+			cstate->cur_attname = NULL;
+		}
+	}
+	else if (cstate->raw)
 	{
+		if (cstate->row_processed)
+			return false;
+
+		CopyLoadAllRawBuf(cstate);
+		cstate->cur_attname = NameStr(attr[0]->attname);
+
+		if (cstate->attribute_buf.data != NULL)
+			pfree(cstate->attribute_buf.data);
+
+		cstate->attribute_buf.data = cstate->raw_buf;
+		cstate->attribute_buf.len = cstate->raw_buf_len;
+		cstate->attribute_buf.cursor = 0;
+
+		cstate->raw_buf = NULL;
+
+		/* Call the column type's binary input converter */
+		values[0] = ReceiveFunctionCall(&in_functions[0], &cstate->attribute_buf,
+								 typioparams[0], attr[0]->atttypmod);
+		nulls[0] = false;
+
+		/* Trouble if it didn't eat the whole buffer */
+		if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+					 errmsg("incorrect binary data format")));
+
+		cstate->row_processed = true;
+	}
+	else
+	{
+		/* text */
 		char	  **field_strings;
 		ListCell   *cur;
 		int			fldct;
@@ -3015,88 +3218,6 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
 
 		Assert(fieldno == nfields);
 	}
-	else
-	{
-		/* binary */
-		int16		fld_count;
-		ListCell   *cur;
-
-		cstate->cur_lineno++;
-
-		if (!CopyGetInt16(cstate, &fld_count))
-		{
-			/* EOF detected (end of file, or protocol-level EOF) */
-			return false;
-		}
-
-		if (fld_count == -1)
-		{
-			/*
-			 * Received EOF marker.  In a V3-protocol copy, wait for the
-			 * protocol-level EOF, and complain if it doesn't come
-			 * immediately.  This ensures that we correctly handle CopyFail,
-			 * if client chooses to send that now.
-			 *
-			 * Note that we MUST NOT try to read more data in an old-protocol
-			 * copy, since there is no protocol-level EOF marker then.  We
-			 * could go either way for copy from file, but choose to throw
-			 * error if there's data after the EOF marker, for consistency
-			 * with the new-protocol case.
-			 */
-			char		dummy;
-
-			if (cstate->copy_dest != COPY_OLD_FE &&
-				CopyGetData(cstate, &dummy, 1, 1) > 0)
-				ereport(ERROR,
-						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-						 errmsg("received copy data after EOF marker")));
-			return false;
-		}
-
-		if (fld_count != attr_count)
-			ereport(ERROR,
-					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-					 errmsg("row field count is %d, expected %d",
-							(int) fld_count, attr_count)));
-
-		if (file_has_oids)
-		{
-			Oid			loaded_oid;
-
-			cstate->cur_attname = "oid";
-			loaded_oid =
-				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
-														 0,
-													&cstate->oid_in_function,
-													  cstate->oid_typioparam,
-														 -1,
-														 &isnull));
-			if (isnull || loaded_oid == InvalidOid)
-				ereport(ERROR,
-						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-						 errmsg("invalid OID in COPY data")));
-			cstate->cur_attname = NULL;
-			if (cstate->oids && tupleOid != NULL)
-				*tupleOid = loaded_oid;
-		}
-
-		i = 0;
-		foreach(cur, cstate->attnumlist)
-		{
-			int			attnum = lfirst_int(cur);
-			int			m = attnum - 1;
-
-			cstate->cur_attname = NameStr(attr[m]->attname);
-			i++;
-			values[m] = CopyReadBinaryAttribute(cstate,
-												i,
-												&in_functions[m],
-												typioparams[m],
-												attr[m]->atttypmod,
-												&nulls[m]);
-			cstate->cur_attname = NULL;
-		}
-	}
 
 	/*
 	 * Now compute and insert any defaults available for the columns not
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 5e31737..30e77ca 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -469,3 +469,16 @@ DROP FUNCTION truncate_in_subxact();
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+INSERT INTO x VALUES('\x41484f4a0a');
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ERROR:  only single column result is allowed in RAW mode
+COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+AHOJ
+ERROR:  only single row result is allowed in RAW mode
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+AHOJ
+DROP TABLE x;
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
index cb13606..d8970b1 100644
--- a/src/test/regress/input/copy.source
+++ b/src/test/regress/input/copy.source
@@ -133,3 +133,36 @@ this is just a line full of junk that would error out if parsed
 \.
 
 copy copytest3 to stdout csv header;
+
+-- copy raw
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+SELECT length(a) FROM x;
+
+INSERT INTO x VALUES('\x41484f4a0a');
+
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+TRUNCATE x;
+COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+SELECT length(a) FROM x;
+COPY x TO stdout (FORMAT raw);
+
+TRUNCATE x;
+
+\COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+SELECT length(a) FROM x;
+COPY x TO stdout (FORMAT raw);
+
+\COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+TRUNCATE x;
+
+\COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+SELECT length(a) FROM x;
+COPY x TO stdout (FORMAT raw);
+
+DROP TABLE x;
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
index b7e372d..878797a 100644
--- a/src/test/regress/output/copy.source
+++ b/src/test/regress/output/copy.source
@@ -95,3 +95,52 @@ copy copytest3 to stdout csv header;
 c1,"col with , comma","col with "" quote"
 1,a,1
 2,b,2
+-- copy raw
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+INSERT INTO x VALUES('\x41484f4a0a');
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ERROR:  only single column result is allowed in RAW mode
+COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ERROR:  only single row result is allowed in RAW mode
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+TRUNCATE x;
+COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+COPY x TO stdout (FORMAT raw);
+AHOJ
+TRUNCATE x;
+\COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+COPY x TO stdout (FORMAT raw);
+AHOJ
+\COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+TRUNCATE x;
+\COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+COPY x TO stdout (FORMAT raw);
+AHOJ
+DROP TABLE x;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 39a9deb..e5703a5 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -333,3 +333,16 @@ DROP FUNCTION truncate_in_subxact();
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
+
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+INSERT INTO x VALUES('\x41484f4a0a');
+
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+
+DROP TABLE x;
#20Pavel Stehule
pavel.stehule@gmail.com
In reply to: Pavel Stehule (#19)
1 attachment(s)
Re: raw output from copy

Hi

previous patch was broken, and buggy

Here is new version with fixed upload and more tests

The interesting is so I should not to modify interface or client - so it
should to work with any current driver with protocol support >= 3.

Regards

Pavel

2015-07-06 23:34 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com>:

Show quoted text

Hi

here is a version with both direction support.

postgres=# copy foo from '/tmp/1.jpg' (format raw);
COPY 1
Time: 93.021 ms
postgres=# \dt+ foo
List of relations
┌────────┬──────┬───────┬───────┬────────┬─────────────┐
│ Schema │ Name │ Type │ Owner │ Size │ Description │
╞════════╪══════╪═══════╪═══════╪════════╪═════════════╡
│ public │ foo │ table │ pavel │ 256 kB │ │
└────────┴──────┴───────┴───────┴────────┴─────────────┘
(1 row)

postgres=# \copy foo to '~/3.jpg' (format raw)
COPY 1
Time: 2.401 ms

Regards

Pavel

2015-07-02 17:02 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Andrew Dunstan <andrew@dunslane.net> writes:

Does the COPY line protocol even support binary data?

The protocol, per se, just transmits a byte stream. There is a field
in the CopyInResponse/CopyOutResponse messages that indicates whether
a text or binary copy is being done. One thing we'd have to consider
is whether "raw" mode is sufficiently different from binary to justify
an additional value for this field, and if so whether that constitutes
a protocol break.

IIRC, psql wouldn't really care; it just transfers the byte stream to or
from the target file, regardless of text or binary mode. But there might
be other client libraries that are smarter and expect "binary" mode to
mean the binary file format specified in the COPY reference page. So
there may be value in being explicit about "raw" mode in these messages.

A key point in all this is that people who need "raw" transfer probably
need it in both directions, a point that your SELECT proposal cannot
satisfy, but hacking COPY could. So I lean towards the latter really.

regards, tom lane

Attachments:

copy-raw-format-20150707-02.patchtext/x-patch; charset=US-ASCII; name=copy-raw-format-20150707-02.patchDownload
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
new file mode 100644
index 2850b47..5739158
*** a/doc/src/sgml/ref/copy.sgml
--- b/doc/src/sgml/ref/copy.sgml
*************** COPY { <replaceable class="parameter">ta
*** 190,196 ****
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       or <literal>binary</>.
        The default is <literal>text</>.
       </para>
      </listitem>
--- 190,196 ----
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       <literal>binary</> or <literal>raw</literal>.
        The default is <literal>text</>.
       </para>
      </listitem>
*************** OIDs to be shown as null if that ever pr
*** 881,886 ****
--- 881,918 ----
      </para>
     </refsect3>
    </refsect2>
+ 
+   <refsect2>
+      <title>Raw Format</title>
+ 
+    <para>
+     The <literal>raw</literal> format option causes all data to be
+     stored/read as binary format rather than as text. It shares format
+     for data with <literal>binary</literal> format. This format doesn't
+     use any metadata - only row data in network byte order are exported
+     or imported.
+    </para>
+ 
+    <para>
+     Because this format doesn't support any delimiter, only one value
+     can be exported or imported. NULL values are not allowed.
+    </para>
+    <para>
+     The <literal>raw</literal> format can be used for export or import
+     bytea values.
+ <programlisting>
+ COPY images(data) FROM '/usr1/proj/img/01.jpg' (FORMAT raw);
+ </programlisting>
+     It can be used successfully for export XML in different encoding
+     or import valid XML document with any supported encoding:
+ <screen><![CDATA[
+ SET client_encoding TO latin2;
+ 
+ COPY (SELECT xmlelement(NAME data, 'Hello')) TO stdout (FORMAT raw);
+ <?xml version="1.0" encoding="LATIN2"?><data>Hello</data>
+ ]]></screen>
+    </para>
+   </refsect2>
   </refsect1>
  
   <refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
new file mode 100644
index 8904676..c69854c
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** typedef enum EolType
*** 92,97 ****
--- 92,102 ----
   * it's faster to make useless comparisons to trailing bytes than it is to
   * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
   * when we have to do it the hard way.
+  *
+  * COPY supports three modes: text, binary and raw. The text format is plain
+  * text multiline format with specified delimiter. The binary format holds
+  * metadata (numbers, sizes) and data. The raw format holds data only and
+  * only one non NULL value can be processed.
   */
  typedef struct CopyStateData
  {
*************** typedef struct CopyStateData
*** 113,118 ****
--- 118,124 ----
  	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
  	bool		is_program;		/* is 'filename' a program to popen? */
  	bool		binary;			/* binary format? */
+ 	bool		raw;			/* required raw binary? */
  	bool		oids;			/* include OIDs? */
  	bool		freeze;			/* freeze rows on loading? */
  	bool		csv_mode;		/* Comma Separated Value format? */
*************** typedef struct CopyStateData
*** 202,207 ****
--- 208,216 ----
  	char	   *raw_buf;
  	int			raw_buf_index;	/* next byte to process */
  	int			raw_buf_len;	/* total # of bytes stored */
+ 
+ 	/* field for RAW mode */
+ 	bool		row_processed;		/* true, when first row was processed */
  } CopyStateData;
  
  /* DestReceiver for COPY (SELECT) TO */
*************** SendCopyBegin(CopyState cstate)
*** 345,353 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'H');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
--- 354,369 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
+ 		if (cstate->raw)
+ 			format = 2;
+ 		else if (cstate->binary)
+ 			format = 1;
+ 		else
+ 			format = 0;
+ 
  		pq_beginmessage(&buf, 'H');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
*************** SendCopyBegin(CopyState cstate)
*** 359,365 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 375,381 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary && cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** SendCopyBegin(CopyState cstate)
*** 371,377 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 387,393 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary && cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** ReceiveCopyBegin(CopyState cstate)
*** 390,398 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'G');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
--- 406,421 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
+ 		if (cstate->raw)
+ 			format = 2;
+ 		else if (cstate->binary)
+ 			format = 1;
+ 		else
+ 			format = 0;
+ 
  		pq_beginmessage(&buf, 'G');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
*************** ReceiveCopyBegin(CopyState cstate)
*** 405,411 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 428,434 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** ReceiveCopyBegin(CopyState cstate)
*** 417,423 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 440,446 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** CopySendEndOfRow(CopyState cstate)
*** 485,491 ****
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
--- 508,514 ----
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary && !cstate->raw)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
*************** CopySendEndOfRow(CopyState cstate)
*** 530,536 ****
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
--- 553,559 ----
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
*************** CopySendEndOfRow(CopyState cstate)
*** 543,549 ****
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
--- 566,572 ----
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
*************** CopyGetData(CopyState cstate, void *data
*** 600,605 ****
--- 623,629 ----
  			bytesread = minread;
  			break;
  		case COPY_NEW_FE:
+ 
  			while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
  			{
  				int			avail;
*************** CopyGetData(CopyState cstate, void *data
*** 622,627 ****
--- 646,652 ----
  								(errcode(ERRCODE_CONNECTION_FAILURE),
  								 errmsg("unexpected EOF on client connection with an open transaction")));
  					RESUME_CANCEL_INTERRUPTS();
+ 
  					switch (mtype)
  					{
  						case 'd':		/* CopyData */
*************** CopyLoadRawBuf(CopyState cstate)
*** 769,774 ****
--- 794,830 ----
  	return (inbytes > 0);
  }
  
+ /*
+  * CopyLoadallRawBuf load all file into raw_buf.
+  *
+  * It is used for reading content in raw mode. If original RAW_BUF_SIZE is not
+  * enough, the buffer is enlarged.
+  */
+ static void
+ CopyLoadallRawBuf(CopyState cstate)
+ {
+ 	int			nbytes = 0;
+ 	int			inbytes;
+ 	Size			raw_buf_size = RAW_BUF_SIZE;
+ 
+ 	do
+ 	{
+ 		/* hold enough space for one data packet */
+ 		if ((raw_buf_size - nbytes - 1) < 8 * 1024)
+ 		{
+ 			raw_buf_size += RAW_BUF_SIZE;
+ 			cstate->raw_buf = repalloc(cstate->raw_buf, raw_buf_size);
+ 		}
+ 
+ 		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, raw_buf_size - nbytes - 1);
+ 		nbytes += inbytes;
+ 	}
+ 	while (inbytes > 0);
+ 
+ 	cstate->raw_buf[nbytes] = '\0';
+ 	cstate->raw_buf_index = 0;
+ 	cstate->raw_buf_len = nbytes;
+ }
  
  /*
   *	 DoCopy executes the SQL COPY statement
*************** ProcessCopyOptions(CopyState cstate,
*** 1006,1011 ****
--- 1062,1069 ----
  				cstate->csv_mode = true;
  			else if (strcmp(fmt, "binary") == 0)
  				cstate->binary = true;
+ 			else if (strcmp(fmt, "raw") == 0)
+ 				cstate->raw = true;
  			else
  				ereport(ERROR,
  						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
*************** ProcessCopyOptions(CopyState cstate,
*** 1155,1169 ****
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if (cstate->binary && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY mode")));
  
! 	if (cstate->binary && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
--- 1213,1232 ----
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if ((cstate->binary || cstate->raw) && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY or RAW mode")));
  
! 	if ((cstate->binary || cstate->raw) && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY or RAW mode")));
! 
! 	if (cstate->raw && cstate->oids)
! 		ereport(ERROR,
! 				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify OIDS in RAW mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
*************** BeginCopy(bool is_from,
*** 1559,1564 ****
--- 1622,1641 ----
  		}
  	}
  
+ 	/*
+ 	 * Initializaze the field "row_processed" for one row output in RAW mode,
+ 	 * and ensure only one output column.
+ 	 */
+ 	if (cstate->raw)
+ 	{
+ 		cstate->row_processed = false;
+ 
+ 		if (num_phys_attrs > 1)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 					 errmsg("only single column result is allowed in RAW mode")));
+ 	}
+ 
  	/* Use client encoding when ENCODING option is not specified. */
  	if (cstate->file_encoding < 0)
  		cstate->file_encoding = pg_get_client_encoding();
*************** CopyTo(CopyState cstate)
*** 1821,1827 ****
  		Oid			out_func_oid;
  		bool		isvarlena;
  
! 		if (cstate->binary)
  			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
  									&out_func_oid,
  									&isvarlena);
--- 1898,1904 ----
  		Oid			out_func_oid;
  		bool		isvarlena;
  
! 		if (cstate->binary || cstate->raw)
  			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
  									&out_func_oid,
  									&isvarlena);
*************** CopyTo(CopyState cstate)
*** 1860,1866 ****
  		tmp = 0;
  		CopySendInt32(cstate, tmp);
  	}
! 	else
  	{
  		/*
  		 * For non-binary copy, we need to convert null_print to file
--- 1937,1943 ----
  		tmp = 0;
  		CopySendInt32(cstate, tmp);
  	}
! 	else if (!cstate->raw)
  	{
  		/*
  		 * For non-binary copy, we need to convert null_print to file
*************** CopyTo(CopyState cstate)
*** 1928,1934 ****
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
--- 2005,2011 ----
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, cstate->raw ? 2L : 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 1972,1977 ****
--- 2049,2062 ----
  			CopySendInt32(cstate, tupleOid);
  		}
  	}
+ 	else if (cstate->raw)
+ 	{
+ 		if (cstate->row_processed)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_TOO_MANY_ROWS),
+ 					 errmsg("only single row result is allowed in RAW mode")));
+ 		cstate->row_processed = true;
+ 	}
  	else
  	{
  		/* Text format has no per-tuple header, but send OID if wanted */
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 1991,1997 ****
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!cstate->binary)
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
--- 2076,2082 ----
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!cstate->binary && !cstate->raw)
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2000,2013 ****
  
  		if (isnull)
  		{
! 			if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (!cstate->binary)
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
--- 2085,2116 ----
  
  		if (isnull)
  		{
! 			if (cstate->raw)
! 					ereport(ERROR,
! 						(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
! 						  errmsg("cannot to copy NULL value in RAW mode.")));
! 			else if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (cstate->binary || cstate->raw)
! 			{
! 				bytea	   *outputbytes;
! 
! 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 											   value);
! 
! 				/* send the size only in binary mode */
! 				if (cstate->binary)
! 					CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! 
! 				CopySendData(cstate, VARDATA(outputbytes),
! 							 VARSIZE(outputbytes) - VARHDRSZ);
! 			}
! 			else
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2018,2033 ****
  				else
  					CopyAttributeOutText(cstate, string);
  			}
- 			else
- 			{
- 				bytea	   *outputbytes;
- 
- 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
- 											   value);
- 				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
- 				CopySendData(cstate, VARDATA(outputbytes),
- 							 VARSIZE(outputbytes) - VARHDRSZ);
- 			}
  		}
  	}
  
--- 2121,2126 ----
*************** BeginCopyFrom(Relation rel,
*** 2657,2663 ****
  			continue;
  
  		/* Fetch the input function and typioparam info */
! 		if (cstate->binary)
  			getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
  								   &in_func_oid, &typioparams[attnum - 1]);
  		else
--- 2750,2756 ----
  			continue;
  
  		/* Fetch the input function and typioparam info */
! 		if (cstate->binary || cstate->raw)
  			getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
  								   &in_func_oid, &typioparams[attnum - 1]);
  		else
*************** BeginCopyFrom(Relation rel,
*** 2752,2758 ****
  		}
  	}
  
! 	if (!cstate->binary)
  	{
  		/* must rely on user to tell us... */
  		cstate->file_has_oids = cstate->oids;
--- 2845,2851 ----
  		}
  	}
  
! 	if (!cstate->binary || cstate->raw)
  	{
  		/* must rely on user to tell us... */
  		cstate->file_has_oids = cstate->oids;
*************** BeginCopyFrom(Relation rel,
*** 2804,2810 ****
  	}
  
  	/* create workspace for CopyReadAttributes results */
! 	if (!cstate->binary)
  	{
  		AttrNumber	attr_count = list_length(cstate->attnumlist);
  		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
--- 2897,2903 ----
  	}
  
  	/* create workspace for CopyReadAttributes results */
! 	if (!cstate->binary && !cstate->raw)
  	{
  		AttrNumber	attr_count = list_length(cstate->attnumlist);
  		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 2909,2916 ****
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (!cstate->binary)
  	{
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
--- 3002,3121 ----
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (cstate->binary)
  	{
+ 		int16		fld_count;
+ 		ListCell   *cur;
+ 
+ 		cstate->cur_lineno++;
+ 
+ 		if (!CopyGetInt16(cstate, &fld_count))
+ 		{
+ 			/* EOF detected (end of file, or protocol-level EOF) */
+ 			return false;
+ 		}
+ 
+ 		if (fld_count == -1)
+ 		{
+ 			/*
+ 			 * Received EOF marker.  In a V3-protocol copy, wait for the
+ 			 * protocol-level EOF, and complain if it doesn't come
+ 			 * immediately.  This ensures that we correctly handle CopyFail,
+ 			 * if client chooses to send that now.
+ 			 *
+ 			 * Note that we MUST NOT try to read more data in an old-protocol
+ 			 * copy, since there is no protocol-level EOF marker then.  We
+ 			 * could go either way for copy from file, but choose to throw
+ 			 * error if there's data after the EOF marker, for consistency
+ 			 * with the new-protocol case.
+ 			 */
+ 			char		dummy;
+ 
+ 			if (cstate->copy_dest != COPY_OLD_FE &&
+ 				CopyGetData(cstate, &dummy, 1, 1) > 0)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 						 errmsg("received copy data after EOF marker")));
+ 			return false;
+ 		}
+ 
+ 		if (fld_count != attr_count)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 					 errmsg("row field count is %d, expected %d",
+ 							(int) fld_count, attr_count)));
+ 
+ 		if (file_has_oids)
+ 		{
+ 			Oid			loaded_oid;
+ 
+ 			cstate->cur_attname = "oid";
+ 			loaded_oid =
+ 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
+ 														 0,
+ 													&cstate->oid_in_function,
+ 													  cstate->oid_typioparam,
+ 														 -1,
+ 														 &isnull));
+ 			if (isnull || loaded_oid == InvalidOid)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 						 errmsg("invalid OID in COPY data")));
+ 			cstate->cur_attname = NULL;
+ 			if (cstate->oids && tupleOid != NULL)
+ 				*tupleOid = loaded_oid;
+ 		}
+ 
+ 		i = 0;
+ 		foreach(cur, cstate->attnumlist)
+ 		{
+ 			int			attnum = lfirst_int(cur);
+ 			int			m = attnum - 1;
+ 
+ 			cstate->cur_attname = NameStr(attr[m]->attname);
+ 			i++;
+ 			values[m] = CopyReadBinaryAttribute(cstate,
+ 												i,
+ 												&in_functions[m],
+ 												typioparams[m],
+ 												attr[m]->atttypmod,
+ 												&nulls[m]);
+ 			cstate->cur_attname = NULL;
+ 		}
+ 	}
+ 	else if (cstate->raw)
+ 	{
+ 		if (cstate->row_processed)
+ 			return false;
+ 
+ 		CopyLoadallRawBuf(cstate);
+ 		cstate->cur_attname = NameStr(attr[0]->attname);
+ 
+ 		if (cstate->attribute_buf.data != NULL)
+ 			pfree(cstate->attribute_buf.data);
+ 
+ 		cstate->attribute_buf.data = cstate->raw_buf;
+ 		cstate->attribute_buf.len = cstate->raw_buf_len;
+ 		cstate->attribute_buf.cursor = 0;
+ 
+ 		cstate->raw_buf = NULL;
+ 
+ 		/* Call the column type's binary input converter */
+ 		values[0] = ReceiveFunctionCall(&in_functions[0], &cstate->attribute_buf,
+ 								 typioparams[0], attr[0]->atttypmod);
+ 		nulls[0] = false;
+ 
+ 		/* Trouble if it didn't eat the whole buffer */
+ 		if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ 					 errmsg("incorrect binary data format")));
+ 
+ 		cstate->row_processed = true;
+ 	}
+ 	else
+ 	{
+ 		/* text */
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 3015,3102 ****
  
  		Assert(fieldno == nfields);
  	}
- 	else
- 	{
- 		/* binary */
- 		int16		fld_count;
- 		ListCell   *cur;
- 
- 		cstate->cur_lineno++;
- 
- 		if (!CopyGetInt16(cstate, &fld_count))
- 		{
- 			/* EOF detected (end of file, or protocol-level EOF) */
- 			return false;
- 		}
- 
- 		if (fld_count == -1)
- 		{
- 			/*
- 			 * Received EOF marker.  In a V3-protocol copy, wait for the
- 			 * protocol-level EOF, and complain if it doesn't come
- 			 * immediately.  This ensures that we correctly handle CopyFail,
- 			 * if client chooses to send that now.
- 			 *
- 			 * Note that we MUST NOT try to read more data in an old-protocol
- 			 * copy, since there is no protocol-level EOF marker then.  We
- 			 * could go either way for copy from file, but choose to throw
- 			 * error if there's data after the EOF marker, for consistency
- 			 * with the new-protocol case.
- 			 */
- 			char		dummy;
- 
- 			if (cstate->copy_dest != COPY_OLD_FE &&
- 				CopyGetData(cstate, &dummy, 1, 1) > 0)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("received copy data after EOF marker")));
- 			return false;
- 		}
- 
- 		if (fld_count != attr_count)
- 			ereport(ERROR,
- 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 					 errmsg("row field count is %d, expected %d",
- 							(int) fld_count, attr_count)));
- 
- 		if (file_has_oids)
- 		{
- 			Oid			loaded_oid;
- 
- 			cstate->cur_attname = "oid";
- 			loaded_oid =
- 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
- 														 0,
- 													&cstate->oid_in_function,
- 													  cstate->oid_typioparam,
- 														 -1,
- 														 &isnull));
- 			if (isnull || loaded_oid == InvalidOid)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("invalid OID in COPY data")));
- 			cstate->cur_attname = NULL;
- 			if (cstate->oids && tupleOid != NULL)
- 				*tupleOid = loaded_oid;
- 		}
- 
- 		i = 0;
- 		foreach(cur, cstate->attnumlist)
- 		{
- 			int			attnum = lfirst_int(cur);
- 			int			m = attnum - 1;
- 
- 			cstate->cur_attname = NameStr(attr[m]->attname);
- 			i++;
- 			values[m] = CopyReadBinaryAttribute(cstate,
- 												i,
- 												&in_functions[m],
- 												typioparams[m],
- 												attr[m]->atttypmod,
- 												&nulls[m]);
- 			cstate->cur_attname = NULL;
- 		}
- 	}
  
  	/*
  	 * Now compute and insert any defaults available for the columns not
--- 3220,3225 ----
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
new file mode 100644
index 5e31737..30e77ca
*** a/src/test/regress/expected/copy2.out
--- b/src/test/regress/expected/copy2.out
*************** DROP FUNCTION truncate_in_subxact();
*** 469,471 ****
--- 469,484 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+ AHOJ
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ AHOJ
+ DROP TABLE x;
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
new file mode 100644
index cb13606..d351b43
*** a/src/test/regress/input/copy.source
--- b/src/test/regress/input/copy.source
*************** this is just a line full of junk that wo
*** 133,135 ****
--- 133,195 ----
  \.
  
  copy copytest3 to stdout csv header;
+ 
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ 
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ 
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
new file mode 100644
index b7e372d..f555f58
*** a/src/test/regress/output/copy.source
--- b/src/test/regress/output/copy.source
*************** copy copytest3 to stdout csv header;
*** 95,97 ****
--- 95,183 ----
  c1,"col with , comma","col with "" quote"
  1,a,1
  2,b,2
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (3 rows)
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (5 rows)
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
new file mode 100644
index 39a9deb..e5703a5
*** a/src/test/regress/sql/copy2.sql
--- b/src/test/regress/sql/copy2.sql
*************** DROP FUNCTION truncate_in_subxact();
*** 333,335 ****
--- 333,348 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ 
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ 
+ DROP TABLE x;
#21Dickson S. Guedes
listas@guedesoft.net
In reply to: Pavel Stehule (#20)
Re: raw output from copy

2015-07-07 3:32 GMT-03:00 Pavel Stehule <pavel.stehule@gmail.com>:

Hi

previous patch was broken, and buggy

Here is new version with fixed upload and more tests

The interesting is so I should not to modify interface or client - so it should to work with any current driver with protocol support >= 3.

Hi Pavel,

Here are some thoughts:

1) from docs: "only row data in network byte order are exported or imported."

Should it be "only raw data"?

2) from docs "Because this format doesn't support any delimiter, only
one value can be exported or imported. NULL values are not allowed."

That "only one value can be exported or imported" is a little sad for
someone with a table with more than one column that accepts bytea. The
implemented feature doesn't covers the use-case where a table 'image'
has columns: id integer, image bytea, thumbnail bytea, and I want to
import binary data in that. We could put here the cases where we have
NOT NULL columns. Since these are expected and the error messages
complain about that couldn't them be covered in docs more explicitly?

3) from code: "bool row_processed; /* true, when first row was processed */"

Maybe rename the variable to something like `first_row_processed` and
rip off the comment?

4) from code:

if (cstate->raw)
format = 2;
else if (cstate->binary)
format = 1;
else
format = 0;

Maybe create a constant for code readability?

If by one side this feature does not covers a more generalized case,
by other is a nice start, IMHO.

--
Dickson S. Guedes
mail/xmpp: guedes@guedesoft.net - skype: guediz
http://github.com/guedes - http://guedesoft.net
http://www.postgresql.org.br

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#22Pavel Stehule
pavel.stehule@gmail.com
In reply to: Dickson S. Guedes (#21)
Re: raw output from copy

2015-07-23 22:05 GMT+02:00 Dickson S. Guedes <listas@guedesoft.net>:

2015-07-07 3:32 GMT-03:00 Pavel Stehule <pavel.stehule@gmail.com>:

Hi

previous patch was broken, and buggy

Here is new version with fixed upload and more tests

The interesting is so I should not to modify interface or client - so it

should to work with any current driver with protocol support >= 3.

Hi

Hi Pavel,

Here are some thoughts:

1) from docs: "only row data in network byte order are exported or
imported."

Should it be "only raw data"?

I don't understand well - it use a PostgreSQL buildin "send" functions -
and result of these functions is defined as "in network byte order"

2) from docs "Because this format doesn't support any delimiter, only
one value can be exported or imported. NULL values are not allowed."

That "only one value can be exported or imported" is a little sad for
someone with a table with more than one column that accepts bytea. The
implemented feature doesn't covers the use-case where a table 'image'
has columns: id integer, image bytea, thumbnail bytea, and I want to
import binary data in that. We could put here the cases where we have
NOT NULL columns. Since these are expected and the error messages
complain about that couldn't them be covered in docs more explicitly?

This mode should not to replace current COPY binary mode. RAW binary output
for multiple fields is terrible complex task - you can use a fix length,
you can use some special separator etc. I remember a terrible complex
bulkload on Oracle or MSSQL - and I would to design it differently. I
prefer to have a COPY statement simple as possible - If you need
import/export all fields in record - then you can:

1. you can use a new LO api (for import) - load binary files as LO, INSERT
and drop used LO
2. call more COPY statements, and join exported files with operation system
tools (for export),
3. you can write specialized application that will support a COPY API and
export, import data in your preferred format.

The same complexity is with input, and I would not to write generic binary
files parser.

3) from code: "bool row_processed; /* true, when first row was processed
*/"

in this mode is only one row - so first_row_processed sounds little bit
strange.

Maybe rename the variable to something like `first_row_processed` and
rip off the comment?

4) from code:

if (cstate->raw)
format = 2;
else if (cstate->binary)
format = 1;
else
format = 0;

Maybe create a constant for code readability?

good idea

If by one side this feature does not covers a more generalized case,
by other is a nice start, IMHO.

It is exactly what I don't would - the complexity of usage can go up to sky
with generic binary format file processing.

Regards

Pavel

Show quoted text

--
Dickson S. Guedes
mail/xmpp: guedes@guedesoft.net - skype: guediz
http://github.com/guedes - http://guedesoft.net
http://www.postgresql.org.br

#23Craig Ringer
craig@2ndquadrant.com
In reply to: Pavel Stehule (#20)
Re: raw output from copy

On 7 July 2015 at 14:32, Pavel Stehule <pavel.stehule@gmail.com> wrote:

Hi

previous patch was broken, and buggy

Here is new version with fixed upload and more tests

I routinely see people trying to use COPY ... FORMAT binary to export
a single binary field (like an image, for example) and getting
confused by the header PostgreSQL adds. Or using text-format COPY and
struggling with the hex escaping. It's clearly something people have
trouble with.

It doesn't help that while lo_import and lo_export can read paths
outside the datadir (and refuse to read from within it),
pg_read_binary_file is superuser only and disallows absolute paths.
There's no corresponding pg_write_binary_file. So users who want to
import and export a single binary field tend to try to use COPY. We
have functionality for large objects that has no equivalent for
'bytea'.

I don't love the use of COPY for this, but it gets us support for
arbitrary clients pretty easily. Otherwise it'd be server-side only
via local filesystem access, or require special psql-specific
functionality like we have for lo_import etc.

The main point is that this is a real world thing. People want to do
it, try to do it, and have problems doing it. So it's a solution a
real issue.

--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#24Heikki Linnakangas
hlinnaka@iki.fi
In reply to: Craig Ringer (#23)
Re: raw output from copy

On 07/27/2015 06:55 AM, Craig Ringer wrote:

On 7 July 2015 at 14:32, Pavel Stehule <pavel.stehule@gmail.com> wrote:

Hi

previous patch was broken, and buggy

Here is new version with fixed upload and more tests

I routinely see people trying to use COPY ... FORMAT binary to export
a single binary field (like an image, for example) and getting
confused by the header PostgreSQL adds. Or using text-format COPY and
struggling with the hex escaping. It's clearly something people have
trouble with.

It doesn't help that while lo_import and lo_export can read paths
outside the datadir (and refuse to read from within it),
pg_read_binary_file is superuser only and disallows absolute paths.
There's no corresponding pg_write_binary_file. So users who want to
import and export a single binary field tend to try to use COPY. We
have functionality for large objects that has no equivalent for
'bytea'.

I don't love the use of COPY for this, but it gets us support for
arbitrary clients pretty easily. Otherwise it'd be server-side only
via local filesystem access, or require special psql-specific
functionality like we have for lo_import etc.

COPY seems like a strange interface for this. I can see the point that
the syntax is almost there already, for both input and output. But even
that's not quite there yet, we'd need the new RAW format. And as an
input method, COPY is a bit awkward, because you cannot easily pass the
file to a function, for example. I think this should be implemented in
psql, along the lines of Andrew's original \bcopy patch.

There are a couple of related psql-features here actually, that would be
useful on their own. The first is being able to send the query result to
a file, for a single query only. You can currently do:

\o /tmp/foo
SELECT ...;
\o

But more often than not, when I try to do that, I forget to do the last
\o, and run another query, and the output still goes to the file. So
it'd be nice to have a \o option that only affects the next query.
Something like:

\O /tmp/foo
SELECT ...;

The second feature needed is to write the output without any headers,
row delimiters and such. Just the datum. And the third feature is to
write it in binary. Perhaps something like:

\O /tmp/foo binary
SELECT blob FROM foo WHERE id = 10;

What about input? This is a whole new feature, but it would be nice to
be able to pass the file contents as a query parameter. Something like:

\P /tmp/foo binary
INSERT INTO foo VALUES (?);

- Heikki

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#25Pavel Stehule
pavel.stehule@gmail.com
In reply to: Heikki Linnakangas (#24)
Re: raw output from copy

2015-07-27 10:41 GMT+02:00 Heikki Linnakangas <hlinnaka@iki.fi>:

On 07/27/2015 06:55 AM, Craig Ringer wrote:

On 7 July 2015 at 14:32, Pavel Stehule <pavel.stehule@gmail.com> wrote:

Hi

previous patch was broken, and buggy

Here is new version with fixed upload and more tests

I routinely see people trying to use COPY ... FORMAT binary to export
a single binary field (like an image, for example) and getting
confused by the header PostgreSQL adds. Or using text-format COPY and
struggling with the hex escaping. It's clearly something people have
trouble with.

It doesn't help that while lo_import and lo_export can read paths
outside the datadir (and refuse to read from within it),
pg_read_binary_file is superuser only and disallows absolute paths.
There's no corresponding pg_write_binary_file. So users who want to
import and export a single binary field tend to try to use COPY. We
have functionality for large objects that has no equivalent for
'bytea'.

I don't love the use of COPY for this, but it gets us support for
arbitrary clients pretty easily. Otherwise it'd be server-side only
via local filesystem access, or require special psql-specific
functionality like we have for lo_import etc.

COPY seems like a strange interface for this. I can see the point that the
syntax is almost there already, for both input and output. But even that's
not quite there yet, we'd need the new RAW format. And as an input method,
COPY is a bit awkward, because you cannot easily pass the file to a
function, for example. I think this should be implemented in psql, along
the lines of Andrew's original \bcopy patch.

There are a couple of related psql-features here actually, that would be
useful on their own. The first is being able to send the query result to a
file, for a single query only. You can currently do:

\o /tmp/foo
SELECT ...;
\o

But more often than not, when I try to do that, I forget to do the last
\o, and run another query, and the output still goes to the file. So it'd
be nice to have a \o option that only affects the next query. Something
like:

\O /tmp/foo
SELECT ...;

The second feature needed is to write the output without any headers, row
delimiters and such. Just the datum. And the third feature is to write it
in binary. Perhaps something like:

\O /tmp/foo binary
SELECT blob FROM foo WHERE id = 10;

What about input? This is a whole new feature, but it would be nice to be
able to pass the file contents as a query parameter. Something like:

\P /tmp/foo binary
INSERT INTO foo VALUES (?);

The example of input is strong reason, why don't do it via inserts. Only
parsing some special "?" symbol needs lot of new code.

In this case, I don't see any advantage of psql based solution. COPY is
standard interface for input/output from/to files, and it should be used
there.

Regards

Pavel

Show quoted text

- Heikki

#26Heikki Linnakangas
hlinnaka@iki.fi
In reply to: Pavel Stehule (#25)
Re: raw output from copy

On 07/27/2015 02:28 PM, Pavel Stehule wrote:

2015-07-27 10:41 GMT+02:00 Heikki Linnakangas <hlinnaka@iki.fi>:

What about input? This is a whole new feature, but it would be nice to be
able to pass the file contents as a query parameter. Something like:

\P /tmp/foo binary
INSERT INTO foo VALUES (?);

The example of input is strong reason, why don't do it via inserts. Only
parsing some special "?" symbol needs lot of new code.

Sorry, I meant $1 in place of the ?. No special parsing needed, psql can
send the query to the server as is, with the parameters that are given
by this new mechanism.

In this case, I don't see any advantage of psql based solution. COPY is
standard interface for input/output from/to files, and it should be used
there.

I'm not too happy with the COPY approach, although I won't object is one
of the other committers feel more comfortable with it. However, we don't
seem to be making progress here, so I'm going to mark this as Returned
with Feedback. I don't feel good about that either, because I don't
actually have any great suggestions on how to move this forward. Which
is a pity because this is a genuine problem for users.

- Heikki

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#27Andrew Dunstan
andrew@dunslane.net
In reply to: Heikki Linnakangas (#26)
Re: raw output from copy

On 08/05/2015 04:59 PM, Heikki Linnakangas wrote:

On 07/27/2015 02:28 PM, Pavel Stehule wrote:

2015-07-27 10:41 GMT+02:00 Heikki Linnakangas <hlinnaka@iki.fi>:

What about input? This is a whole new feature, but it would be nice
to be
able to pass the file contents as a query parameter. Something like:

\P /tmp/foo binary
INSERT INTO foo VALUES (?);

The example of input is strong reason, why don't do it via inserts. Only
parsing some special "?" symbol needs lot of new code.

Sorry, I meant $1 in place of the ?. No special parsing needed, psql
can send the query to the server as is, with the parameters that are
given by this new mechanism.

In this case, I don't see any advantage of psql based solution. COPY is
standard interface for input/output from/to files, and it should be used
there.

I'm not too happy with the COPY approach, although I won't object is
one of the other committers feel more comfortable with it. However, we
don't seem to be making progress here, so I'm going to mark this as
Returned with Feedback. I don't feel good about that either, because I
don't actually have any great suggestions on how to move this forward.
Which is a pity because this is a genuine problem for users.

This is really only a psql problem, IMNSHO. Inserting and extracting
binary data is pretty trivial for most users of client libraries (e.g.
it's a couple of lines of code in a DBD::Pg program), but it's hard in psql.

I do agree that the COPY approach feels more than a little klunky.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#28Pavel Stehule
pavel.stehule@gmail.com
In reply to: Andrew Dunstan (#27)
Re: raw output from copy

Hi,

Psql based implementation needs new infrastructure (more than few lines)

Missing:

* binary mode support
* parametrized query support,

I am not against, but both points I proposed, and both was rejected.

So why dont use current infrastructure? Raw copy is trivial patch.
Dne 6.8.2015 0:09 napsal uživatel "Andrew Dunstan" <andrew@dunslane.net>:

Show quoted text

On 08/05/2015 04:59 PM, Heikki Linnakangas wrote:

On 07/27/2015 02:28 PM, Pavel Stehule wrote:

2015-07-27 10:41 GMT+02:00 Heikki Linnakangas <hlinnaka@iki.fi>:

What about input? This is a whole new feature, but it would be nice to be

able to pass the file contents as a query parameter. Something like:

\P /tmp/foo binary
INSERT INTO foo VALUES (?);

The example of input is strong reason, why don't do it via inserts. Only
parsing some special "?" symbol needs lot of new code.

Sorry, I meant $1 in place of the ?. No special parsing needed, psql can
send the query to the server as is, with the parameters that are given by
this new mechanism.

In this case, I don't see any advantage of psql based solution. COPY is

standard interface for input/output from/to files, and it should be used
there.

I'm not too happy with the COPY approach, although I won't object is one
of the other committers feel more comfortable with it. However, we don't
seem to be making progress here, so I'm going to mark this as Returned with
Feedback. I don't feel good about that either, because I don't actually
have any great suggestions on how to move this forward. Which is a pity
because this is a genuine problem for users.

This is really only a psql problem, IMNSHO. Inserting and extracting
binary data is pretty trivial for most users of client libraries (e.g. it's
a couple of lines of code in a DBD::Pg program), but it's hard in psql.

I do agree that the COPY approach feels more than a little klunky.

cheers

andrew

#29Pavel Stehule
pavel.stehule@gmail.com
In reply to: Pavel Stehule (#28)
1 attachment(s)
Re: raw output from copy

Hi

2015-08-06 10:37 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com>:

Hi,

Psql based implementation needs new infrastructure (more than few lines)

Missing:

* binary mode support
* parametrized query support,

I am not against, but both points I proposed, and both was rejected.

So why dont use current infrastructure? Raw copy is trivial patch.

I was asked by Daniel Verite about reopening this patch in opened
commitfest.

I am sending rebased patch

Regards

Pavel

Attachments:

copy-raw-format-20160227-03.patchtext/x-patch; charset=US-ASCII; name=copy-raw-format-20160227-03.patchDownload
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
new file mode 100644
index 07e2f45..68fbfd8
*** a/doc/src/sgml/ref/copy.sgml
--- b/doc/src/sgml/ref/copy.sgml
*************** COPY { <replaceable class="parameter">ta
*** 197,203 ****
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       or <literal>binary</>.
        The default is <literal>text</>.
       </para>
      </listitem>
--- 197,203 ----
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       <literal>binary</> or <literal>raw</literal>.
        The default is <literal>text</>.
       </para>
      </listitem>
*************** OIDs to be shown as null if that ever pr
*** 888,893 ****
--- 888,925 ----
      </para>
     </refsect3>
    </refsect2>
+ 
+   <refsect2>
+      <title>Raw Format</title>
+ 
+    <para>
+     The <literal>raw</literal> format option causes all data to be
+     stored/read as binary format rather than as text. It shares format
+     for data with <literal>binary</literal> format. This format doesn't
+     use any metadata - only row data in network byte order are exported
+     or imported.
+    </para>
+ 
+    <para>
+     Because this format doesn't support any delimiter, only one value
+     can be exported or imported. NULL values are not allowed.
+    </para>
+    <para>
+     The <literal>raw</literal> format can be used for export or import
+     bytea values.
+ <programlisting>
+ COPY images(data) FROM '/usr1/proj/img/01.jpg' (FORMAT raw);
+ </programlisting>
+     It can be used successfully for export XML in different encoding
+     or import valid XML document with any supported encoding:
+ <screen><![CDATA[
+ SET client_encoding TO latin2;
+ 
+ COPY (SELECT xmlelement(NAME data, 'Hello')) TO stdout (FORMAT raw);
+ <?xml version="1.0" encoding="LATIN2"?><data>Hello</data>
+ ]]></screen>
+    </para>
+   </refsect2>
   </refsect1>
  
   <refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
new file mode 100644
index 3201476..beb9152
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** typedef enum EolType
*** 89,94 ****
--- 89,99 ----
   * it's faster to make useless comparisons to trailing bytes than it is to
   * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
   * when we have to do it the hard way.
+  *
+  * COPY supports three modes: text, binary and raw. The text format is plain
+  * text multiline format with specified delimiter. The binary format holds
+  * metadata (numbers, sizes) and data. The raw format holds data only and
+  * only one non NULL value can be processed.
   */
  typedef struct CopyStateData
  {
*************** typedef struct CopyStateData
*** 110,115 ****
--- 115,121 ----
  	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
  	bool		is_program;		/* is 'filename' a program to popen? */
  	bool		binary;			/* binary format? */
+ 	bool		raw;			/* required raw binary? */
  	bool		oids;			/* include OIDs? */
  	bool		freeze;			/* freeze rows on loading? */
  	bool		csv_mode;		/* Comma Separated Value format? */
*************** typedef struct CopyStateData
*** 199,204 ****
--- 205,213 ----
  	char	   *raw_buf;
  	int			raw_buf_index;	/* next byte to process */
  	int			raw_buf_len;	/* total # of bytes stored */
+ 
+ 	/* field for RAW mode */
+ 	bool		row_processed;		/* true, when first row was processed */
  } CopyStateData;
  
  /* DestReceiver for COPY (query) TO */
*************** SendCopyBegin(CopyState cstate)
*** 342,350 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'H');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
--- 351,366 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
+ 		if (cstate->raw)
+ 			format = 2;
+ 		else if (cstate->binary)
+ 			format = 1;
+ 		else
+ 			format = 0;
+ 
  		pq_beginmessage(&buf, 'H');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
*************** SendCopyBegin(CopyState cstate)
*** 356,362 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 372,378 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary && cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** SendCopyBegin(CopyState cstate)
*** 368,374 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 384,390 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary && cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** ReceiveCopyBegin(CopyState cstate)
*** 387,395 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'G');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
--- 403,418 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
+ 		if (cstate->raw)
+ 			format = 2;
+ 		else if (cstate->binary)
+ 			format = 1;
+ 		else
+ 			format = 0;
+ 
  		pq_beginmessage(&buf, 'G');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
*************** ReceiveCopyBegin(CopyState cstate)
*** 402,408 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 425,431 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** ReceiveCopyBegin(CopyState cstate)
*** 414,420 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 437,443 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** CopySendEndOfRow(CopyState cstate)
*** 482,488 ****
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
--- 505,511 ----
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary && !cstate->raw)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
*************** CopySendEndOfRow(CopyState cstate)
*** 527,533 ****
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
--- 550,556 ----
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
*************** CopySendEndOfRow(CopyState cstate)
*** 540,546 ****
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
--- 563,569 ----
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
*************** CopyGetData(CopyState cstate, void *data
*** 597,602 ****
--- 620,626 ----
  			bytesread = minread;
  			break;
  		case COPY_NEW_FE:
+ 
  			while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
  			{
  				int			avail;
*************** CopyGetData(CopyState cstate, void *data
*** 619,624 ****
--- 643,649 ----
  								(errcode(ERRCODE_CONNECTION_FAILURE),
  								 errmsg("unexpected EOF on client connection with an open transaction")));
  					RESUME_CANCEL_INTERRUPTS();
+ 
  					switch (mtype)
  					{
  						case 'd':		/* CopyData */
*************** CopyLoadRawBuf(CopyState cstate)
*** 766,771 ****
--- 791,827 ----
  	return (inbytes > 0);
  }
  
+ /*
+  * CopyLoadallRawBuf load all file into raw_buf.
+  *
+  * It is used for reading content in raw mode. If original RAW_BUF_SIZE is not
+  * enough, the buffer is enlarged.
+  */
+ static void
+ CopyLoadallRawBuf(CopyState cstate)
+ {
+ 	int			nbytes = 0;
+ 	int			inbytes;
+ 	Size			raw_buf_size = RAW_BUF_SIZE;
+ 
+ 	do
+ 	{
+ 		/* hold enough space for one data packet */
+ 		if ((raw_buf_size - nbytes - 1) < 8 * 1024)
+ 		{
+ 			raw_buf_size += RAW_BUF_SIZE;
+ 			cstate->raw_buf = repalloc(cstate->raw_buf, raw_buf_size);
+ 		}
+ 
+ 		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, raw_buf_size - nbytes - 1);
+ 		nbytes += inbytes;
+ 	}
+ 	while (inbytes > 0);
+ 
+ 	cstate->raw_buf[nbytes] = '\0';
+ 	cstate->raw_buf_index = 0;
+ 	cstate->raw_buf_len = nbytes;
+ }
  
  /*
   *	 DoCopy executes the SQL COPY statement
*************** ProcessCopyOptions(CopyState cstate,
*** 1013,1018 ****
--- 1069,1076 ----
  				cstate->csv_mode = true;
  			else if (strcmp(fmt, "binary") == 0)
  				cstate->binary = true;
+ 			else if (strcmp(fmt, "raw") == 0)
+ 				cstate->raw = true;
  			else
  				ereport(ERROR,
  						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
*************** ProcessCopyOptions(CopyState cstate,
*** 1162,1176 ****
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if (cstate->binary && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY mode")));
  
! 	if (cstate->binary && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
--- 1220,1239 ----
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if ((cstate->binary || cstate->raw) && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY or RAW mode")));
  
! 	if ((cstate->binary || cstate->raw) && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY or RAW mode")));
! 
! 	if (cstate->raw && cstate->oids)
! 		ereport(ERROR,
! 				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify OIDS in RAW mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
*************** BeginCopy(bool is_from,
*** 1608,1613 ****
--- 1671,1690 ----
  		}
  	}
  
+ 	/*
+ 	 * Initializaze the field "row_processed" for one row output in RAW mode,
+ 	 * and ensure only one output column.
+ 	 */
+ 	if (cstate->raw)
+ 	{
+ 		cstate->row_processed = false;
+ 
+ 		if (num_phys_attrs > 1)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 					 errmsg("only single column result is allowed in RAW mode")));
+ 	}
+ 
  	/* Use client encoding when ENCODING option is not specified. */
  	if (cstate->file_encoding < 0)
  		cstate->file_encoding = pg_get_client_encoding();
*************** CopyTo(CopyState cstate)
*** 1876,1882 ****
  		Oid			out_func_oid;
  		bool		isvarlena;
  
! 		if (cstate->binary)
  			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
  									&out_func_oid,
  									&isvarlena);
--- 1953,1959 ----
  		Oid			out_func_oid;
  		bool		isvarlena;
  
! 		if (cstate->binary || cstate->raw)
  			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
  									&out_func_oid,
  									&isvarlena);
*************** CopyTo(CopyState cstate)
*** 1915,1921 ****
  		tmp = 0;
  		CopySendInt32(cstate, tmp);
  	}
! 	else
  	{
  		/*
  		 * For non-binary copy, we need to convert null_print to file
--- 1992,1998 ----
  		tmp = 0;
  		CopySendInt32(cstate, tmp);
  	}
! 	else if (!cstate->raw)
  	{
  		/*
  		 * For non-binary copy, we need to convert null_print to file
*************** CopyTo(CopyState cstate)
*** 1983,1989 ****
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
--- 2060,2066 ----
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, cstate->raw ? 2L : 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2027,2032 ****
--- 2104,2117 ----
  			CopySendInt32(cstate, tupleOid);
  		}
  	}
+ 	else if (cstate->raw)
+ 	{
+ 		if (cstate->row_processed)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_TOO_MANY_ROWS),
+ 					 errmsg("only single row result is allowed in RAW mode")));
+ 		cstate->row_processed = true;
+ 	}
  	else
  	{
  		/* Text format has no per-tuple header, but send OID if wanted */
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2046,2052 ****
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!cstate->binary)
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
--- 2131,2137 ----
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!cstate->binary && !cstate->raw)
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2055,2068 ****
  
  		if (isnull)
  		{
! 			if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (!cstate->binary)
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
--- 2140,2171 ----
  
  		if (isnull)
  		{
! 			if (cstate->raw)
! 					ereport(ERROR,
! 						(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
! 						  errmsg("cannot to copy NULL value in RAW mode.")));
! 			else if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (cstate->binary || cstate->raw)
! 			{
! 				bytea	   *outputbytes;
! 
! 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 											   value);
! 
! 				/* send the size only in binary mode */
! 				if (cstate->binary)
! 					CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! 
! 				CopySendData(cstate, VARDATA(outputbytes),
! 							 VARSIZE(outputbytes) - VARHDRSZ);
! 			}
! 			else
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2073,2088 ****
  				else
  					CopyAttributeOutText(cstate, string);
  			}
- 			else
- 			{
- 				bytea	   *outputbytes;
- 
- 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
- 											   value);
- 				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
- 				CopySendData(cstate, VARDATA(outputbytes),
- 							 VARSIZE(outputbytes) - VARHDRSZ);
- 			}
  		}
  	}
  
--- 2176,2181 ----
*************** BeginCopyFrom(Relation rel,
*** 2712,2718 ****
  			continue;
  
  		/* Fetch the input function and typioparam info */
! 		if (cstate->binary)
  			getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
  								   &in_func_oid, &typioparams[attnum - 1]);
  		else
--- 2805,2811 ----
  			continue;
  
  		/* Fetch the input function and typioparam info */
! 		if (cstate->binary || cstate->raw)
  			getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
  								   &in_func_oid, &typioparams[attnum - 1]);
  		else
*************** BeginCopyFrom(Relation rel,
*** 2811,2817 ****
  		}
  	}
  
! 	if (!cstate->binary)
  	{
  		/* must rely on user to tell us... */
  		cstate->file_has_oids = cstate->oids;
--- 2904,2910 ----
  		}
  	}
  
! 	if (!cstate->binary || cstate->raw)
  	{
  		/* must rely on user to tell us... */
  		cstate->file_has_oids = cstate->oids;
*************** BeginCopyFrom(Relation rel,
*** 2863,2869 ****
  	}
  
  	/* create workspace for CopyReadAttributes results */
! 	if (!cstate->binary)
  	{
  		AttrNumber	attr_count = list_length(cstate->attnumlist);
  		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
--- 2956,2962 ----
  	}
  
  	/* create workspace for CopyReadAttributes results */
! 	if (!cstate->binary && !cstate->raw)
  	{
  		AttrNumber	attr_count = list_length(cstate->attnumlist);
  		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 2968,2975 ****
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (!cstate->binary)
  	{
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
--- 3061,3180 ----
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (cstate->binary)
  	{
+ 		int16		fld_count;
+ 		ListCell   *cur;
+ 
+ 		cstate->cur_lineno++;
+ 
+ 		if (!CopyGetInt16(cstate, &fld_count))
+ 		{
+ 			/* EOF detected (end of file, or protocol-level EOF) */
+ 			return false;
+ 		}
+ 
+ 		if (fld_count == -1)
+ 		{
+ 			/*
+ 			 * Received EOF marker.  In a V3-protocol copy, wait for the
+ 			 * protocol-level EOF, and complain if it doesn't come
+ 			 * immediately.  This ensures that we correctly handle CopyFail,
+ 			 * if client chooses to send that now.
+ 			 *
+ 			 * Note that we MUST NOT try to read more data in an old-protocol
+ 			 * copy, since there is no protocol-level EOF marker then.  We
+ 			 * could go either way for copy from file, but choose to throw
+ 			 * error if there's data after the EOF marker, for consistency
+ 			 * with the new-protocol case.
+ 			 */
+ 			char		dummy;
+ 
+ 			if (cstate->copy_dest != COPY_OLD_FE &&
+ 				CopyGetData(cstate, &dummy, 1, 1) > 0)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 						 errmsg("received copy data after EOF marker")));
+ 			return false;
+ 		}
+ 
+ 		if (fld_count != attr_count)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 					 errmsg("row field count is %d, expected %d",
+ 							(int) fld_count, attr_count)));
+ 
+ 		if (file_has_oids)
+ 		{
+ 			Oid			loaded_oid;
+ 
+ 			cstate->cur_attname = "oid";
+ 			loaded_oid =
+ 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
+ 														 0,
+ 													&cstate->oid_in_function,
+ 													  cstate->oid_typioparam,
+ 														 -1,
+ 														 &isnull));
+ 			if (isnull || loaded_oid == InvalidOid)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 						 errmsg("invalid OID in COPY data")));
+ 			cstate->cur_attname = NULL;
+ 			if (cstate->oids && tupleOid != NULL)
+ 				*tupleOid = loaded_oid;
+ 		}
+ 
+ 		i = 0;
+ 		foreach(cur, cstate->attnumlist)
+ 		{
+ 			int			attnum = lfirst_int(cur);
+ 			int			m = attnum - 1;
+ 
+ 			cstate->cur_attname = NameStr(attr[m]->attname);
+ 			i++;
+ 			values[m] = CopyReadBinaryAttribute(cstate,
+ 												i,
+ 												&in_functions[m],
+ 												typioparams[m],
+ 												attr[m]->atttypmod,
+ 												&nulls[m]);
+ 			cstate->cur_attname = NULL;
+ 		}
+ 	}
+ 	else if (cstate->raw)
+ 	{
+ 		if (cstate->row_processed)
+ 			return false;
+ 
+ 		CopyLoadallRawBuf(cstate);
+ 		cstate->cur_attname = NameStr(attr[0]->attname);
+ 
+ 		if (cstate->attribute_buf.data != NULL)
+ 			pfree(cstate->attribute_buf.data);
+ 
+ 		cstate->attribute_buf.data = cstate->raw_buf;
+ 		cstate->attribute_buf.len = cstate->raw_buf_len;
+ 		cstate->attribute_buf.cursor = 0;
+ 
+ 		cstate->raw_buf = NULL;
+ 
+ 		/* Call the column type's binary input converter */
+ 		values[0] = ReceiveFunctionCall(&in_functions[0], &cstate->attribute_buf,
+ 								 typioparams[0], attr[0]->atttypmod);
+ 		nulls[0] = false;
+ 
+ 		/* Trouble if it didn't eat the whole buffer */
+ 		if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ 					 errmsg("incorrect binary data format")));
+ 
+ 		cstate->row_processed = true;
+ 	}
+ 	else
+ 	{
+ 		/* text */
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 3074,3161 ****
  
  		Assert(fieldno == nfields);
  	}
- 	else
- 	{
- 		/* binary */
- 		int16		fld_count;
- 		ListCell   *cur;
- 
- 		cstate->cur_lineno++;
- 
- 		if (!CopyGetInt16(cstate, &fld_count))
- 		{
- 			/* EOF detected (end of file, or protocol-level EOF) */
- 			return false;
- 		}
- 
- 		if (fld_count == -1)
- 		{
- 			/*
- 			 * Received EOF marker.  In a V3-protocol copy, wait for the
- 			 * protocol-level EOF, and complain if it doesn't come
- 			 * immediately.  This ensures that we correctly handle CopyFail,
- 			 * if client chooses to send that now.
- 			 *
- 			 * Note that we MUST NOT try to read more data in an old-protocol
- 			 * copy, since there is no protocol-level EOF marker then.  We
- 			 * could go either way for copy from file, but choose to throw
- 			 * error if there's data after the EOF marker, for consistency
- 			 * with the new-protocol case.
- 			 */
- 			char		dummy;
- 
- 			if (cstate->copy_dest != COPY_OLD_FE &&
- 				CopyGetData(cstate, &dummy, 1, 1) > 0)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("received copy data after EOF marker")));
- 			return false;
- 		}
- 
- 		if (fld_count != attr_count)
- 			ereport(ERROR,
- 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 					 errmsg("row field count is %d, expected %d",
- 							(int) fld_count, attr_count)));
- 
- 		if (file_has_oids)
- 		{
- 			Oid			loaded_oid;
- 
- 			cstate->cur_attname = "oid";
- 			loaded_oid =
- 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
- 														 0,
- 													&cstate->oid_in_function,
- 													  cstate->oid_typioparam,
- 														 -1,
- 														 &isnull));
- 			if (isnull || loaded_oid == InvalidOid)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("invalid OID in COPY data")));
- 			cstate->cur_attname = NULL;
- 			if (cstate->oids && tupleOid != NULL)
- 				*tupleOid = loaded_oid;
- 		}
- 
- 		i = 0;
- 		foreach(cur, cstate->attnumlist)
- 		{
- 			int			attnum = lfirst_int(cur);
- 			int			m = attnum - 1;
- 
- 			cstate->cur_attname = NameStr(attr[m]->attname);
- 			i++;
- 			values[m] = CopyReadBinaryAttribute(cstate,
- 												i,
- 												&in_functions[m],
- 												typioparams[m],
- 												attr[m]->atttypmod,
- 												&nulls[m]);
- 			cstate->cur_attname = NULL;
- 		}
- 	}
  
  	/*
  	 * Now compute and insert any defaults available for the columns not
--- 3279,3284 ----
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
new file mode 100644
index 5f6260a..90d2aa5
*** a/src/test/regress/expected/copy2.out
--- b/src/test/regress/expected/copy2.out
*************** DROP FUNCTION truncate_in_subxact();
*** 466,468 ****
--- 466,481 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+ AHOJ
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ AHOJ
+ DROP TABLE x;
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
new file mode 100644
index cb13606..d351b43
*** a/src/test/regress/input/copy.source
--- b/src/test/regress/input/copy.source
*************** this is just a line full of junk that wo
*** 133,135 ****
--- 133,195 ----
  \.
  
  copy copytest3 to stdout csv header;
+ 
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ 
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ 
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
new file mode 100644
index b7e372d..f555f58
*** a/src/test/regress/output/copy.source
--- b/src/test/regress/output/copy.source
*************** copy copytest3 to stdout csv header;
*** 95,97 ****
--- 95,183 ----
  c1,"col with , comma","col with "" quote"
  1,a,1
  2,b,2
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (3 rows)
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (5 rows)
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
new file mode 100644
index 39a9deb..e5703a5
*** a/src/test/regress/sql/copy2.sql
--- b/src/test/regress/sql/copy2.sql
*************** DROP FUNCTION truncate_in_subxact();
*** 333,335 ****
--- 333,348 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ 
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ 
+ DROP TABLE x;
#30Corey Huinker
corey.huinker@gmail.com
In reply to: Pavel Stehule (#29)
Re: raw output from copy

On Sat, Feb 27, 2016 at 2:26 AM, Pavel Stehule <pavel.stehule@gmail.com>
wrote:

Hi

2015-08-06 10:37 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com>:

Hi,

Psql based implementation needs new infrastructure (more than few lines)

Missing:

* binary mode support
* parametrized query support,

I am not against, but both points I proposed, and both was rejected.

So why dont use current infrastructure? Raw copy is trivial patch.

I was asked by Daniel Verite about reopening this patch in opened
commitfest.

I am sending rebased patch

Regards

Pavel

Since this patch does something I need for my own work, I've signed up as a
reviewer.

From a design standpoint, I feel that COPY is the preferred means of
dealing with data from sources too transient to justify setting up a
foreign data wrapper, and too simple to justify writing application code.
So, for me, RAW is the right solution, or at least *a* right solution.

My first pass of reading the code changes and the regression tests is
complete, and I found the changes to be clear and fairly straightforward.
This shouldn't surprise anyone, as the previous reviewers had only minor
quibbles with the code. So far, so good.

The regression tests seem to adequately cover all new functionality, though
I wonder if we should add some cases that highlight situations where BINARY
mode is insufficient.

Before I give my approval, I want to read it again more closely to make
sure that no cases were skipped with regard to the (binary || raw) and
(binary || !raw) tests. Also, I want to use it on some of my problematic
files. Maybe I'll find a good edge case. Probably not.

I hope to find time for those things in the next few days.

#31Pavel Stehule
pavel.stehule@gmail.com
In reply to: Corey Huinker (#30)
Re: raw output from copy

2016-03-04 3:13 GMT+01:00 Corey Huinker <corey.huinker@gmail.com>:

On Sat, Feb 27, 2016 at 2:26 AM, Pavel Stehule <pavel.stehule@gmail.com>
wrote:

Hi

2015-08-06 10:37 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com>:

Hi,

Psql based implementation needs new infrastructure (more than few lines)

Missing:

* binary mode support
* parametrized query support,

I am not against, but both points I proposed, and both was rejected.

So why dont use current infrastructure? Raw copy is trivial patch.

I was asked by Daniel Verite about reopening this patch in opened
commitfest.

I am sending rebased patch

Regards

Pavel

Since this patch does something I need for my own work, I've signed up as
a reviewer.

From a design standpoint, I feel that COPY is the preferred means of
dealing with data from sources too transient to justify setting up a
foreign data wrapper, and too simple to justify writing application code.
So, for me, RAW is the right solution, or at least *a* right solution.

my opinion is same - there all necessary infrastructure is ready and when
we work with IO, then we use COPY natively. I hope so main use case (export
bytea) is solved, but there are a possibility to enhance this command by
COPY options - what is, I am thinking, a advantage of this way.

My first pass of reading the code changes and the regression tests is
complete, and I found the changes to be clear and fairly straightforward.
This shouldn't surprise anyone, as the previous reviewers had only minor
quibbles with the code. So far, so good.

The regression tests seem to adequately cover all new functionality,
though I wonder if we should add some cases that highlight situations where
BINARY mode is insufficient.

Before I give my approval, I want to read it again more closely to make
sure that no cases were skipped with regard to the (binary || raw) and
(binary || !raw) tests. Also, I want to use it on some of my problematic
files. Maybe I'll find a good edge case. Probably not.

I hope to find time for those things in the next few days.

Thank you very much

Regards

Pavel

#32Ildar Musin
i.musin@postgrespro.ru
In reply to: Pavel Stehule (#29)
Re: raw output from copy

Hi Pavel

27/02/16 10:26, Pavel Stehule пишет:

Hi

2015-08-06 10:37 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com
<mailto:pavel.stehule@gmail.com>>:

Hi,

Psql based implementation needs new infrastructure (more than few
lines)

Missing:

* binary mode support
* parametrized query support,

I am not against, but both points I proposed, and both was rejected.

So why dont use current infrastructure? Raw copy is trivial patch.

I was asked by Daniel Verite about reopening this patch in opened
commitfest.

I am sending rebased patch

Regards

Pavel

I am new to reviewing, here is what I got. Patch have been applied
nicely to the HEAD. I tried to upload and export files in psql, it works
as expected. All regression tests are passed without problems as well.
Code looks good for me. There is a little confusion for me in this line
of documentation:

"use any metadata - only row data in network byte order are exported"

Did you mean "only raw data in network byte order is exported"?

And there are two entries for this patch on commitfest page: in
"miscellaneous" and "sql" sections. Probably it's better to remove one
of them to avoid confusion.

--
Ildar Musin
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

#33Daniel Verite
daniel@manitou-mail.org
In reply to: Corey Huinker (#30)
Re: raw output from copy

Corey Huinker wrote:

So, for me, RAW is the right solution, or at least *a* right solution.

Questions on how to extract from a bytea column come up on a regular
basis, as in [1]/messages/by-id/038517CEB6DE43BD8422D7947B6BE8D8@fanliji ng [2]/messages/by-id/4C8272C4.1000008@arcor.de [3]http://stackoverflow.com/questions/6730729, or [4]/messages/by-id/56C66565.50107@consistentstate.com a few days ago, and so far the answers
are to encode the contents in text and decode them in an additional
step, or use COPY BINARY and filter out the headers.

But none of this is as straightforward and efficient as the proposed
COPY RAW.
Also the conversion to text can't be used at all on very large
contents (>512MB), as mentioned in another recent thread [5]/messages/by-id/14620.1456851036@sss.pgh.pa.us
(this is the same reason why pg_dump can't dump such rows),
but COPY RAW doesn't have this limitation.

Technically COPY BINARY should be sufficient, but it seems that
people dislike having to deal with its headers.
Also it's not supported by any of the drivers of popular
script languages that otherwise provide COPY in text format
(DBD::Pg, php, psycopg2...)
Maybe the RAW format would have a better chance to get support
there, because of its simplicity.

[1]: /messages/by-id/038517CEB6DE43BD8422D7947B6BE8D8@fanliji ng
/messages/by-id/038517CEB6DE43BD8422D7947B6BE8D8@fanliji
ng

[2]: /messages/by-id/4C8272C4.1000008@arcor.de

[3]: http://stackoverflow.com/questions/6730729

[4]: /messages/by-id/56C66565.50107@consistentstate.com

[5]: /messages/by-id/14620.1456851036@sss.pgh.pa.us

Best regards,
--
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#34Pavel Stehule
pavel.stehule@gmail.com
In reply to: Daniel Verite (#33)
Re: raw output from copy

2016-03-04 15:54 GMT+01:00 Daniel Verite <daniel@manitou-mail.org>:

Corey Huinker wrote:

So, for me, RAW is the right solution, or at least *a* right solution.

Questions on how to extract from a bytea column come up on a regular
basis, as in [1] [2] [3], or [4] a few days ago, and so far the answers
are to encode the contents in text and decode them in an additional
step, or use COPY BINARY and filter out the headers.

But none of this is as straightforward and efficient as the proposed
COPY RAW.
Also the conversion to text can't be used at all on very large
contents (>512MB), as mentioned in another recent thread [5]
(this is the same reason why pg_dump can't dump such rows),
but COPY RAW doesn't have this limitation.

Technically COPY BINARY should be sufficient, but it seems that
people dislike having to deal with its headers.

Also it's not supported by any of the drivers of popular

script languages that otherwise provide COPY in text format
(DBD::Pg, php, psycopg2...)
Maybe the RAW format would have a better chance to get support
there, because of its simplicity.

exactly - I would to decrease dependency on PostgreSQL internals. Working
with clean content is simple and possible with any environment without
unclean operations.

Regards

Pavel

Show quoted text

[1]

/messages/by-id/038517CEB6DE43BD8422D7947B6BE8D8@fanliji
ng

[2] /messages/by-id/4C8272C4.1000008@arcor.de

[3] http://stackoverflow.com/questions/6730729

[4]
/messages/by-id/56C66565.50107@consistentstate.com

[5] /messages/by-id/14620.1456851036@sss.pgh.pa.us

Best regards,
--
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite

#35Pavel Stehule
pavel.stehule@gmail.com
In reply to: Pavel Stehule (#34)
Re: raw output from copy

2016-03-04 18:06 GMT+01:00 Pavel Stehule <pavel.stehule@gmail.com>:

2016-03-04 15:54 GMT+01:00 Daniel Verite <daniel@manitou-mail.org>:

Corey Huinker wrote:

So, for me, RAW is the right solution, or at least *a* right solution.

Questions on how to extract from a bytea column come up on a regular
basis, as in [1] [2] [3], or [4] a few days ago, and so far the answers
are to encode the contents in text and decode them in an additional
step, or use COPY BINARY and filter out the headers.

But none of this is as straightforward and efficient as the proposed
COPY RAW.
Also the conversion to text can't be used at all on very large
contents (>512MB), as mentioned in another recent thread [5]
(this is the same reason why pg_dump can't dump such rows),
but COPY RAW doesn't have this limitation.

Technically COPY BINARY should be sufficient, but it seems that
people dislike having to deal with its headers.

Also it's not supported by any of the drivers of popular

script languages that otherwise provide COPY in text format
(DBD::Pg, php, psycopg2...)
Maybe the RAW format would have a better chance to get support
there, because of its simplicity.

exactly - I would to decrease dependency on PostgreSQL internals. Working
with clean content is simple and possible with any environment without
unclean operations.

COPY RAW can be used for import. I am not sure if this use case was tested.

cat image.jpg | psql -c "CREATE TEMP TABLE auxbuf(image bytea); COPY
auxbuf(image) FROM stdin RAW; ..." postgres

Regards

Pavel

Show quoted text

Regards

Pavel

[1]

/messages/by-id/038517CEB6DE43BD8422D7947B6BE8D8@fanliji
ng

[2] /messages/by-id/4C8272C4.1000008@arcor.de

[3] http://stackoverflow.com/questions/6730729

[4]
/messages/by-id/56C66565.50107@consistentstate.com

[5] /messages/by-id/14620.1456851036@sss.pgh.pa.us

Best regards,
--
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite

#36Corey Huinker
corey.huinker@gmail.com
In reply to: Pavel Stehule (#31)
1 attachment(s)
Re: raw output from copy

The regression tests seem to adequately cover all new functionality,
though I wonder if we should add some cases that highlight situations where
BINARY mode is insufficient.

One thing I tried to test RAW was to load an existing json file.

My own personal test was to load an existing .json file into a 1x1 bytea
table, which worked. From there I was able to
select encode(col_name,'escape')::text::jsonb from test_table
and the json was correctly converted.

A similar test copying binary failed.

A write up of the test looks like this:

\copy (select '{"foo": "bar"}') to '/tmp/raw_test.jsonb' (format raw);
COPY 1
create temporary table raw_byte (b bytea);
CREATE TABLE
create temporary table raw_text (t text);
CREATE TABLE
\copy raw_jsonb from '/tmp/raw_test.blob' (format raw);
psql:/home/ubuntu/raw_test.sql:9: ERROR: relation "raw_jsonb" does not
exist
\copy raw_byte from '/tmp/raw_test.blob' (format raw);
COPY 1
select encode(b,'escape')::text::json from raw_byte;
encode
----------------
{"foo": "bar"}
(1 row)

\copy raw_text from '/tmp/raw_test.blob' (format raw);
COPY 1
select t::jsonb from raw_text;
t
----------------
{"foo": "bar"}
(1 row)

create temporary table binary_byte (b bytea);
CREATE TABLE
create temporary table binary_text (t text);
CREATE TABLE
\copy binary_byte from '/tmp/raw_test.blob' (format binary);
psql:/home/ubuntu/raw_test.sql:22: ERROR: COPY file signature not
recognized
select encode(b,'escape')::jsonb from binary_byte;
encode
--------
(0 rows)

\copy binary_text from '/tmp/raw_test.blob' (format binary);
psql:/home/ubuntu/raw_test.sql:26: ERROR: COPY file signature not
recognized
select t::jsonb from binary_text;
t
---
(0 rows)

So, *if* we want to add a regression test to demonstrate to posterity why
we need RAW for cases that BINARY can't handle, I offer the attached file.

Does anyone else see value in adding that to the regression tests?

Before I give my approval, I want to read it again more closely to make

sure that no cases were skipped with regard to the (binary || raw) and
(binary || !raw) tests. Also, I want to use it on some of my problematic
files. Maybe I'll find a good edge case. Probably not.

I don't know why I thought this, but when I looked at the patch, I assumed
that the ( binary || raw ) tests were part of a large if/elseif/else
waterfall. They are not. They stand alone. There are no edge cases to find.

Review complete and passed. I can re-review if we want to add the
additional test.

Attachments:

raw_test.sqlapplication/sql; name=raw_test.sqlDownload
#37Pavel Stehule
pavel.stehule@gmail.com
In reply to: Corey Huinker (#36)
Re: raw output from copy

Hi

2016-03-09 18:41 GMT+01:00 Corey Huinker <corey.huinker@gmail.com>:

The regression tests seem to adequately cover all new functionality,
though I wonder if we should add some cases that highlight situations where
BINARY mode is insufficient.

One thing I tried to test RAW was to load an existing json file.

My own personal test was to load an existing .json file into a 1x1 bytea
table, which worked. From there I was able to
select encode(col_name,'escape')::text::jsonb from test_table
and the json was correctly converted.

A similar test copying binary failed.

A write up of the test looks like this:

\copy (select '{"foo": "bar"}') to '/tmp/raw_test.jsonb' (format raw);
COPY 1
create temporary table raw_byte (b bytea);
CREATE TABLE
create temporary table raw_text (t text);
CREATE TABLE
\copy raw_jsonb from '/tmp/raw_test.blob' (format raw);
psql:/home/ubuntu/raw_test.sql:9: ERROR: relation "raw_jsonb" does not
exist
\copy raw_byte from '/tmp/raw_test.blob' (format raw);
COPY 1
select encode(b,'escape')::text::json from raw_byte;
encode
----------------
{"foo": "bar"}
(1 row)

\copy raw_text from '/tmp/raw_test.blob' (format raw);
COPY 1
select t::jsonb from raw_text;
t
----------------
{"foo": "bar"}
(1 row)

create temporary table binary_byte (b bytea);
CREATE TABLE
create temporary table binary_text (t text);
CREATE TABLE
\copy binary_byte from '/tmp/raw_test.blob' (format binary);
psql:/home/ubuntu/raw_test.sql:22: ERROR: COPY file signature not
recognized
select encode(b,'escape')::jsonb from binary_byte;
encode
--------
(0 rows)

\copy binary_text from '/tmp/raw_test.blob' (format binary);
psql:/home/ubuntu/raw_test.sql:26: ERROR: COPY file signature not
recognized
select t::jsonb from binary_text;
t
---
(0 rows)

So, *if* we want to add a regression test to demonstrate to posterity why
we need RAW for cases that BINARY can't handle, I offer the attached file.

I don't think so regress tests should to do this demonstration. It is
clean, so COPY BINARY should to fail every time, and then there is not any
benefit from it in regress tests. There are lot of discussion in this
thread, and we don't need to inject more "garbage" to regress tests.

Does anyone else see value in adding that to the regression tests?

Before I give my approval, I want to read it again more closely to make

sure that no cases were skipped with regard to the (binary || raw) and
(binary || !raw) tests. Also, I want to use it on some of my problematic
files. Maybe I'll find a good edge case. Probably not.

I don't know why I thought this, but when I looked at the patch, I assumed
that the ( binary || raw ) tests were part of a large if/elseif/else
waterfall. They are not. They stand alone. There are no edge cases to find.

This is organized to files by necessity to work with external files. The
regress tests for COPY RAW has about 100 lines - so why need special files
and infrastructure. COPY RAW, COPY BINARY tests well shares infrastructure.

Review complete and passed. I can re-review if we want to add the
additional test.

Great, thank you very much. I hope so this feature really useful. It allow
to simple export/import XML doc in different encodings, JSONs and can be
enhanced future via options. The nice feature (but not for this release)
can be additional cast info for import -- like "COPY table(jsonb_column)
FROM stdin (FORMAT RAW, CAST json_2_jsonb). Because there are the options,
there are big space for other enhancing.

Regards

Pavel

#38David Steele
david@pgmasters.net
In reply to: Pavel Stehule (#37)
Re: raw output from copy

On 3/12/16 1:24 AM, Pavel Stehule wrote:

Great, thank you very much. I hope so this feature really useful. It
allow to simple export/import XML doc in different encodings, JSONs and
can be enhanced future via options. The nice feature (but not for this
release) can be additional cast info for import -- like "COPY
table(jsonb_column) FROM stdin (FORMAT RAW, CAST json_2_jsonb). Because
there are the options, there are big space for other enhancing.

Andres Karlsson pointed out that this patch has two CF entries:

https://commitfest.postgresql.org/9/223/
https://commitfest.postgresql.org/9/547/

I closed the one that was in the "needs review" (547) state and kept the
one that is "ready for committer" (223).

--
-David
david@pgmasters.net

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#39Pavel Stehule
pavel.stehule@gmail.com
In reply to: David Steele (#38)
Re: raw output from copy

2016-03-18 16:32 GMT+01:00 David Steele <david@pgmasters.net>:

On 3/12/16 1:24 AM, Pavel Stehule wrote:

Great, thank you very much. I hope so this feature really useful. It

allow to simple export/import XML doc in different encodings, JSONs and
can be enhanced future via options. The nice feature (but not for this
release) can be additional cast info for import -- like "COPY
table(jsonb_column) FROM stdin (FORMAT RAW, CAST json_2_jsonb). Because
there are the options, there are big space for other enhancing.

Andres Karlsson pointed out that this patch has two CF entries:

https://commitfest.postgresql.org/9/223/
https://commitfest.postgresql.org/9/547/

I closed the one that was in the "needs review" (547) state and kept the
one that is "ready for committer" (223).

sure, thank you

Pavel

Show quoted text

--
-David
david@pgmasters.net

#40Tom Lane
tgl@sss.pgh.pa.us
In reply to: Pavel Stehule (#29)
Re: raw output from copy

Pavel Stehule <pavel.stehule@gmail.com> writes:

[ copy-raw-format-20160227-03.patch ]

I looked at this patch. I'm having a hard time accepting that it has
a use-case large enough to justify it, and here's the reason: it's
a protocol break. Conveniently omitting to update protocol.sgml
doesn't make it not a protocol break. (libpq.sgml also contains
assorted statements that are falsified by this patch.)

You could argue that it's the user's own fault if he tries to use
COPY RAW with client-side code that hasn't been updated to support it.
Maybe that's okay, but I wonder if we're opening ourselves up to
problems. Maybe even security-grade problems.

In terms of specific code that hasn't been updated, ecpg is broken
by this patch, and I'm not very sure what libpq's PQbinaryTuples()
ought to do but probably something other than what it does today.

There's also a definitional question of what we think PQfformat() ought
to do; should it return "2" for the per-field format? Or maybe the
per-field format is still "1", since it's after all the same binary data
format as for COPY BINARY, and only the overall copy format reported by
PQbinaryTuples() should change to "2".

BTW, I'm not really sure why the patch is trying to enforce single
row and column for the COPY OUT case. I thought the idea for that
was that we'd just shove out the data without any delimiters, and
if it's more than one datum it's the user's problem whether he can
identify the boundaries. On the input side we would have to insist
on one column since we're not going to attempt to identify boundaries
(and one row would fall out of the fact that we slurp the entire input
and treat it as one datum).

Anyway this is certainly not committable as-is, so I'm setting it back
to Waiting on Author. But the fact that both libpq and ecpg would need
updates makes me question whether we can safely pretend that this isn't
a protocol break.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#41Andrew Dunstan
andrew@dunslane.net
In reply to: Tom Lane (#40)
Re: raw output from copy

On 03/28/2016 06:26 PM, Tom Lane wrote:

Pavel Stehule <pavel.stehule@gmail.com> writes:

[ copy-raw-format-20160227-03.patch ]

I looked at this patch. I'm having a hard time accepting that it has
a use-case large enough to justify it, and here's the reason: it's
a protocol break. Conveniently omitting to update protocol.sgml
doesn't make it not a protocol break. (libpq.sgml also contains
assorted statements that are falsified by this patch.)

You could argue that it's the user's own fault if he tries to use
COPY RAW with client-side code that hasn't been updated to support it.
Maybe that's okay, but I wonder if we're opening ourselves up to
problems. Maybe even security-grade problems.

In terms of specific code that hasn't been updated, ecpg is broken
by this patch, and I'm not very sure what libpq's PQbinaryTuples()
ought to do but probably something other than what it does today.

There's also a definitional question of what we think PQfformat() ought
to do; should it return "2" for the per-field format? Or maybe the
per-field format is still "1", since it's after all the same binary data
format as for COPY BINARY, and only the overall copy format reported by
PQbinaryTuples() should change to "2".

BTW, I'm not really sure why the patch is trying to enforce single
row and column for the COPY OUT case. I thought the idea for that
was that we'd just shove out the data without any delimiters, and
if it's more than one datum it's the user's problem whether he can
identify the boundaries. On the input side we would have to insist
on one column since we're not going to attempt to identify boundaries
(and one row would fall out of the fact that we slurp the entire input
and treat it as one datum).

Anyway this is certainly not committable as-is, so I'm setting it back
to Waiting on Author. But the fact that both libpq and ecpg would need
updates makes me question whether we can safely pretend that this isn't
a protocol break.

In that case I humbly submit that there is a case for reviving the psql
patch I posted that kicked off this whole thing and lets you export a
piece of binary data from psql quite easily. It should certainly not
involve any protocol break.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#42Pavel Stehule
pavel.stehule@gmail.com
In reply to: Andrew Dunstan (#41)
Re: raw output from copy

2016-03-29 5:12 GMT+02:00 Andrew Dunstan <andrew@dunslane.net>:

On 03/28/2016 06:26 PM, Tom Lane wrote:

Pavel Stehule <pavel.stehule@gmail.com> writes:

[ copy-raw-format-20160227-03.patch ]

I looked at this patch. I'm having a hard time accepting that it has
a use-case large enough to justify it, and here's the reason: it's
a protocol break. Conveniently omitting to update protocol.sgml
doesn't make it not a protocol break. (libpq.sgml also contains
assorted statements that are falsified by this patch.)

You could argue that it's the user's own fault if he tries to use
COPY RAW with client-side code that hasn't been updated to support it.
Maybe that's okay, but I wonder if we're opening ourselves up to
problems. Maybe even security-grade problems.

In terms of specific code that hasn't been updated, ecpg is broken
by this patch, and I'm not very sure what libpq's PQbinaryTuples()
ought to do but probably something other than what it does today.

There's also a definitional question of what we think PQfformat() ought
to do; should it return "2" for the per-field format? Or maybe the
per-field format is still "1", since it's after all the same binary data
format as for COPY BINARY, and only the overall copy format reported by
PQbinaryTuples() should change to "2".

BTW, I'm not really sure why the patch is trying to enforce single
row and column for the COPY OUT case. I thought the idea for that
was that we'd just shove out the data without any delimiters, and
if it's more than one datum it's the user's problem whether he can
identify the boundaries. On the input side we would have to insist
on one column since we're not going to attempt to identify boundaries
(and one row would fall out of the fact that we slurp the entire input
and treat it as one datum).

Anyway this is certainly not committable as-is, so I'm setting it back
to Waiting on Author. But the fact that both libpq and ecpg would need
updates makes me question whether we can safely pretend that this isn't
a protocol break.

In that case I humbly submit that there is a case for reviving the psql
patch I posted that kicked off this whole thing and lets you export a piece
of binary data from psql quite easily. It should certainly not involve any
protocol break.

The psql only solution can work only for output. Doesn't help with input.

Regards

Pavel

Show quoted text

cheers

andrew

#43Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#40)
Re: raw output from copy

Hi

2016-03-29 0:26 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Pavel Stehule <pavel.stehule@gmail.com> writes:

[ copy-raw-format-20160227-03.patch ]

I looked at this patch. I'm having a hard time accepting that it has
a use-case large enough to justify it, and here's the reason: it's
a protocol break. Conveniently omitting to update protocol.sgml
doesn't make it not a protocol break. (libpq.sgml also contains
assorted statements that are falsified by this patch.)

The reply on this question depends how we would to be strict. This doesn't
change the format in types stream, but it creates new enum value. Correctly
written should to raise exception when is processing unknown enum value.

I'll do tests against old libpq.

You could argue that it's the user's own fault if he tries to use
COPY RAW with client-side code that hasn't been updated to support it.
Maybe that's okay, but I wonder if we're opening ourselves up to
problems. Maybe even security-grade problems.

In terms of specific code that hasn't been updated, ecpg is broken
by this patch, and I'm not very sure what libpq's PQbinaryTuples()
ought to do but probably something other than what it does today.

There's also a definitional question of what we think PQfformat() ought
to do; should it return "2" for the per-field format? Or maybe the
per-field format is still "1", since it's after all the same binary data
format as for COPY BINARY, and only the overall copy format reported by
PQbinaryTuples() should change to "2".

I'll recheck it

BTW, I'm not really sure why the patch is trying to enforce single
row and column for the COPY OUT case. I thought the idea for that
was that we'd just shove out the data without any delimiters, and
if it's more than one datum it's the user's problem whether he can
identify the boundaries. On the input side we would have to insist
on one column since we're not going to attempt to identify boundaries
(and one row would fall out of the fact that we slurp the entire input
and treat it as one datum).

It should not be problem. I though about it. The COPY statements is
extensible with options. We can support more fields, more rows if it will
be required with additional options. But now, it looks like premature
optimization.

Anyway this is certainly not committable as-is, so I'm setting it back
to Waiting on Author. But the fact that both libpq and ecpg would need
updates makes me question whether we can safely pretend that this isn't
a protocol break.

I'll do test against some clients.

Regards

Pavel

Show quoted text

regards, tom lane

#44Pavel Stehule
pavel.stehule@gmail.com
In reply to: Pavel Stehule (#42)
Re: raw output from copy

Hi

Anyway this is certainly not committable as-is, so I'm setting it back
to Waiting on Author. But the fact that both libpq and ecpg would need
updates makes me question whether we can safely pretend that this isn't
a protocol break.

In that case I humbly submit that there is a case for reviving the psql
patch I posted that kicked off this whole thing and lets you export a piece
of binary data from psql quite easily. It should certainly not involve any
protocol break.

The psql only solution can work only for output. Doesn't help with input.

In this case, I am thinking so the features of COPY statement is perfect
for this feature. The way from a content to the file is direct. In psql
you have to off - tuple separator, record separator, you have to set output
file. You can get same effect, but with more work. In previous version it
was relatively hard to use it from command line - now, with multi command
-c is much simpler, but still the COPY is the ideal.

I agree, so output formats of psql is nice feature. And should be pretty
nice, if we support more common formats - like csv, simple xml, simple
json. I believe so sometime the redundancy is acceptable, if the cost is
not too high.

sorry for offtopic - I would to see some output format on client side, but
the format possibilities are on server side. So there are natural idea -
define server side output format. psql output format just can wrap it.

Regards

Pavel

Show quoted text

Regards

Pavel

cheers

andrew

#45Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#40)
Re: raw output from copy

Hi

2016-03-29 0:26 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Pavel Stehule <pavel.stehule@gmail.com> writes:

[ copy-raw-format-20160227-03.patch ]

I looked at this patch. I'm having a hard time accepting that it has
a use-case large enough to justify it, and here's the reason: it's
a protocol break. Conveniently omitting to update protocol.sgml
doesn't make it not a protocol break. (libpq.sgml also contains
assorted statements that are falsified by this patch.)

You could argue that it's the user's own fault if he tries to use
COPY RAW with client-side code that hasn't been updated to support it.
Maybe that's okay, but I wonder if we're opening ourselves up to
problems. Maybe even security-grade problems.

I tested COPY RAW on old psql clients - and it is working without any
problem - so when the client uses same logic as psql, then it should to
work. Sure, there can be differently implemented clients, but the COPY
client side is usually simple - store stream to output.

Maybe I am blind, but I don't see any new security risks. The risk can be
only on client side - and if client is not able work with new value, then
it can fails. But any attacker can use fake data stream, and can enforce
this error too. So if there are some security risks on special designed
clients, then this risks is existing now.

In terms of specific code that hasn't been updated, ecpg is broken
by this patch, and I'm not very sure what libpq's PQbinaryTuples()
ought to do but probably something other than what it does today.

There's also a definitional question of what we think PQfformat() ought
to do; should it return "2" for the per-field format? Or maybe the
per-field format is still "1", since it's after all the same binary data
format as for COPY BINARY, and only the overall copy format reported by
PQbinaryTuples() should change to "2".

Theoretically the change there is allowed - "Format code zero indicates
textual data representation, while format code one indicates binary
representation. (Other codes are reserved for future definition.) -
PQfformat". But - the format of COPY RAW is binary - this format is cleaner
binary format than is used by COPY BINARY (where is a header + BINARY). I
am thinking so PQbinaryTuples should to return 1 (without change), and
PQfformat should to return 2. If some older client uses deprecated function
PQbinaryTuples(), then 1 is safe value. PQfformat() is documented
differently and if there will be different than expected value, then the
client should to raise a error. So using 2 is safe there. The value 2 is
adequate to actual content

Packet: t=1459265078.596466, session=213070643360702
PGSQL: type=Query, F -> B
QUERY query=copy foo(x) to stdout (format raw);

Packet: t=1459265078.597755, session=213070643360702
PGSQL: type=CopyOutResponse, B -> F
COPY OUT RESPONSE copy format=1, num_fields=1, fields_formats=2

Packet: t=1459265078.597755, session=213070643360702
PGSQL: type=CopyData, B -> F
COPY DATA len=20

Packet: t=1459265078.597755, session=213070643360702
PGSQL: type=CopyDone, B -> F
COPY DONE

Packet: t=1459265078.597755, session=213070643360702
PGSQL: type=CommandComplete, B -> F
COMMAND COMPLETE command='COPY 1'

Packet: t=1459265078.597755, session=213070643360702
PGSQL: type=ReadyForQuery, B -> F
READY FOR QUERY type=<IDLE>

What do you think ?

p.s. These values are returned now

PQfformat(*results, 0)) returns 2 already, PQbinaryTuples() returns 1.

BTW, I'm not really sure why the patch is trying to enforce single
row and column for the COPY OUT case. I thought the idea for that
was that we'd just shove out the data without any delimiters, and
if it's more than one datum it's the user's problem whether he can
identify the boundaries. On the input side we would have to insist
on one column since we're not going to attempt to identify boundaries
(and one row would fall out of the fact that we slurp the entire input
and treat it as one datum).

Anyway this is certainly not committable as-is, so I'm setting it back
to Waiting on Author. But the fact that both libpq and ecpg would need
updates makes me question whether we can safely pretend that this isn't
a protocol break.

I executed all tests in libpq and ecpg without any problems. Can you,
please, help me with repeating a ecpg issues?

Regards

Pavel

Show quoted text

regards, tom lane

#46Tom Lane
tgl@sss.pgh.pa.us
In reply to: Pavel Stehule (#45)
Re: raw output from copy

Pavel Stehule <pavel.stehule@gmail.com> writes:

I tested COPY RAW on old psql clients - and it is working without any
problem - so when the client uses same logic as psql, then it should to
work. Sure, there can be differently implemented clients, but the COPY
client side is usually simple - store stream to output.

My point is precisely that I doubt all clients are that stupid about COPY.

Maybe I am blind, but I don't see any new security risks. The risk can be
only on client side - and if client is not able work with new value, then
it can fails.

Well, the point is that low-level code might get used to process the data
stream for commands it doesn't have any control over. Maybe there's no
realistic security risk there, or maybe there is; I'm not sure.

I am thinking so PQbinaryTuples should to return 1 (without change), and
PQfformat should to return 2.

Well, that seems pretty backwards to me. The format of the individual
fields is still what it is under COPY BINARY; you would not use a
different per-field transformation. You do need to know about the
overall format of the copy data stream being different, and defining
PQbinaryTuples as still returning 1 means there's no clean way to
understand overall copy format vs. per-field format.

There's a case to be made that we should invent a new function named
along the lines of PQcopyFormat() rather than overloading PQbinaryTuples()
some more. That function is currently deprecated and I'm not very happy
with un-deprecating it only to use it in a confusing way.

To be more concrete about this: I think it's actually rather broken
that this patch ties RAW to binary format of the field contents.
Why would it not be exactly as useful to have delimiter-less COPY
of textual data, for use when there's just one datum and/or you're
confident in picking the data apart for yourself? But as things stand
it'd be too confusing for an application to try to figure out what's
happening in such a case.

So I think we should either invent RAW_TEXT and RAW_BINARY formats
(not just RAW) or make RAW be an orthogonal copy option. And we need
to improve libpq's behavior enough so that applications can sanely
figure out what's happening.

I executed all tests in libpq and ecpg without any problems. Can you,
please, help me with repeating a ecpg issues?

Of course the ecpg tests pass; you didn't extend them to see what would
happen if someone tries COPY RAW with ecpg. Likewise, we have no tests
exercising a client's use of libpq with more intelligence than psql has
got. But that doesn't mean it's acceptable to write this patch with no
thought for such clients.

I am fairly sure that there actually are third-party client libraries
that have more intelligence about COPY than psql, but I do not remember
any specifics unfortunately.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#47Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#46)
Re: raw output from copy

Hi

2016-03-29 18:19 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Pavel Stehule <pavel.stehule@gmail.com> writes:

I tested COPY RAW on old psql clients - and it is working without any
problem - so when the client uses same logic as psql, then it should to
work. Sure, there can be differently implemented clients, but the COPY
client side is usually simple - store stream to output.

My point is precisely that I doubt all clients are that stupid about COPY.

Maybe I am blind, but I don't see any new security risks. The risk can be
only on client side - and if client is not able work with new value, then
it can fails.

Well, the point is that low-level code might get used to process the data
stream for commands it doesn't have any control over. Maybe there's no
realistic security risk there, or maybe there is; I'm not sure.

I am thinking so PQbinaryTuples should to return 1 (without change), and
PQfformat should to return 2.

Well, that seems pretty backwards to me. The format of the individual
fields is still what it is under COPY BINARY; you would not use a
different per-field transformation. You do need to know about the
overall format of the copy data stream being different, and defining
PQbinaryTuples as still returning 1 means there's no clean way to
understand overall copy format vs. per-field format.

There's a case to be made that we should invent a new function named
along the lines of PQcopyFormat() rather than overloading PQbinaryTuples()
some more. That function is currently deprecated and I'm not very happy
with un-deprecating it only to use it in a confusing way.

I see a introduction of PQcopyFormat() as best idea. So for
PQbinaryTuples() and PQfformat() these new changes are transparent - and
PQcopyFormat can returns info about used method.

To be more concrete about this: I think it's actually rather broken
that this patch ties RAW to binary format of the field contents.
Why would it not be exactly as useful to have delimiter-less COPY
of textual data, for use when there's just one datum and/or you're
confident in picking the data apart for yourself? But as things stand
it'd be too confusing for an application to try to figure out what's
happening in such a case.

So I think we should either invent RAW_TEXT and RAW_BINARY formats
(not just RAW) or make RAW be an orthogonal copy option. And we need
to improve libpq's behavior enough so that applications can sanely
figure out what's happening.

I had a use case that required binary mode. Higher granularity has sense.

This opening new question - RAW_TEXT will use text output function. But if
I will pass this value as text value, then a behave of current clients will
be same as usual COPY. So I need to use binary protocol. And then the
behave of PQbinaryTuples() and PQfformat() is the question? Although text
value can be passed in binary mode too (with format [length, data...]).

I executed all tests in libpq and ecpg without any problems. Can you,
please, help me with repeating a ecpg issues?

Of course the ecpg tests pass; you didn't extend them to see what would
happen if someone tries COPY RAW with ecpg. Likewise, we have no tests
exercising a client's use of libpq with more intelligence than psql has
got. But that doesn't mean it's acceptable to write this patch with no
thought for such clients.

if we don't change PQbinaryTuples() and PQfformat(), then COPY RAW should
be transparent for any client. Server sending data in binary format - what
is generic.

I am fairly sure that there actually are third-party client libraries
that have more intelligence about COPY than psql, but I do not remember
any specifics unfortunately.

The COPY RAW should not to break any existing application. This is new
feature - and old application, old client use COPY RAW newer. I see as
important the conformity of used mode (text/binary) and PQbinaryTuples()
and PQfformat().

I am writing few lines as summary:

1. invention RAW_TEXT and RAW_BINARY
2. for RAW_BINARY: PQbinaryTuples() returns 1 and PQfformat() returns 1
3.a for RAW_TEXT: PQbinaryTuples() returns 0 and PQfformat() returns 0, but
the client should to check PQcopyFormat() to not print "\n" on the end
3.b for RAW_TEXT: PQbinaryTuples() returns 1 and PQfformat() returns 1, but
used output function, not necessary client modification
4. PQcopyFormat() returns 0 for text, 1 for binary, 2 for RAW_TEXT, 3 for
RAW_BINARY
5. create tests for ecpg

Is it ok?

What do you prefer 3.a, or 3.b?

Regards

Pavel

Show quoted text

regards, tom lane

#48Tom Lane
tgl@sss.pgh.pa.us
In reply to: Pavel Stehule (#47)
Re: raw output from copy

Pavel Stehule <pavel.stehule@gmail.com> writes:

I am writing few lines as summary:

1. invention RAW_TEXT and RAW_BINARY
2. for RAW_BINARY: PQbinaryTuples() returns 1 and PQfformat() returns 1
3.a for RAW_TEXT: PQbinaryTuples() returns 0 and PQfformat() returns 0, but
the client should to check PQcopyFormat() to not print "\n" on the end
3.b for RAW_TEXT: PQbinaryTuples() returns 1 and PQfformat() returns 1, but
used output function, not necessary client modification
4. PQcopyFormat() returns 0 for text, 1 for binary, 2 for RAW_TEXT, 3 for
RAW_BINARY
5. create tests for ecpg

3.b certainly seems completely wrong. PQfformat==1 would imply binary
data.

I suggest that PQcopyFormat should be understood as defining the format
of the copy data encapsulation, not the individual fields. So it would go
like 0 = traditional text format, 1 = traditional binary format, 2 = raw
(no encapsulation). You'd need to also look at PQfformat to distinguish
raw text from raw binary. But if we do it as you suggest above, we've
locked ourselves into only ever having two field format codes, which
is something the existing design is specifically intended to allow
expansion in.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#49Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#48)
Re: raw output from copy

2016-03-29 20:59 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Pavel Stehule <pavel.stehule@gmail.com> writes:

I am writing few lines as summary:

1. invention RAW_TEXT and RAW_BINARY
2. for RAW_BINARY: PQbinaryTuples() returns 1 and PQfformat() returns 1
3.a for RAW_TEXT: PQbinaryTuples() returns 0 and PQfformat() returns 0,

but

the client should to check PQcopyFormat() to not print "\n" on the end
3.b for RAW_TEXT: PQbinaryTuples() returns 1 and PQfformat() returns 1,

but

used output function, not necessary client modification
4. PQcopyFormat() returns 0 for text, 1 for binary, 2 for RAW_TEXT, 3 for
RAW_BINARY
5. create tests for ecpg

3.b certainly seems completely wrong. PQfformat==1 would imply binary
data.

I suggest that PQcopyFormat should be understood as defining the format
of the copy data encapsulation, not the individual fields. So it would go
like 0 = traditional text format, 1 = traditional binary format, 2 = raw
(no encapsulation). You'd need to also look at PQfformat to distinguish
raw text from raw binary. But if we do it as you suggest above, we've
locked ourselves into only ever having two field format codes, which
is something the existing design is specifically intended to allow
expansion in.

I have a less courage than you :). The original design worked with almost
clients without changes on client side. New design has lot of combinations,
that are unknown for old clients. It can be better, because the client
authors will do update faster.

If PQfformat will returns 0 = text, 1 = traditional binary, 2 = raw text, 3
= raw binary - like you propose, then PQcopyFormat is useless. I see all
information just from PQfformat.

Regards

Pavel

Show quoted text

regards, tom lane

#50Andrew Dunstan
andrew@dunslane.net
In reply to: Pavel Stehule (#42)
Re: raw output from copy

On 03/28/2016 11:18 PM, Pavel Stehule wrote:

Anyway this is certainly not committable as-is, so I'm setting
it back
to Waiting on Author. But the fact that both libpq and ecpg
would need
updates makes me question whether we can safely pretend that
this isn't
a protocol break.

In that case I humbly submit that there is a case for reviving the
psql patch I posted that kicked off this whole thing and lets you
export a piece of binary data from psql quite easily. It should
certainly not involve any protocol break.

The psql only solution can work only for output. Doesn't help with input.

The I would suggest we try to invent something for psql which does help
with it. I just don't see this as an SQL problem. Pretty much any driver
library will have no difficulty in handling binary input and output.
It's only psql that has an issue, ISTM, and therefore I believe that's
where the fix should go. What else is going to use this? As an SQL
change this seems like a solution in search of a problem. If someone can
make a good case that this is going to be of general use I'll happily go
along, but I haven't seen one so far.

cheers

andrdew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#51Tom Lane
tgl@sss.pgh.pa.us
In reply to: Andrew Dunstan (#50)
Re: raw output from copy

Andrew Dunstan <andrew@dunslane.net> writes:

The I would suggest we try to invent something for psql which does help
with it. I just don't see this as an SQL problem.

There's certainly a lot to be said for that approach. I'm still not
convinced that we can make COPY do this without creating compatibility
issues, regardless of the details; and it doesn't seem like a big
enough problem to be worth taking any risks of that sort.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#52Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#48)
1 attachment(s)
Re: raw output from copy

Hi

2016-03-29 20:59 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Pavel Stehule <pavel.stehule@gmail.com> writes:

I am writing few lines as summary:

1. invention RAW_TEXT and RAW_BINARY
2. for RAW_BINARY: PQbinaryTuples() returns 1 and PQfformat() returns 1
3.a for RAW_TEXT: PQbinaryTuples() returns 0 and PQfformat() returns 0,

but

the client should to check PQcopyFormat() to not print "\n" on the end
3.b for RAW_TEXT: PQbinaryTuples() returns 1 and PQfformat() returns 1,

but

used output function, not necessary client modification
4. PQcopyFormat() returns 0 for text, 1 for binary, 2 for RAW_TEXT, 3 for
RAW_BINARY
5. create tests for ecpg

3.b certainly seems completely wrong. PQfformat==1 would imply binary
data.

I suggest that PQcopyFormat should be understood as defining the format
of the copy data encapsulation, not the individual fields. So it would go
like 0 = traditional text format, 1 = traditional binary format, 2 = raw
(no encapsulation). You'd need to also look at PQfformat to distinguish
raw text from raw binary. But if we do it as you suggest above, we've
locked ourselves into only ever having two field format codes, which
is something the existing design is specifically intended to allow
expansion in.

I wrote concept of raw_text, raw_binary modes.

I am trying to implement text data passing like text format - but for
RAW_TEXT it is not practical. Text passing is designed for one line data,
for multiline data enforces escaping, what we don't would for RAW mode. I
have to skip escaping, and the code is not nice.

So I propose different schema - RAW_TEXT uses text values (uses
input/output functions), enforce encoding from/to client codes and for
passing to client mode is used binary mode - then I don't need to read the
content with line by line. PQbinaryTuples() returns 1 for RAW_TEXT and
RAW_BINARY - in these cases data are passed as one binary value. PQfformat
returns 2 for RAW_TEXT and 3 for RAW_BINARY.

Any objections to this design?

Regards

Pavel

Show quoted text

regards, tom lane

Attachments:

copy-raw-format-20160331-04.patchtext/x-patch; charset=US-ASCII; name=copy-raw-format-20160331-04.patchDownload
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
new file mode 100644
index 07e2f45..68fbfd8
*** a/doc/src/sgml/ref/copy.sgml
--- b/doc/src/sgml/ref/copy.sgml
*************** COPY { <replaceable class="parameter">ta
*** 197,203 ****
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       or <literal>binary</>.
        The default is <literal>text</>.
       </para>
      </listitem>
--- 197,203 ----
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       <literal>binary</> or <literal>raw</literal>.
        The default is <literal>text</>.
       </para>
      </listitem>
*************** OIDs to be shown as null if that ever pr
*** 888,893 ****
--- 888,925 ----
      </para>
     </refsect3>
    </refsect2>
+ 
+   <refsect2>
+      <title>Raw Format</title>
+ 
+    <para>
+     The <literal>raw</literal> format option causes all data to be
+     stored/read as binary format rather than as text. It shares format
+     for data with <literal>binary</literal> format. This format doesn't
+     use any metadata - only row data in network byte order are exported
+     or imported.
+    </para>
+ 
+    <para>
+     Because this format doesn't support any delimiter, only one value
+     can be exported or imported. NULL values are not allowed.
+    </para>
+    <para>
+     The <literal>raw</literal> format can be used for export or import
+     bytea values.
+ <programlisting>
+ COPY images(data) FROM '/usr1/proj/img/01.jpg' (FORMAT raw);
+ </programlisting>
+     It can be used successfully for export XML in different encoding
+     or import valid XML document with any supported encoding:
+ <screen><![CDATA[
+ SET client_encoding TO latin2;
+ 
+ COPY (SELECT xmlelement(NAME data, 'Hello')) TO stdout (FORMAT raw);
+ <?xml version="1.0" encoding="LATIN2"?><data>Hello</data>
+ ]]></screen>
+    </para>
+   </refsect2>
   </refsect1>
  
   <refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
new file mode 100644
index 3201476..1de36b6
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** typedef enum EolType
*** 89,94 ****
--- 89,99 ----
   * it's faster to make useless comparisons to trailing bytes than it is to
   * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
   * when we have to do it the hard way.
+  *
+  * COPY supports three modes: text, binary, raw_text and raw_binary. The text
+  * format is plain text multiline format with specified delimiter. The binary
+  * format holds metadata (numbers, sizes) and data. The raw format holds data
+  * only and only one non NULL value can be processed.
   */
  typedef struct CopyStateData
  {
*************** typedef struct CopyStateData
*** 110,115 ****
--- 115,121 ----
  	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
  	bool		is_program;		/* is 'filename' a program to popen? */
  	bool		binary;			/* binary format? */
+ 	bool		raw;			/* required raw binary? */
  	bool		oids;			/* include OIDs? */
  	bool		freeze;			/* freeze rows on loading? */
  	bool		csv_mode;		/* Comma Separated Value format? */
*************** typedef struct CopyStateData
*** 199,204 ****
--- 205,213 ----
  	char	   *raw_buf;
  	int			raw_buf_index;	/* next byte to process */
  	int			raw_buf_len;	/* total # of bytes stored */
+ 
+ 	/* field for RAW mode */
+ 	bool		row_processed;		/* true, when first row was processed */
  } CopyStateData;
  
  /* DestReceiver for COPY (query) TO */
*************** SendCopyBegin(CopyState cstate)
*** 342,353 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'H');
! 		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
--- 351,368 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
  		pq_beginmessage(&buf, 'H');
! 		pq_sendbyte(&buf, cstate->binary ? 1 : 0);		/* overall format */
  		pq_sendint(&buf, natts, 2);
+ 
+ 		if (!cstate->raw)
+ 			format = cstate->binary ? 1 : 0;
+ 		else
+ 			format = cstate->binary ? 3 : 2;
+ 
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
*************** SendCopyBegin(CopyState cstate)
*** 356,365 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('H');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
--- 371,380 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY or COPY RAW is not supported to stdout or from stdin")));
  		pq_putemptymessage('H');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
*************** SendCopyBegin(CopyState cstate)
*** 368,377 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('B');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
--- 383,392 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY or COPY RAW is not supported to stdout or from stdin")));
  		pq_putemptymessage('B');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
*************** ReceiveCopyBegin(CopyState cstate)
*** 387,398 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'G');
! 		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
--- 402,419 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
  		pq_beginmessage(&buf, 'G');
! 		pq_sendbyte(&buf, cstate->binary ? 1 : 0);		/* overall format */
  		pq_sendint(&buf, natts, 2);
+ 
+ 		if (!cstate->raw)
+ 			format = cstate->binary ? 1 : 0;
+ 		else
+ 			format = cstate->binary ? 3 : 2;
+ 
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
*************** ReceiveCopyBegin(CopyState cstate)
*** 402,408 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 423,429 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** ReceiveCopyBegin(CopyState cstate)
*** 414,420 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 435,441 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** CopySendEndOfRow(CopyState cstate)
*** 482,488 ****
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
--- 503,509 ----
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary && !cstate->raw)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
*************** CopySendEndOfRow(CopyState cstate)
*** 527,533 ****
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
--- 548,554 ----
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
*************** CopySendEndOfRow(CopyState cstate)
*** 540,546 ****
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
--- 561,567 ----
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
*************** CopyGetData(CopyState cstate, void *data
*** 597,602 ****
--- 618,624 ----
  			bytesread = minread;
  			break;
  		case COPY_NEW_FE:
+ 
  			while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
  			{
  				int			avail;
*************** CopyGetData(CopyState cstate, void *data
*** 619,624 ****
--- 641,647 ----
  								(errcode(ERRCODE_CONNECTION_FAILURE),
  								 errmsg("unexpected EOF on client connection with an open transaction")));
  					RESUME_CANCEL_INTERRUPTS();
+ 
  					switch (mtype)
  					{
  						case 'd':		/* CopyData */
*************** CopyLoadRawBuf(CopyState cstate)
*** 766,771 ****
--- 789,825 ----
  	return (inbytes > 0);
  }
  
+ /*
+  * CopyLoadallRawBuf load all file into raw_buf.
+  *
+  * It is used for reading content in raw mode. If original RAW_BUF_SIZE is not
+  * enough, the buffer is enlarged.
+  */
+ static void
+ CopyLoadallRawBuf(CopyState cstate)
+ {
+ 	int			nbytes = 0;
+ 	int			inbytes;
+ 	Size			raw_buf_size = RAW_BUF_SIZE;
+ 
+ 	do
+ 	{
+ 		/* hold enough space for one data packet */
+ 		if ((raw_buf_size - nbytes - 1) < 8 * 1024)
+ 		{
+ 			raw_buf_size += RAW_BUF_SIZE;
+ 			cstate->raw_buf = repalloc(cstate->raw_buf, raw_buf_size);
+ 		}
+ 
+ 		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, raw_buf_size - nbytes - 1);
+ 		nbytes += inbytes;
+ 	}
+ 	while (inbytes > 0);
+ 
+ 	cstate->raw_buf[nbytes] = '\0';
+ 	cstate->raw_buf_index = 0;
+ 	cstate->raw_buf_len = nbytes;
+ }
  
  /*
   *	 DoCopy executes the SQL COPY statement
*************** ProcessCopyOptions(CopyState cstate,
*** 1013,1018 ****
--- 1067,1079 ----
  				cstate->csv_mode = true;
  			else if (strcmp(fmt, "binary") == 0)
  				cstate->binary = true;
+ 			else if (strcmp(fmt, "raw_text") == 0)
+ 				cstate->raw = true;
+ 			else if (strcmp(fmt, "raw_binary") == 0)
+ 			{
+ 				cstate->binary = true;
+ 				cstate->raw = true;
+ 			}
  			else
  				ereport(ERROR,
  						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
*************** ProcessCopyOptions(CopyState cstate,
*** 1162,1176 ****
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if (cstate->binary && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY mode")));
  
! 	if (cstate->binary && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
--- 1223,1242 ----
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if ((cstate->binary || cstate->raw) && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY or RAW mode")));
  
! 	if ((cstate->binary || cstate->raw) && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY or RAW mode")));
! 
! 	if (cstate->raw && cstate->oids)
! 		ereport(ERROR,
! 				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify OIDS in RAW mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
*************** BeginCopy(bool is_from,
*** 1608,1613 ****
--- 1674,1693 ----
  		}
  	}
  
+ 	/*
+ 	 * Initializaze the field "row_processed" for one row output in RAW mode,
+ 	 * and ensure only one output column.
+ 	 */
+ 	if (cstate->raw)
+ 	{
+ 		cstate->row_processed = false;
+ 
+ 		if (num_phys_attrs > 1)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 					 errmsg("only single column result is allowed in RAW mode")));
+ 	}
+ 
  	/* Use client encoding when ENCODING option is not specified. */
  	if (cstate->file_encoding < 0)
  		cstate->file_encoding = pg_get_client_encoding();
*************** CopyTo(CopyState cstate)
*** 1899,1905 ****
  											   ALLOCSET_DEFAULT_INITSIZE,
  											   ALLOCSET_DEFAULT_MAXSIZE);
  
! 	if (cstate->binary)
  	{
  		/* Generate header for a binary copy */
  		int32		tmp;
--- 1979,1985 ----
  											   ALLOCSET_DEFAULT_INITSIZE,
  											   ALLOCSET_DEFAULT_MAXSIZE);
  
! 	if (!cstate->raw && cstate->binary)
  	{
  		/* Generate header for a binary copy */
  		int32		tmp;
*************** CopyTo(CopyState cstate)
*** 1927,1933 ****
  													  cstate->file_encoding);
  
  		/* if a header has been requested send the line */
! 		if (cstate->header_line)
  		{
  			bool		hdr_delim = false;
  
--- 2007,2013 ----
  													  cstate->file_encoding);
  
  		/* if a header has been requested send the line */
! 		if (!cstate->raw && cstate->header_line)
  		{
  			bool		hdr_delim = false;
  
*************** CopyTo(CopyState cstate)
*** 1983,1993 ****
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
! 	if (cstate->binary)
  	{
  		/* Generate trailer for a binary copy */
  		CopySendInt16(cstate, -1);
--- 2063,2073 ----
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, cstate->raw ? 2L : 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
! 	if (!cstate->raw && cstate->binary)
  	{
  		/* Generate trailer for a binary copy */
  		CopySendInt16(cstate, -1);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2015,2021 ****
  	MemoryContextReset(cstate->rowcontext);
  	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
  
! 	if (cstate->binary)
  	{
  		/* Binary per-tuple header */
  		CopySendInt16(cstate, list_length(cstate->attnumlist));
--- 2095,2109 ----
  	MemoryContextReset(cstate->rowcontext);
  	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
  
! 	if (cstate->raw)
! 	{
! 		if (cstate->row_processed)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_TOO_MANY_ROWS),
! 					 errmsg("only single row result is allowed in RAW mode")));
! 		cstate->row_processed = true;
! 	}
! 	else if (cstate->binary)
  	{
  		/* Binary per-tuple header */
  		CopySendInt16(cstate, list_length(cstate->attnumlist));
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2046,2052 ****
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!cstate->binary)
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
--- 2134,2140 ----
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!(cstate->binary || cstate->raw))
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2055,2068 ****
  
  		if (isnull)
  		{
! 			if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (!cstate->binary)
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
--- 2143,2174 ----
  
  		if (isnull)
  		{
! 			if (cstate->raw)
! 					ereport(ERROR,
! 						(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
! 						  errmsg("cannot to copy NULL value in RAW mode.")));
! 			else if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (cstate->binary)
! 			{
! 				bytea	   *outputbytes;
! 
! 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 											   value);
! 
! 				/* send the size only in binary mode */
! 				if (!cstate->raw)
! 					CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! 
! 				CopySendData(cstate, VARDATA(outputbytes),
! 							 VARSIZE(outputbytes) - VARHDRSZ);
! 			}
! 			else
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2073,2088 ****
  				else
  					CopyAttributeOutText(cstate, string);
  			}
- 			else
- 			{
- 				bytea	   *outputbytes;
- 
- 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
- 											   value);
- 				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
- 				CopySendData(cstate, VARDATA(outputbytes),
- 							 VARSIZE(outputbytes) - VARHDRSZ);
- 			}
  		}
  	}
  
--- 2179,2184 ----
*************** BeginCopyFrom(Relation rel,
*** 2811,2859 ****
  		}
  	}
  
! 	if (!cstate->binary)
! 	{
! 		/* must rely on user to tell us... */
! 		cstate->file_has_oids = cstate->oids;
! 	}
! 	else
  	{
! 		/* Read and verify binary header */
! 		char		readSig[11];
! 		int32		tmp;
! 
! 		/* Signature */
! 		if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
! 			memcmp(readSig, BinarySignature, 11) != 0)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("COPY file signature not recognized")));
! 		/* Flags field */
! 		if (!CopyGetInt32(cstate, &tmp))
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("invalid COPY file header (missing flags)")));
! 		cstate->file_has_oids = (tmp & (1 << 16)) != 0;
! 		tmp &= ~(1 << 16);
! 		if ((tmp >> 16) != 0)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 				 errmsg("unrecognized critical flags in COPY file header")));
! 		/* Header extension length */
! 		if (!CopyGetInt32(cstate, &tmp) ||
! 			tmp < 0)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("invalid COPY file header (missing length)")));
! 		/* Skip extension header, if present */
! 		while (tmp-- > 0)
  		{
! 			if (CopyGetData(cstate, readSig, 1, 1) != 1)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("invalid COPY file header (wrong length)")));
  		}
  	}
  
  	if (cstate->file_has_oids && cstate->binary)
  	{
--- 2907,2956 ----
  		}
  	}
  
! 	if (cstate->binary)
  	{
! 		if (!cstate->raw)
  		{
! 			/* Read and verify binary header */
! 			char		readSig[11];
! 			int32		tmp;
! 
! 			/* Signature */
! 			if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
! 				memcmp(readSig, BinarySignature, 11) != 0)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("COPY file signature not recognized")));
! 			/* Flags field */
! 			if (!CopyGetInt32(cstate, &tmp))
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("invalid COPY file header (missing flags)")));
! 			cstate->file_has_oids = (tmp & (1 << 16)) != 0;
! 			tmp &= ~(1 << 16);
! 			if ((tmp >> 16) != 0)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("unrecognized critical flags in COPY file header")));
! 			/* Header extension length */
! 			if (!CopyGetInt32(cstate, &tmp) ||
! 				tmp < 0)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("invalid COPY file header (missing length)")));
! 			/* Skip extension header, if present */
! 			while (tmp-- > 0)
! 			{
! 				if (CopyGetData(cstate, readSig, 1, 1) != 1)
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("invalid COPY file header (wrong length)")));
! 			}
  		}
  	}
+ 	else
+ 		cstate->file_has_oids = cstate->oids;
+ 
  
  	if (cstate->file_has_oids && cstate->binary)
  	{
*************** NextCopyFromRawFields(CopyState cstate,
*** 2918,2928 ****
  	if (done && cstate->line_buf.len == 0)
  		return false;
  
! 	/* Parse the line into de-escaped field values */
! 	if (cstate->csv_mode)
! 		fldct = CopyReadAttributesCSV(cstate);
  	else
! 		fldct = CopyReadAttributesText(cstate);
  
  	*fields = cstate->raw_fields;
  	*nfields = fldct;
--- 3015,3051 ----
  	if (done && cstate->line_buf.len == 0)
  		return false;
  
! 	/* try to read all content in raw mode */
! 	if (cstate->raw)
! 	{
! 		StringInfoData	lines;
! 
! 		initStringInfo(&lines);
! 
! 		do
! 		{
! 			if (lines.len > 0)
! 				appendStringInfoChar(&lines, '\n');
! 
! 			appendBinaryStringInfo(&lines, cstate->line_buf.data, cstate->line_buf.len);
! 
! 			cstate->cur_lineno++;
! 			done = CopyReadLine(cstate);
! 		} while (!(done && cstate->line_buf.len == 0));
! 
! 		appendStringInfoChar(&lines, '\0');
! 
! 		cstate->raw_fields[0] = &lines.data;
! 		fldct = 1;
! 	}
  	else
! 	{
! 		/* Parse the line into de-escaped field values */
! 		if (cstate->csv_mode)
! 			fldct = CopyReadAttributesCSV(cstate);
! 		else
! 			fldct = CopyReadAttributesText(cstate);
! 	}
  
  	*fields = cstate->raw_fields;
  	*nfields = fldct;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 2968,2975 ****
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (!cstate->binary)
  	{
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
--- 3091,3210 ----
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (cstate->binary && !cstate->raw)
! 	{
! 		int16		fld_count;
! 		ListCell   *cur;
! 
! 		cstate->cur_lineno++;
! 
! 		if (!CopyGetInt16(cstate, &fld_count))
! 		{
! 			/* EOF detected (end of file, or protocol-level EOF) */
! 			return false;
! 		}
! 
! 		if (fld_count == -1)
! 		{
! 			/*
! 			 * Received EOF marker.  In a V3-protocol copy, wait for the
! 			 * protocol-level EOF, and complain if it doesn't come
! 			 * immediately.  This ensures that we correctly handle CopyFail,
! 			 * if client chooses to send that now.
! 			 *
! 			 * Note that we MUST NOT try to read more data in an old-protocol
! 			 * copy, since there is no protocol-level EOF marker then.  We
! 			 * could go either way for copy from file, but choose to throw
! 			 * error if there's data after the EOF marker, for consistency
! 			 * with the new-protocol case.
! 			 */
! 			char		dummy;
! 
! 			if (cstate->copy_dest != COPY_OLD_FE &&
! 				CopyGetData(cstate, &dummy, 1, 1) > 0)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("received copy data after EOF marker")));
! 			return false;
! 		}
! 
! 		if (fld_count != attr_count)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("row field count is %d, expected %d",
! 							(int) fld_count, attr_count)));
! 
! 		if (file_has_oids)
! 		{
! 			Oid			loaded_oid;
! 
! 			cstate->cur_attname = "oid";
! 			loaded_oid =
! 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
! 														 0,
! 													&cstate->oid_in_function,
! 													  cstate->oid_typioparam,
! 														 -1,
! 														 &isnull));
! 			if (isnull || loaded_oid == InvalidOid)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("invalid OID in COPY data")));
! 			cstate->cur_attname = NULL;
! 			if (cstate->oids && tupleOid != NULL)
! 				*tupleOid = loaded_oid;
! 		}
! 
! 		i = 0;
! 		foreach(cur, cstate->attnumlist)
! 		{
! 			int			attnum = lfirst_int(cur);
! 			int			m = attnum - 1;
! 
! 			cstate->cur_attname = NameStr(attr[m]->attname);
! 			i++;
! 			values[m] = CopyReadBinaryAttribute(cstate,
! 												i,
! 												&in_functions[m],
! 												typioparams[m],
! 												attr[m]->atttypmod,
! 												&nulls[m]);
! 			cstate->cur_attname = NULL;
! 		}
! 	}
! 	else if (cstate->binary && cstate->raw)
! 	{
! 		if (cstate->row_processed)
! 			return false;
! 
! 		CopyLoadallRawBuf(cstate);
! 		cstate->cur_attname = NameStr(attr[0]->attname);
! 
! 		if (cstate->attribute_buf.data != NULL)
! 			pfree(cstate->attribute_buf.data);
! 
! 		cstate->attribute_buf.data = cstate->raw_buf;
! 		cstate->attribute_buf.len = cstate->raw_buf_len;
! 		cstate->attribute_buf.cursor = 0;
! 
! 		cstate->raw_buf = NULL;
! 
! 		/* Call the column type's binary input converter */
! 		values[0] = ReceiveFunctionCall(&in_functions[0], &cstate->attribute_buf,
! 								 typioparams[0], attr[0]->atttypmod);
! 		nulls[0] = false;
! 
! 		/* Trouble if it didn't eat the whole buffer */
! 		if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
! 					 errmsg("incorrect binary data format")));
! 
! 		cstate->row_processed = true;
! 	}
! 	else
  	{
+ 		/* text */
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 3074,3161 ****
  
  		Assert(fieldno == nfields);
  	}
- 	else
- 	{
- 		/* binary */
- 		int16		fld_count;
- 		ListCell   *cur;
- 
- 		cstate->cur_lineno++;
- 
- 		if (!CopyGetInt16(cstate, &fld_count))
- 		{
- 			/* EOF detected (end of file, or protocol-level EOF) */
- 			return false;
- 		}
- 
- 		if (fld_count == -1)
- 		{
- 			/*
- 			 * Received EOF marker.  In a V3-protocol copy, wait for the
- 			 * protocol-level EOF, and complain if it doesn't come
- 			 * immediately.  This ensures that we correctly handle CopyFail,
- 			 * if client chooses to send that now.
- 			 *
- 			 * Note that we MUST NOT try to read more data in an old-protocol
- 			 * copy, since there is no protocol-level EOF marker then.  We
- 			 * could go either way for copy from file, but choose to throw
- 			 * error if there's data after the EOF marker, for consistency
- 			 * with the new-protocol case.
- 			 */
- 			char		dummy;
- 
- 			if (cstate->copy_dest != COPY_OLD_FE &&
- 				CopyGetData(cstate, &dummy, 1, 1) > 0)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("received copy data after EOF marker")));
- 			return false;
- 		}
- 
- 		if (fld_count != attr_count)
- 			ereport(ERROR,
- 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 					 errmsg("row field count is %d, expected %d",
- 							(int) fld_count, attr_count)));
- 
- 		if (file_has_oids)
- 		{
- 			Oid			loaded_oid;
- 
- 			cstate->cur_attname = "oid";
- 			loaded_oid =
- 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
- 														 0,
- 													&cstate->oid_in_function,
- 													  cstate->oid_typioparam,
- 														 -1,
- 														 &isnull));
- 			if (isnull || loaded_oid == InvalidOid)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("invalid OID in COPY data")));
- 			cstate->cur_attname = NULL;
- 			if (cstate->oids && tupleOid != NULL)
- 				*tupleOid = loaded_oid;
- 		}
- 
- 		i = 0;
- 		foreach(cur, cstate->attnumlist)
- 		{
- 			int			attnum = lfirst_int(cur);
- 			int			m = attnum - 1;
- 
- 			cstate->cur_attname = NameStr(attr[m]->attname);
- 			i++;
- 			values[m] = CopyReadBinaryAttribute(cstate,
- 												i,
- 												&in_functions[m],
- 												typioparams[m],
- 												attr[m]->atttypmod,
- 												&nulls[m]);
- 			cstate->cur_attname = NULL;
- 		}
- 	}
  
  	/*
  	 * Now compute and insert any defaults available for the columns not
--- 3309,3314 ----
*************** CopyAttributeOutText(CopyState cstate, c
*** 4143,4148 ****
--- 4296,4312 ----
  		ptr = string;
  
  	/*
+ 	 * Do not any escaping when raw mode is used. In this mode only one field
+ 	 * is passed - so escaping is useless. We would to work with raw data, i.e.
+ 	 * no escaping.
+ 	 */
+ 	if (cstate->raw)
+ 	{
+ 		CopySendString(cstate, ptr);
+ 		return;
+ 	}
+ 
+ 	/*
  	 * We have to grovel through the string searching for control characters
  	 * and instances of the delimiter character.  In most cases, though, these
  	 * are infrequent.  To avoid overhead from calling CopySendData once per
*************** CopyAttributeOutText(CopyState cstate, c
*** 4156,4162 ****
  	 * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
  	 * of the normal safe-encoding path.
  	 */
! 	if (cstate->encoding_embeds_ascii)
  	{
  		start = ptr;
  		while ((c = *ptr) != '\0')
--- 4320,4326 ----
  	 * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
  	 * of the normal safe-encoding path.
  	 */
! 	else if (cstate->encoding_embeds_ascii)
  	{
  		start = ptr;
  		while ((c = *ptr) != '\0')
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
new file mode 100644
index 892058e..777a375
*** a/src/bin/psql/common.c
--- b/src/bin/psql/common.c
*************** ProcessResult(PGresult **results)
*** 871,876 ****
--- 871,877 ----
  			{
  				if (!copystream)
  					copystream = pset.cur_cmd_source;
+ 
  				success = handleCopyIn(pset.db,
  									   copystream,
  									   PQbinaryTuples(*results),
diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile
new file mode 100644
index 1b292d2..83b30b0
*** a/src/interfaces/libpq/Makefile
--- b/src/interfaces/libpq/Makefile
*************** include $(top_builddir)/src/Makefile.glo
*** 17,23 ****
  # shared library parameters
  NAME= pq
  SO_MAJOR_VERSION= 5
! SO_MINOR_VERSION= 9
  
  override CPPFLAGS :=  -DFRONTEND -DUNSAFE_STAT_OK -I$(srcdir) $(CPPFLAGS) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port
  ifneq ($(PORTNAME), win32)
--- 17,23 ----
  # shared library parameters
  NAME= pq
  SO_MAJOR_VERSION= 5
! SO_MINOR_VERSION= 10
  
  override CPPFLAGS :=  -DFRONTEND -DUNSAFE_STAT_OK -I$(srcdir) $(CPPFLAGS) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port
  ifneq ($(PORTNAME), win32)
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
new file mode 100644
index 41937c0..096ed6b
*** a/src/interfaces/libpq/fe-exec.c
--- b/src/interfaces/libpq/fe-exec.c
*************** PQmakeEmptyPGresult(PGconn *conn, ExecSt
*** 155,160 ****
--- 155,161 ----
  	result->resultStatus = status;
  	result->cmdStatus[0] = '\0';
  	result->binary = 0;
+ 	result->raw = 0;
  	result->events = NULL;
  	result->nEvents = 0;
  	result->errMsg = NULL;
*************** PQsetResultAttrs(PGresult *res, int numA
*** 245,250 ****
--- 246,252 ----
  
  	/* deep-copy the attribute names, and determine format */
  	res->binary = 1;
+ 	res->raw = 0;
  	for (i = 0; i < res->numAttributes; i++)
  	{
  		if (res->attDescs[i].name)
*************** PQsetResultAttrs(PGresult *res, int numA
*** 255,262 ****
  		if (!res->attDescs[i].name)
  			return FALSE;
  
! 		if (res->attDescs[i].format == 0)
  			res->binary = 0;
  	}
  
  	return TRUE;
--- 257,266 ----
  		if (!res->attDescs[i].name)
  			return FALSE;
  
! 		if (res->attDescs[i].format == 0 || res->attDescs[i].format == 2)
  			res->binary = 0;
+ 		if (res->attDescs[i].format == 2 || res->attDescs[i].format == 3)
+ 			res->raw = 1;
  	}
  
  	return TRUE;
*************** PQcopyResult(const PGresult *src, int fl
*** 372,377 ****
--- 376,382 ----
  	return dest;
  }
  
+ 
  /*
   * Copy an array of PGEvents (with no extra space for more).
   * Does not duplicate the event instance data, sets this to NULL.
*************** PQbinaryTuples(const PGresult *res)
*** 2634,2639 ****
--- 2639,2645 ----
  {
  	if (!res)
  		return 0;
+ 
  	return res->binary;
  }
  
*************** PQfmod(const PGresult *res, int field_nu
*** 2884,2889 ****
--- 2890,2910 ----
  		return 0;
  }
  
+ /*
+  * PQcopyFormat
+  *
+  * Returns a info about copy mode:
+  * -1 signalize a error, 0 = text mode, 1 = binary mode, 2 = raw mode
+  */
+ int
+ PQcopyFormat(const PGresult *res)
+ {
+ 	if (res->raw)
+ 		return 2;
+ 	else
+ 		return res->binary;
+ }
+ 
  char *
  PQcmdStatus(PGresult *res)
  {
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
new file mode 100644
index 43898a4..3934b1d
*** a/src/interfaces/libpq/fe-protocol3.c
--- b/src/interfaces/libpq/fe-protocol3.c
*************** getCopyStart(PGconn *conn, ExecStatusTyp
*** 1397,1402 ****
--- 1397,1403 ----
  
  	if (pqGetc(&conn->copy_is_binary, conn))
  		goto failure;
+ 
  	result->binary = conn->copy_is_binary;
  	/* the next two bytes are the number of fields	*/
  	if (pqGetInt(&(result->numAttributes), 2, conn))
*************** getCopyStart(PGconn *conn, ExecStatusTyp
*** 1426,1431 ****
--- 1427,1436 ----
  		 */
  		format = (int) ((int16) format);
  		result->attDescs[i].format = format;
+ 
+ 		/* when any fields uses raw format, then COPY RAW was used */
+ 		if (format == 2 || format == 3)
+ 			result->raw = true;
  	}
  
  	/* Success! */
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
new file mode 100644
index 6bf34b3..9f7903a
*** a/src/interfaces/libpq/libpq-fe.h
--- b/src/interfaces/libpq/libpq-fe.h
*************** extern int	PQfformat(const PGresult *res
*** 475,480 ****
--- 475,481 ----
  extern Oid	PQftype(const PGresult *res, int field_num);
  extern int	PQfsize(const PGresult *res, int field_num);
  extern int	PQfmod(const PGresult *res, int field_num);
+ extern int	PQcopyFormat(const PGresult *res);
  extern char *PQcmdStatus(PGresult *res);
  extern char *PQoidStatus(const PGresult *res);	/* old and ugly */
  extern Oid	PQoidValue(const PGresult *res);	/* new and improved */
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
new file mode 100644
index 6c9bbf7..6b4d923
*** a/src/interfaces/libpq/libpq-int.h
--- b/src/interfaces/libpq/libpq-int.h
*************** struct pg_result
*** 180,185 ****
--- 180,186 ----
  	char		cmdStatus[CMDSTATUS_LEN];		/* cmd status from the query */
  	int			binary;			/* binary tuple values if binary == 1,
  								 * otherwise text */
+ 	int			raw;			/* only values */
  
  	/*
  	 * These fields are copied from the originating PGconn, so that operations
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
new file mode 100644
index 5f6260a..e31c4f2
*** a/src/test/regress/expected/copy2.out
--- b/src/test/regress/expected/copy2.out
*************** DROP FUNCTION truncate_in_subxact();
*** 466,468 ****
--- 466,481 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw_binary);
+ AHOJ
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ AHOJ
+ DROP TABLE x;
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
new file mode 100644
index cb13606..e25f996
*** a/src/test/regress/input/copy.source
--- b/src/test/regress/input/copy.source
*************** this is just a line full of junk that wo
*** 133,135 ****
--- 133,195 ----
  \.
  
  copy copytest3 to stdout csv header;
+ 
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw_binary);
+ 
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw_binary);
+ 
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw_binary);
+ 
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ 
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ 
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
new file mode 100644
index b7e372d..6c82993
*** a/src/test/regress/output/copy.source
--- b/src/test/regress/output/copy.source
*************** copy copytest3 to stdout csv header;
*** 95,97 ****
--- 95,183 ----
  c1,"col with , comma","col with "" quote"
  1,a,1
  2,b,2
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw_binary);
+ AHOJ
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw_binary);
+ AHOJ
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw_binary);
+ AHOJ
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (3 rows)
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (5 rows)
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
new file mode 100644
index 39a9deb..7e22ee4
*** a/src/test/regress/sql/copy2.sql
--- b/src/test/regress/sql/copy2.sql
*************** DROP FUNCTION truncate_in_subxact();
*** 333,335 ****
--- 333,348 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ 
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw_binary);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ 
+ DROP TABLE x;
#53Craig Ringer
craig@2ndquadrant.com
In reply to: Tom Lane (#46)
Re: raw output from copy

On 30 March 2016 at 00:19, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Pavel Stehule <pavel.stehule@gmail.com> writes:

I tested COPY RAW on old psql clients - and it is working without any
problem - so when the client uses same logic as psql, then it should to
work. Sure, there can be differently implemented clients, but the COPY
client side is usually simple - store stream to output.

My point is precisely that I doubt all clients are that stupid about COPY.

PgJDBC definitely isn't.

Any changes really need to be tested against PgJDBC's CopyManager.

--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#54Pavel Stehule
pavel.stehule@gmail.com
In reply to: Craig Ringer (#53)
Re: raw output from copy

2016-03-31 8:34 GMT+02:00 Craig Ringer <craig@2ndquadrant.com>:

On 30 March 2016 at 00:19, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Pavel Stehule <pavel.stehule@gmail.com> writes:

I tested COPY RAW on old psql clients - and it is working without any
problem - so when the client uses same logic as psql, then it should to
work. Sure, there can be differently implemented clients, but the COPY
client side is usually simple - store stream to output.

My point is precisely that I doubt all clients are that stupid about COPY.

PgJDBC definitely isn't.

Any changes really need to be tested against PgJDBC's CopyManager.

this patch doesn't break any old application. Accepting new feature depends
on binary method detection. PQbinaryTuples based clients should to support
COPY RAW* without problems, PQfformat() should to report unknown format.

Regards

Pavel

Show quoted text

--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#55Craig Ringer
craig@2ndquadrant.com
In reply to: Pavel Stehule (#54)
Re: raw output from copy

On 31 March 2016 at 14:40, Pavel Stehule <pavel.stehule@gmail.com> wrote:

this patch doesn't break any old application. Accepting new feature
depends on binary method detection. PQbinaryTuples based clients should
to support COPY RAW* without problems, PQfformat() should to report
unknown format.

PgJDBC does not use libpq.

--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#56Pavel Stehule
pavel.stehule@gmail.com
In reply to: Craig Ringer (#55)
Re: raw output from copy

2016-03-31 9:48 GMT+02:00 Craig Ringer <craig@2ndquadrant.com>:

On 31 March 2016 at 14:40, Pavel Stehule <pavel.stehule@gmail.com> wrote:

this patch doesn't break any old application. Accepting new feature
depends on binary method detection. PQbinaryTuples based clients should
to support COPY RAW* without problems, PQfformat() should to report
unknown format.

PgJDBC does not use libpq.

so it can be interesting test

Pavel

Show quoted text

--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#57Daniel Verite
daniel@manitou-mail.org
In reply to: Andrew Dunstan (#50)
Re: raw output from copy

Andrew Dunstan wrote:

If someone can make a good case that this is going to be of
general use I'll happily go along, but I haven't seen one so far.

About COPY FROM with a raw format, for instance just yesterday
there was this user question on stackoverflow:
http://stackoverflow.com/questions/36317237

which essentially is: how to import contents from a file without any
particular interpretation of any character?

With the patch discussed in this thread, a user can do
\copy table(textcol) from /path/to/file (format raw)
or the equivalent COPY.
If it's a binary column, that works just the same.

Without this, it's not obvious at all how this result can be
achieved without resorting to external preprocessing,
and assuming the availability of such preprocessing tools
in the environment. Notwithstanding the fact that the
solution proposed on SO (doubling backslashes with sed)
doesn't even work if the file contains tabs, as they would be
interpreted as field separators, even if the copy target has only
one column. You can change the delimiter with COPY but AFAIK
you can't tell that there is none.

Best regards,
--
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#58Andrew Dunstan
andrew@dunslane.net
In reply to: Daniel Verite (#57)
Re: raw output from copy

On 04/01/2016 11:42 AM, Daniel Verite wrote:

Andrew Dunstan wrote:

If someone can make a good case that this is going to be of
general use I'll happily go along, but I haven't seen one so far.

About COPY FROM with a raw format, for instance just yesterday
there was this user question on stackoverflow:
http://stackoverflow.com/questions/36317237

which essentially is: how to import contents from a file without any
particular interpretation of any character?\

There is so much wrong with this it's hard to know where to start.

Inserting the whole contents of a text file unchanged is insanely easy
in psql.

\set file `cat /path/to/file`
insert into mytable(contents) values(:'file');

What is more everyone on SO missed the fact that CSV mode gives you very
considerable control over the quote, delimiter and null settings.

See for example
<http://adpgtech.blogspot.com/2014/09/importing-json-data.html&gt; which
has this example for handling files consisting of 1 json document per line:

copy the_table(jsonfield)
from '/path/to/jsondata'
csv quote e'\x01' delimiter e'\x02';

psql's \copy will work just the same way

(I noticed with amusement this week that CitusData is using pretty much
exactly this in one of their examples.)

With the patch discussed in this thread, a user can do
\copy table(textcol) from /path/to/file (format raw)
or the equivalent COPY.
If it's a binary column, that works just the same.

It would be fairly simple to invent a binary mechanism that did the
equivalent of the above insert. All without any change to SQL or the
backend at all.

Without this, it's not obvious at all how this result can be
achieved without resorting to external preprocessing,
and assuming the availability of such preprocessing tools
in the environment. Notwithstanding the fact that the
solution proposed on SO (doubling backslashes with sed)
doesn't even work if the file contains tabs, as they would be
interpreted as field separators, even if the copy target has only
one column. You can change the delimiter with COPY but AFAIK
you can't tell that there is none.

There is arguably a good case for allowing a null delimiter. But that SO
page is just a terrible piece of misinformation, as far too often
happens in my experience.

And I am still waiting for a non-psql use case. But I don't expect to
see one, precisely because most clients have no difficulty at all in
handling binary data.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#59Daniel Verite
daniel@manitou-mail.org
In reply to: Andrew Dunstan (#58)
Re: raw output from copy

Andrew Dunstan wrote:

Inserting the whole contents of a text file unchanged is insanely easy
in psql.

\set file `cat /path/to/file`
insert into mytable(contents) values(:'file');

That's assuming psql but the asker of that question never mentioned
using psql. The COPY invocation could be inside a function. Even if
that particular user would be fine with a psql-only option, the next
one might not. Or they might want to import a binary file, and
as you mention, currently there's no equivalent of the :'var'
feature for binary.

But there's another aspect to this that's worth of consideration,
and that this forum question illustrates.
One reason of adding the format to COPY is that it's where users
are looking for it. It's the canonical way of importing contents
from files so that's where it makes more sense.
From the POV of being user friendly and consistent, restricting what
COPY can do because psql could do it completely differently
if the user was psql-savvy enough to know it, what sense does it
make?

And I am still waiting for a non-psql use case. But I don't expect to
see one, precisely because most clients have no difficulty at all in
handling binary data.

You mean small or medium-size binary data. The 512MB-1GB range is
impossible to handle if requested in text format, which is what drivers
tend to use. Even pg_dump fails on these contents.
Maybe it was unimportant when bytea was added ~15 years ago,
but the size of data that people actually put into bytea columns is
growing, following Moore's law like the rest.

Even in the lower size range, considering the amount of memory allocated
and the time spent to convert to hex, sending twice the number
of bytes on the wire, just to do the reverse conversion in the client
as soon as all data is obtained, it works but it's pointless
and inefficient.

Code that uses PQexecParams() binary "resultFormat", or the
binary format of copy doesn't have that problem, but most
client-side drivers don't do that.

And maybe they just can't realistically, because getting result
format in binary is exposed as an all-or-nothing choice in libpq.

I mean if client code does SELECT * FROM table or even COPY of the
same, and what comes back is bytea and e.g. timestamps and floats and
custom types, the client-side driver may wish to have the bytea field in
binary format for efficiency and the rest in text format for
usability, but that's not possible with PQexecParams(), or other
libpq functions.

The point of mixing binary and text is outside the scope of a RAW
format for COPY, as obviously it wouldn't help with that in any way,
but on the argument that the status quo is fine because clients
have no difficulty, that's just not true. Clients cope with what they have,
but what they have is far from being complete or optimal.

Best regards,
--
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#60Tom Lane
tgl@sss.pgh.pa.us
In reply to: Daniel Verite (#59)
Re: raw output from copy

"Daniel Verite" <daniel@manitou-mail.org> writes:

One reason of adding the format to COPY is that it's where users
are looking for it. It's the canonical way of importing contents
from files so that's where it makes more sense.

I'm not sure I buy that argument, because it could be used to justify
adding absolutely any ETL functionality to COPY. And we don't want
to go down that path; the design intention for COPY is that it be as
simple and fast as possible.

And I am still waiting for a non-psql use case. But I don't expect to
see one, precisely because most clients have no difficulty at all in
handling binary data.

You mean small or medium-size binary data. The 512MB-1GB range is
impossible to handle if requested in text format, which is what drivers
tend to use. Even pg_dump fails on these contents.

... which is COPY. I do not see that RAW mode is going to help much
here: it's not going to be noticeably better than COPY BINARY in terms
of maximum field width.

Code that uses PQexecParams() binary "resultFormat", or the
binary format of copy doesn't have that problem, but most
client-side drivers don't do that.

And maybe they just can't realistically, because getting result
format in binary is exposed as an all-or-nothing choice in libpq.

That's simply wrong. Read the documentation for PQexecParams and
friends: you can specify text or binary per-column. It's COPY that
has the only-one-column-format restriction, and RAW certainly isn't
going to make that better.

I'm not quite as convinced as Andrew that RAW mode is unnecessary,
but I don't find these arguments for it to be very compelling.

The real issue to my mind is that it doesn't seem like we can shoehorn
a sanely-defined version of RAW into the existing protocol spec without
creating compatibility hazards. So we can either wait for the mythical
protocol v4 (but even a protocol update wouldn't fix the application-level
hazards) or we can treat it as a problem to be solved client-side.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#61David G. Johnston
david.g.johnston@gmail.com
In reply to: Daniel Verite (#57)
Re: raw output from copy

On Fri, Apr 1, 2016 at 8:42 AM, Daniel Verite <daniel@manitou-mail.org>
wrote:

Andrew Dunstan wrote:

If someone can make a good case that this is going to be of
general use I'll happily go along, but I haven't seen one so far.

About COPY FROM with a raw format, for instance just yesterday
there was this user question on stackoverflow:
http://stackoverflow.com/questions/36317237

which essentially is: how to import contents from a file without any
particular interpretation of any character?

With the patch discussed in this thread, a user can do
\copy table(textcol) from /path/to/file (format raw)

​What is needed to solve this specific use-case is a way to specify "QUOTE
NONE" instead of the default for whatever format is being hijacked:

​COPY file_content FROM '/tmp/textfile.txt' WITH (FORMAT csv, QUOTE
E'<unprintable character that should never appear in the data>');

becomes

COPY file_content FROM '/tmp/textfile.txt' WITH (FORMAT csv, QUOTE NONE);

​Or maybe: "WITH (FORMAT single_column)"

Though maybe that doesn't extend well to unencoded binary data...which
seems like it can be considered a separate problem from reliably importing
an entire file into a single row+column in a table.

David J.

#62Daniel Verite
daniel@manitou-mail.org
In reply to: Tom Lane (#60)
Re: raw output from copy

Tom Lane wrote:

Code that uses PQexecParams() binary "resultFormat", or the
binary format of copy doesn't have that problem, but most
client-side drivers don't do that.

And maybe they just can't realistically, because getting result
format in binary is exposed as an all-or-nothing choice in libpq.

That's simply wrong. Read the documentation for PQexecParams and
friends: you can specify text or binary per-column. It's COPY that
has the only-one-column-format restriction, and RAW certainly isn't
going to make that better.

About PQexecParams, I disagree, the parameters formats can be
specified independantly, but the not the results, which are either all
binary or all text.

Quoting the doc at
http://www.postgresql.org/docs/9.5/static/libpq-exec.html
<quote>
PGresult *PQexecParams(PGconn *conn,
const char *command,
int nParams,
const Oid *paramTypes,
const char * const *paramValues,
const int *paramLengths,
const int *paramFormats,
int resultFormat);
[...]

resultFormat:
Specify zero to obtain results in text format, or one to obtain results
in binary format. (There is not currently a provision to obtain different
result columns in different formats, although that is possible in the
underlying protocol.)
</quote>

For the client-side drivers that I've looked at, like these used in php
or perl, they just never use resultFormat=1.
I assume that they consider that having all values
in binary is unworkable for them, which is reasonable.
Maybe if they had a per-column choice, they wouldn't
use it anyway, but at least it would be theirs to decide

All this is only tangentially related to COPY RAW.
It's just that COPY RAW can be seen as an efficient alternative to
the single-column returning [SELECT bytea_column FROM...]
The drivers currently request this in text mode even though
it makes no sense in this particular case, and it gets measurably
annoying if the contents are big.

Best regards,
--
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#63Pavel Stehule
pavel.stehule@gmail.com
In reply to: Pavel Stehule (#52)
1 attachment(s)
Re: raw output from copy

Hi

here is cleaned/finished previous implementation of RAW_TEXT/RAW_BINARY
formats for COPY statements.

The RAW with text formats means unescaped data, but with correct encoding -
input/output is realised with input/output function. RAW binary means
content produced/received by sending/received functions.

Now both directions (input/output) working well

Some examples of expected usage:

copy (select xmlelement(name foo, 'hello')) to stdout (format raw_binary,
encoding 'latin2');

create table avatars(id serial, picture bytea);
\copy avatars(picture) from ~/images/foo.jpg (format raw_binary);
select lastval();

create table doc(id serial, txt text);
\copy doc(txt) from ~/files/aaa.txt (format raw_text, encoding 'latin2');
select lastval();

Regards

Pavel

Attachments:

copy-raw_text_binary-01.patchtext/x-patch; charset=US-ASCII; name=copy-raw_text_binary-01.patchDownload
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
new file mode 100644
index 3829a14..7b9ed73
*** a/doc/src/sgml/libpq.sgml
--- b/doc/src/sgml/libpq.sgml
*************** int PQfformat(const PGresult *res,
*** 3226,3233 ****
  
        <para>
         Format code zero indicates textual data representation, while format
!        code one indicates binary representation.  (Other codes are reserved
!        for future definition.)
        </para>
       </listitem>
      </varlistentry>
--- 3226,3234 ----
  
        <para>
         Format code zero indicates textual data representation, while format
!        code one indicates binary representation. Format code two indicates
!        raw_text representation and format code three indicates raw_binary
!        representation (Other codes are reserved for future definition.)
        </para>
       </listitem>
      </varlistentry>
*************** typedef struct
*** 3557,3562 ****
--- 3558,3583 ----
     </para>
  
     <variablelist>
+     <varlistentry id="libpq-pqcopyformat">
+      <term>
+       <function>PQcopyFormat</function>
+       <indexterm>
+        <primary>PQcopyFormat</primary>
+       </indexterm>
+      </term>
+ 
+      <listitem>
+       <para>
+        Format code zero indicates textual data representation, format one
+        indicates binary representation, format two indicates raw
+        representation.
+ <synopsis>
+ int PQcopyFormat(PGresult *res);
+ </synopsis>
+       </para>
+      </listitem>
+     </varlistentry>
+ 
      <varlistentry id="libpq-pqcmdstatus">
       <term>
        <function>PQcmdStatus</function>
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
new file mode 100644
index 522128e..e783b30
*** a/doc/src/sgml/protocol.sgml
--- b/doc/src/sgml/protocol.sgml
*************** CopyInResponse (B)
*** 3239,3244 ****
--- 3239,3245 ----
                  characters, etc).
                  1 indicates the overall copy format is binary (similar
                  to DataRow format).
+                 2 indicates the overall copy format is raw.
                  See <xref linkend="sql-copy">
                  for more information.
  </para>
*************** CopyInResponse (B)
*** 3262,3269 ****
  <listitem>
  <para>
                  The format codes to be used for each column.
!                 Each must presently be zero (text) or one (binary).
!                 All must be zero if the overall copy format is textual.
  </para>
  </listitem>
  </varlistentry>
--- 3263,3271 ----
  <listitem>
  <para>
                  The format codes to be used for each column.
!                 Each must be zero (text), one (binary), two (raw_text)
!                 or three (raw_binary). All must be zero if the overall
!                 copy format is textual.
  </para>
  </listitem>
  </varlistentry>
*************** CopyOutResponse (B)
*** 3313,3319 ****
                  is textual (rows separated by newlines, columns
                  separated by separator characters, etc). 1 indicates
                  the overall copy format is binary (similar to DataRow
!                 format). See <xref linkend="sql-copy"> for more information.
  </para>
  </listitem>
  </varlistentry>
--- 3315,3322 ----
                  is textual (rows separated by newlines, columns
                  separated by separator characters, etc). 1 indicates
                  the overall copy format is binary (similar to DataRow
!                 format). 2 indicates raw_text or raw_binary format.
!                 See <xref linkend="sql-copy"> for more information.
  </para>
  </listitem>
  </varlistentry>
*************** CopyOutResponse (B)
*** 3335,3342 ****
  <listitem>
  <para>
                  The format codes to be used for each column.
!                 Each must presently be zero (text) or one (binary).
!                 All must be zero if the overall copy format is textual.
  </para>
  </listitem>
  </varlistentry>
--- 3338,3346 ----
  <listitem>
  <para>
                  The format codes to be used for each column.
!                 Each must be zero (text), one (binary), two (raw_text)
!                 or three (raw_binary). All must be zero if the overall
!                 copy format is textual.
  </para>
  </listitem>
  </varlistentry>
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
new file mode 100644
index 07e2f45..4e339e4
*** a/doc/src/sgml/ref/copy.sgml
--- b/doc/src/sgml/ref/copy.sgml
*************** COPY { <replaceable class="parameter">ta
*** 197,203 ****
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       or <literal>binary</>.
        The default is <literal>text</>.
       </para>
      </listitem>
--- 197,205 ----
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       <literal>binary</>,
!       <literal>raw_text</>
!       or <literal>raw_binary</>.
        The default is <literal>text</>.
       </para>
      </listitem>
*************** OIDs to be shown as null if that ever pr
*** 888,893 ****
--- 890,933 ----
      </para>
     </refsect3>
    </refsect2>
+ 
+   <refsect2>
+      <title>Raw_text/raw_binary Format</title>
+ 
+    <para>
+     The <literal>raw_text</literal> format option causes all data to be
+     stored/read as one text value. This format doesn't use any metadata
+     - only raw data are exported or imported.
+    </para>
+ 
+    <para>
+     The <literal>raw_binary</literal> format option causes all data to be
+     stored/read as binary format rather than as text. It shares format
+     for data with <literal>binary</literal> format. This format doesn't
+     use any metadata - only row data in network byte order are exported
+     or imported.
+    </para>
+ 
+    <para>
+     Because this format doesn't support any delimiter, only one value
+     can be exported or imported. NULL values are not allowed.
+    </para>
+    <para>
+     The <literal>raw_binary</literal> format can be used for export or import
+     bytea values.
+ <programlisting>
+ COPY images(data) FROM '/usr1/proj/img/01.jpg' (FORMAT raw_binary);
+ </programlisting>
+     It can be used successfully for export XML in different encoding
+     or import valid XML document with any supported encoding:
+ <screen><![CDATA[
+ SET client_encoding TO latin2;
+ 
+ COPY (SELECT xmlelement(NAME data, 'Hello')) TO stdout (FORMAT raw_binary);
+ <?xml version="1.0" encoding="LATIN2"?><data>Hello</data>
+ ]]></screen>
+    </para>
+   </refsect2>
   </refsect1>
  
   <refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
new file mode 100644
index 3201476..7600829
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** typedef struct CopyStateData
*** 110,115 ****
--- 110,116 ----
  	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
  	bool		is_program;		/* is 'filename' a program to popen? */
  	bool		binary;			/* binary format? */
+ 	bool		raw;			/* raw mode? */
  	bool		oids;			/* include OIDs? */
  	bool		freeze;			/* freeze rows on loading? */
  	bool		csv_mode;		/* Comma Separated Value format? */
*************** SendCopyBegin(CopyState cstate)
*** 342,353 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'H');
! 		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
--- 343,369 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
! 		int			mode;
  		int			i;
  
  		pq_beginmessage(&buf, 'H');
! 
! 		if (cstate->raw)
! 			mode = 2;
! 		else if (cstate->binary)
! 			mode = 1;
! 		else
! 			mode = 0;
! 
! 		pq_sendbyte(&buf, mode);		/* overall mode */
  		pq_sendint(&buf, natts, 2);
+ 
+ 		if (!cstate->raw)
+ 			format = cstate->binary ? 1 : 0;
+ 		else
+ 			format = cstate->binary ? 3 : 2;
+ 
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
*************** SendCopyBegin(CopyState cstate)
*** 356,365 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('H');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
--- 372,381 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('H');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
*************** SendCopyBegin(CopyState cstate)
*** 368,377 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('B');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
--- 384,393 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('B');
  		/* grottiness needed for old COPY OUT protocol */
  		pq_startcopyout();
*************** ReceiveCopyBegin(CopyState cstate)
*** 387,398 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'G');
! 		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
--- 403,429 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
! 		int			mode;
  		int			i;
  
  		pq_beginmessage(&buf, 'G');
! 
! 		if (cstate->raw)
! 			mode = 2;
! 		else if (cstate->binary)
! 			mode = 1;
! 		else
! 			mode = 0;
! 
! 		pq_sendbyte(&buf, mode);		/* overall format */
  		pq_sendint(&buf, natts, 2);
+ 
+ 		if (!cstate->raw)
+ 			format = cstate->binary ? 1 : 0;
+ 		else
+ 			format = cstate->binary ? 3 : 2;
+ 
  		for (i = 0; i < natts; i++)
  			pq_sendint(&buf, format, 2);		/* per-column formats */
  		pq_endmessage(&buf);
*************** ReceiveCopyBegin(CopyState cstate)
*** 402,411 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('G');
  		/* any error in old protocol will make us lose sync */
  		pq_startmsgread();
--- 433,442 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('G');
  		/* any error in old protocol will make us lose sync */
  		pq_startmsgread();
*************** ReceiveCopyBegin(CopyState cstate)
*** 414,423 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('D');
  		/* any error in old protocol will make us lose sync */
  		pq_startmsgread();
--- 445,454 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
  		pq_putemptymessage('D');
  		/* any error in old protocol will make us lose sync */
  		pq_startmsgread();
*************** CopySendEndOfRow(CopyState cstate)
*** 482,488 ****
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
--- 513,519 ----
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary && !cstate->raw)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
*************** CopySendEndOfRow(CopyState cstate)
*** 526,531 ****
--- 557,565 ----
  			}
  			break;
  		case COPY_OLD_FE:
+ 			/* This old protocol doesn't allow RAW_TEXT/RAW_BINARY */
+ 			Assert(!cstate->raw);
+ 
  			/* The FE/BE protocol uses \n as newline for all platforms */
  			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
*************** CopySendEndOfRow(CopyState cstate)
*** 540,546 ****
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
--- 574,580 ----
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
*************** CopyLoadRawBuf(CopyState cstate)
*** 766,771 ****
--- 800,837 ----
  	return (inbytes > 0);
  }
  
+ /*
+  * CopyLoadallRawBuf loads all content into raw_buf.
+  *
+  * This routine is used in raw_text/raw_binary mode. If original RAW_BUF_SIZE is not
+  * enough, then the buffer is enlarged.
+  */
+ static void
+ CopyLoadallRawBuf(CopyState cstate)
+ {
+ 	int			nbytes = 0;
+ 	int			inbytes;
+ 	Size		raw_buf_size = RAW_BUF_SIZE;
+ 
+ 	do
+ 	{
+ 		/* hold enough space for one data packet */
+ 		if ((raw_buf_size - nbytes - 1) < 8 * 1024)
+ 		{
+ 			raw_buf_size += RAW_BUF_SIZE;
+ 			cstate->raw_buf = repalloc(cstate->raw_buf, raw_buf_size);
+ 		}
+ 
+ 		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, raw_buf_size - nbytes - 1);
+ 		nbytes += inbytes;
+ 	}
+ 	while (inbytes > 0);
+ 
+ 	cstate->raw_buf[nbytes] = '\0';
+ 	cstate->raw_buf_index = 0;
+ 	cstate->raw_buf_len = nbytes;
+ }
+ 
  
  /*
   *	 DoCopy executes the SQL COPY statement
*************** ProcessCopyOptions(CopyState cstate,
*** 1013,1018 ****
--- 1079,1091 ----
  				cstate->csv_mode = true;
  			else if (strcmp(fmt, "binary") == 0)
  				cstate->binary = true;
+ 			else if (strcmp(fmt, "raw_text") == 0)
+ 				cstate->raw = true;
+ 			else if (strcmp(fmt, "raw_binary") == 0)
+ 			{
+ 				cstate->binary = true;
+ 				cstate->raw = true;
+ 			}
  			else
  				ereport(ERROR,
  						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
*************** ProcessCopyOptions(CopyState cstate,
*** 1162,1177 ****
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if (cstate->binary && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
  				 errmsg("cannot specify DELIMITER in BINARY mode")));
  
! 	if (cstate->binary && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
  				 errmsg("cannot specify NULL in BINARY mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
  		cstate->delim = cstate->csv_mode ? "," : "\t";
--- 1235,1255 ----
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if ((cstate->binary || cstate->raw)  && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
  				 errmsg("cannot specify DELIMITER in BINARY mode")));
  
! 	if ((cstate->binary || cstate->raw) && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
  				 errmsg("cannot specify NULL in BINARY mode")));
  
+ 	if (cstate->raw && cstate->oids)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_SYNTAX_ERROR),
+ 				 errmsg("cannot specify OIDS in RAW_TEXT/RAW_BINARY mode")));
+ 
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
  		cstate->delim = cstate->csv_mode ? "," : "\t";
*************** BeginCopy(bool is_from,
*** 1608,1613 ****
--- 1686,1697 ----
  		}
  	}
  
+ 	/* No more columns are allowed in RAW mode */
+ 	if (cstate->raw && list_length(cstate->attnumlist) > 1)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 		 errmsg("Single column result/target is required in RAW_TEXT/RAW_BINARY mode")));
+ 
  	/* Use client encoding when ENCODING option is not specified. */
  	if (cstate->file_encoding < 0)
  		cstate->file_encoding = pg_get_client_encoding();
*************** CopyTo(CopyState cstate)
*** 1899,1905 ****
  											   ALLOCSET_DEFAULT_INITSIZE,
  											   ALLOCSET_DEFAULT_MAXSIZE);
  
! 	if (cstate->binary)
  	{
  		/* Generate header for a binary copy */
  		int32		tmp;
--- 1983,1989 ----
  											   ALLOCSET_DEFAULT_INITSIZE,
  											   ALLOCSET_DEFAULT_MAXSIZE);
  
! 	if (cstate->binary && !cstate->raw)
  	{
  		/* Generate header for a binary copy */
  		int32		tmp;
*************** CopyTo(CopyState cstate)
*** 1931,1936 ****
--- 2015,2023 ----
  		{
  			bool		hdr_delim = false;
  
+ 			/* raw_text/raw_binary mode is not allowed here */
+ 			Assert(!cstate->raw);
+ 
  			foreach(cur, cstate->attnumlist)
  			{
  				int			attnum = lfirst_int(cur);
*************** CopyTo(CopyState cstate)
*** 1967,1972 ****
--- 2054,2063 ----
  		{
  			CHECK_FOR_INTERRUPTS();
  
+ 			/* stop quickly in raw_text/raw_binary when more rows is detected */
+ 			if (cstate->raw && processed > 0)
+ 				break;
+ 
  			/* Deconstruct the tuple ... faster than repeated heap_getattr */
  			heap_deform_tuple(tuple, tupDesc, values, nulls);
  
*************** CopyTo(CopyState cstate)
*** 1983,1993 ****
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
! 	if (cstate->binary)
  	{
  		/* Generate trailer for a binary copy */
  		CopySendInt16(cstate, -1);
--- 2074,2098 ----
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, cstate->raw ? 2L : 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
! 	/* raw_text/raw_binary requires exactly one row */
! 	if (cstate->raw)
! 	{
! 		if (processed > 1)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_TOO_MANY_ROWS),
! 					 errmsg("single row result is required by RAW_TEXT/RAW_BINARY mode")));
! 
! 		if (processed == 0)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_NO_DATA_FOUND),
! 					 errmsg("single row result is required by RAW_TEXT/RAW_BINARY mode")));
! 	}
! 
! 	if (cstate->binary && !cstate->raw)
  	{
  		/* Generate trailer for a binary copy */
  		CopySendInt16(cstate, -1);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2015,2042 ****
  	MemoryContextReset(cstate->rowcontext);
  	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
  
! 	if (cstate->binary)
  	{
! 		/* Binary per-tuple header */
! 		CopySendInt16(cstate, list_length(cstate->attnumlist));
! 		/* Send OID if wanted --- note attnumlist doesn't include it */
! 		if (cstate->oids)
  		{
! 			/* Hack --- assume Oid is same size as int32 */
! 			CopySendInt32(cstate, sizeof(int32));
! 			CopySendInt32(cstate, tupleOid);
  		}
! 	}
! 	else
! 	{
! 		/* Text format has no per-tuple header, but send OID if wanted */
! 		/* Assume digits don't need any quoting or encoding conversion */
! 		if (cstate->oids)
  		{
! 			string = DatumGetCString(DirectFunctionCall1(oidout,
! 												ObjectIdGetDatum(tupleOid)));
! 			CopySendString(cstate, string);
! 			need_delim = true;
  		}
  	}
  
--- 2120,2150 ----
  	MemoryContextReset(cstate->rowcontext);
  	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
  
! 	if (!cstate->raw)
  	{
! 		if (cstate->binary)
  		{
! 			/* Binary per-tuple header */
! 			CopySendInt16(cstate, list_length(cstate->attnumlist));
! 			/* Send OID if wanted --- note attnumlist doesn't include it */
! 			if (cstate->oids)
! 			{
! 				/* Hack --- assume Oid is same size as int32 */
! 				CopySendInt32(cstate, sizeof(int32));
! 				CopySendInt32(cstate, tupleOid);
! 			}
  		}
! 		else
  		{
! 			/* Text format has no per-tuple header, but send OID if wanted */
! 			/* Assume digits don't need any quoting or encoding conversion */
! 			if (cstate->oids)
! 			{
! 				string = DatumGetCString(DirectFunctionCall1(oidout,
! 													ObjectIdGetDatum(tupleOid)));
! 				CopySendString(cstate, string);
! 				need_delim = true;
! 			}
  		}
  	}
  
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2055,2060 ****
--- 2163,2173 ----
  
  		if (isnull)
  		{
+ 			if (cstate->raw)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ 						 errmsg("NULL value is not allowed in RAW_TEXT/RAW_BINARY mode")));
+ 
  			if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2062,2068 ****
  		}
  		else
  		{
! 			if (!cstate->binary)
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
--- 2175,2246 ----
  		}
  		else
  		{
! 			if (cstate->raw)
! 			{
! 				const void *content;
! 				int size;
! 			
! 				if (!cstate->binary)
! 				{
! 					string = OutputFunctionCall(&out_functions[attnum - 1],
! 												value);
! 
! 					/* We would to transcode, but without escaping */
! 					if (cstate->need_transcoding)
! 						content = pg_server_to_any(string, strlen(string), cstate->file_encoding);
! 					else
! 						content = string;
! 
! 					size = strlen((const char *) content);
! 				}
! 				else
! 				{
! 					bytea *outputbytes;
! 
! 					/*
! 					 * Some binary output functions depends can depends on client encoding.
! 					 * The binary output of xml is good example. Set client_encoding
! 					 * temporaly before out function execution.
! 					 */
! 					if (cstate->need_transcoding)
! 					{
! 						int		old_server_encoding = pg_get_client_encoding();
! 						volatile bool reset_encoding = false;
! 
! 						PG_TRY();
! 						{
! 							/* We don't expect an error, because encoding was checked before */
! 							if (PrepareClientEncoding(cstate->file_encoding) < 0)
! 								elog(ERROR, "PrepareClientEncoding(%d) failed", cstate->file_encoding);
! 
! 							SetClientEncoding(cstate->file_encoding);
! 							reset_encoding = true;
! 
! 							outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 													   value);
! 							SetClientEncoding(old_server_encoding);
! 						}
! 						PG_CATCH();
! 						{
! 							if (reset_encoding)
! 								SetClientEncoding(old_server_encoding);
! 							PG_RE_THROW();
! 						}
! 						PG_END_TRY();
! 					}
! 					else
! 					{
! 						outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 												   value);
! 					}
! 					content = VARDATA(outputbytes);
! 					size = VARSIZE(outputbytes) - VARHDRSZ;
! 				}
! 
! 				/* Send only content in RAW_TEXT/RAW_BINARY mode */
! 				CopySendData(cstate, content, size);
! 			}
! 			else if (!cstate->binary)
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
*************** BeginCopyFrom(Relation rel,
*** 2811,2875 ****
  		}
  	}
  
! 	if (!cstate->binary)
! 	{
! 		/* must rely on user to tell us... */
! 		cstate->file_has_oids = cstate->oids;
! 	}
! 	else
  	{
! 		/* Read and verify binary header */
! 		char		readSig[11];
! 		int32		tmp;
! 
! 		/* Signature */
! 		if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
! 			memcmp(readSig, BinarySignature, 11) != 0)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("COPY file signature not recognized")));
! 		/* Flags field */
! 		if (!CopyGetInt32(cstate, &tmp))
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("invalid COPY file header (missing flags)")));
! 		cstate->file_has_oids = (tmp & (1 << 16)) != 0;
! 		tmp &= ~(1 << 16);
! 		if ((tmp >> 16) != 0)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 				 errmsg("unrecognized critical flags in COPY file header")));
! 		/* Header extension length */
! 		if (!CopyGetInt32(cstate, &tmp) ||
! 			tmp < 0)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("invalid COPY file header (missing length)")));
! 		/* Skip extension header, if present */
! 		while (tmp-- > 0)
  		{
! 			if (CopyGetData(cstate, readSig, 1, 1) != 1)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("invalid COPY file header (wrong length)")));
  		}
- 	}
  
! 	if (cstate->file_has_oids && cstate->binary)
! 	{
! 		getTypeBinaryInputInfo(OIDOID,
! 							   &in_func_oid, &cstate->oid_typioparam);
! 		fmgr_info(in_func_oid, &cstate->oid_in_function);
! 	}
  
! 	/* create workspace for CopyReadAttributes results */
! 	if (!cstate->binary)
! 	{
! 		AttrNumber	attr_count = list_length(cstate->attnumlist);
! 		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
  
! 		cstate->max_fields = nfields;
! 		cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
  	}
  
  	MemoryContextSwitchTo(oldcontext);
--- 2989,3057 ----
  		}
  	}
  
! 	/* The raw mode hasn't any header information */
! 	if (!cstate->raw)
  	{
! 		if (!cstate->binary)
  		{
! 			/* must rely on user to tell us... */
! 			cstate->file_has_oids = cstate->oids;
! 		}
! 		else
! 		{
! 			/* Read and verify binary header */
! 			char		readSig[11];
! 			int32		tmp;
! 
! 			/* Signature */
! 			if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
! 				memcmp(readSig, BinarySignature, 11) != 0)
  				ereport(ERROR,
  						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("COPY file signature not recognized")));
! 			/* Flags field */
! 			if (!CopyGetInt32(cstate, &tmp))
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("invalid COPY file header (missing flags)")));
! 			cstate->file_has_oids = (tmp & (1 << 16)) != 0;
! 			tmp &= ~(1 << 16);
! 			if ((tmp >> 16) != 0)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 					 errmsg("unrecognized critical flags in COPY file header")));
! 			/* Header extension length */
! 			if (!CopyGetInt32(cstate, &tmp) ||
! 				tmp < 0)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 						 errmsg("invalid COPY file header (missing length)")));
! 			/* Skip extension header, if present */
! 			while (tmp-- > 0)
! 			{
! 				if (CopyGetData(cstate, readSig, 1, 1) != 1)
! 					ereport(ERROR,
! 							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! 							 errmsg("invalid COPY file header (wrong length)")));
! 			}
  		}
  
! 		if (cstate->file_has_oids && cstate->binary)
! 		{
! 			getTypeBinaryInputInfo(OIDOID,
! 								   &in_func_oid, &cstate->oid_typioparam);
! 			fmgr_info(in_func_oid, &cstate->oid_in_function);
! 		}
  
! 		/* create workspace for CopyReadAttributes results */
! 		if (!cstate->binary)
! 		{
! 			AttrNumber	attr_count = list_length(cstate->attnumlist);
! 			int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
  
! 			cstate->max_fields = nfields;
! 			cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
! 		}
  	}
  
  	MemoryContextSwitchTo(oldcontext);
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 2968,2974 ****
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (!cstate->binary)
  	{
  		char	  **field_strings;
  		ListCell   *cur;
--- 3150,3203 ----
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (cstate->raw)
! 	{
! 		int		m = linitial_int(cstate->attnumlist) - 1;
! 
! 		/* All content was read in first cycle */
! 		if (++cstate->cur_lineno > 1)
! 			return false;
! 
! 		CopyLoadallRawBuf(cstate);
! 
! 		cstate->cur_attname = NameStr(attr[m]->attname);
! 
! 		if (!cstate->binary)
! 		{
! 			char	   *cvt;
! 
! 			cvt = pg_any_to_server(cstate->raw_buf,
! 								   cstate->raw_buf_len,
! 								   cstate->file_encoding);
! 
! 			values[m] = InputFunctionCall(&in_functions[m],
! 										  cvt,
! 										  typioparams[m],
! 										  attr[m]->atttypmod);
! 		}
! 		else
! 		{
! 			cstate->attribute_buf.data = cstate->raw_buf;
! 			cstate->attribute_buf.len = cstate->raw_buf_len;
! 			cstate->attribute_buf.cursor = 0;
! 			cstate->raw_buf = NULL;
! 
! 			/* Call the column type's binary input converter */
! 			values[m] = ReceiveFunctionCall(&in_functions[m], &cstate->attribute_buf,
! 									 typioparams[m], attr[m]->atttypmod);
! 
! 			/* Trouble if it didn't eat the whole buffer */
! 			if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
! 						 errmsg("incorrect binary data format")));
! 		}
! 
! 		nulls[m] = false;
! 
! 		cstate->cur_attname = NULL;
! 	}
! 	else if (!cstate->binary)
  	{
  		char	  **field_strings;
  		ListCell   *cur;
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
new file mode 100644
index cb8a06d..4dab119
*** a/src/bin/psql/tab-complete.c
--- b/src/bin/psql/tab-complete.c
*************** psql_completion(const char *text, int st
*** 1969,1976 ****
  	/* Handle COPY [BINARY] <sth> FROM|TO filename */
  	else if (Matches4("COPY|\\copy", MatchAny, "FROM|TO", MatchAny) ||
  			 Matches5("COPY", "BINARY", MatchAny, "FROM|TO", MatchAny))
! 		COMPLETE_WITH_LIST6("BINARY", "OIDS", "DELIMITER", "NULL", "CSV",
! 							"ENCODING");
  
  	/* Handle COPY [BINARY] <sth> FROM|TO filename CSV */
  	else if (Matches5("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "CSV") ||
--- 1969,1976 ----
  	/* Handle COPY [BINARY] <sth> FROM|TO filename */
  	else if (Matches4("COPY|\\copy", MatchAny, "FROM|TO", MatchAny) ||
  			 Matches5("COPY", "BINARY", MatchAny, "FROM|TO", MatchAny))
! 		COMPLETE_WITH_LIST8("BINARY", "RAW_TEXT", "RAW_BINARY", "OIDS",
! 							"DELIMITER", "NULL", "CSV", "ENCODING");
  
  	/* Handle COPY [BINARY] <sth> FROM|TO filename CSV */
  	else if (Matches5("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "CSV") ||
diff --git a/src/interfaces/libpq/exports.txt b/src/interfaces/libpq/exports.txt
new file mode 100644
index 21dd772..a2754f1
*** a/src/interfaces/libpq/exports.txt
--- b/src/interfaces/libpq/exports.txt
*************** PQsslAttributeNames       168
*** 171,173 ****
--- 171,174 ----
  PQsslAttribute            169
  PQsetErrorContextVisibility 170
  PQresultVerboseErrorMessage 171
+ PQcopyFormat              172
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
new file mode 100644
index 2621767..09967e9
*** a/src/interfaces/libpq/fe-exec.c
--- b/src/interfaces/libpq/fe-exec.c
*************** PQmakeEmptyPGresult(PGconn *conn, ExecSt
*** 155,160 ****
--- 155,161 ----
  	result->resultStatus = status;
  	result->cmdStatus[0] = '\0';
  	result->binary = 0;
+ 	result->raw = 0;
  	result->events = NULL;
  	result->nEvents = 0;
  	result->errMsg = NULL;
*************** PQsetResultAttrs(PGresult *res, int numA
*** 256,263 ****
  		if (!res->attDescs[i].name)
  			return FALSE;
  
! 		if (res->attDescs[i].format == 0)
  			res->binary = 0;
  	}
  
  	return TRUE;
--- 257,266 ----
  		if (!res->attDescs[i].name)
  			return FALSE;
  
! 		if (res->attDescs[i].format == 0 || res->attDescs[i].format == 2)
  			res->binary = 0;
+ 		if (res->attDescs[i].format == 2 || res->attDescs[i].format == 3)
+ 			res->raw = 1;
  	}
  
  	return TRUE;
*************** PQcmdStatus(PGresult *res)
*** 2932,2937 ****
--- 2935,2955 ----
  }
  
  /*
+  * PQcopyFormat
+  *
+  * Returns a info about copy mode:
+  * -1 signalize a error, 0 = text mode, 1 = binary mode, 2 = raw mode
+  */
+ int
+ PQcopyFormat(const PGresult *res)
+ {
+ 	if (res->raw)
+ 		return 2;
+ 	else
+ 		return res->binary;
+ }
+ 
+ /*
   * PQoidStatus -
   *	if the last command was an INSERT, return the oid string
   *	if not, return ""
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
new file mode 100644
index 0b8c62f..1783844
*** a/src/interfaces/libpq/fe-protocol3.c
--- b/src/interfaces/libpq/fe-protocol3.c
*************** getCopyStart(PGconn *conn, ExecStatusTyp
*** 1486,1491 ****
--- 1486,1495 ----
  		 */
  		format = (int) ((int16) format);
  		result->attDescs[i].format = format;
+ 
+ 		/* when any field uses raw format, then COPY RAW_* was used */
+ 		if (format == 2 || format == 3)
+ 			result->raw = true;
  	}
  
  	/* Success! */
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
new file mode 100644
index 9ca0756..7984666
*** a/src/interfaces/libpq/libpq-fe.h
--- b/src/interfaces/libpq/libpq-fe.h
*************** extern Oid	PQftype(const PGresult *res,
*** 479,484 ****
--- 479,485 ----
  extern int	PQfsize(const PGresult *res, int field_num);
  extern int	PQfmod(const PGresult *res, int field_num);
  extern char *PQcmdStatus(PGresult *res);
+ extern int	PQcopyFormat(const PGresult *res);
  extern char *PQoidStatus(const PGresult *res);	/* old and ugly */
  extern Oid	PQoidValue(const PGresult *res);	/* new and improved */
  extern char *PQcmdTuples(PGresult *res);
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
new file mode 100644
index 1183323..8fc4b04
*** a/src/interfaces/libpq/libpq-int.h
--- b/src/interfaces/libpq/libpq-int.h
*************** struct pg_result
*** 180,185 ****
--- 180,186 ----
  	char		cmdStatus[CMDSTATUS_LEN];		/* cmd status from the query */
  	int			binary;			/* binary tuple values if binary == 1,
  								 * otherwise text */
+ 	int			raw;			/* raw mode for COPY */
  
  	/*
  	 * These fields are copied from the originating PGconn, so that operations
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
new file mode 100644
index 5f6260a..3d36dd3
*** a/src/test/regress/expected/copy2.out
--- b/src/test/regress/expected/copy2.out
*************** DROP FUNCTION truncate_in_subxact();
*** 466,468 ****
--- 466,482 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ ERROR:  Single column result/target is required in RAW_TEXT/RAW_BINARY mode
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw_binary);
+ AHOJ
+ AHOJ
+ ERROR:  single row result is required by RAW_TEXT/RAW_BINARY mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ AHOJ
+ DROP TABLE x;
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
new file mode 100644
index cb13606..085ae36
*** a/src/test/regress/input/copy.source
--- b/src/test/regress/input/copy.source
*************** this is just a line full of junk that wo
*** 133,135 ****
--- 133,214 ----
  \.
  
  copy copytest3 to stdout csv header;
+ 
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw_binary);
+ 
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw_binary);
+ 
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw_binary);
+ 
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ 
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ 
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ DROP TABLE x;
+ 
+ -- insert into multicolumn table
+ CREATE TABLE x(id serial, a bytea, b bytea);
+ 
+ -- should fail, too much columns
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ 
+ -- should work
+ COPY x(a) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ COPY x(b) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ SELECT id, md5(a), md5(b) FROM x;
+ 
+ -- test raw_text
+ COPY (SELECT a FROM x WHERE id = 1) TO '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+ COPY x(a) FROM '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+ SELECT id, md5(a) FROM x WHERE id = lastval();
+ 
+ DROP TABLE x;
+ 
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
new file mode 100644
index b7e372d..e34bbab
*** a/src/test/regress/output/copy.source
--- b/src/test/regress/output/copy.source
*************** copy copytest3 to stdout csv header;
*** 95,97 ****
--- 95,208 ----
  c1,"col with , comma","col with "" quote"
  1,a,1
  2,b,2
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ ERROR:  Single column result/target is required in RAW_TEXT/RAW_BINARY mode
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ ERROR:  single row result is required by RAW_TEXT/RAW_BINARY mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw_binary);
+ AHOJ
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw_binary);
+ AHOJ
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw_binary);
+ AHOJ
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (3 rows)
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (5 rows)
+ 
+ DROP TABLE x;
+ -- insert into multicolumn table
+ CREATE TABLE x(id serial, a bytea, b bytea);
+ -- should fail, too much columns
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ ERROR:  Single column result/target is required in RAW_TEXT/RAW_BINARY mode
+ -- should work
+ COPY x(a) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ COPY x(b) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ SELECT id, md5(a), md5(b) FROM x;
+  id |               md5                |               md5                
+ ----+----------------------------------+----------------------------------
+   1 | e446fe6ea5a347e69670633412c7f8cb | 
+   2 |                                  | e446fe6ea5a347e69670633412c7f8cb
+ (2 rows)
+ 
+ -- test raw_text
+ COPY (SELECT a FROM x WHERE id = 1) TO '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+ COPY x(a) FROM '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+ SELECT id, md5(a) FROM x WHERE id = lastval();
+  id |               md5                
+ ----+----------------------------------
+   3 | e446fe6ea5a347e69670633412c7f8cb
+ (1 row)
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
new file mode 100644
index 39a9deb..7e22ee4
*** a/src/test/regress/sql/copy2.sql
--- b/src/test/regress/sql/copy2.sql
*************** DROP FUNCTION truncate_in_subxact();
*** 333,335 ****
--- 333,348 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ 
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw_binary);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ 
+ DROP TABLE x;
#64Robert Haas
robertmhaas@gmail.com
In reply to: Pavel Stehule (#63)
Re: raw output from copy

On Tue, Apr 5, 2016 at 4:45 AM, Pavel Stehule <pavel.stehule@gmail.com> wrote:

here is cleaned/finished previous implementation of RAW_TEXT/RAW_BINARY
formats for COPY statements.

The RAW with text formats means unescaped data, but with correct encoding -
input/output is realised with input/output function. RAW binary means
content produced/received by sending/received functions.

Now both directions (input/output) working well

Some examples of expected usage:

copy (select xmlelement(name foo, 'hello')) to stdout (format raw_binary,
encoding 'latin2');

create table avatars(id serial, picture bytea);
\copy avatars(picture) from ~/images/foo.jpg (format raw_binary);
select lastval();

create table doc(id serial, txt text);
\copy doc(txt) from ~/files/aaa.txt (format raw_text, encoding 'latin2');
select lastval();

As much as I know you and some other people would like it to be
otherwise, this patch clearly does not have a sufficient degree of
consensus to justify committing it to PostgreSQL 9.6. I'm marking it
Returned with Feedback.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#65Pavel Stehule
pavel.stehule@gmail.com
In reply to: Robert Haas (#64)
Re: raw output from copy

2016-04-08 20:13 GMT+02:00 Robert Haas <robertmhaas@gmail.com>:

On Tue, Apr 5, 2016 at 4:45 AM, Pavel Stehule <pavel.stehule@gmail.com>
wrote:

here is cleaned/finished previous implementation of RAW_TEXT/RAW_BINARY
formats for COPY statements.

The RAW with text formats means unescaped data, but with correct

encoding -

input/output is realised with input/output function. RAW binary means
content produced/received by sending/received functions.

Now both directions (input/output) working well

Some examples of expected usage:

copy (select xmlelement(name foo, 'hello')) to stdout (format raw_binary,
encoding 'latin2');

create table avatars(id serial, picture bytea);
\copy avatars(picture) from ~/images/foo.jpg (format raw_binary);
select lastval();

create table doc(id serial, txt text);
\copy doc(txt) from ~/files/aaa.txt (format raw_text, encoding 'latin2');
select lastval();

As much as I know you and some other people would like it to be
otherwise, this patch clearly does not have a sufficient degree of
consensus to justify committing it to PostgreSQL 9.6. I'm marking it
Returned with Feedback.

ok, I'll try to complete this patch

Regards

Pavel

Show quoted text

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#66Andrew Dunstan
andrew@dunslane.net
In reply to: Robert Haas (#64)
Re: raw output from copy

On 04/08/2016 02:13 PM, Robert Haas wrote:

On Tue, Apr 5, 2016 at 4:45 AM, Pavel Stehule <pavel.stehule@gmail.com> wrote:

here is cleaned/finished previous implementation of RAW_TEXT/RAW_BINARY
formats for COPY statements.

The RAW with text formats means unescaped data, but with correct encoding -
input/output is realised with input/output function. RAW binary means
content produced/received by sending/received functions.

Now both directions (input/output) working well

Some examples of expected usage:

copy (select xmlelement(name foo, 'hello')) to stdout (format raw_binary,
encoding 'latin2');

create table avatars(id serial, picture bytea);
\copy avatars(picture) from ~/images/foo.jpg (format raw_binary);
select lastval();

create table doc(id serial, txt text);
\copy doc(txt) from ~/files/aaa.txt (format raw_text, encoding 'latin2');
select lastval();

As much as I know you and some other people would like it to be
otherwise, this patch clearly does not have a sufficient degree of
consensus to justify committing it to PostgreSQL 9.6. I'm marking it
Returned with Feedback.

I should add that I've been thinking about this some more, and that I
now agree that something should be done to support this at the SQL
level, mainly so that clients can manage very large pieces of data in a
stream-oriented fashion rather than having to marshall the data in
memory to load/unload via INSERT/SELECT. Anything that is client-side
only is likely to have this memory issue.

At the same time I'm still not entirely convinced that COPY is a good
vehicle for this. It's designed for bulk records, and already quite
complex. Maybe we need something new that uses the COPY protocol but is
more specifically tailored for loading or sending large singleton pieces
of data.

cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#67Pavel Stehule
pavel.stehule@gmail.com
In reply to: Andrew Dunstan (#66)
Re: raw output from copy

2016-04-08 20:54 GMT+02:00 Andrew Dunstan <andrew@dunslane.net>:

On 04/08/2016 02:13 PM, Robert Haas wrote:

On Tue, Apr 5, 2016 at 4:45 AM, Pavel Stehule <pavel.stehule@gmail.com>
wrote:

here is cleaned/finished previous implementation of RAW_TEXT/RAW_BINARY
formats for COPY statements.

The RAW with text formats means unescaped data, but with correct
encoding -
input/output is realised with input/output function. RAW binary means
content produced/received by sending/received functions.

Now both directions (input/output) working well

Some examples of expected usage:

copy (select xmlelement(name foo, 'hello')) to stdout (format raw_binary,
encoding 'latin2');

create table avatars(id serial, picture bytea);
\copy avatars(picture) from ~/images/foo.jpg (format raw_binary);
select lastval();

create table doc(id serial, txt text);
\copy doc(txt) from ~/files/aaa.txt (format raw_text, encoding 'latin2');
select lastval();

As much as I know you and some other people would like it to be
otherwise, this patch clearly does not have a sufficient degree of
consensus to justify committing it to PostgreSQL 9.6. I'm marking it
Returned with Feedback.

I should add that I've been thinking about this some more, and that I now
agree that something should be done to support this at the SQL level,
mainly so that clients can manage very large pieces of data in a
stream-oriented fashion rather than having to marshall the data in memory
to load/unload via INSERT/SELECT. Anything that is client-side only is
likely to have this memory issue.

At the same time I'm still not entirely convinced that COPY is a good
vehicle for this. It's designed for bulk records, and already quite
complex. Maybe we need something new that uses the COPY protocol but is
more specifically tailored for loading or sending large singleton pieces of
data.

Now it is little bit more time to think more about. But It is hard to
design some more simpler than is COPY syntax. What will support both
directions.

My implementation has same limit like COPY BINARY - it isn't worse. It
should be good enough for VARLENA types that should not be higher than 1GB.
It is not designed for LOB replacement.

Regards

Pavel

Show quoted text

cheers

andrew

#68Ants Aasma
ants.aasma@eesti.ee
In reply to: Pavel Stehule (#1)
Re: raw output from copy

On 8 Apr 2016 9:14 pm, "Pavel Stehule" <pavel.stehule@gmail.com> wrote:

2016-04-08 20:54 GMT+02:00 Andrew Dunstan <andrew@dunslane.net>:

I should add that I've been thinking about this some more, and that I now agree that something should be done to support this at the SQL level, mainly so that clients can manage very large pieces of data in a stream-oriented fashion rather than having to marshall the data in memory to load/unload via INSERT/SELECT. Anything that is client-side only is likely to have this memory issue.

At the same time I'm still not entirely convinced that COPY is a good vehicle for this. It's designed for bulk records, and already quite complex. Maybe we need something new that uses the COPY protocol but is more specifically tailored for loading or sending large singleton pieces of data.

Now it is little bit more time to think more about. But It is hard to design some more simpler than is COPY syntax. What will support both directions.

Sorry for arriving late and adding to the bikeshedding. Maybe the
answer is to make COPY pluggable. It seems to me that it would be
relatively straightforward to add an extension mechanism for copy
output and input plugins that could support any format expressible as
a binary stream. Raw output would then be an almost trivial plugin.
Others could implement JSON, protocol buffers, Redis bulk load, BSON,
ASN.1 or whatever else serialisation format du jour. It will still
have the same backwards compatibility issues as adding the raw output,
but the payoff is greater.

Regards,
Ants Aasma

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#69Pavel Stehule
pavel.stehule@gmail.com
In reply to: Ants Aasma (#68)
Re: raw output from copy

2016-04-12 12:22 GMT+02:00 Ants Aasma <ants.aasma@eesti.ee>:

On 8 Apr 2016 9:14 pm, "Pavel Stehule" <pavel.stehule@gmail.com> wrote:

2016-04-08 20:54 GMT+02:00 Andrew Dunstan <andrew@dunslane.net>:

I should add that I've been thinking about this some more, and that I

now agree that something should be done to support this at the SQL level,
mainly so that clients can manage very large pieces of data in a
stream-oriented fashion rather than having to marshall the data in memory
to load/unload via INSERT/SELECT. Anything that is client-side only is
likely to have this memory issue.

At the same time I'm still not entirely convinced that COPY is a good

vehicle for this. It's designed for bulk records, and already quite
complex. Maybe we need something new that uses the COPY protocol but is
more specifically tailored for loading or sending large singleton pieces of
data.

Now it is little bit more time to think more about. But It is hard to

design some more simpler than is COPY syntax. What will support both
directions.

Sorry for arriving late and adding to the bikeshedding. Maybe the
answer is to make COPY pluggable. It seems to me that it would be
relatively straightforward to add an extension mechanism for copy
output and input plugins that could support any format expressible as
a binary stream. Raw output would then be an almost trivial plugin.
Others could implement JSON, protocol buffers, Redis bulk load, BSON,
ASN.1 or whatever else serialisation format du jour. It will still
have the same backwards compatibility issues as adding the raw output,
but the payoff is greater.

I had a idea about additional options of COPY RAW statements. One can be
CAST function. These CAST functions can be used to any for any format.

COPY has two parts - client, and server side. Currently we cannot to expand
libpq, and we cannot to expand psql. So we have to send data to client in
target format and all transformations should be done on server side.
Personally, I strongly prefer to write Linux server side extensions against
MSWin client side extensions. The client (psql) is able to use a pipe - so
any client side transformation can be done outer psql.

Regards

Pavel

Show quoted text

Regards,
Ants Aasma

#70Tom Lane
tgl@sss.pgh.pa.us
In reply to: Pavel Stehule (#69)
Re: raw output from copy

Pavel Stehule <pavel.stehule@gmail.com> writes:

I had a idea about additional options of COPY RAW statements. One can be
CAST function. These CAST functions can be used to any for any format.

Uh, what? CAST() is not about external representations of values, and
overloading it for that purpose doesn't seem like a particularly good
idea: you'd have to figure out what the conversions meant inside SQL as
well as externally. Also, maybe I missed something, but a different
representation of individual data values within a COPY wasn't what we
were after here.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#71Pavel Stehule
pavel.stehule@gmail.com
In reply to: Tom Lane (#70)
Re: raw output from copy

2016-04-12 22:48 GMT+02:00 Tom Lane <tgl@sss.pgh.pa.us>:

Pavel Stehule <pavel.stehule@gmail.com> writes:

I had a idea about additional options of COPY RAW statements. One can be
CAST function. These CAST functions can be used to any for any format.

Uh, what? CAST() is not about external representations of values, and
overloading it for that purpose doesn't seem like a particularly good
idea: you'd have to figure out what the conversions meant inside SQL as
well as externally. Also, maybe I missed something, but a different
representation of individual data values within a COPY wasn't what we
were after here.

I didn't think about this idea to deep - so there can be more than one
problem. More - I though about it before you designed RAW_TEXT mode - that
can coverage this use case too.

Originally I had only RAW mode, what can be difficult for JSONB, so my
solution was

COPY target(jsonbcol) FROM jsondata OPTIONS(RAW, CAST(json_to_jsonb)).

Now this idea is obsolete, because anybody can do

COPY target(jsonbcol) FROM jsondata OPTIONS(RAW_TEXT)

What is much more simple.

Using explicit casts in COPY statement was motivated by possible
requirement do some manipulations with data before their storing to table.
It is idea, and probably wrong idea.

I don't want to increase complexity of COPY statement too much. My goal is
enhance COPY to import single objects simply. And if you need some more
complex, then you can write some simple application where can be used
classic COPY or COPY RAW again (because it doesn't require escaping).

Regards

Pavel

Show quoted text

regards, tom lane

#72Pavel Stehule
pavel.stehule@gmail.com
In reply to: Pavel Stehule (#63)
1 attachment(s)
Re: raw output from copy

Hi

2016-04-05 10:45 GMT+02:00 Pavel Stehule <pavel.stehule@gmail.com>:

Hi

here is cleaned/finished previous implementation of RAW_TEXT/RAW_BINARY
formats for COPY statements.

The RAW with text formats means unescaped data, but with correct encoding
- input/output is realised with input/output function. RAW binary means
content produced/received by sending/received functions.

Now both directions (input/output) working well

Some examples of expected usage:

copy (select xmlelement(name foo, 'hello')) to stdout (format raw_binary,
encoding 'latin2');

create table avatars(id serial, picture bytea);
\copy avatars(picture) from ~/images/foo.jpg (format raw_binary);
select lastval();

create table doc(id serial, txt text);
\copy doc(txt) from ~/files/aaa.txt (format raw_text, encoding 'latin2');
select lastval();

Regards

Pavel

I am sending fresh version of COPY RAW patch.

There is new regress client test requested by Tom.

Note: I though about another solution based on binary parameters and binary
result support in psql. Somelike:

INSERT INTO foo(a) VALUES($1)
\gpush filename

SELECT a FROM foo
\gpop filename

but, it is less intuitive, and doesn't work with stdin/stdout - so it is
significant week against COPY based solution for scripting from shell. More
\g***** solution is still possible if will be requested in future.

Regards

Pavel

[pavel@nemesis ~]$ cat avatar.gif | psql -Xq -At -c "copy xx(b) from stdin
(format raw_text)" -c "select lastval()" postgres
313

Attachments:

copy-raw-2016-07-16.patchtext/x-patch; charset=US-ASCII; name=copy-raw-2016-07-16.patchDownload
commit d62c1ff8dee2324ce1fe7765c2d015e68f5f923a
Author: Pavel Stehule <pavel.stehule@gooddata.com>
Date:   Sat Jul 16 10:35:25 2016 +0200

    with regress tests

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 6285dd0..4c6cacb 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -3226,8 +3226,9 @@ int PQfformat(const PGresult *res,
 
       <para>
        Format code zero indicates textual data representation, while format
-       code one indicates binary representation.  (Other codes are reserved
-       for future definition.)
+       code one indicates binary representation. Format code two indicates
+       raw_text representation and format code three indicates raw_binary
+       representation (Other codes are reserved for future definition.)
       </para>
      </listitem>
     </varlistentry>
@@ -3557,6 +3558,26 @@ typedef struct
    </para>
 
    <variablelist>
+    <varlistentry id="libpq-pqcopyformat">
+     <term>
+      <function>PQcopyFormat</function>
+      <indexterm>
+       <primary>PQcopyFormat</primary>
+      </indexterm>
+     </term>
+
+     <listitem>
+      <para>
+       Format code zero indicates textual data representation, format one
+       indicates binary representation, format two indicates raw
+       representation.
+<synopsis>
+int PQcopyFormat(PGresult *res);
+</synopsis>
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry id="libpq-pqcmdstatus">
      <term>
       <function>PQcmdStatus</function>
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 9c96d8f..adcff46 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -3239,6 +3239,7 @@ CopyInResponse (B)
                 characters, etc).
                 1 indicates the overall copy format is binary (similar
                 to DataRow format).
+                2 indicates the overall copy format is raw.
                 See <xref linkend="sql-copy">
                 for more information.
 </para>
@@ -3262,8 +3263,9 @@ CopyInResponse (B)
 <listitem>
 <para>
                 The format codes to be used for each column.
-                Each must presently be zero (text) or one (binary).
-                All must be zero if the overall copy format is textual.
+                Each must be zero (text), one (binary), two (raw_text)
+                or three (raw_binary). All must be zero if the overall
+                copy format is textual.
 </para>
 </listitem>
 </varlistentry>
@@ -3313,7 +3315,8 @@ CopyOutResponse (B)
                 is textual (rows separated by newlines, columns
                 separated by separator characters, etc). 1 indicates
                 the overall copy format is binary (similar to DataRow
-                format). See <xref linkend="sql-copy"> for more information.
+                format). 2 indicates raw_text or raw_binary format.
+                See <xref linkend="sql-copy"> for more information.
 </para>
 </listitem>
 </varlistentry>
@@ -3335,8 +3338,9 @@ CopyOutResponse (B)
 <listitem>
 <para>
                 The format codes to be used for each column.
-                Each must presently be zero (text) or one (binary).
-                All must be zero if the overall copy format is textual.
+                Each must be zero (text), one (binary), two (raw_text)
+                or three (raw_binary). All must be zero if the overall
+                copy format is textual.
 </para>
 </listitem>
 </varlistentry>
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 07e2f45..4e339e4 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -197,7 +197,9 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       Selects the data format to be read or written:
       <literal>text</>,
       <literal>csv</> (Comma Separated Values),
-      or <literal>binary</>.
+      <literal>binary</>,
+      <literal>raw_text</>
+      or <literal>raw_binary</>.
       The default is <literal>text</>.
      </para>
     </listitem>
@@ -888,6 +890,44 @@ OIDs to be shown as null if that ever proves desirable.
     </para>
    </refsect3>
   </refsect2>
+
+  <refsect2>
+     <title>Raw_text/raw_binary Format</title>
+
+   <para>
+    The <literal>raw_text</literal> format option causes all data to be
+    stored/read as one text value. This format doesn't use any metadata
+    - only raw data are exported or imported.
+   </para>
+
+   <para>
+    The <literal>raw_binary</literal> format option causes all data to be
+    stored/read as binary format rather than as text. It shares format
+    for data with <literal>binary</literal> format. This format doesn't
+    use any metadata - only row data in network byte order are exported
+    or imported.
+   </para>
+
+   <para>
+    Because this format doesn't support any delimiter, only one value
+    can be exported or imported. NULL values are not allowed.
+   </para>
+   <para>
+    The <literal>raw_binary</literal> format can be used for export or import
+    bytea values.
+<programlisting>
+COPY images(data) FROM '/usr1/proj/img/01.jpg' (FORMAT raw_binary);
+</programlisting>
+    It can be used successfully for export XML in different encoding
+    or import valid XML document with any supported encoding:
+<screen><![CDATA[
+SET client_encoding TO latin2;
+
+COPY (SELECT xmlelement(NAME data, 'Hello')) TO stdout (FORMAT raw_binary);
+<?xml version="1.0" encoding="LATIN2"?><data>Hello</data>
+]]></screen>
+   </para>
+  </refsect2>
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f45b330..c63d052 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -110,6 +110,7 @@ typedef struct CopyStateData
 	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
 	bool		is_program;		/* is 'filename' a program to popen? */
 	bool		binary;			/* binary format? */
+	bool		raw;			/* raw mode? */
 	bool		oids;			/* include OIDs? */
 	bool		freeze;			/* freeze rows on loading? */
 	bool		csv_mode;		/* Comma Separated Value format? */
@@ -342,12 +343,27 @@ SendCopyBegin(CopyState cstate)
 		/* new way */
 		StringInfoData buf;
 		int			natts = list_length(cstate->attnumlist);
-		int16		format = (cstate->binary ? 1 : 0);
+		int16		format;
+		int			mode;
 		int			i;
 
 		pq_beginmessage(&buf, 'H');
-		pq_sendbyte(&buf, format);		/* overall format */
+
+		if (cstate->raw)
+			mode = 2;
+		else if (cstate->binary)
+			mode = 1;
+		else
+			mode = 0;
+
+		pq_sendbyte(&buf, mode);		/* overall mode */
 		pq_sendint(&buf, natts, 2);
+
+		if (!cstate->raw)
+			format = cstate->binary ? 1 : 0;
+		else
+			format = cstate->binary ? 3 : 2;
+
 		for (i = 0; i < natts; i++)
 			pq_sendint(&buf, format, 2);		/* per-column formats */
 		pq_endmessage(&buf);
@@ -356,10 +372,10 @@ SendCopyBegin(CopyState cstate)
 	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 	{
 		/* old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			errmsg("COPY BINARY is not supported to stdout or from stdin")));
+			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
 		pq_putemptymessage('H');
 		/* grottiness needed for old COPY OUT protocol */
 		pq_startcopyout();
@@ -368,10 +384,10 @@ SendCopyBegin(CopyState cstate)
 	else
 	{
 		/* very old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			errmsg("COPY BINARY is not supported to stdout or from stdin")));
+			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
 		pq_putemptymessage('B');
 		/* grottiness needed for old COPY OUT protocol */
 		pq_startcopyout();
@@ -387,12 +403,27 @@ ReceiveCopyBegin(CopyState cstate)
 		/* new way */
 		StringInfoData buf;
 		int			natts = list_length(cstate->attnumlist);
-		int16		format = (cstate->binary ? 1 : 0);
+		int16		format;
+		int			mode;
 		int			i;
 
 		pq_beginmessage(&buf, 'G');
-		pq_sendbyte(&buf, format);		/* overall format */
+
+		if (cstate->raw)
+			mode = 2;
+		else if (cstate->binary)
+			mode = 1;
+		else
+			mode = 0;
+
+		pq_sendbyte(&buf, mode);		/* overall format */
 		pq_sendint(&buf, natts, 2);
+
+		if (!cstate->raw)
+			format = cstate->binary ? 1 : 0;
+		else
+			format = cstate->binary ? 3 : 2;
+
 		for (i = 0; i < natts; i++)
 			pq_sendint(&buf, format, 2);		/* per-column formats */
 		pq_endmessage(&buf);
@@ -402,10 +433,10 @@ ReceiveCopyBegin(CopyState cstate)
 	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
 	{
 		/* old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			errmsg("COPY BINARY is not supported to stdout or from stdin")));
+			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
 		pq_putemptymessage('G');
 		/* any error in old protocol will make us lose sync */
 		pq_startmsgread();
@@ -414,10 +445,10 @@ ReceiveCopyBegin(CopyState cstate)
 	else
 	{
 		/* very old way */
-		if (cstate->binary)
+		if (cstate->binary || cstate->raw)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			errmsg("COPY BINARY is not supported to stdout or from stdin")));
+			errmsg("COPY BINARY or COPY RAW_TEXT/RAW_BINARY is not supported to stdout or from stdin")));
 		pq_putemptymessage('D');
 		/* any error in old protocol will make us lose sync */
 		pq_startmsgread();
@@ -482,7 +513,7 @@ CopySendEndOfRow(CopyState cstate)
 	switch (cstate->copy_dest)
 	{
 		case COPY_FILE:
-			if (!cstate->binary)
+			if (!cstate->binary && !cstate->raw)
 			{
 				/* Default line termination depends on platform */
 #ifndef WIN32
@@ -526,6 +557,9 @@ CopySendEndOfRow(CopyState cstate)
 			}
 			break;
 		case COPY_OLD_FE:
+			/* This old protocol doesn't allow RAW_TEXT/RAW_BINARY */
+			Assert(!cstate->raw);
+
 			/* The FE/BE protocol uses \n as newline for all platforms */
 			if (!cstate->binary)
 				CopySendChar(cstate, '\n');
@@ -540,7 +574,7 @@ CopySendEndOfRow(CopyState cstate)
 			break;
 		case COPY_NEW_FE:
 			/* The FE/BE protocol uses \n as newline for all platforms */
-			if (!cstate->binary)
+			if (!cstate->binary && !cstate->raw)
 				CopySendChar(cstate, '\n');
 
 			/* Dump the accumulated row as one CopyData message */
@@ -766,6 +800,38 @@ CopyLoadRawBuf(CopyState cstate)
 	return (inbytes > 0);
 }
 
+/*
+ * CopyLoadallRawBuf loads all content into raw_buf.
+ *
+ * This routine is used in raw_text/raw_binary mode. If original RAW_BUF_SIZE is not
+ * enough, then the buffer is enlarged.
+ */
+static void
+CopyLoadallRawBuf(CopyState cstate)
+{
+	int			nbytes = 0;
+	int			inbytes;
+	Size		raw_buf_size = RAW_BUF_SIZE;
+
+	do
+	{
+		/* hold enough space for one data packet */
+		if ((raw_buf_size - nbytes - 1) < 8 * 1024)
+		{
+			raw_buf_size += RAW_BUF_SIZE;
+			cstate->raw_buf = repalloc(cstate->raw_buf, raw_buf_size);
+		}
+
+		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, raw_buf_size - nbytes - 1);
+		nbytes += inbytes;
+	}
+	while (inbytes > 0);
+
+	cstate->raw_buf[nbytes] = '\0';
+	cstate->raw_buf_index = 0;
+	cstate->raw_buf_len = nbytes;
+}
+
 
 /*
  *	 DoCopy executes the SQL COPY statement
@@ -1013,6 +1079,13 @@ ProcessCopyOptions(CopyState cstate,
 				cstate->csv_mode = true;
 			else if (strcmp(fmt, "binary") == 0)
 				cstate->binary = true;
+			else if (strcmp(fmt, "raw_text") == 0)
+				cstate->raw = true;
+			else if (strcmp(fmt, "raw_binary") == 0)
+			{
+				cstate->binary = true;
+				cstate->raw = true;
+			}
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1162,16 +1235,21 @@ ProcessCopyOptions(CopyState cstate,
 	 * Check for incompatible options (must do these two before inserting
 	 * defaults)
 	 */
-	if (cstate->binary && cstate->delim)
+	if ((cstate->binary || cstate->raw)  && cstate->delim)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify DELIMITER in BINARY mode")));
 
-	if (cstate->binary && cstate->null_print)
+	if ((cstate->binary || cstate->raw) && cstate->null_print)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify NULL in BINARY mode")));
 
+	if (cstate->raw && cstate->oids)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify OIDS in RAW_TEXT/RAW_BINARY mode")));
+
 	/* Set defaults for omitted options */
 	if (!cstate->delim)
 		cstate->delim = cstate->csv_mode ? "," : "\t";
@@ -1608,6 +1686,12 @@ BeginCopy(bool is_from,
 		}
 	}
 
+	/* No more columns are allowed in RAW mode */
+	if (cstate->raw && list_length(cstate->attnumlist) > 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+		 errmsg("Single column result/target is required in RAW_TEXT/RAW_BINARY mode")));
+
 	/* Use client encoding when ENCODING option is not specified. */
 	if (cstate->file_encoding < 0)
 		cstate->file_encoding = pg_get_client_encoding();
@@ -1899,7 +1983,7 @@ CopyTo(CopyState cstate)
 											   ALLOCSET_DEFAULT_INITSIZE,
 											   ALLOCSET_DEFAULT_MAXSIZE);
 
-	if (cstate->binary)
+	if (cstate->binary && !cstate->raw)
 	{
 		/* Generate header for a binary copy */
 		int32		tmp;
@@ -1931,6 +2015,9 @@ CopyTo(CopyState cstate)
 		{
 			bool		hdr_delim = false;
 
+			/* raw_text/raw_binary mode is not allowed here */
+			Assert(!cstate->raw);
+
 			foreach(cur, cstate->attnumlist)
 			{
 				int			attnum = lfirst_int(cur);
@@ -1967,6 +2054,10 @@ CopyTo(CopyState cstate)
 		{
 			CHECK_FOR_INTERRUPTS();
 
+			/* stop quickly in raw_text/raw_binary when more rows is detected */
+			if (cstate->raw && processed > 0)
+				break;
+
 			/* Deconstruct the tuple ... faster than repeated heap_getattr */
 			heap_deform_tuple(tuple, tupDesc, values, nulls);
 
@@ -1983,11 +2074,25 @@ CopyTo(CopyState cstate)
 	else
 	{
 		/* run the plan --- the dest receiver will send tuples */
-		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
+		ExecutorRun(cstate->queryDesc, ForwardScanDirection, cstate->raw ? 2L : 0L);
 		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
 	}
 
-	if (cstate->binary)
+	/* raw_text/raw_binary requires exactly one row */
+	if (cstate->raw)
+	{
+		if (processed > 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_ROWS),
+					 errmsg("single row result is required by RAW_TEXT/RAW_BINARY mode")));
+
+		if (processed == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_NO_DATA_FOUND),
+					 errmsg("single row result is required by RAW_TEXT/RAW_BINARY mode")));
+	}
+
+	if (cstate->binary && !cstate->raw)
 	{
 		/* Generate trailer for a binary copy */
 		CopySendInt16(cstate, -1);
@@ -2015,28 +2120,31 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 	MemoryContextReset(cstate->rowcontext);
 	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
 
-	if (cstate->binary)
+	if (!cstate->raw)
 	{
-		/* Binary per-tuple header */
-		CopySendInt16(cstate, list_length(cstate->attnumlist));
-		/* Send OID if wanted --- note attnumlist doesn't include it */
-		if (cstate->oids)
+		if (cstate->binary)
 		{
-			/* Hack --- assume Oid is same size as int32 */
-			CopySendInt32(cstate, sizeof(int32));
-			CopySendInt32(cstate, tupleOid);
+			/* Binary per-tuple header */
+			CopySendInt16(cstate, list_length(cstate->attnumlist));
+			/* Send OID if wanted --- note attnumlist doesn't include it */
+			if (cstate->oids)
+			{
+				/* Hack --- assume Oid is same size as int32 */
+				CopySendInt32(cstate, sizeof(int32));
+				CopySendInt32(cstate, tupleOid);
+			}
 		}
-	}
-	else
-	{
-		/* Text format has no per-tuple header, but send OID if wanted */
-		/* Assume digits don't need any quoting or encoding conversion */
-		if (cstate->oids)
+		else
 		{
-			string = DatumGetCString(DirectFunctionCall1(oidout,
-												ObjectIdGetDatum(tupleOid)));
-			CopySendString(cstate, string);
-			need_delim = true;
+			/* Text format has no per-tuple header, but send OID if wanted */
+			/* Assume digits don't need any quoting or encoding conversion */
+			if (cstate->oids)
+			{
+				string = DatumGetCString(DirectFunctionCall1(oidout,
+													ObjectIdGetDatum(tupleOid)));
+				CopySendString(cstate, string);
+				need_delim = true;
+			}
 		}
 	}
 
@@ -2055,6 +2163,11 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 
 		if (isnull)
 		{
+			if (cstate->raw)
+				ereport(ERROR,
+						(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+						 errmsg("NULL value is not allowed in RAW_TEXT/RAW_BINARY mode")));
+
 			if (!cstate->binary)
 				CopySendString(cstate, cstate->null_print_client);
 			else
@@ -2062,7 +2175,72 @@ CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
 		}
 		else
 		{
-			if (!cstate->binary)
+			if (cstate->raw)
+			{
+				const void *content;
+				int size;
+			
+				if (!cstate->binary)
+				{
+					string = OutputFunctionCall(&out_functions[attnum - 1],
+												value);
+
+					/* We would to transcode, but without escaping */
+					if (cstate->need_transcoding)
+						content = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+					else
+						content = string;
+
+					size = strlen((const char *) content);
+				}
+				else
+				{
+					bytea *outputbytes;
+
+					/*
+					 * Some binary output functions depends can depends on client encoding.
+					 * The binary output of xml is good example. Set client_encoding
+					 * temporaly before out function execution.
+					 */
+					if (cstate->need_transcoding)
+					{
+						int		old_server_encoding = pg_get_client_encoding();
+						volatile bool reset_encoding = false;
+
+						PG_TRY();
+						{
+							/* We don't expect an error, because encoding was checked before */
+							if (PrepareClientEncoding(cstate->file_encoding) < 0)
+								elog(ERROR, "PrepareClientEncoding(%d) failed", cstate->file_encoding);
+
+							SetClientEncoding(cstate->file_encoding);
+							reset_encoding = true;
+
+							outputbytes = SendFunctionCall(&out_functions[attnum - 1],
+													   value);
+							SetClientEncoding(old_server_encoding);
+						}
+						PG_CATCH();
+						{
+							if (reset_encoding)
+								SetClientEncoding(old_server_encoding);
+							PG_RE_THROW();
+						}
+						PG_END_TRY();
+					}
+					else
+					{
+						outputbytes = SendFunctionCall(&out_functions[attnum - 1],
+												   value);
+					}
+					content = VARDATA(outputbytes);
+					size = VARSIZE(outputbytes) - VARHDRSZ;
+				}
+
+				/* Send only content in RAW_TEXT/RAW_BINARY mode */
+				CopySendData(cstate, content, size);
+			}
+			else if (!cstate->binary)
 			{
 				string = OutputFunctionCall(&out_functions[attnum - 1],
 											value);
@@ -2811,65 +2989,69 @@ BeginCopyFrom(Relation rel,
 		}
 	}
 
-	if (!cstate->binary)
+	/* The raw mode hasn't any header information */
+	if (!cstate->raw)
 	{
-		/* must rely on user to tell us... */
-		cstate->file_has_oids = cstate->oids;
-	}
-	else
-	{
-		/* Read and verify binary header */
-		char		readSig[11];
-		int32		tmp;
-
-		/* Signature */
-		if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
-			memcmp(readSig, BinarySignature, 11) != 0)
-			ereport(ERROR,
-					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-					 errmsg("COPY file signature not recognized")));
-		/* Flags field */
-		if (!CopyGetInt32(cstate, &tmp))
-			ereport(ERROR,
-					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-					 errmsg("invalid COPY file header (missing flags)")));
-		cstate->file_has_oids = (tmp & (1 << 16)) != 0;
-		tmp &= ~(1 << 16);
-		if ((tmp >> 16) != 0)
-			ereport(ERROR,
-					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-				 errmsg("unrecognized critical flags in COPY file header")));
-		/* Header extension length */
-		if (!CopyGetInt32(cstate, &tmp) ||
-			tmp < 0)
-			ereport(ERROR,
-					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-					 errmsg("invalid COPY file header (missing length)")));
-		/* Skip extension header, if present */
-		while (tmp-- > 0)
+		if (!cstate->binary)
+		{
+			/* must rely on user to tell us... */
+			cstate->file_has_oids = cstate->oids;
+		}
+		else
 		{
-			if (CopyGetData(cstate, readSig, 1, 1) != 1)
+			/* Read and verify binary header */
+			char		readSig[11];
+			int32		tmp;
+
+			/* Signature */
+			if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
+				memcmp(readSig, BinarySignature, 11) != 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 errmsg("COPY file signature not recognized")));
+			/* Flags field */
+			if (!CopyGetInt32(cstate, &tmp))
 				ereport(ERROR,
 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-						 errmsg("invalid COPY file header (wrong length)")));
+						 errmsg("invalid COPY file header (missing flags)")));
+			cstate->file_has_oids = (tmp & (1 << 16)) != 0;
+			tmp &= ~(1 << 16);
+			if ((tmp >> 16) != 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("unrecognized critical flags in COPY file header")));
+			/* Header extension length */
+			if (!CopyGetInt32(cstate, &tmp) ||
+				tmp < 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 errmsg("invalid COPY file header (missing length)")));
+			/* Skip extension header, if present */
+			while (tmp-- > 0)
+			{
+				if (CopyGetData(cstate, readSig, 1, 1) != 1)
+					ereport(ERROR,
+							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+							 errmsg("invalid COPY file header (wrong length)")));
+			}
 		}
-	}
 
-	if (cstate->file_has_oids && cstate->binary)
-	{
-		getTypeBinaryInputInfo(OIDOID,
-							   &in_func_oid, &cstate->oid_typioparam);
-		fmgr_info(in_func_oid, &cstate->oid_in_function);
-	}
+		if (cstate->file_has_oids && cstate->binary)
+		{
+			getTypeBinaryInputInfo(OIDOID,
+								   &in_func_oid, &cstate->oid_typioparam);
+			fmgr_info(in_func_oid, &cstate->oid_in_function);
+		}
 
-	/* create workspace for CopyReadAttributes results */
-	if (!cstate->binary)
-	{
-		AttrNumber	attr_count = list_length(cstate->attnumlist);
-		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
+		/* create workspace for CopyReadAttributes results */
+		if (!cstate->binary)
+		{
+			AttrNumber	attr_count = list_length(cstate->attnumlist);
+			int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
 
-		cstate->max_fields = nfields;
-		cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
+			cstate->max_fields = nfields;
+			cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
+		}
 	}
 
 	MemoryContextSwitchTo(oldcontext);
@@ -2968,7 +3150,54 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
 	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
 	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
 
-	if (!cstate->binary)
+	if (cstate->raw)
+	{
+		int		m = linitial_int(cstate->attnumlist) - 1;
+
+		/* All content was read in first cycle */
+		if (++cstate->cur_lineno > 1)
+			return false;
+
+		CopyLoadallRawBuf(cstate);
+
+		cstate->cur_attname = NameStr(attr[m]->attname);
+
+		if (!cstate->binary)
+		{
+			char	   *cvt;
+
+			cvt = pg_any_to_server(cstate->raw_buf,
+								   cstate->raw_buf_len,
+								   cstate->file_encoding);
+
+			values[m] = InputFunctionCall(&in_functions[m],
+										  cvt,
+										  typioparams[m],
+										  attr[m]->atttypmod);
+		}
+		else
+		{
+			cstate->attribute_buf.data = cstate->raw_buf;
+			cstate->attribute_buf.len = cstate->raw_buf_len;
+			cstate->attribute_buf.cursor = 0;
+			cstate->raw_buf = NULL;
+
+			/* Call the column type's binary input converter */
+			values[m] = ReceiveFunctionCall(&in_functions[m], &cstate->attribute_buf,
+									 typioparams[m], attr[m]->atttypmod);
+
+			/* Trouble if it didn't eat the whole buffer */
+			if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+						 errmsg("incorrect binary data format")));
+		}
+
+		nulls[m] = false;
+
+		cstate->cur_attname = NULL;
+	}
+	else if (!cstate->binary)
 	{
 		char	  **field_strings;
 		ListCell   *cur;
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 8469d9f..2c8d6f5 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1974,8 +1974,8 @@ psql_completion(const char *text, int start, int end)
 	/* Handle COPY [BINARY] <sth> FROM|TO filename */
 	else if (Matches4("COPY|\\copy", MatchAny, "FROM|TO", MatchAny) ||
 			 Matches5("COPY", "BINARY", MatchAny, "FROM|TO", MatchAny))
-		COMPLETE_WITH_LIST6("BINARY", "OIDS", "DELIMITER", "NULL", "CSV",
-							"ENCODING");
+		COMPLETE_WITH_LIST8("BINARY", "RAW_TEXT", "RAW_BINARY", "OIDS",
+							"DELIMITER", "NULL", "CSV", "ENCODING");
 
 	/* Handle COPY [BINARY] <sth> FROM|TO filename CSV */
 	else if (Matches5("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "CSV") ||
diff --git a/src/interfaces/libpq/exports.txt b/src/interfaces/libpq/exports.txt
index 21dd772..a2754f1 100644
--- a/src/interfaces/libpq/exports.txt
+++ b/src/interfaces/libpq/exports.txt
@@ -171,3 +171,4 @@ PQsslAttributeNames       168
 PQsslAttribute            169
 PQsetErrorContextVisibility 170
 PQresultVerboseErrorMessage 171
+PQcopyFormat              172
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 2621767..09967e9 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -155,6 +155,7 @@ PQmakeEmptyPGresult(PGconn *conn, ExecStatusType status)
 	result->resultStatus = status;
 	result->cmdStatus[0] = '\0';
 	result->binary = 0;
+	result->raw = 0;
 	result->events = NULL;
 	result->nEvents = 0;
 	result->errMsg = NULL;
@@ -256,8 +257,10 @@ PQsetResultAttrs(PGresult *res, int numAttributes, PGresAttDesc *attDescs)
 		if (!res->attDescs[i].name)
 			return FALSE;
 
-		if (res->attDescs[i].format == 0)
+		if (res->attDescs[i].format == 0 || res->attDescs[i].format == 2)
 			res->binary = 0;
+		if (res->attDescs[i].format == 2 || res->attDescs[i].format == 3)
+			res->raw = 1;
 	}
 
 	return TRUE;
@@ -2932,6 +2935,21 @@ PQcmdStatus(PGresult *res)
 }
 
 /*
+ * PQcopyFormat
+ *
+ * Returns a info about copy mode:
+ * -1 signalize a error, 0 = text mode, 1 = binary mode, 2 = raw mode
+ */
+int
+PQcopyFormat(const PGresult *res)
+{
+	if (res->raw)
+		return 2;
+	else
+		return res->binary;
+}
+
+/*
  * PQoidStatus -
  *	if the last command was an INSERT, return the oid string
  *	if not, return ""
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
index 0b8c62f..1783844 100644
--- a/src/interfaces/libpq/fe-protocol3.c
+++ b/src/interfaces/libpq/fe-protocol3.c
@@ -1486,6 +1486,10 @@ getCopyStart(PGconn *conn, ExecStatusType copytype)
 		 */
 		format = (int) ((int16) format);
 		result->attDescs[i].format = format;
+
+		/* when any field uses raw format, then COPY RAW_* was used */
+		if (format == 2 || format == 3)
+			result->raw = true;
 	}
 
 	/* Success! */
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 9ca0756..7984666 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -479,6 +479,7 @@ extern Oid	PQftype(const PGresult *res, int field_num);
 extern int	PQfsize(const PGresult *res, int field_num);
 extern int	PQfmod(const PGresult *res, int field_num);
 extern char *PQcmdStatus(PGresult *res);
+extern int	PQcopyFormat(const PGresult *res);
 extern char *PQoidStatus(const PGresult *res);	/* old and ugly */
 extern Oid	PQoidValue(const PGresult *res);	/* new and improved */
 extern char *PQcmdTuples(PGresult *res);
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 1183323..8fc4b04 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -180,6 +180,7 @@ struct pg_result
 	char		cmdStatus[CMDSTATUS_LEN];		/* cmd status from the query */
 	int			binary;			/* binary tuple values if binary == 1,
 								 * otherwise text */
+	int			raw;			/* raw mode for COPY */
 
 	/*
 	 * These fields are copied from the originating PGconn, so that operations
diff --git a/src/interfaces/libpq/test/Makefile b/src/interfaces/libpq/test/Makefile
index ab41dc3..f5f1511 100644
--- a/src/interfaces/libpq/test/Makefile
+++ b/src/interfaces/libpq/test/Makefile
@@ -9,14 +9,18 @@ endif
 override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
 override LDLIBS := $(libpq_pgport) $(LDLIBS)
 
-PROGS = uri-regress
+PROGS = uri-regress copy-raw-regress
 
 all: $(PROGS)
 
 installcheck: all
 	SRCDIR='$(top_srcdir)' SUBDIR='$(subdir)' \
 		   $(PERL) $(top_srcdir)/$(subdir)/regress.pl
+	SRCDIR='$(top_srcdir)' SUBDIR='$(subdir)' \
+		   $(PERL) $(top_srcdir)/$(subdir)/copy-raw-regress.pl
 
 clean distclean maintainer-clean:
 	rm -f $(PROGS)
 	rm -f regress.out regress.diff
+	rm -f copy-raw-regress.out
+	rm -f uri-regress.o copy-raw-regress.o
\ No newline at end of file
diff --git a/src/interfaces/libpq/test/copy-raw-expected.out b/src/interfaces/libpq/test/copy-raw-expected.out
new file mode 100644
index 0000000..6bbef51
--- /dev/null
+++ b/src/interfaces/libpq/test/copy-raw-expected.out
@@ -0,0 +1,33 @@
+trying
+copy-raw-regress: buffer is empty
+copy-raw-regress: the test COPY TO was successful (a, raw_text, 0)
+copy-raw-regress: buffer is empty
+copy-raw-regress: the test COPY TO was successful (b, raw_binary, 0)
+copy-raw-regress: the test COPY TO was successful (a, raw_text, 1)
+copy-raw-regress: the test COPY TO was successful (b, raw_binary, 1)
+copy-raw-regress: the test COPY TO was successful (a, raw_text, 10)
+copy-raw-regress: the test COPY TO was successful (b, raw_binary, 10)
+copy-raw-regress: the test COPY TO was successful (a, raw_text, 1000)
+copy-raw-regress: the test COPY TO was successful (b, raw_binary, 1000)
+copy-raw-regress: the test COPY TO was successful (a, raw_text, 10000)
+copy-raw-regress: the test COPY TO was successful (b, raw_binary, 10000)
+copy-raw-regress: the test COPY TO was successful (a, raw_text, 100000)
+copy-raw-regress: the test COPY TO was successful (b, raw_binary, 100000)
+copy-raw-regress: the test COPY TO was successful (a, raw_text, 1000000)
+copy-raw-regress: the test COPY TO was successful (b, raw_binary, 1000000)
+copy-raw-regress: the test COPY FROM was successful (a, raw_text, 0)
+copy-raw-regress: the test COPY FROM was successful (b, raw_binary, 0)
+copy-raw-regress: the test COPY FROM was successful (a, raw_text, 1)
+copy-raw-regress: the test COPY FROM was successful (b, raw_binary, 1)
+copy-raw-regress: the test COPY FROM was successful (a, raw_text, 10)
+copy-raw-regress: the test COPY FROM was successful (b, raw_binary, 10)
+copy-raw-regress: the test COPY FROM was successful (a, raw_text, 1000)
+copy-raw-regress: the test COPY FROM was successful (b, raw_binary, 1000)
+copy-raw-regress: the test COPY FROM was successful (a, raw_text, 10000)
+copy-raw-regress: the test COPY FROM was successful (b, raw_binary, 10000)
+copy-raw-regress: the test COPY FROM was successful (a, raw_text, 100000)
+copy-raw-regress: the test COPY FROM was successful (b, raw_binary, 100000)
+copy-raw-regress: the test COPY FROM was successful (a, raw_text, 1000000)
+copy-raw-regress: the test COPY FROM was successful (b, raw_binary, 1000000)
+COPY RAW tests done
+
diff --git a/src/interfaces/libpq/test/copy-raw-regress.c b/src/interfaces/libpq/test/copy-raw-regress.c
new file mode 100644
index 0000000..2cdb331
--- /dev/null
+++ b/src/interfaces/libpq/test/copy-raw-regress.c
@@ -0,0 +1,597 @@
+/*
+ * copy-raw-regress.c
+ *		A test program for COPY API
+ *
+ * Portions Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/interfaces/libpq/test/copy-raw-regress.c
+ */
+
+#include "postgres_fe.h"
+
+#include "libpq-fe.h"
+#include "pqexpbuffer.h"
+#include "catalog/pg_type.h"
+
+char *text_message = "The PostgreSQL Database, ";
+char *binary_message = "\360\237\230\203\040\360\237\230\202\040";
+
+const char *prgname = "copy-raw-regress";
+
+/*
+ * Hide possible notice from DROP TABLE IF EXIT statement
+ *
+ */
+static void
+terseNoticeProcessor(void *arg, const char *message)
+{
+	/* do nothing */
+}
+
+/*
+ * Prepare textual and binary data with requested size.
+ *
+ */
+static void
+PrepareTestValues(char *values[], int size)
+{
+	char	*tbuffer;
+	char	*bbuffer;
+	char		*p;
+	int			 nchars;
+	int			 l;
+
+	tbuffer = pg_malloc(size + 1);
+	bbuffer = pg_malloc(size + 1);
+
+	p = tbuffer;
+	nchars = size;
+	l = strlen(text_message);
+	while (nchars > 0)
+	{
+		if (l < nchars)
+		{
+			strcpy(p, text_message);
+			p += l;
+			nchars -= l;
+		}
+		else
+		{
+			strncpy(p, text_message, nchars);
+			nchars = 0;
+		}
+	}
+
+	tbuffer[size] = '\0';
+	values[0] = tbuffer;
+
+	p = bbuffer;
+	nchars = size;
+	l = 10;
+	while (nchars > 0)
+	{
+		if (l < nchars)
+		{
+			memcpy(p, binary_message, l);
+			p += l;
+			nchars -= l;
+		}
+		else
+		{
+			memset(p, 0, nchars);
+			nchars = 0;
+		}
+	}
+
+	bbuffer[size] = '\0';
+	values[1] = bbuffer;
+}
+
+/*
+ * Store textual and binary data in table copy_raw. Clean this table before.
+ *
+ */
+static bool
+InsertData(PGconn *conn, char *values[], int size)
+{
+	PGresult	*res;
+	Oid		 types[] = {TEXTOID, BYTEAOID};
+	int		 formats[] = {0, 1};
+	int		 lens[2];
+
+	lens[0] = 0;						/* text format is used, value is ignored */
+	lens[1] = size;						/* binary mode, value should be correct */
+
+	res = PQexec(conn, "TRUNCATE TABLE copy_raw");
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "%s: could not to truncate table \"copy_raw\": %s",
+				  prgname, PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+	PQclear(res);
+
+	res = PQexecParams(conn, "INSERT INTO copy_raw(id, a,b) VALUES(1, $1, $2)",
+							 2,
+							 types,
+							 (const char * const*) values,
+							 lens,
+							 formats,
+							 0);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "%s: could not to insert to table \"copy_raw\": %s\n",
+				  prgname, PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+	PQclear(res);
+	return true;
+}
+
+/*
+ * COPY TO test - insert data to server, get it with COPY API and compare result
+ *
+ */
+static bool
+CopyToTest(PGconn *conn, int colnum, const char *method, int size)
+{
+	char 	*colnames[] = {"a", "b"};
+	char	*values[2];
+	PGresult	*res;
+	PQExpBufferData		 query;
+	int					 len;
+	char				*buffer;
+
+	PrepareTestValues(values, size);
+	if (!InsertData(conn, values, size))
+		return false;
+
+	initPQExpBuffer(&query);
+	appendPQExpBuffer(&query,
+					  "COPY (SELECT %s FROM copy_raw WHERE id = 1) TO stdout (FORMAT %s)",
+					  colnames[colnum],
+					  method);
+
+	res = PQexec(conn, query.data);
+	if (PQresultStatus(res) != PGRES_COPY_OUT)
+	{
+		fprintf(stderr, "%s: could not to execute \"%s\": %s\n",
+				  prgname,
+				  query.data,
+				  PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+
+	PQclear(res);
+
+	len = PQgetCopyData(conn, &buffer, false);
+	if (len == -2)
+	{
+		fprintf(stderr, "%s: cannot to get data: %s\n",
+				  prgname, PQerrorMessage(conn));
+		return false;
+	}
+
+	if (len == -1)
+	{
+		fprintf(stderr, "%s: buffer is empty\n",
+				  prgname);
+
+		res = PQgetResult(conn);
+		if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		{
+			fprintf(stderr, "%s: could not to execute \"%s\": %s\n",
+					  prgname,
+					  query.data,
+					  PQerrorMessage(conn));
+			PQclear(res);
+			return false;
+		}
+		PQclear(res);
+
+		len = 0;
+	}
+
+	if (len != size)
+	{
+		fprintf(stderr, "%s: the length of data (%d) is different than expected (%d)\n",
+				prgname, len, size);
+		return false;
+	}
+
+	if (len > 0 && memcmp(values[colnum], buffer, len) != 0)
+	{
+		fprintf(stderr, "%s: the content of loaded data is different than expected\n",
+				  prgname);
+		return false;
+	}
+
+	fprintf(stderr, "%s: the test COPY TO was successful (%s, %s, %d)\n",
+				prgname,
+				colnames[colnum],
+				method,
+				size);
+
+	PQfreemem(buffer);
+	termPQExpBuffer(&query);
+
+	pg_free(values[0]);
+	pg_free(values[1]);
+
+	return true;
+}
+
+static PGconn *
+PrepareConnection(char *connectionstr)
+{
+	PQconninfoOption *opts;
+	PQconninfoOption *opt;
+	char	   *errmsg = NULL;
+	const char **keywords = NULL;
+	const char **values = NULL;
+	int		nopts = 0;
+	int		i = 0;
+	PGconn		*conn;
+
+	opts = PQconninfoParse(connectionstr, &errmsg);
+	if (opts == NULL)
+	{
+		fprintf(stderr, "%s: %s\n", prgname, errmsg);
+		return NULL;
+	}
+
+	for (opt = opts; opt->keyword != NULL; opt++)
+	{
+		if (opt->val != NULL && opt->val[0] != '\0')
+			nopts++;
+	}
+
+	keywords = pg_malloc0((nopts + 1) * sizeof(*keywords));
+	values = pg_malloc0((nopts + 1) * sizeof(*values));
+
+	for (opt = opts; opt->keyword != NULL; opt++)
+	{
+		if (opt->val != NULL && opt->val[0] != '\0')
+		{
+			keywords[i] = opt->keyword;
+			values[i] = opt->val;
+			i++;
+		}
+	}
+
+	conn = PQconnectdbParams(keywords, values, false);
+	if (!conn)
+	{
+		fprintf(stderr, "%s: could not connect to server\n",
+				  prgname);
+		return NULL;
+	}
+
+	if (PQstatus(conn) != CONNECTION_OK)
+	{
+		fprintf(stderr, "%s: could not connect to server: %s",
+					prgname, PQerrorMessage(conn));
+		PQfinish(conn);
+		return NULL;
+	}
+
+	pg_free(values);
+	pg_free(keywords);
+	PQconninfoFree(opts);
+
+	return conn;
+}
+
+static bool
+CreateTable(PGconn *conn)
+{
+	PGresult *res;
+	PQnoticeProcessor prev_notproc;
+
+	prev_notproc = PQsetNoticeProcessor(conn, terseNoticeProcessor, NULL);
+
+	res = PQexec(conn, "DROP TABLE IF EXISTS copy_raw");
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "%s: could not drop table \"copy_raw\": %s",
+				  prgname, PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+	PQclear(res);
+
+	PQsetNoticeProcessor(conn, prev_notproc, NULL);
+
+	res = PQexec(conn, "CREATE TABLE copy_raw(id serial, a text, b bytea)");
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "%s: could not create table table \"copy_raw\": %s",
+				  prgname, PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+	PQclear(res);
+
+	return true;
+}
+
+static bool
+DropTable(PGconn *conn)
+{
+	PGresult *res;
+
+	res = PQexec(conn, "DROP TABLE copy_raw");
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "%s: could not drop table \"copy_raw\": %s",
+				  prgname, PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+	PQclear(res);
+
+	return true;
+}
+
+/*
+ * returns result of lastval()
+ *
+ *  returns false on error
+ */
+static bool
+get_lastval(PGconn *conn, int *lastval)
+{
+	PGresult *res;
+	char		*val;
+
+	res = PQexec(conn, "SELECT lastval()");
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr, "%s: could not to call lastval function: %s",
+				  prgname, PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+
+	val = PQgetvalue(res, 0, 0);
+	Assert(val != NULL);
+
+	*lastval = atoi(val);
+
+	PQclear(res);
+
+	return true;
+}
+
+
+/*
+ * does COPY FROM RAW
+ *
+ */
+static bool
+DoCopyFromBuffer(PGconn *conn, int colnum, const char *method, const char *buffer, int size, int *id)
+{
+	PQExpBufferData		 query;
+	PGresult			*res;
+	char 	*colnames[] = {"a", "b"};
+	int		copy_data_res;
+	const char		*errormsg = NULL;
+
+	res = PQexec(conn, "TRUNCATE TABLE copy_raw");
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "%s: could not to truncate table \"copy_raw\": %s",
+				  prgname, PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+	PQclear(res);
+
+	initPQExpBuffer(&query);
+	appendPQExpBuffer(&query,
+					  "COPY copy_raw(%s) FROM stdin (FORMAT %s)",
+					  colnames[colnum],
+					  method);
+
+	res = PQexec(conn, query.data);
+
+
+	if (PQresultStatus(res) != PGRES_COPY_IN)
+	{
+		fprintf(stderr, "%s: could not to execute \"%s\": %s\n",
+				  prgname,
+				  query.data,
+				  PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+
+	PQclear(res);
+
+	copy_data_res = PQputCopyData(conn, buffer, size);
+	if (copy_data_res == -1)
+	{
+		fprintf(stderr, "%s: could not to send data: %s\n",
+				  prgname, PQerrorMessage(conn));
+		return false;
+	}
+
+	copy_data_res = PQputCopyEnd(conn, errormsg);
+	if (errormsg != NULL)
+	{
+		fprintf(stderr, "%s: could not to send data: %s\n",
+				  prgname, errormsg);
+		return false;
+	}
+	if (copy_data_res == -1)
+	{
+		fprintf(stderr, "%s: could not to send data: %s\n",
+				  prgname, PQerrorMessage(conn));
+		return false;
+	}
+
+	res = PQgetResult(conn);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "%s: could not to execute \"%s\": %s\n",
+				  prgname,
+				  query.data,
+				  PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+
+	PQclear(res);
+	termPQExpBuffer(&query);
+
+	if (!get_lastval(conn, id))
+		return false;
+
+	return true;
+}
+
+/*
+ * Send data to server with COPY TO RAW, then get data back with SELECT BINARY
+ * and compare data.
+ */
+static bool
+CopyFromTest(PGconn *conn, int colnum, const char *method, int size)
+{
+	char 	*colnames[] = {"a", "b"};
+	char	*values[2];
+	PGresult	*res;
+	int			 id;
+	PQExpBufferData		 query;
+	char				*buffer;
+	int					len;
+
+	PrepareTestValues(values, size);
+
+	if (!DoCopyFromBuffer(conn, colnum, method, (const char *) values[colnum], size, &id))
+		return false;
+
+	initPQExpBuffer(&query);
+	appendPQExpBuffer(&query,
+					  "SELECT %s FROM copy_raw WHERE id = %d",
+					  colnames[colnum],
+					  id);
+
+	res = PQexecParams(conn, query.data,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL,
+							 NULL,
+							 1);
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr, "%s: could not to execute \"%s\": %s\n",
+				  prgname,
+				  query.data,
+				  PQerrorMessage(conn));
+		PQclear(res);
+		return false;
+	}
+
+	termPQExpBuffer(&query);
+
+	buffer = PQgetvalue(res, 0, 0);
+	len = PQgetlength(res, 0, 0);
+
+	if (len != size)
+	{
+		fprintf(stderr, "%s: the length of data (%d) is different than expected (%d)\n",
+				prgname, len, size);
+		PQclear(res);
+		return false;
+	}
+
+	if (len > 0 && memcmp(values[colnum], buffer, len) != 0)
+	{
+		fprintf(stderr, "%s: the content of stored data is different than expected\n",
+				  prgname);
+		PQclear(res);
+		return false;
+	}
+
+	PQclear(res);
+
+	pg_free(values[0]);
+	pg_free(values[1]);
+
+	fprintf(stderr, "%s: the test COPY FROM was successful (%s, %s, %d)\n",
+				prgname,
+				colnames[colnum],
+				method,
+				size);
+
+	return true;
+}
+
+int
+main(int argc, char *argv[])
+{
+	PGconn *conn;
+	int		sizes[] = {0, 1, 10, 1000, 10000, 100000, 1000000};
+	const char *method[] = {"raw_text", "raw_binary"};
+	int		i;
+	int			j;
+
+	if (argc != 2)
+	{
+		fprintf(stderr, "%s: usage %s connection string\n",
+			  prgname, prgname);
+		exit(1);
+	}
+
+	conn = PrepareConnection(argv[1]);
+	if (conn == NULL)
+		exit(1);
+
+	if (!CreateTable(conn))
+	{
+		PQfinish(conn);
+		exit(1);
+	}
+
+	for (i = 0; i < 7; i++)
+	{
+		for (j = 0; j < 2; j++)
+		{
+			if (!CopyToTest(conn, j, method[j], sizes[i]))
+			{
+				PQfinish(conn);
+				exit(1);
+			}
+		}
+	}
+
+	for (i = 0; i < 7; i++)
+	{
+		for (j = 0; j < 2; j++)
+		{
+			if (!CopyFromTest(conn, j, method[j], sizes[i]))
+			{
+				PQfinish(conn);
+				exit(1);
+			}
+		}
+	}
+
+	if (!DropTable(conn))
+	{
+		PQfinish(conn);
+		exit(1);
+	}
+
+	PQfinish(conn);
+
+	fprintf(stderr, "COPY RAW tests done\n");
+
+	exit(0);
+}
diff --git a/src/interfaces/libpq/test/copy-raw-regress.pl b/src/interfaces/libpq/test/copy-raw-regress.pl
new file mode 100644
index 0000000..7b1714a
--- /dev/null
+++ b/src/interfaces/libpq/test/copy-raw-regress.pl
@@ -0,0 +1,48 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+# use of SRCDIR/SUBDIR is required for supporting VPath builds
+my $srcdir = $ENV{'SRCDIR'} or die 'SRCDIR environment variable is not set';
+my $subdir = $ENV{'SUBDIR'} or die 'SUBDIR environment variable is not set';
+
+my $expected_out = "$srcdir/$subdir/copy-raw-expected.out";
+
+# the output file should land in the build_dir of VPath, or just in
+# the current dir, if VPath isn't used
+my $regress_out = "copy-raw-regress.out";
+
+# save STDOUT/ERR and redirect both to regress.out
+open(OLDOUT, ">&", \*STDOUT) or die "can't dup STDOUT: $!";
+open(OLDERR, ">&", \*STDERR) or die "can't dup STDERR: $!";
+
+open(STDOUT, ">", $regress_out)
+  or die "can't open $regress_out for writing: $!";
+open(STDERR, ">&", \*STDOUT) or die "can't dup STDOUT: $!";
+
+print "trying\n";
+system("./copy-raw-regress \"postgresql://\"");
+print "\n";
+
+# restore STDOUT/ERR so we can print the outcome to the user
+open(STDERR, ">&", \*OLDERR) or die; # can't complain as STDERR is still duped
+open(STDOUT, ">&", \*OLDOUT) or die "can't restore STDOUT: $!";
+
+my $diff_status = system(
+	"diff -c \"$srcdir/$subdir/$regress_out\" $expected_out >regress.diff");
+
+print "=" x 70, "\n";
+if ($diff_status == 0)
+{
+	print "All tests passed\n";
+	exit 0;
+}
+else
+{
+	print <<EOF;
+FAILED: the test result differs from the expected output
+
+Review the difference in "$subdir/regress.diff"
+EOF
+	exit 1;
+}
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 5f6260a..3d36dd3 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -466,3 +466,17 @@ DROP FUNCTION truncate_in_subxact();
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+INSERT INTO x VALUES('\x41484f4a0a');
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+ERROR:  Single column result/target is required in RAW_TEXT/RAW_BINARY mode
+COPY (SELECT a FROM x) TO STDOUT (FORMAT raw_binary);
+AHOJ
+AHOJ
+ERROR:  single row result is required by RAW_TEXT/RAW_BINARY mode
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+AHOJ
+DROP TABLE x;
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
index cb13606..085ae36 100644
--- a/src/test/regress/input/copy.source
+++ b/src/test/regress/input/copy.source
@@ -133,3 +133,82 @@ this is just a line full of junk that would error out if parsed
 \.
 
 copy copytest3 to stdout csv header;
+
+-- copy raw
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+SELECT length(a) FROM x;
+
+INSERT INTO x VALUES('\x41484f4a0a');
+
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+TRUNCATE x;
+COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+SELECT length(a) FROM x;
+COPY x TO stdout (FORMAT raw_binary);
+
+TRUNCATE x;
+
+\COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary)
+SELECT length(a) FROM x;
+COPY x TO stdout (FORMAT raw_binary);
+
+\COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+TRUNCATE x;
+
+\COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+SELECT length(a) FROM x;
+COPY x TO stdout (FORMAT raw_binary);
+
+-- test big file
+TRUNCATE x;
+-- use different mechanism for load to bytea
+\lo_import @abs_builddir@/data/hash.data
+\set lo_oid :LASTOID
+INSERT INTO x VALUES(lo_get(:lo_oid));
+\lo_unlink :lo_oid
+
+COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+\COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary)
+
+SELECT md5(a), length(a) FROM x;
+
+TRUNCATE x;
+COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+\COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+
+-- read again
+COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+\COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+-- cross
+COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary);
+\COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary)
+
+SELECT md5(a), length(a) FROM x;
+
+DROP TABLE x;
+
+-- insert into multicolumn table
+CREATE TABLE x(id serial, a bytea, b bytea);
+
+-- should fail, too much columns
+COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+
+-- should work
+COPY x(a) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+COPY x(b) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+SELECT id, md5(a), md5(b) FROM x;
+
+-- test raw_text
+COPY (SELECT a FROM x WHERE id = 1) TO '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+COPY x(a) FROM '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+SELECT id, md5(a) FROM x WHERE id = lastval();
+
+DROP TABLE x;
+
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
index b7e372d..e34bbab 100644
--- a/src/test/regress/output/copy.source
+++ b/src/test/regress/output/copy.source
@@ -95,3 +95,114 @@ copy copytest3 to stdout csv header;
 c1,"col with , comma","col with "" quote"
 1,a,1
 2,b,2
+-- copy raw
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+INSERT INTO x VALUES('\x41484f4a0a');
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ERROR:  Single column result/target is required in RAW_TEXT/RAW_BINARY mode
+COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+ERROR:  single row result is required by RAW_TEXT/RAW_BINARY mode
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+TRUNCATE x;
+COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary);
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+COPY x TO stdout (FORMAT raw_binary);
+AHOJ
+TRUNCATE x;
+\COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw_binary)
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+COPY x TO stdout (FORMAT raw_binary);
+AHOJ
+\COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+TRUNCATE x;
+\COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw_binary)
+SELECT length(a) FROM x;
+ length 
+--------
+      5
+(1 row)
+
+COPY x TO stdout (FORMAT raw_binary);
+AHOJ
+-- test big file
+TRUNCATE x;
+-- use different mechanism for load to bytea
+\lo_import @abs_builddir@/data/hash.data
+\set lo_oid :LASTOID
+INSERT INTO x VALUES(lo_get(:lo_oid));
+\lo_unlink :lo_oid
+COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+\COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary)
+SELECT md5(a), length(a) FROM x;
+               md5                | length 
+----------------------------------+--------
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+(3 rows)
+
+TRUNCATE x;
+COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw_binary);
+COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+\COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+-- read again
+COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+\COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary)
+-- cross
+COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw_binary);
+\COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary)
+SELECT md5(a), length(a) FROM x;
+               md5                | length 
+----------------------------------+--------
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+ e446fe6ea5a347e69670633412c7f8cb | 153749
+(5 rows)
+
+DROP TABLE x;
+-- insert into multicolumn table
+CREATE TABLE x(id serial, a bytea, b bytea);
+-- should fail, too much columns
+COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+ERROR:  Single column result/target is required in RAW_TEXT/RAW_BINARY mode
+-- should work
+COPY x(a) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+COPY x(b) FROM '@abs_builddir@/results/hash2.data' (FORMAT raw_binary);
+SELECT id, md5(a), md5(b) FROM x;
+ id |               md5                |               md5                
+----+----------------------------------+----------------------------------
+  1 | e446fe6ea5a347e69670633412c7f8cb | 
+  2 |                                  | e446fe6ea5a347e69670633412c7f8cb
+(2 rows)
+
+-- test raw_text
+COPY (SELECT a FROM x WHERE id = 1) TO '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+COPY x(a) FROM '@abs_builddir@/results/hash4.data' (FORMAT raw_text);
+SELECT id, md5(a) FROM x WHERE id = lastval();
+ id |               md5                
+----+----------------------------------
+  3 | e446fe6ea5a347e69670633412c7f8cb
+(1 row)
+
+DROP TABLE x;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 39a9deb..7e22ee4 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -333,3 +333,16 @@ DROP FUNCTION truncate_in_subxact();
 DROP TABLE x, y;
 DROP FUNCTION fn_x_before();
 DROP FUNCTION fn_x_after();
+
+CREATE TABLE x(a bytea);
+INSERT INTO x VALUES('\x41484f4a0a');
+INSERT INTO x VALUES('\x41484f4a0a');
+
+-- should to fail
+COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+COPY (SELECT a FROM x) TO STDOUT (FORMAT raw_binary);
+
+-- should be ok
+COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw_binary);
+
+DROP TABLE x;
#73Michael Paquier
michael.paquier@gmail.com
In reply to: Pavel Stehule (#72)
Re: raw output from copy

On Sat, Jul 16, 2016 at 5:55 PM, Pavel Stehule <pavel.stehule@gmail.com> wrote:

I am sending fresh version of COPY RAW patch.

Moved to next CF per this status.

+++ b/src/interfaces/libpq/test/copy-raw-regress.pl
@@ -0,0 +1,48 @@
+#!/usr/bin/perl -w
+
+use strict;
I don't understand why this is shaped this way, I mean the perl part
if we have the TAP infra in place. MSVC is not testing it as well.
-- 
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#74Haribabu Kommi
kommi.haribabu@gmail.com
In reply to: Michael Paquier (#73)
Re: raw output from copy

Hi,

This is a gentle reminder.

you assigned as reviewer to the current patch in the 11-2016 commitfest.
But you haven't shared your review yet. Please share your review about
the patch. This will help us in smoother operation of commitfest.

Please Ignore if you already shared your review.

Regards,
Hari Babu
Fujitsu Australia

#75Haribabu Kommi
kommi.haribabu@gmail.com
In reply to: Haribabu Kommi (#74)
Re: raw output from copy

On Tue, Nov 22, 2016 at 10:48 PM, Haribabu Kommi <kommi.haribabu@gmail.com>
wrote:

Hi,

This is a gentle reminder.

you assigned as reviewer to the current patch in the 11-2016 commitfest.
But you haven't shared your review yet. Please share your review about
the patch. This will help us in smoother operation of commitfest.

Please Ignore if you already shared your review.

Patch is not applying properly to HEAD.
Moved to next CF with "waiting on author" status.

Regards,
Hari Babu
Fujitsu Australia

#76Kohei KaiGai
kaigai@kaigai.gr.jp
In reply to: Haribabu Kommi (#75)
Re: raw output from copy

Sorry for my late response.

I've briefly checked a series of discussion in the past.
I understood the target/purpose of this patch is provision of a fast interface
to import/export a particular cell of a relation, by skip of text<->binary
transformation. Its typical use case are XML and JSON data types. Right?

If so, how about the idea to use fast-path invocation protocol to call functions
to import/export these document types?
It allows to accept binary form of the data stream, with minimum overheads.

It seems to me extend of COPY statement for this optimization is a bit overkill
solution. Do we find out an alternative solution that we can build on
the existing
infrastructure?

Best regards,

2016-12-05 14:16 GMT+09:00 Haribabu Kommi <kommi.haribabu@gmail.com>:

On Tue, Nov 22, 2016 at 10:48 PM, Haribabu Kommi <kommi.haribabu@gmail.com>
wrote:

Hi,

This is a gentle reminder.

you assigned as reviewer to the current patch in the 11-2016 commitfest.
But you haven't shared your review yet. Please share your review about
the patch. This will help us in smoother operation of commitfest.

Please Ignore if you already shared your review.

Patch is not applying properly to HEAD.
Moved to next CF with "waiting on author" status.

Regards,
Hari Babu
Fujitsu Australia

--
KaiGai Kohei <kaigai@kaigai.gr.jp>

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#77Pavel Stehule
pavel.stehule@gmail.com
In reply to: Kohei KaiGai (#76)
Re: raw output from copy

Hi

2016-12-05 14:19 GMT+01:00 Kohei KaiGai <kaigai@kaigai.gr.jp>:

Sorry for my late response.

I've briefly checked a series of discussion in the past.
I understood the target/purpose of this patch is provision of a fast
interface
to import/export a particular cell of a relation, by skip of text<->binary
transformation. Its typical use case are XML and JSON data types. Right?

There are more goals:

1. user friendly import of text or binary data - import text data (with
psql) from file is possible - but you have to load a content to psql
variable. For binary data you should to use workaround based on LO and
transformation from LO to bytea.

2. user friendly export text or binary data - now, the binary data can be
exported only via transformation to LO. The XML has very interesting
features when is passing from/to client binary. This feature is impossible
in psql now.

If so, how about the idea to use fast-path invocation protocol to call
functions
to import/export these document types?
It allows to accept binary form of the data stream, with minimum overheads.

Sorry, I don't see a sense - for Fast API I have to write a application -
and then I can write a application with support of binary passing.

It seems to me extend of COPY statement for this optimization is a bit
overkill
solution. Do we find out an alternative solution that we can build on
the existing
infrastructure?

The advantage and sense of COPY RAW was reusing existing interface. The
question was: How I can export/import binary data simply from psql console?

Regards

Pavel

Show quoted text

Best regards,

2016-12-05 14:16 GMT+09:00 Haribabu Kommi <kommi.haribabu@gmail.com>:

On Tue, Nov 22, 2016 at 10:48 PM, Haribabu Kommi <

kommi.haribabu@gmail.com>

wrote:

Hi,

This is a gentle reminder.

you assigned as reviewer to the current patch in the 11-2016 commitfest.
But you haven't shared your review yet. Please share your review about
the patch. This will help us in smoother operation of commitfest.

Please Ignore if you already shared your review.

Patch is not applying properly to HEAD.
Moved to next CF with "waiting on author" status.

Regards,
Hari Babu
Fujitsu Australia

--
KaiGai Kohei <kaigai@kaigai.gr.jp>

#78Kohei KaiGai
kaigai@kaigai.gr.jp
In reply to: Pavel Stehule (#77)
Re: raw output from copy

2016-12-05 22:45 GMT+09:00 Pavel Stehule <pavel.stehule@gmail.com>:

There are more goals:

1. user friendly import of text or binary data - import text data (with
psql) from file is possible - but you have to load a content to psql
variable. For binary data you should to use workaround based on LO and
transformation from LO to bytea.

2. user friendly export text or binary data - now, the binary data can be
exported only via transformation to LO. The XML has very interesting
features when is passing from/to client binary. This feature is impossible
in psql now.

:
<snip>
:

It seems to me extend of COPY statement for this optimization is a bit
overkill
solution. Do we find out an alternative solution that we can build on
the existing
infrastructure?

The advantage and sense of COPY RAW was reusing existing interface. The
question was: How I can export/import binary data simply from psql console?

OK, I could get your point.

Likeky, we can implement the feature without COPY statement enhancement
by adding a special purpose function and \xxx command on psql.

Let's assume the two commands below on psql:

\blob_import <table_name> <column_name> (STDIN|<filename>)
\blob_export <query> (STDOUT|<filename>)

On \blob_import, the psql command reads the binary contents from either
stdin or file, than call a special purpose function that takes three
arguments; table name, column name and a binary data chunk.
PQexecParams() of libpq allows to deliver the data chunk with keeping
binary data format, then the special purpose function will be able to
lookup the destination table/column and construct a tuple that contains
the supplied data chunk. (I think xxxx_recv handler shall be used for
data validation, but not an element of this feature.)

On \blob_export, the psql command also set up a simple query as follows:
SELECT blob_export((<user's supplied query))
For example,
\blob_export SELECT binary_data FROM my_table WHERE id = 10 /tmp/aaa
shall be transformed to
SELECT blob_export((SELECT binary_data FROM my_table WHERE id = 10))

This function is declared as:
blob_export(anyelement) RETURNS bytea
So, as long as the user supplied query returns exactly one column and
one row, it can transform the argument to the binary stream, then psql
command receive it and dump somewhere; stdout or file.

How about your thought?

Thanks,
--
KaiGai Kohei <kaigai@kaigai.gr.jp>

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#79Pavel Stehule
pavel.stehule@gmail.com
In reply to: Kohei KaiGai (#78)
Re: raw output from copy

2016-12-06 1:50 GMT+01:00 Kohei KaiGai <kaigai@kaigai.gr.jp>:

2016-12-05 22:45 GMT+09:00 Pavel Stehule <pavel.stehule@gmail.com>:

There are more goals:

1. user friendly import of text or binary data - import text data (with
psql) from file is possible - but you have to load a content to psql
variable. For binary data you should to use workaround based on LO and
transformation from LO to bytea.

2. user friendly export text or binary data - now, the binary data can be
exported only via transformation to LO. The XML has very interesting
features when is passing from/to client binary. This feature is

impossible

in psql now.

:
<snip>
:

It seems to me extend of COPY statement for this optimization is a bit
overkill
solution. Do we find out an alternative solution that we can build on
the existing
infrastructure?

The advantage and sense of COPY RAW was reusing existing interface. The
question was: How I can export/import binary data simply from psql

console?

OK, I could get your point.

Likeky, we can implement the feature without COPY statement enhancement
by adding a special purpose function and \xxx command on psql.

Let's assume the two commands below on psql:

\blob_import <table_name> <column_name> (STDIN|<filename>)
\blob_export <query> (STDOUT|<filename>)

On \blob_import, the psql command reads the binary contents from either
stdin or file, than call a special purpose function that takes three
arguments; table name, column name and a binary data chunk.
PQexecParams() of libpq allows to deliver the data chunk with keeping
binary data format, then the special purpose function will be able to
lookup the destination table/column and construct a tuple that contains
the supplied data chunk. (I think xxxx_recv handler shall be used for
data validation, but not an element of this feature.)

On \blob_export, the psql command also set up a simple query as follows:
SELECT blob_export((<user's supplied query))
For example,
\blob_export SELECT binary_data FROM my_table WHERE id = 10 /tmp/aaa
shall be transformed to
SELECT blob_export((SELECT binary_data FROM my_table WHERE id = 10))

This is reason why I prefer a COPY statement - because it does all
necessary things natural. But if there is a disagreement against COPY RAW
it can be implemented as psql commands.

export should be similar like \g, \gset feature

so

SELECT xmldoc FROM xxxx
\gbinary_store xxxx.xxx

import is maybe better solved by proposed file references in queries

Regards

Pavel

Show quoted text

This function is declared as:
blob_export(anyelement) RETURNS bytea
So, as long as the user supplied query returns exactly one column and
one row, it can transform the argument to the binary stream, then psql
command receive it and dump somewhere; stdout or file.

How about your thought?

Thanks,
--
KaiGai Kohei <kaigai@kaigai.gr.jp>

#80Kohei KaiGai
kaigai@kaigai.gr.jp
In reply to: Pavel Stehule (#79)
Re: raw output from copy

2016-12-06 16:59 GMT+09:00 Pavel Stehule <pavel.stehule@gmail.com>:

2016-12-06 1:50 GMT+01:00 Kohei KaiGai <kaigai@kaigai.gr.jp>:

2016-12-05 22:45 GMT+09:00 Pavel Stehule <pavel.stehule@gmail.com>:

There are more goals:

1. user friendly import of text or binary data - import text data (with
psql) from file is possible - but you have to load a content to psql
variable. For binary data you should to use workaround based on LO and
transformation from LO to bytea.

2. user friendly export text or binary data - now, the binary data can
be
exported only via transformation to LO. The XML has very interesting
features when is passing from/to client binary. This feature is
impossible
in psql now.

:
<snip>
:

It seems to me extend of COPY statement for this optimization is a bit
overkill
solution. Do we find out an alternative solution that we can build on
the existing
infrastructure?

The advantage and sense of COPY RAW was reusing existing interface. The
question was: How I can export/import binary data simply from psql
console?

OK, I could get your point.

Likeky, we can implement the feature without COPY statement enhancement
by adding a special purpose function and \xxx command on psql.

Let's assume the two commands below on psql:

\blob_import <table_name> <column_name> (STDIN|<filename>)
\blob_export <query> (STDOUT|<filename>)

On \blob_import, the psql command reads the binary contents from either
stdin or file, than call a special purpose function that takes three
arguments; table name, column name and a binary data chunk.
PQexecParams() of libpq allows to deliver the data chunk with keeping
binary data format, then the special purpose function will be able to
lookup the destination table/column and construct a tuple that contains
the supplied data chunk. (I think xxxx_recv handler shall be used for
data validation, but not an element of this feature.)

On \blob_export, the psql command also set up a simple query as follows:
SELECT blob_export((<user's supplied query))
For example,
\blob_export SELECT binary_data FROM my_table WHERE id = 10 /tmp/aaa
shall be transformed to
SELECT blob_export((SELECT binary_data FROM my_table WHERE id = 10))

This is reason why I prefer a COPY statement - because it does all necessary
things natural. But if there is a disagreement against COPY RAW it can be
implemented as psql commands.

Yes, both of approach will be able to implement what you want to do.
I agree it is valuable if psql can import/export a particular item with
simple shell-script description, however, here is no consensus how
to implement it.

If psql supports the special \xxx command, it is equivalently convenient
from the standpoint of users, with no enhancement of the statement.

I hope committers comment on the approach we will take on.

Thanks,

export should be similar like \g, \gset feature

so

SELECT xmldoc FROM xxxx
\gbinary_store xxxx.xxx

import is maybe better solved by proposed file references in queries

Regards

Pavel

This function is declared as:
blob_export(anyelement) RETURNS bytea
So, as long as the user supplied query returns exactly one column and
one row, it can transform the argument to the binary stream, then psql
command receive it and dump somewhere; stdout or file.

How about your thought?

Thanks,
--
KaiGai Kohei <kaigai@kaigai.gr.jp>

--
KaiGai Kohei <kaigai@kaigai.gr.jp>

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers