[HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

Started by Anna Akentevaalmost 8 years ago9 messages

a.akenteva@postgrespro.ru

almost 8 years ago

1 attachment(s)

Hello!

If I create a big bytea value and try to select it from a table, I get
an error, something like: "ERROR: invalid memory alloc request size
...".

So basically we can insert data into a table but then we can't even work
with it. Sounds like a bug. Attaching a patch that fixes it (applies to
2a41507dab0f293ff241fe8ae326065998668af8).

And as it seems like quite a serious issue, would it be possible to
backport a fix for it to earlier versions?

HOW TO RECREATE:
1) generate some random data (in this case, 600 MB):
dd if=/dev/urandom of=rand.dat bs=1M count=600

2) postgres=# select lo_import('/PATH/TO/rand.dat');
lo_import
-----------
16397 [USE THIS ID FOR THE NEXT STEP]
(1 row)

3) postgres=# create table big_data as select (string_agg(data,'')) as
data from pg_largeobject where loid =16397;
SELECT 1

4) postgres=# select * from big_data;
ERROR: invalid memory alloc request size 1468006403

--
Anna Akenteva
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

string_info_master.patchtext/x-diff; name=string_info_master.patchDownload

diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 9d4efc0..85296e9 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -1143,7 +1143,7 @@ HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
 
 		default:
 			{
-				elog(ERROR, "unrecognized message type received from parallel worker: %c (message length %d bytes)",
+				elog(ERROR, "unrecognized message type received from parallel worker: %c (message length %zu bytes)",
 					 msgtype, msg->len);
 			}
 	}
diff --git a/src/backend/lib/stringinfo.c b/src/backend/lib/stringinfo.c
index 798a823..d99b8bb 100644
--- a/src/backend/lib/stringinfo.c
+++ b/src/backend/lib/stringinfo.c
@@ -45,7 +45,7 @@ makeStringInfo(void)
 void
 initStringInfo(StringInfo str)
 {
-	int			size = 1024;	/* initial default buffer size */
+	size_t		size = 1024;	/* initial default buffer size */
 
 	str->data = (char *) palloc(size);
 	str->maxlen = size;
@@ -80,7 +80,7 @@ appendStringInfo(StringInfo str, const char *fmt,...)
 	for (;;)
 	{
 		va_list		args;
-		int			needed;
+		size_t		needed;
 
 		/* Try to format the data. */
 		va_start(args, fmt);
@@ -110,10 +110,10 @@ appendStringInfo(StringInfo str, const char *fmt,...)
  * to redo va_start before you can rescan the argument list, and we can't do
  * that from here.
  */
-int
+size_t
 appendStringInfoVA(StringInfo str, const char *fmt, va_list args)
 {
-	int			avail;
+	size_t		avail;
 	size_t		nprinted;
 
 	Assert(str != NULL);
@@ -127,24 +127,20 @@ appendStringInfoVA(StringInfo str, const char *fmt, va_list args)
 	if (avail < 16)
 		return 32;
 
-	nprinted = pvsnprintf(str->data + str->len, (size_t) avail, fmt, args);
+	nprinted = pvsnprintf(str->data + str->len, avail, fmt, args);
 
-	if (nprinted < (size_t) avail)
+	if (nprinted < avail)
 	{
 		/* Success.  Note nprinted does not include trailing null. */
-		str->len += (int) nprinted;
+		str->len += nprinted;
 		return 0;
 	}
 
 	/* Restore the trailing null so that str is unmodified. */
 	str->data[str->len] = '\0';
 
-	/*
-	 * Return pvsnprintf's estimate of the space needed.  (Although this is
-	 * given as a size_t, we know it will fit in int because it's not more
-	 * than MaxAllocSize.)
-	 */
-	return (int) nprinted;
+	/* Return pvsnprintf's estimate of the space needed. */
+	return nprinted;
 }
 
 /*
@@ -189,7 +185,7 @@ appendStringInfoSpaces(StringInfo str, int count)
 	if (count > 0)
 	{
 		/* Make more room if needed */
-		enlargeStringInfo(str, count);
+		enlargeStringInfo(str, (size_t) count);
 
 		/* OK, append the spaces */
 		while (--count >= 0)
@@ -205,7 +201,7 @@ appendStringInfoSpaces(StringInfo str, int count)
  * if necessary. Ensures that a trailing null byte is present.
  */
 void
-appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
+appendBinaryStringInfo(StringInfo str, const char *data, size_t datalen)
 {
 	Assert(str != NULL);
 
@@ -231,7 +227,7 @@ appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
  * if necessary. Does not ensure a trailing null-byte exists.
  */
 void
-appendBinaryStringInfoNT(StringInfo str, const char *data, int datalen)
+appendBinaryStringInfoNT(StringInfo str, const char *data, size_t datalen)
 {
 	Assert(str != NULL);
 
@@ -261,21 +257,19 @@ appendBinaryStringInfoNT(StringInfo str, const char *data, int datalen)
  * This is the desired and indeed critical behavior!
  */
 void
-enlargeStringInfo(StringInfo str, int needed)
+enlargeStringInfo(StringInfo str, size_t needed)
 {
-	int			newlen;
+	size_t		newlen;
 
 	/*
 	 * Guard against out-of-range "needed" values.  Without this, we can get
 	 * an overflow or infinite loop in the following.
 	 */
-	if (needed < 0)				/* should not happen */
-		elog(ERROR, "invalid string enlargement request size: %d", needed);
-	if (((Size) needed) >= (MaxAllocSize - (Size) str->len))
+	if (needed >= (MaxAllocHugeSize - str->len))
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("out of memory"),
-				 errdetail("Cannot enlarge string buffer containing %d bytes by %d more bytes.",
+				 errdetail("Cannot enlarge string buffer containing %zu bytes by %zu more bytes.",
 						   str->len, needed)));
 
 	needed += str->len + 1;		/* total space required now */
@@ -295,14 +289,14 @@ enlargeStringInfo(StringInfo str, int needed)
 		newlen = 2 * newlen;
 
 	/*
-	 * Clamp to MaxAllocSize in case we went past it.  Note we are assuming
-	 * here that MaxAllocSize <= INT_MAX/2, else the above loop could
+	 * Clamp to MaxAllocHugeSize in case we went past it.  Note we know
+	 * here that MaxAllocHugeSize <= SIZE_MAX/2, else the above loop could
 	 * overflow.  We will still have newlen >= needed.
 	 */
-	if (newlen > (int) MaxAllocSize)
-		newlen = (int) MaxAllocSize;
+	if (newlen > MaxAllocHugeSize)
+		newlen = MaxAllocHugeSize;
 
-	str->data = (char *) repalloc(str->data, newlen);
+	str->data = (char *) repalloc_huge(str->data, newlen);
 
 	str->maxlen = newlen;
 }
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 304cb26..fb55c94 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -358,7 +358,7 @@ byteaout(PG_FUNCTION_ARGS)
 	if (bytea_output == BYTEA_OUTPUT_HEX)
 	{
 		/* Print hex format */
-		rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+		rp = result = palloc_extended(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1, MCXT_ALLOC_HUGE);
 		*rp++ = '\\';
 		*rp++ = 'x';
 		rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
@@ -367,7 +367,7 @@ byteaout(PG_FUNCTION_ARGS)
 	{
 		/* Print traditional escaped format */
 		char	   *vp;
-		int			len;
+		size_t		len;
 		int			i;
 
 		len = 1;				/* empty string has 1 char */
@@ -381,7 +381,7 @@ byteaout(PG_FUNCTION_ARGS)
 			else
 				len++;
 		}
-		rp = result = (char *) palloc(len);
+		rp = result = (char *) palloc_extended(len, MCXT_ALLOC_HUGE);
 		vp = VARDATA_ANY(vlena);
 		for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 		{
diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h
index 8551237..7acea28 100644
--- a/src/include/lib/stringinfo.h
+++ b/src/include/lib/stringinfo.h
@@ -35,9 +35,9 @@
 typedef struct StringInfoData
 {
 	char	   *data;
-	int			len;
-	int			maxlen;
-	int			cursor;
+	size_t		len;
+	size_t		maxlen;
+	size_t		cursor;
 } StringInfoData;
 
 typedef StringInfoData *StringInfo;
@@ -103,7 +103,7 @@ extern void appendStringInfo(StringInfo str, const char *fmt,...) pg_attribute_p
  * pass the return value to enlargeStringInfo() before trying again; see
  * appendStringInfo for standard usage pattern.
  */
-extern int	appendStringInfoVA(StringInfo str, const char *fmt, va_list args) pg_attribute_printf(2, 0);
+extern size_t appendStringInfoVA(StringInfo str, const char *fmt, va_list args) pg_attribute_printf(2, 0);
 
 /*------------------------
  * appendStringInfoString
@@ -141,7 +141,7 @@ extern void appendStringInfoSpaces(StringInfo str, int count);
  * if necessary.
  */
 extern void appendBinaryStringInfo(StringInfo str,
-					   const char *data, int datalen);
+					   const char *data, size_t datalen);
 
 /*------------------------
  * appendBinaryStringInfoNT
@@ -149,12 +149,12 @@ extern void appendBinaryStringInfo(StringInfo str,
  * if necessary. Does not ensure a trailing null-byte exists.
  */
 extern void appendBinaryStringInfoNT(StringInfo str,
-						 const char *data, int datalen);
+						 const char *data, size_t datalen);
 
 /*------------------------
  * enlargeStringInfo
  * Make sure a StringInfo's buffer can hold at least 'needed' more bytes.
  */
-extern void enlargeStringInfo(StringInfo str, int needed);
+extern void enlargeStringInfo(StringInfo str, size_t needed);
 
 #endif							/* STRINGINFO_H */

Tom Lane

tgl@sss.pgh.pa.us

almost 8 years ago

In reply to: Anna Akenteva (#1)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

Anna Akenteva <a.akenteva@postgrespro.ru> writes:

[ widen StringInfoData max length to size_t ]

I find this scary as heck. Have you spent any time looking at the
side effects? There are probably hundreds of places that expect that
stringinfos won't get larger than 1GB.

Also, I don't entirely see how this fixes your stated goal of being
able to select a bytea value whose textual representation exceeds
1GB. The wire protocol can't support that either, and even if it did,
I wonder how many client programs could cope. Extremely wide tuple
values create pain points in many places.

And as it seems like quite a serious issue, would it be possible to
backport a fix for it to earlier versions?

Since this is an ABI break with very widely visible effects, there is
no chance whatsoever that it would be back-patched.

regards, tom lane

Andres Freund

andres@anarazel.de

almost 8 years ago

In reply to: Tom Lane (#2)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

Hi,

On 2018-02-16 09:58:29 -0500, Tom Lane wrote:

Anna Akenteva <a.akenteva@postgrespro.ru> writes:

[ widen StringInfoData max length to size_t ]

I find this scary as heck. Have you spent any time looking at the
side effects? There are probably hundreds of places that expect that
stringinfos won't get larger than 1GB.

FWIW, I think we're going to have to bite that bullet sooner rather than
later. I do agree it's not going to fix this issue for real, and that
we're not going to backpatch it.

Greetings,

Andres Freund

Tom Lane

tgl@sss.pgh.pa.us

almost 8 years ago

In reply to: Andres Freund (#3)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

Andres Freund <andres@anarazel.de> writes:

On 2018-02-16 09:58:29 -0500, Tom Lane wrote:

Anna Akenteva <a.akenteva@postgrespro.ru> writes:

[ widen StringInfoData max length to size_t ]

I find this scary as heck. Have you spent any time looking at the
side effects? There are probably hundreds of places that expect that
stringinfos won't get larger than 1GB.

FWIW, I think we're going to have to bite that bullet sooner rather than
later. I do agree it's not going to fix this issue for real, and that
we're not going to backpatch it.

I'm not necessarily saying we shouldn't consider widening this.
I'm just saying it's going to take a good deal of cross-checking for
consequences, in particular that nothing is at risk of integer overflow
if it's presented with a very long StringInfo.

One way to limit the side effects would be to have StringInfos default to
only allowing 1GB of content as before, and you have to do something extra
at creation time to let one get bigger.

There's still the problem that the wire protocol will limit us to 2GB
(or maybe 4GB if you want to be brave^Wfoolhardy and assume clients think
the width fields are unsigned). I can't get hugely excited about moving
the goalposts only from 1GB to 2GB ...

regards, tom lane

Alvaro Herrera

alvherre@alvh.no-ip.org

almost 8 years ago

In reply to: Tom Lane (#2)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

Tom Lane wrote:

Anna Akenteva <a.akenteva@postgrespro.ru> writes:

[ widen StringInfoData max length to size_t ]

I find this scary as heck. Have you spent any time looking at the
side effects? There are probably hundreds of places that expect that
stringinfos won't get larger than 1GB.

See these commits:
fa2fa9955280 42f50cb8fa98 b66adb7b0c83
and the discussion threads linked in the commit messages.

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Anna Akenteva

a.akenteva@postgrespro.ru

almost 8 years ago

In reply to: Tom Lane (#2)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

Tom Lane writes 2018-02-16 17:58:

Also, I don't entirely see how this fixes your stated goal of being
able to select a bytea value whose textual representation exceeds 1GB.

It's not necessarily my goal. My goal is to avoid the confusing
situation where you insert something into a table and suddenly
everything seems to break for no reason and you don't get any
information on what to do next. As I see it, it could be solved with:
a) allowing including big bytea values but making sure that it doesn't
cause problems (which I tried to do with my patch)
b) prohibiting inserting the kind of data that will cause problems
c) informing the user about the issue (maybe documenting this behaviour
or giving a more informative error message)

So far the weird behaviour of big bytea values that I see boils down to:
1) We can't SELECT it
after INSERTing it and there's no clear explanation as to why. It does
make sense that we can insert a 900MB value into a table and then we
can't select it due to its textual representation taking up more than
1GB. It's confusing for whoever uses Postgres though. It doesn't seem to
be documented anywhere (correct me if I'm wrong) and you don't get to
see any hints like "don't worry, you can retrieve the data, but use COPY
in binary format for that".

2) We can't use pg_dump
on a database that has a big bytea value, it will just show the same
error as when we try to select the value. And again, it doesn't explain
anything in the error message and I couldn't find it documented
anywhere. It's weird that it would just allow me to insert a value that
will make pg_dump unusable (although maybe there is a good enough way to
workaround it that I'm not aware of).

The wire protocol can't support that either, and even if it did,
I wonder how many client programs could cope. Extremely wide tuple
values create pain points in many places.

I see how it can create a lot of problems. I do agree that making the
max length bigger doesn't really seem to be a good solution and I see
now how it's hard to implement properly. I don't see other ways to make
it work so far though. If it can't be fixed anytime soon, do you think
that documenting this behavior could be worth it?

--
Anna Akenteva
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Robert Haas

robertmhaas@gmail.com

almost 8 years ago

In reply to: Anna Akenteva (#6)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

On Fri, Feb 16, 2018 at 2:00 PM, Anna Akenteva
<a.akenteva@postgrespro.ru> wrote:

It's not necessarily my goal. My goal is to avoid the confusing situation
where you insert something into a table and suddenly everything seems to
break for no reason and you don't get any information on what to do next. As
I see it, it could be solved with:
a) allowing including big bytea values but making sure that it doesn't cause
problems (which I tried to do with my patch)
b) prohibiting inserting the kind of data that will cause problems
c) informing the user about the issue (maybe documenting this behaviour or
giving a more informative error message)

+1. We don't have to support everything, but things that don't work
should fail on insertion, not retrieval. Otherwise what we have is
less a database and more a data black hole.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Tom Lane

tgl@sss.pgh.pa.us

almost 8 years ago

In reply to: Robert Haas (#7)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

Robert Haas <robertmhaas@gmail.com> writes:

+1. We don't have to support everything, but things that don't work
should fail on insertion, not retrieval. Otherwise what we have is
less a database and more a data black hole.

That sounds nice as a principle but I'm not sure how workable it really
is. Do you want to reject text strings that fit fine in, say, LATIN1
encoding, but might be overlength if some client tries to read them in
UTF8 encoding? (bytea would have a comparable problem with escape vs hex
representation, for instance.) Should the limit vary depending on how
many columns are in the table? Should we account for client-side tuple
length restrictions?

Anyway, as Alvaro pointed out upthread, we've been down this particular
path before and it didn't work out. We need to learn something from that
failure and decide how to move forward.

regards, tom lane

Robert Haas

robertmhaas@gmail.com

almost 8 years ago

In reply to: Tom Lane (#8)

Re: [HACKERS] [bug-fix] Cannot select big bytea values (~600MB)

On Tue, Feb 27, 2018 at 2:17 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Robert Haas <robertmhaas@gmail.com> writes:

+1. We don't have to support everything, but things that don't work
should fail on insertion, not retrieval. Otherwise what we have is
less a database and more a data black hole.

That sounds nice as a principle but I'm not sure how workable it really
is. Do you want to reject text strings that fit fine in, say, LATIN1
encoding, but might be overlength if some client tries to read them in
UTF8 encoding? (bytea would have a comparable problem with escape vs hex
representation, for instance.) Should the limit vary depending on how
many columns are in the table? Should we account for client-side tuple
length restrictions?

I suppose what I really want is to have a limit that's large enough
for how big the retrieved data can be that people stop hitting it.

Anyway, as Alvaro pointed out upthread, we've been down this particular
path before and it didn't work out. We need to learn something from that
failure and decide how to move forward.

Yep.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company