Support of partial decompression for datums

Started by Ildus Kurbangalievabout 10 years ago6 messages
#1Ildus Kurbangaliev
i.kurbangaliev@postgrespro.ru
1 attachment(s)

Attached patch adds support of partial decompression for datums.
It will be useful in many cases when extracting part of data is
enough for big varlena structures.

It is especially useful for expanded datums, because it provides
storage for partial results.

I have another patch, which removes the 1 Mb limit on tsvector using
this feature.

Usage:

Assert(VARATT_IS_COMPRESSED(attr));
evh->data = (struct varlena *)
palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
SET_VARSIZE(evh->data, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);

/* Extract size of tsvector */
res = toast_decompress_datum_partial(attr, evh->data,
evh->dcState, sizeof(int32));
if (res == -1)
elog(ERROR, "compressed tsvector is corrupted");

evh->count = TS_COUNT((TSVector) evh->data);

/* Extract entries of tsvector */
res = toast_decompress_datum_partial(attr, evh->data,
evh->dcState, sizeof(int32) + sizeof(WordEntry) * evh->count);
if (res == -1)
elog(ERROR, "compressed tsvector is corrupted");

--
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

Attachments:

partial_decompression_v1.patchtext/x-patchDownload
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index b9691a5..0fc5d5a 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -46,32 +46,12 @@
 
 #undef TOAST_DEBUG
 
-/*
- *	The information at the start of the compressed toast data.
- */
-typedef struct toast_compress_header
-{
-	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int32		rawsize;
-} toast_compress_header;
-
-/*
- * Utilities for manipulation of header information for compressed
- * toast entries.
- */
-#define TOAST_COMPRESS_HDRSZ		((int32) sizeof(toast_compress_header))
-#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
-#define TOAST_COMPRESS_RAWDATA(ptr) \
-	(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
-#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
-	(((toast_compress_header *) (ptr))->rawsize = (len))
 
 static void toast_delete_datum(Relation rel, Datum value);
 static Datum toast_save_datum(Relation rel, Datum value,
 				 struct varlena * oldexternal, int options);
 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
-static struct varlena *toast_fetch_datum(struct varlena * attr);
 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
 						int32 sliceoffset, int32 length);
 static struct varlena *toast_decompress_datum(struct varlena * attr);
@@ -1792,7 +1772,7 @@ toastid_valueid_exists(Oid toastrelid, Oid valueid)
  *	in the toast relation
  * ----------
  */
-static struct varlena *
+struct varlena *
 toast_fetch_datum(struct varlena * attr)
 {
 	Relation	toastrel;
@@ -2205,12 +2185,33 @@ toast_decompress_datum(struct varlena * attr)
 	if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
 						VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
 						VARDATA(result),
-						TOAST_COMPRESS_RAWSIZE(attr)) < 0)
+						TOAST_COMPRESS_RAWSIZE(attr),
+						NULL) < 0)
 		elog(ERROR, "compressed data is corrupted");
 
 	return result;
 }
 
+/* ----------
+ * toast_decompress_datum_partial -
+ *
+ * Decompress a compressed version of a varlena datum partially
+ */
+int
+toast_decompress_datum_partial(struct varlena *source,
+	struct varlena *dest,
+	PGLZ_DecompressState *state,
+	int32 until)
+{
+	Assert(VARATT_IS_COMPRESSED(source));
+
+	state->until = until;
+	return pglz_decompress(TOAST_COMPRESS_RAWDATA(source),
+						VARSIZE(source) - TOAST_COMPRESS_HDRSZ,
+						VARDATA(dest),
+						TOAST_COMPRESS_RAWSIZE(source),
+						state);
+}
 
 /* ----------
  * toast_open_indexes
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 37cf9de..a1642ed 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1309,7 +1309,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 	{
 		/* If a backup block image is compressed, decompress it */
 		if (pglz_decompress(ptr, bkpb->bimg_len, tmp,
-							BLCKSZ - bkpb->hole_length) < 0)
+							BLCKSZ - bkpb->hole_length, NULL) < 0)
 		{
 			report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
 								  (uint32) (record->ReadRecPtr >> 32),
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 447a043..df5e169 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -680,18 +680,30 @@ pglz_compress(const char *source, int32 slen, char *dest,
  */
 int32
 pglz_decompress(const char *source, int32 slen, char *dest,
-				int32 rawsize)
+				int32 rawsize, PGLZ_DecompressState *state)
 {
-	const unsigned char *sp;
+	unsigned char *sp;
 	const unsigned char *srcend;
 	unsigned char *dp;
 	unsigned char *destend;
+	bool last_block = true;
 
-	sp = (const unsigned char *) source;
+	sp = (unsigned char *) source;
 	srcend = ((const unsigned char *) source) + slen;
 	dp = (unsigned char *) dest;
 	destend = dp + rawsize;
 
+	if (state != NULL)
+	{
+		last_block = (state->until >= rawsize);
+		sp += state->sp;
+		dp += state->dp;
+
+		Assert(sp <= srcend);
+		Assert(dp <= destend);
+		destend = last_block ? destend: ((unsigned char *) dest) + state->until;
+	}
+
 	while (sp < srcend && dp < destend)
 	{
 		/*
@@ -728,7 +740,7 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 				 * don't simply put the elog inside the loop since that will
 				 * probably interfere with optimization.
 				 */
-				if (dp + len > destend)
+				if (last_block && (dp + len > destend))
 				{
 					dp += len;
 					break;
@@ -752,7 +764,7 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 				 * An unset control bit means LITERAL BYTE. So we just copy
 				 * one from INPUT to OUTPUT.
 				 */
-				if (dp >= destend)		/* check for buffer overrun */
+				if (last_block && dp >= destend)		/* check for buffer overrun */
 					break;		/* do not clobber memory */
 
 				*dp++ = *sp++;
@@ -765,14 +777,22 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 		}
 	}
 
-	/*
-	 * Check we decompressed the right amount.
-	 */
-	if (dp != destend || sp != srcend)
-		return -1;
+	if (!last_block && state != NULL)
+	{
+		state->sp = sp - (unsigned char *) source;
+		state->dp = dp - (unsigned char *) dest;
+	}
+	else
+	{
+		/*
+		 * Check we decompressed the right amount.
+		 */
+		if (dp != destend || sp != srcend)
+			return -1;
+	}
 
 	/*
 	 * That's it.
 	 */
-	return rawsize;
+	return dp - (unsigned char *)dest;
 }
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index 77f637e..5248d73 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -14,6 +14,7 @@
 #define TUPTOASTER_H
 
 #include "access/htup_details.h"
+#include "common/pg_lzcompress.h"
 #include "storage/lockdefs.h"
 #include "utils/relcache.h"
 
@@ -102,6 +103,26 @@
 #define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_indirect))
 
 /*
+ *	The information at the start of the compressed toast data.
+ */
+typedef struct toast_compress_header
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	int32		rawsize;
+} toast_compress_header;
+
+/*
+ * Utilities for manipulation of header information for compressed
+ * toast entries.
+ */
+#define TOAST_COMPRESS_HDRSZ		((int32) sizeof(toast_compress_header))
+#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
+#define TOAST_COMPRESS_RAWDATA(ptr) \
+	(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
+#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
+	(((toast_compress_header *) (ptr))->rawsize = (len))
+
+/*
  * Testing whether an externally-stored value is compressed now requires
  * comparing extsize (the actual length of the external data) to rawsize
  * (the original uncompressed datum's size).  The latter includes VARHDRSZ
@@ -202,6 +223,24 @@ extern Datum toast_flatten_tuple_to_datum(HeapTupleHeader tup,
 extern Datum toast_compress_datum(Datum value);
 
 /* ----------
+ * toast_fetch_datum -
+ *
+ *	Reconstruct an in memory Datum from the chunks saved
+ *	in the toast relation
+ * ----------
+ */
+extern struct varlena *toast_fetch_datum(struct varlena *attr);
+
+/* ----------
+ * toast_decompress_datum_partial -
+ *
+ * Decompress a datum partially by saving its state
+ * ----------
+ */
+extern int toast_decompress_datum_partial(struct varlena *source,
+	struct varlena *dest, PGLZ_DecompressState *state, int32 until);
+
+/* ----------
  * toast_raw_datum_size -
  *
  *	Return the raw (detoasted) size of a varlena datum
diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h
index dbd51d5..3378d57 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -65,6 +65,16 @@ typedef struct PGLZ_Strategy
 } PGLZ_Strategy;
 
 
+typedef struct PGLZ_DecompressState
+{
+	int32		until;	/* decompress until this value */
+
+	/* internal values */
+	int32		sp;
+	int32		dp;
+} PGLZ_DecompressState;
+
+
 /* ----------
  * The standard strategies
  *
@@ -86,6 +96,6 @@ extern const PGLZ_Strategy *const PGLZ_strategy_always;
 extern int32 pglz_compress(const char *source, int32 slen, char *dest,
 			  const PGLZ_Strategy *strategy);
 extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
-				int32 rawsize);
+				int32 rawsize, PGLZ_DecompressState *state);
 
 #endif   /* _PG_LZCOMPRESS_H_ */
#2Michael Paquier
michael.paquier@gmail.com
In reply to: Ildus Kurbangaliev (#1)
Re: Support of partial decompression for datums

On Fri, Dec 4, 2015 at 9:47 PM, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Attached patch adds support of partial decompression for datums.
It will be useful in many cases when extracting part of data is
enough for big varlena structures.

It is especially useful for expanded datums, because it provides
storage for partial results.

I have another patch, which removes the 1 Mb limit on tsvector using
this feature.

-1 for changing the shape of pglz_decompress directly and particularly
use metadata in it. The current format of those routines is close to
what lz4 offers in terms of compression and decompression of a string,
let's not break that we had a time hard enough in 9.5 cycle to get
something clean.

By the way, why don't you compress the multiple chunks and store the
related metadata at a higher level? There is no need to put that in
pglz itself.
--
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Ildus Kurbangaliev
i.kurbangaliev@postgrespro.ru
In reply to: Michael Paquier (#2)
Re: Support of partial decompression for datums

On Fri, 4 Dec 2015 22:13:58 +0900
Michael Paquier <michael.paquier@gmail.com> wrote:

On Fri, Dec 4, 2015 at 9:47 PM, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Attached patch adds support of partial decompression for datums.
It will be useful in many cases when extracting part of data is
enough for big varlena structures.

It is especially useful for expanded datums, because it provides
storage for partial results.

I have another patch, which removes the 1 Mb limit on tsvector using
this feature.

-1 for changing the shape of pglz_decompress directly and particularly
use metadata in it. The current format of those routines is close to
what lz4 offers in terms of compression and decompression of a string,
let's not break that we had a time hard enough in 9.5 cycle to get
something clean.

Metadata is not used for current code, only for case with partial
decompression.

By the way, why don't you compress the multiple chunks and store the
related metadata at a higher level? There is no need to put that in
pglz itself.

Yes, but this idea with chunks means that you are creating a whole
new structure, but it can't be used if you want to optimize current
structures. For example you can't just change arrays, but I think there
are places where the partial decompression can be used.

--
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Simon Riggs
simon@2ndQuadrant.com
In reply to: Ildus Kurbangaliev (#1)
Re: Support of partial decompression for datums

On 4 December 2015 at 13:47, Ildus Kurbangaliev <
i.kurbangaliev@postgrespro.ru> wrote:

Attached patch adds support of partial decompression for datums.
It will be useful in many cases when extracting part of data is
enough for big varlena structures.

It is especially useful for expanded datums, because it provides
storage for partial results.

This isn't enough for anyone else to follow your thoughts and agree enough
to commit.

Please explain the whole idea, starting from what problem you are trying to
solve and how well this does it, why you did it this way and the other ways
you tried/decided not to pursue. Thanks.

--
Simon Riggs http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/&gt;
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#5Michael Paquier
michael.paquier@gmail.com
In reply to: Simon Riggs (#4)
Re: Support of partial decompression for datums

On Sat, Dec 5, 2015 at 12:10 AM, Simon Riggs <simon@2ndquadrant.com> wrote:

On 4 December 2015 at 13:47, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Attached patch adds support of partial decompression for datums.
It will be useful in many cases when extracting part of data is
enough for big varlena structures.

It is especially useful for expanded datums, because it provides
storage for partial results.

This isn't enough for anyone else to follow your thoughts and agree enough
to commit.

Please explain the whole idea, starting from what problem you are trying to
solve and how well this does it, why you did it this way and the other ways
you tried/decided not to pursue. Thanks.

Yeah, I would imagine that what Ildus is trying to achieve is
something close to LZ4_decompress_safe_partial, by being able to stop
compression after getting a certain amount of data decompressed, and
continue working once again after.

And actually I think I get the idea. With his test case, what we get
first is a size, and then we reuse this size to extract only what we
need to fetch only a number of items from the tsvector. But that's
actually linked to the length of the compressed chunk, and at the end
we would still need to decompress the whole string perhaps, but it is
not possible to be sure using the information provided.

Ildus, using your patch for tsvector, are you aiming at being able to
complete an operation by only using a portion of the compressed data?
Or are you planning to use that to improve the speed of detection of
corrupted data in the chunk? If that's the latter, we would need to
still decompress the whole string anyway, so having a routine able to
decompress only until a given position is not necessary, and based on
the example given upthread it is not possible to know what you are
trying to achieve. Hence could you share your thoughts regarding your
stuff with tsvector?

Changing pglz_decompress shape is still a bad idea anyway, I guess we
had better have something new like pglz_decompress_partial instead.
--
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6Ildus Kurbangaliev
i.kurbangaliev@postgrespro.ru
In reply to: Michael Paquier (#5)
Re: Support of partial decompression for datums

On Sat, 5 Dec 2015 06:14:07 +0900
Michael Paquier <michael.paquier@gmail.com> wrote:

On Sat, Dec 5, 2015 at 12:10 AM, Simon Riggs <simon@2ndquadrant.com>
wrote:

On 4 December 2015 at 13:47, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Attached patch adds support of partial decompression for datums.
It will be useful in many cases when extracting part of data is
enough for big varlena structures.

It is especially useful for expanded datums, because it provides
storage for partial results.

This isn't enough for anyone else to follow your thoughts and agree
enough to commit.

Please explain the whole idea, starting from what problem you are
trying to solve and how well this does it, why you did it this way
and the other ways you tried/decided not to pursue. Thanks.

Yeah, I would imagine that what Ildus is trying to achieve is
something close to LZ4_decompress_safe_partial, by being able to stop
compression after getting a certain amount of data decompressed, and
continue working once again after.

And actually I think I get the idea. With his test case, what we get
first is a size, and then we reuse this size to extract only what we
need to fetch only a number of items from the tsvector. But that's
actually linked to the length of the compressed chunk, and at the end
we would still need to decompress the whole string perhaps, but it is
not possible to be sure using the information provided.

Ildus, using your patch for tsvector, are you aiming at being able to
complete an operation by only using a portion of the compressed data?
Or are you planning to use that to improve the speed of detection of
corrupted data in the chunk? If that's the latter, we would need to
still decompress the whole string anyway, so having a routine able to
decompress only until a given position is not necessary, and based on
the example given upthread it is not possible to know what you are
trying to achieve. Hence could you share your thoughts regarding your
stuff with tsvector?

Changing pglz_decompress shape is still a bad idea anyway, I guess we
had better have something new like pglz_decompress_partial instead.

Yes, you've got the idea. First we get a size of entries in tsvector,
then with the size we can get WordEntry values. WordEntry
contains offset of lexeme in the data and length of lexeme.
Information in these entries is enough to calculate an offset until we
need to decompress tsvector varlena.

So for example in binary search, we will decompress until half of
lexemes data (lexemes in tsvector are sorted), and then if search will
go left, then we just reuse that decompressed block, and we don't need
other part of tsvector. If search will go right, then we just
decompress a half of remaining part using the saved state and so on.

So in half of cases, we will decompress only a half of lexemes
in tsvector, and even if we need to decompress more, in most of cases we
will not decompress the whole tsvector.

--
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers