From 523786a03ae770f1edbd360a47336ce4ae619fbb Mon Sep 17 00:00:00 2001
From: BBG <djydewang@gmail.com>
Date: Sun, 2 Jun 2019 19:18:46 +0800
Subject: [PATCH] Optimize partial TOAST decompression

---
 src/backend/access/heap/tuptoaster.c | 54 ++++++++++++++++++++++++++++++------
 src/common/pg_lzcompress.c           | 27 ++++++++++++++++++
 src/include/common/pg_lzcompress.h   |  1 +
 3 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 55d6e91..809a521 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -61,6 +61,7 @@ typedef struct toast_compress_header
  */
 #define TOAST_COMPRESS_HDRSZ		((int32) sizeof(toast_compress_header))
 #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
+#define TOAST_COMPRESS_SIZE(ptr)	((int32) VARSIZE(ptr) - TOAST_COMPRESS_HDRSZ)
 #define TOAST_COMPRESS_RAWDATA(ptr) \
 	(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
 #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
@@ -252,6 +253,8 @@ heap_tuple_untoast_attr(struct varlena *attr)
  *
  *		Public entry point to get back part of a toasted value
  *		from compression or external storage.
+ *
+ * Note: when slicelength is negative, return suffix of the value.
  * ----------
  */
 struct varlena *
@@ -273,8 +276,29 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 		if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
 			return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
 
-		/* fetch it back (compressed marker will get set automatically) */
-		preslice = toast_fetch_datum(attr);
+		/*
+		 * When a prefix of the value is requested, fetch enough slices to
+		 * decompress. Otherwise, fetch all slices.
+		 */
+		if (slicelength > 0 && sliceoffset >= 0)
+		{
+			int32 max_size;
+
+			/*
+			 * Determine maximum amount of compressed data needed for a prefix
+			 * of a given length (after decompression).
+			 */
+			max_size = pglz_maximum_compressed_size(sliceoffset + slicelength,
+													TOAST_COMPRESS_SIZE(attr));
+
+			/*
+			 * Fetch enough compressed slices, compressed marker will get set
+			 * automatically.
+			 */
+			preslice = toast_fetch_datum_slice(attr, 0, max_size);
+		}
+		else
+			preslice = toast_fetch_datum(attr);
 	}
 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
 	{
@@ -2031,7 +2055,9 @@ toast_fetch_datum(struct varlena *attr)
  *	Reconstruct a segment of a Datum from the chunks saved
  *	in the toast relation
  *
- *	Note that this function only supports non-compressed external datums.
+ *	Note that this function supports non-compressed external datums
+ *	and compressed external datums (in which case the requrested slice
+ *  has to be a prefix, i.e. sliceoffset has to be 0).
  * ----------
  */
 static struct varlena *
@@ -2072,10 +2098,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
 	/*
-	 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
-	 * we can't return a compressed datum which is meaningful to toast later
+	 * It's meaningful to fetch slices at the start of a compressed datum,
+	 * since we can decompress the prefix raw data without decompressing the
+	 * whole value.
 	 */
-	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
 
 	attrsize = toast_pointer.va_extsize;
 	totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@@ -2086,12 +2113,23 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 		length = 0;
 	}
 
+	/*
+	 * When fetching a prefix of a compressed external datum, account for
+	 * the rawsize tracking amount of raw data (it's stored at the beginning
+	 * as an int32 value).
+	 */
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0)
+		length = length + sizeof(int32);
+
 	if (((sliceoffset + length) > attrsize) || length < 0)
 		length = attrsize - sliceoffset;
 
 	result = (struct varlena *) palloc(length + VARHDRSZ);
 
-	SET_VARSIZE(result, length + VARHDRSZ);
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+		SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+	else
+		SET_VARSIZE(result, length + VARHDRSZ);
 
 	if (length == 0)
 		return result;			/* Can save a lot of work at this point! */
@@ -2275,7 +2313,7 @@ toast_decompress_datum(struct varlena *attr)
 	SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
 
 	if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
-						VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
+						TOAST_COMPRESS_SIZE(attr),
 						VARDATA(result),
 						TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
 		elog(ERROR, "compressed data is corrupted");
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 988b398..18766ad 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -771,3 +771,30 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 	 */
 	return (char *) dp - dest;
 }
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ *		Calculate the maximum compressed size for a given amount of raw data.
+ *		Return the maximum size, or total compressed size if maximum size is
+ *		larger than total compressed size.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
+{
+	int32 compressed_size;
+
+	/*
+	 * Use int64 to prevent overflow during calculation.
+	 */
+	compressed_size = (int32) ((int64) rawsize * 9 + 8) / 8;
+
+	/*
+	 * Maximum compressed size can't be larger than total compressed size.
+	 */
+	compressed_size = Min(compressed_size, total_compressed_size);
+
+	return compressed_size;
+}
diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h
index 5555764..e59fe2e 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest,
 						   const PGLZ_Strategy *strategy);
 extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
 							 int32 rawsize, bool check_complete);
+extern int32 pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size);
 
 #endif							/* _PG_LZCOMPRESS_H_ */
-- 
2.7.4

