[PATCH] ALTER TABLE SET (compress_max_size... = )
This patch lets you control 3 pg_lzcompress knobs on a per table basis
(note requires reloptions.patch)
compress_max_size: Controls the maximum size to be considered for
TOAST compression.
compress_min_rate: Minimum compression rate (0-100%) required for
TOAST compression to be used.
compress_success_by: if by this byte no compressible data found abort
compression.
Note this adds some documentation, but I was having a hard time coming
up with a good way to describe these. I'm also not very happy with
the names. I originally tried something like toast.max_input_size.
But decided later if we allow you to set toast attributes that might
be confusing. So help with verbiage and names is appreciated.
Also I only did those 3 because they seemed the 3 most useful things
someone would want to tune. Later if we need to we can export them
all and make them per column settings (and maybe you can pick a
compression algo or what not...) But I figured lets start small.
I thought about doing another cleanup patch to get rid of
PGLZ_Strategy_default and PGLZ_Strategy_always. Nothing uses the
later, and if we expose all the nobs nothing will use the first.
Comments?
Attachments:
compress_opts.patchapplication/octet-stream; name=compress_opts.patchDownload
*** a/doc/src/sgml/ref/create_table.sgml
--- b/doc/src/sgml/ref/create_table.sgml
***************
*** 714,719 **** and <replaceable class="PARAMETER">table_constraint</replaceable> is:
--- 714,749 ----
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>COMPRESS_MAX_SIZE</></term>
+ <listitem>
+ <para>
+ Controls the maximum size to be considered for TOAST compression.
+ The default is 1048576 bytes (1MB). Note as with other COMPRESS_* storage options setting this higher can adversely affect substring(...) or even simple select performance.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>COMPRESS_MIN_RATE</></term>
+ <listitem>
+ <para>
+ Minimum compression rate (0-100%) required for TOAST compression to be used.
+ Anything compressed that did not achieve this required compress rate will not be compressed.
+ Default: 25%
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>COMPRESS_SUCCESS_BY</></term>
+ <listitem>
+ <para>
+ If by this byte (default 1024) we have found no compressible data, abort compression. This knob is mainly to keep us from waisting cycles compressing data that is already compressed.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</refsect2>
*** a/src/backend/access/common/reloptions.c
--- b/src/backend/access/common/reloptions.c
***************
*** 85,90 **** static relopt_int intRelOpts[] =
--- 85,116 ----
},
GIST_DEFAULT_FILLFACTOR, GIST_MIN_FILLFACTOR, 100
},
+ {
+ {
+ "compress_max_size",
+ "Maximum input data size to consider compression",
+ RELOPT_KIND_HEAP
+ },
+ HEAP_DEFAULT_COMPRESS_MAX_SIZE, 0, INT_MAX
+ },
+ {
+ {
+ "compress_min_rate",
+ "Minimum compression rate (0-99%) to require",
+ RELOPT_KIND_HEAP
+ },
+ HEAP_DEFAULT_COMPRESS_MIN_RATE, 0, 100
+ },
+ {
+ {
+ "compress_success_by",
+ "Abandon compression if we find no compressible data"
+ "data within the first this-many bytes",
+ RELOPT_KIND_HEAP
+ },
+ HEAP_DEFAULT_COMPRESS_MIN_RATE, 0, INT_MAX
+ },
+
/* list terminator */
{ { NULL } }
};
***************
*** 750,755 **** default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
--- 776,785 ----
for (i = 0; i < numoptions; i++)
{
HANDLE_INT_RELOPTION("fillfactor", lopts.fillfactor, options[i]);
+
+ HANDLE_INT_RELOPTION("compress_max_size", lopts.compress_max_size, options[i]);
+ HANDLE_INT_RELOPTION("compress_min_rate", lopts.compress_min_rate, options[i]);
+ HANDLE_INT_RELOPTION("compress_sucess_by", lopts.compress_success_by, options[i]);
}
pfree(options);
*** a/src/backend/access/heap/tuptoaster.c
--- b/src/backend/access/heap/tuptoaster.c
***************
*** 36,42 ****
#include "access/xact.h"
#include "catalog/catalog.h"
#include "utils/fmgroids.h"
- #include "utils/pg_lzcompress.h"
#include "utils/rel.h"
#include "utils/typcache.h"
#include "utils/tqual.h"
--- 36,41 ----
***************
*** 435,440 **** toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
--- 434,446 ----
bool toast_free[MaxHeapAttributeNumber];
bool toast_delold[MaxHeapAttributeNumber];
+ PGLZ_Strategy pglz_strategy = *PGLZ_strategy_default;
+
+ pglz_strategy.max_input_size = RelationGetCompressMaxSize(rel, HEAP_DEFAULT_COMPRESS_MAX_SIZE);
+ pglz_strategy.min_comp_rate = RelationGetCompressMinRate(rel, HEAP_DEFAULT_COMPRESS_MIN_RATE);
+ pglz_strategy.first_success_by = RelationGetCompressSuccessBy(rel, HEAP_DEFAULT_COMPRESS_SUCCESS_BY);
+
+
/*
* We should only ever be called for tuples of plain relations ---
* recursing on a toast rel is bad news.
***************
*** 638,644 **** toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
if (att[i]->attstorage == 'x')
{
old_value = toast_values[i];
! new_value = toast_compress_datum(old_value);
if (DatumGetPointer(new_value) != NULL)
{
--- 644,650 ----
if (att[i]->attstorage == 'x')
{
old_value = toast_values[i];
! new_value = toast_compress_datum(old_value, &pglz_strategy);
if (DatumGetPointer(new_value) != NULL)
{
***************
*** 775,781 **** toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
*/
i = biggest_attno;
old_value = toast_values[i];
! new_value = toast_compress_datum(old_value);
if (DatumGetPointer(new_value) != NULL)
{
--- 781,787 ----
*/
i = biggest_attno;
old_value = toast_values[i];
! new_value = toast_compress_datum(old_value, &pglz_strategy);
if (DatumGetPointer(new_value) != NULL)
{
***************
*** 1064,1070 **** toast_flatten_tuple_attribute(Datum value,
* ----------
*/
Datum
! toast_compress_datum(Datum value)
{
struct varlena *tmp;
int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
--- 1070,1076 ----
* ----------
*/
Datum
! toast_compress_datum(Datum value, const PGLZ_Strategy *strategy)
{
struct varlena *tmp;
int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
***************
*** 1073,1083 **** toast_compress_datum(Datum value)
Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
/*
* No point in wasting a palloc cycle if value size is out of the
* allowed range for compression
*/
! if (valsize < PGLZ_strategy_default->min_input_size ||
! valsize > PGLZ_strategy_default->max_input_size)
return PointerGetDatum(NULL);
tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
--- 1079,1096 ----
Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
/*
+ * We concider it a bug if any of these are less than zero
+ */
+ Assert(strategy != NULL);
+ Assert(strategy->min_input_size >= 0);
+ Assert(strategy->max_input_size >= 0);
+
+ /*
* No point in wasting a palloc cycle if value size is out of the
* allowed range for compression
*/
! if (valsize < strategy->min_input_size ||
! valsize > strategy->max_input_size)
return PointerGetDatum(NULL);
tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
***************
*** 1093,1099 **** toast_compress_datum(Datum value)
* on a savings of more than 2 bytes to ensure we have a gain.
*/
if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
! (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
VARSIZE(tmp) < valsize - 2)
{
/* successful compression */
--- 1106,1112 ----
* on a savings of more than 2 bytes to ensure we have a gain.
*/
if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
! (PGLZ_Header *) tmp, strategy) &&
VARSIZE(tmp) < valsize - 2)
{
/* successful compression */
*** a/src/include/access/tuptoaster.h
--- b/src/include/access/tuptoaster.h
***************
*** 16,21 ****
--- 16,22 ----
#include "access/htup.h"
#include "storage/bufpage.h"
#include "utils/relcache.h"
+ #include "utils/pg_lzcompress.h"
/*
* This enables de-toasting of index entries. Needed until VACUUM is
***************
*** 151,157 **** extern Datum toast_flatten_tuple_attribute(Datum value,
* Create a compressed version of a varlena datum, if possible
* ----------
*/
! extern Datum toast_compress_datum(Datum value);
/* ----------
* toast_raw_datum_size -
--- 152,158 ----
* Create a compressed version of a varlena datum, if possible
* ----------
*/
! extern Datum toast_compress_datum(Datum value, const PGLZ_Strategy *strategy);
/* ----------
* toast_raw_datum_size -
*** a/src/include/utils/rel.h
--- b/src/include/utils/rel.h
***************
*** 217,227 **** typedef struct RelationData
typedef struct StdRdOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
! int fillfactor; /* page fill factor in percent (0..100) */
} StdRdOptions;
! #define HEAP_MIN_FILLFACTOR 10
#define HEAP_DEFAULT_FILLFACTOR 100
/*
* RelationGetFillFactor
--- 217,233 ----
typedef struct StdRdOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
! int fillfactor; /* page fill factor in percent (0..100) */
! int compress_max_size; /* dont compress toasted data over this size */
! int compress_min_rate; /* require at least this % to compress */
! int compress_success_by; /* give up if by this byte no compressible data */
} StdRdOptions;
! #define HEAP_MIN_FILLFACTOR 10
#define HEAP_DEFAULT_FILLFACTOR 100
+ #define HEAP_DEFAULT_COMPRESS_MAX_SIZE 1024*1024 /* default to 1MB */
+ #define HEAP_DEFAULT_COMPRESS_MIN_RATE 25
+ #define HEAP_DEFAULT_COMPRESS_SUCCESS_BY 1024
/*
* RelationGetFillFactor
***************
*** 231,236 **** typedef struct StdRdOptions
--- 237,254 ----
((relation)->rd_options ? \
((StdRdOptions *) (relation)->rd_options)->fillfactor : (defaultff))
+ #define RelationGetCompressMaxSize(relation, defaultff) \
+ ((relation)->rd_options ? \
+ ((StdRdOptions *) (relation)->rd_options)->compress_max_size : (defaultff))
+
+ #define RelationGetCompressMinRate(relation, defaultff) \
+ ((relation)->rd_options ? \
+ ((StdRdOptions *) (relation)->rd_options)->compress_min_rate : (defaultff))
+
+ #define RelationGetCompressSuccessBy(relation, defaultff) \
+ ((relation)->rd_options ? \
+ ((StdRdOptions *) (relation)->rd_options)->compress_success_by : (defaultff))
+
/*
* RelationGetTargetPageUsage
* Returns the relation's desired space usage per page in bytes.
Alex Hunsaker wrote:
This patch lets you control 3 pg_lzcompress knobs on a per table basis
(note requires reloptions.patch)compress_max_size: Controls the maximum size to be considered for
TOAST compression.
compress_min_rate: Minimum compression rate (0-100%) required for
TOAST compression to be used.
compress_success_by: if by this byte no compressible data found abort
compression.Note this adds some documentation, but I was having a hard time coming
up with a good way to describe these. I'm also not very happy with
the names. I originally tried something like toast.max_input_size.
But decided later if we allow you to set toast attributes that might
be confusing. So help with verbiage and names is appreciated.Also I only did those 3 because they seemed the 3 most useful things
someone would want to tune. Later if we need to we can export them
all and make them per column settings (and maybe you can pick a
compression algo or what not...) But I figured lets start small.I thought about doing another cleanup patch to get rid of
PGLZ_Strategy_default and PGLZ_Strategy_always. Nothing uses the
later, and if we expose all the nobs nothing will use the first.
Comments?
I think we need to live with the TOAST changes for at least one release
before we know what knobs we will need.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
On Tue, Jan 6, 2009 at 06:43, Bruce Momjian <bruce@momjian.us> wrote:
Alex Hunsaker wrote:
This patch lets you control 3 pg_lzcompress knobs on a per table basis
(note requires reloptions.patch)I think we need to live with the TOAST changes for at least one release
before we know what knobs we will need.
Fine with me. The add an early failure path and increase required
compression rate to 25% still worry me a bit. But I have no data to
show the first one is actually a problem. And the second one only
caused a 15% size increase for me. If that's a typical size increase
or problem, i dunno....