*** a/doc/src/sgml/ref/create_table.sgml --- b/doc/src/sgml/ref/create_table.sgml *************** *** 714,719 **** and table_constraint is: --- 714,749 ---- + + COMPRESS_MAX_SIZE + + + Controls the maximum size to be considered for TOAST compression. + The default is 1048576 bytes (1MB). Note as with other COMPRESS_* storage options setting this higher can adversely affect substring(...) or even simple select performance. + + + + + + COMPRESS_MIN_RATE + + + Minimum compression rate (0-100%) required for TOAST compression to be used. + Anything compressed that did not achieve this required compress rate will not be compressed. + Default: 25% + + + + + + COMPRESS_SUCCESS_BY + + + If by this byte (default 1024) we have found no compressible data, abort compression. This knob is mainly to keep us from waisting cycles compressing data that is already compressed. + + + + *** a/src/backend/access/common/reloptions.c --- b/src/backend/access/common/reloptions.c *************** *** 85,90 **** static relopt_int intRelOpts[] = --- 85,116 ---- }, GIST_DEFAULT_FILLFACTOR, GIST_MIN_FILLFACTOR, 100 }, + { + { + "compress_max_size", + "Maximum input data size to consider compression", + RELOPT_KIND_HEAP + }, + HEAP_DEFAULT_COMPRESS_MAX_SIZE, 0, INT_MAX + }, + { + { + "compress_min_rate", + "Minimum compression rate (0-99%) to require", + RELOPT_KIND_HEAP + }, + HEAP_DEFAULT_COMPRESS_MIN_RATE, 0, 100 + }, + { + { + "compress_success_by", + "Abandon compression if we find no compressible data" + "data within the first this-many bytes", + RELOPT_KIND_HEAP + }, + HEAP_DEFAULT_COMPRESS_MIN_RATE, 0, INT_MAX + }, + /* list terminator */ { { NULL } } }; *************** *** 750,755 **** default_reloptions(Datum reloptions, bool validate, relopt_kind kind) --- 776,785 ---- for (i = 0; i < numoptions; i++) { HANDLE_INT_RELOPTION("fillfactor", lopts.fillfactor, options[i]); + + HANDLE_INT_RELOPTION("compress_max_size", lopts.compress_max_size, options[i]); + HANDLE_INT_RELOPTION("compress_min_rate", lopts.compress_min_rate, options[i]); + HANDLE_INT_RELOPTION("compress_sucess_by", lopts.compress_success_by, options[i]); } pfree(options); *** a/src/backend/access/heap/tuptoaster.c --- b/src/backend/access/heap/tuptoaster.c *************** *** 36,42 **** #include "access/xact.h" #include "catalog/catalog.h" #include "utils/fmgroids.h" - #include "utils/pg_lzcompress.h" #include "utils/rel.h" #include "utils/typcache.h" #include "utils/tqual.h" --- 36,41 ---- *************** *** 435,440 **** toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, --- 434,446 ---- bool toast_free[MaxHeapAttributeNumber]; bool toast_delold[MaxHeapAttributeNumber]; + PGLZ_Strategy pglz_strategy = *PGLZ_strategy_default; + + pglz_strategy.max_input_size = RelationGetCompressMaxSize(rel, HEAP_DEFAULT_COMPRESS_MAX_SIZE); + pglz_strategy.min_comp_rate = RelationGetCompressMinRate(rel, HEAP_DEFAULT_COMPRESS_MIN_RATE); + pglz_strategy.first_success_by = RelationGetCompressSuccessBy(rel, HEAP_DEFAULT_COMPRESS_SUCCESS_BY); + + /* * We should only ever be called for tuples of plain relations --- * recursing on a toast rel is bad news. *************** *** 638,644 **** toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, if (att[i]->attstorage == 'x') { old_value = toast_values[i]; ! new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { --- 644,650 ---- if (att[i]->attstorage == 'x') { old_value = toast_values[i]; ! new_value = toast_compress_datum(old_value, &pglz_strategy); if (DatumGetPointer(new_value) != NULL) { *************** *** 775,781 **** toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, */ i = biggest_attno; old_value = toast_values[i]; ! new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { --- 781,787 ---- */ i = biggest_attno; old_value = toast_values[i]; ! new_value = toast_compress_datum(old_value, &pglz_strategy); if (DatumGetPointer(new_value) != NULL) { *************** *** 1064,1070 **** toast_flatten_tuple_attribute(Datum value, * ---------- */ Datum ! toast_compress_datum(Datum value) { struct varlena *tmp; int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); --- 1070,1076 ---- * ---------- */ Datum ! toast_compress_datum(Datum value, const PGLZ_Strategy *strategy) { struct varlena *tmp; int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); *************** *** 1073,1083 **** toast_compress_datum(Datum value) Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); /* * No point in wasting a palloc cycle if value size is out of the * allowed range for compression */ ! if (valsize < PGLZ_strategy_default->min_input_size || ! valsize > PGLZ_strategy_default->max_input_size) return PointerGetDatum(NULL); tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize)); --- 1079,1096 ---- Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); /* + * We concider it a bug if any of these are less than zero + */ + Assert(strategy != NULL); + Assert(strategy->min_input_size >= 0); + Assert(strategy->max_input_size >= 0); + + /* * No point in wasting a palloc cycle if value size is out of the * allowed range for compression */ ! if (valsize < strategy->min_input_size || ! valsize > strategy->max_input_size) return PointerGetDatum(NULL); tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize)); *************** *** 1093,1099 **** toast_compress_datum(Datum value) * on a savings of more than 2 bytes to ensure we have a gain. */ if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize, ! (PGLZ_Header *) tmp, PGLZ_strategy_default) && VARSIZE(tmp) < valsize - 2) { /* successful compression */ --- 1106,1112 ---- * on a savings of more than 2 bytes to ensure we have a gain. */ if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize, ! (PGLZ_Header *) tmp, strategy) && VARSIZE(tmp) < valsize - 2) { /* successful compression */ *** a/src/include/access/tuptoaster.h --- b/src/include/access/tuptoaster.h *************** *** 16,21 **** --- 16,22 ---- #include "access/htup.h" #include "storage/bufpage.h" #include "utils/relcache.h" + #include "utils/pg_lzcompress.h" /* * This enables de-toasting of index entries. Needed until VACUUM is *************** *** 151,157 **** extern Datum toast_flatten_tuple_attribute(Datum value, * Create a compressed version of a varlena datum, if possible * ---------- */ ! extern Datum toast_compress_datum(Datum value); /* ---------- * toast_raw_datum_size - --- 152,158 ---- * Create a compressed version of a varlena datum, if possible * ---------- */ ! extern Datum toast_compress_datum(Datum value, const PGLZ_Strategy *strategy); /* ---------- * toast_raw_datum_size - *** a/src/include/utils/rel.h --- b/src/include/utils/rel.h *************** *** 217,227 **** typedef struct RelationData typedef struct StdRdOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ ! int fillfactor; /* page fill factor in percent (0..100) */ } StdRdOptions; ! #define HEAP_MIN_FILLFACTOR 10 #define HEAP_DEFAULT_FILLFACTOR 100 /* * RelationGetFillFactor --- 217,233 ---- typedef struct StdRdOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ ! int fillfactor; /* page fill factor in percent (0..100) */ ! int compress_max_size; /* dont compress toasted data over this size */ ! int compress_min_rate; /* require at least this % to compress */ ! int compress_success_by; /* give up if by this byte no compressible data */ } StdRdOptions; ! #define HEAP_MIN_FILLFACTOR 10 #define HEAP_DEFAULT_FILLFACTOR 100 + #define HEAP_DEFAULT_COMPRESS_MAX_SIZE 1024*1024 /* default to 1MB */ + #define HEAP_DEFAULT_COMPRESS_MIN_RATE 25 + #define HEAP_DEFAULT_COMPRESS_SUCCESS_BY 1024 /* * RelationGetFillFactor *************** *** 231,236 **** typedef struct StdRdOptions --- 237,254 ---- ((relation)->rd_options ? \ ((StdRdOptions *) (relation)->rd_options)->fillfactor : (defaultff)) + #define RelationGetCompressMaxSize(relation, defaultff) \ + ((relation)->rd_options ? \ + ((StdRdOptions *) (relation)->rd_options)->compress_max_size : (defaultff)) + + #define RelationGetCompressMinRate(relation, defaultff) \ + ((relation)->rd_options ? \ + ((StdRdOptions *) (relation)->rd_options)->compress_min_rate : (defaultff)) + + #define RelationGetCompressSuccessBy(relation, defaultff) \ + ((relation)->rd_options ? \ + ((StdRdOptions *) (relation)->rd_options)->compress_success_by : (defaultff)) + /* * RelationGetTargetPageUsage * Returns the relation's desired space usage per page in bytes.