From ad27ede8a798c264de961b8bc37205a2635e43c5 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 14 Apr 2026 02:01:26 +0900 Subject: [PATCH v2] Add tests for low-level PGLZ compression The goal of this module is to provide an entry point for the coverage of for low-level APIs of compression and decompression of PGLZ. This includes tests for the cases detected by fuzzing related to corrupted data, as fixed in 2b5ba2a0a141: - Control byte with match tag bit set, where no data follows. - Control byte with match tag bit set, where 1 byte follows. - Extension byte needed (len=18), where no data follows. As bonus points, tests are added for compress/decompress roundtrips, and for check_complete=false/true. Backpatch-through: 14 --- .../regress/expected/compression_pglz.out | 62 +++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/regress.c | 66 +++++++++++++++++++ src/test/regress/sql/compression_pglz.sql | 49 ++++++++++++++ 4 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/compression_pglz.out create mode 100644 src/test/regress/sql/compression_pglz.sql diff --git a/src/test/regress/expected/compression_pglz.out b/src/test/regress/expected/compression_pglz.out new file mode 100644 index 000000000000..daf457f4ac0a --- /dev/null +++ b/src/test/regress/expected/compression_pglz.out @@ -0,0 +1,62 @@ +-- +-- Tests for PGLZ compression +-- +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix +CREATE FUNCTION test_pglz_compress(bytea) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_pglz_decompress(bytea, int4, bool) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +-- Round-trip with pglz: compress then decompress. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, false) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; + roundtrip_ok +-------------- + t +(1 row) + +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, true) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; + roundtrip_ok +-------------- + t +(1 row) + +-- Decompression with rawsize too large, fails to fill the destination +-- buffer. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 500, true); +ERROR: pglz_decompress failed +-- Decompression with rawsize too small, fails with source not fully +-- consumed. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 100, true); +ERROR: pglz_decompress failed +-- Corrupted compressed data. The control byte is set with match tag bit, +-- but only 1 byte follows. +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, false); +ERROR: pglz_decompress failed +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, true); +ERROR: pglz_decompress failed +-- Corrupted compressed data. Control byte with match tag bit set, where +-- no data follows. +SELECT length(test_pglz_decompress('\x01'::bytea, 1024, false)) AS ctrl_only_len; + ctrl_only_len +--------------- + 0 +(1 row) + +SELECT test_pglz_decompress('\x01'::bytea, 1024, true); +ERROR: pglz_decompress failed +-- Corrupted compressed data. The match tag encodes len=18 (aka the +-- extension byte is needed) but there is no data. +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, false); +ERROR: pglz_decompress failed +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, true); +ERROR: pglz_decompress failed diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index cc365393bb7d..3b059906161f 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -128,7 +128,7 @@ test: partition_merge partition_split partition_join partition_prune reloptions # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL # oidjoins is read-only, though, and should run late for best coverage -test: oidjoins event_trigger +test: oidjoins event_trigger compression_pglz test: role_ddl tablespace_ddl database_ddl diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 5c19f70b6e8e..2bcb5559a452 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -27,6 +27,7 @@ #include "catalog/pg_type.h" #include "commands/sequence.h" #include "commands/trigger.h" +#include "common/pg_lzcompress.h" #include "executor/executor.h" #include "executor/functions.h" #include "executor/spi.h" @@ -1422,3 +1423,68 @@ test_instr_time(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } + +/* + * test_pglz_compress + * + * Compress the input using pglz_compress(). Only the "always" strategy is + * currently supported. + * + * Returns the compressed data, or NULL if compression fails. + */ +PG_FUNCTION_INFO_V1(test_pglz_compress); +Datum +test_pglz_compress(PG_FUNCTION_ARGS) +{ + bytea *input = PG_GETARG_BYTEA_PP(0); + char *source = VARDATA_ANY(input); + int32 slen = VARSIZE_ANY_EXHDR(input); + int32 maxout = PGLZ_MAX_OUTPUT(slen); + bytea *result; + int32 clen; + + result = (bytea *) palloc(maxout + VARHDRSZ); + clen = pglz_compress(source, slen, VARDATA(result), + PGLZ_strategy_always); + if (clen < 0) + PG_RETURN_NULL(); + + SET_VARSIZE(result, clen + VARHDRSZ); + PG_RETURN_BYTEA_P(result); +} + +/* + * test_pglz_decompress + * + * Decompress the input using pglz_decompress(). + * + * The second argument is the expected uncompressed data size. The third + * argument is here for the check_complete flag. + * + * Returns the decompressed data, or raises an error if decompression fails. + */ +PG_FUNCTION_INFO_V1(test_pglz_decompress); +Datum +test_pglz_decompress(PG_FUNCTION_ARGS) +{ + bytea *input = PG_GETARG_BYTEA_PP(0); + int32 rawsize = PG_GETARG_INT32(1); + bool check_complete = PG_GETARG_BOOL(2); + char *source = VARDATA_ANY(input); + int32 slen = VARSIZE_ANY_EXHDR(input); + bytea *result; + int32 dlen; + + if (rawsize < 0) + elog(ERROR, "rawsize must not be negative"); + + result = (bytea *) palloc(rawsize + VARHDRSZ); + + dlen = pglz_decompress(source, slen, VARDATA(result), + rawsize, check_complete); + if (dlen < 0) + elog(ERROR, "pglz_decompress failed"); + + SET_VARSIZE(result, dlen + VARHDRSZ); + PG_RETURN_BYTEA_P(result); +} diff --git a/src/test/regress/sql/compression_pglz.sql b/src/test/regress/sql/compression_pglz.sql new file mode 100644 index 000000000000..115f1300d748 --- /dev/null +++ b/src/test/regress/sql/compression_pglz.sql @@ -0,0 +1,49 @@ +-- +-- Tests for PGLZ compression +-- + +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX + +\set regresslib :libdir '/regress' :dlsuffix + +CREATE FUNCTION test_pglz_compress(bytea) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_pglz_decompress(bytea, int4, bool) + RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; + +-- Round-trip with pglz: compress then decompress. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, false) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 400, true) = + decode(repeat('abcd', 100), 'escape') AS roundtrip_ok; + +-- Decompression with rawsize too large, fails to fill the destination +-- buffer. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 500, true); + +-- Decompression with rawsize too small, fails with source not fully +-- consumed. +SELECT test_pglz_decompress(test_pglz_compress( + decode(repeat('abcd', 100), 'escape')), 100, true); + +-- Corrupted compressed data. The control byte is set with match tag bit, +-- but only 1 byte follows. +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, false); +SELECT test_pglz_decompress('\x01ff'::bytea, 1024, true); + +-- Corrupted compressed data. Control byte with match tag bit set, where +-- no data follows. +SELECT length(test_pglz_decompress('\x01'::bytea, 1024, false)) AS ctrl_only_len; +SELECT test_pglz_decompress('\x01'::bytea, 1024, true); + +-- Corrupted compressed data. The match tag encodes len=18 (aka the +-- extension byte is needed) but there is no data. +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, false); +SELECT test_pglz_decompress('\x010f01'::bytea, 1024, true); -- 2.53.0