From fee005e20d121fb79ba3b53d631f4772ce5896fe Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 14 Jan 2019 16:09:48 -0800 Subject: [PATCH] Avoid amcheck TOAST compression inconsistencies. Normalize input varlena datums by decompressing compressed values. This avoids false positive reports of corruption in cases where the executor input a slot containing a heap tuple that is compressed in the heap but not an index. For example, the executor can apply TOAST compression to an entire tuple because it has a single external TOAST pointer. I discovered this bug initially, but it was independently reported shortly afterwards by Andreas Kunert. A new test case is cribbed from his example. Discussion: https://postgr.es/m/CAH2-WznrVd9ie+TTJ45nDT+v2nUt6YJwQrT9SebCdQKtAvfPZw@mail.gmail.com Discussion: https://postgr.es/m/15597-294e5d3e7f01c407@postgresql.org --- contrib/amcheck/expected/check_btree.out | 47 +++++++ contrib/amcheck/sql/check_btree.sql | 45 +++++++ contrib/amcheck/verify_nbtree.c | 163 +++++++++++++++++++---- 3 files changed, 232 insertions(+), 23 deletions(-) diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out index e864579774..3b4044d61e 100644 --- a/contrib/amcheck/expected/check_btree.out +++ b/contrib/amcheck/expected/check_btree.out @@ -140,10 +140,57 @@ SELECT bt_index_parent_check('delete_test_table_pkey', true); (1 row) +-- +-- BUG #15597: must not assume consistent input toasting state when forming +-- tuple. Bloom filter must fingerprint normalized index tuple representation. +-- +-- Example assumes TOAST_TUPLE_TARGET of TOAST_TUPLE_THRESHOLD (default). +-- +CREATE TABLE toast_bug +( + a integer, + b character(255), + c character(255), + d character(255), + e character(25), + g date, + h date, + i date, + j character(255), + k character(255), + m character(10), + n character(255), + o character(1), + p character(4), + q character(1), + r integer, + t character(255), + u character(50), + v character(100) +); +CREATE INDEX i_toast_bug ON toast_bug USING btree (b, c); +-- Heap tuple length is high enough to get us over heap tuple TOAST threshold +-- primarily due to table having many columns. Tuple toasting machinery will +-- compress the biggest datum inline (or joint biggest) to get under maximum. +-- Index tuple formation will not determine that it's worth compressing, which +-- normalization must cope with. +INSERT INTO toast_bug +VALUES ( + 1, 'b', 'c', 'd', 'e', + '2000-01-01', '2000-01-01', NULL, + 'j', 'k', 'm', 'n', 'o', 'p', 'q', 2, 't', 'u', 'v'); +-- Should not get false positive report of corruption: +SELECT bt_index_check('i_toast_bug', true); + bt_index_check +---------------- + +(1 row) + -- cleanup DROP TABLE bttest_a; DROP TABLE bttest_b; DROP TABLE bttest_multi; DROP TABLE delete_test_table; +DROP TABLE toast_bug; DROP OWNED BY bttest_role; -- permissions DROP ROLE bttest_role; diff --git a/contrib/amcheck/sql/check_btree.sql b/contrib/amcheck/sql/check_btree.sql index 7b1ab4f148..983588859a 100644 --- a/contrib/amcheck/sql/check_btree.sql +++ b/contrib/amcheck/sql/check_btree.sql @@ -88,10 +88,55 @@ DELETE FROM delete_test_table WHERE a > 10; VACUUM delete_test_table; SELECT bt_index_parent_check('delete_test_table_pkey', true); +-- +-- BUG #15597: must not assume consistent input toasting state when forming +-- tuple. Bloom filter must fingerprint normalized index tuple representation. +-- +-- Example assumes TOAST_TUPLE_TARGET of TOAST_TUPLE_THRESHOLD (default). +-- +CREATE TABLE toast_bug +( + a integer, + b character(255), + c character(255), + d character(255), + e character(25), + g date, + h date, + i date, + j character(255), + k character(255), + m character(10), + n character(255), + o character(1), + p character(4), + q character(1), + r integer, + t character(255), + u character(50), + v character(100) +); +CREATE INDEX i_toast_bug ON toast_bug USING btree (b, c); + +-- Heap tuple length is high enough to get us over heap tuple TOAST threshold +-- primarily due to table having many columns. Tuple toasting machinery will +-- compress the biggest datum inline (or joint biggest) to get under maximum. +-- Index tuple formation will not determine that it's worth compressing, which +-- normalization must cope with. +INSERT INTO toast_bug +VALUES ( + 1, 'b', 'c', 'd', 'e', + '2000-01-01', '2000-01-01', NULL, + 'j', 'k', 'm', 'n', 'o', 'p', 'q', 2, 't', 'u', 'v'); + +-- Should not get false positive report of corruption: +SELECT bt_index_check('i_toast_bug', true); + -- cleanup DROP TABLE bttest_a; DROP TABLE bttest_b; DROP TABLE bttest_multi; DROP TABLE delete_test_table; +DROP TABLE toast_bug; DROP OWNED BY bttest_role; -- permissions DROP ROLE bttest_role; diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 1c7466c815..30fd66e94c 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -27,6 +27,7 @@ #include "access/htup_details.h" #include "access/nbtree.h" #include "access/transam.h" +#include "access/tuptoaster.h" #include "access/xact.h" #include "catalog/index.h" #include "catalog/pg_am.h" @@ -133,6 +134,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state); static void bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *checkstate); +static IndexTuple bt_toast_normalize_tuple(BtreeCheckState *state, + IndexTuple itup); static inline bool offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset); static inline bool invariant_leq_offset(BtreeCheckState *state, @@ -908,7 +911,16 @@ bt_target_page_check(BtreeCheckState *state) /* Fingerprint leaf page tuples (those that point to the heap) */ if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid)) - bloom_add_element(state->filter, (unsigned char *) itup, tupsize); + { + IndexTuple norm; + + norm = bt_toast_normalize_tuple(state, itup); + + bloom_add_element(state->filter, (unsigned char *) norm, + IndexTupleSize(norm)); + if (norm != itup) + pfree(norm); + } /* * * High key check * @@ -1672,35 +1684,32 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *checkstate) { BtreeCheckState *state = (BtreeCheckState *) checkstate; - IndexTuple itup; + IndexTuple itup, norm; Assert(state->heapallindexed); - /* - * Generate an index tuple for fingerprinting. - * - * Index tuple formation is assumed to be deterministic, and IndexTuples - * are assumed immutable. While the LP_DEAD bit is mutable in leaf pages, - * that's ItemId metadata, which was not fingerprinted. (There will often - * be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter, - * but we only try to detect the absence of needed tuples, so that's - * okay.) - * - * Note that we rely on deterministic index_form_tuple() TOAST - * compression. If index_form_tuple() was ever enhanced to compress datums - * out-of-line, or otherwise varied when or how compression was applied, - * our assumption would break, leading to false positive reports of - * corruption. It's also possible that non-pivot tuples could in the - * future have alternative equivalent representations (e.g. by using the - * INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted - * values as part of fingerprinting. - */ + /* Generate an index tuple for fingerprinting */ itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; + /* + * Normalize. It's fairly inefficient for normalization to deform and + * then reform the tuple that was just formed, but in practice compressed + * datums in indexes are fairly rare, so a second index_form_tuple() is + * usually avoided. Besides, normalization could need to be expanded to + * do more in the future, which can be accommodated by the same simple + * interface. If index_form_tuple() was ever enhanced to compress datums + * out-of-line, we'd only need to specify "read only" index_form_tuple() + * access; verification itself would continue to work. It's also possible + * that non-pivot tuples could in the future have alternative logically + * equivalent representations due to other factors (e.g. they could use + * the INDEX_ALT_TID_MASK bit). + */ + norm = bt_toast_normalize_tuple(state, itup); + /* Probe Bloom filter -- tuple should be present */ - if (bloom_lacks_element(state->filter, (unsigned char *) itup, - IndexTupleSize(itup))) + if (bloom_lacks_element(state->filter, (unsigned char *) norm, + IndexTupleSize(norm))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"", @@ -1714,6 +1723,114 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, state->heaptuplespresent++; pfree(itup); + if (norm != itup) + pfree(norm); +} + +/* + * Normalize an index tuple for fingerprinting. + * + * In general, index tuple formation is assumed to be deterministic by + * heapallindexed verification, and IndexTuples are assumed immutable. While + * the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is + * not fingerprinted. Normalization is required to compensate for corner + * cases where the determinism assumption doesn't quite work. + * + * There is currently one such case: index_form_tuple() does not try to hide + * the source TOAST state of input datums. The executor applies TOAST + * compression for heap tuples based on different criteria to the compression + * applied within btinsert()'s call to index_form_tuple(): it sometimes + * compresses more aggressively, leaving TOASTed heap tuple datums but + * uncompressed corresponding index tuple datums. A subsequent heapallindexed + * verification will get a logically equivalent though bitwise unequal tuple + * from index_form_tuple(), since its input would now be coming from the + * TOASTed heap tuple directly (not some tuple slot). False positive + * heapallindexed corruption reports could occur without normalizing away + * possible inconsistencies. + * + * Returned tuple is often caller's own original tuple. Otherwise, it is a + * new representation of caller's original index tuple, palloc()'d in caller's + * memory context. + * + * Note: This routine is not concerned with distinctions about the + * representation of tuples beyond those that might break heapallindexed + * verification. In particular, it won't try to normalize opclass-equal + * datums with potentially distinct representations where the details are + * known only to opclasses (e.g., btree/numeric_ops index datums will not get + * their display scale normalized-away here). + */ +static IndexTuple +bt_toast_normalize_tuple(BtreeCheckState *state, IndexTuple itup) +{ + TupleDesc tupleDescriptor = RelationGetDescr(state->rel); + Datum normalized[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + bool toast_free[INDEX_MAX_KEYS]; + IndexTuple reformed; + bool normalizetup = false; + int i; + + /* Easy case: Tuple has no varlena datums at all */ + if (!IndexTupleHasVarwidths(itup)) + return itup; + + for (i = 0; i < tupleDescriptor->natts; i++) + { + Form_pg_attribute att; + struct varlena *attval; + + att = TupleDescAttr(tupleDescriptor, i); + + /* Assume untoasted/already normalized datum initially */ + toast_free[i] = false; + normalized[i] = index_getattr(itup, i + 1, + tupleDescriptor, + &isnull[i]); + if (att->attbyval || att->attlen != -1 || isnull[i]) + continue; + + /* + * Callers always pass a tuple that could safely be inserted into the + * index without further processing, so an external varlena header + * should never be encountered here + */ + attval = (struct varlena *) DatumGetPointer(normalized[i]); + if (VARATT_IS_EXTERNAL(attval)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"", + ItemPointerGetBlockNumber(&(itup->t_tid)), + ItemPointerGetOffsetNumber(&(itup->t_tid)), + RelationGetRelationName(state->rel)))); + else if (VARATT_IS_COMPRESSED(attval)) + { + normalizetup = true; + normalized[i] = PointerGetDatum(heap_tuple_untoast_attr(attval)); + toast_free[i] = true; + } + } + + /* Easier case: Tuple has varlena datums, none of which are compressed */ + if (!normalizetup) + return itup; + + /* + * Hard case: Tuple had compressed varlena datums that necessitate + * creating normalized version of the tuple from uncompressed input datums + * (normalized input datums). Forming a new representation of the tuple + * usually though not always results in recompressing the datums that were + * just decompressed. This is rather naive, but shouldn't be necessary + * too often. + */ + reformed = index_form_tuple(tupleDescriptor, normalized, isnull); + reformed->t_tid = itup->t_tid; + + /* Cannot leak memory here */ + for (i = 0; i < tupleDescriptor->natts; i++) + if (toast_free[i]) + pfree(DatumGetPointer(normalized[i])); + + return reformed; } /* -- 2.17.1