From 4309a5f139289c3078f12517c31b31a8d52e5cd0 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sat, 21 Oct 2017 17:19:55 -0700 Subject: [PATCH] Detect duplicate heap TIDs using a tuplesort. Make bt_index_parent_check() check for duplicate TIDs. This condition might otherwise be missed even when using bt_index_parent_check() + healallindexed verification. "trace_sort=on" can be used as a rudimentary progress indicator. --- contrib/amcheck/verify_nbtree.c | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index a1438a2855..cdd2b622f1 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -29,12 +29,15 @@ #include "access/xact.h" #include "catalog/index.h" #include "catalog/pg_am.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" #include "commands/tablecmds.h" #include "lib/bloomfilter.h" #include "miscadmin.h" #include "storage/lmgr.h" #include "utils/memutils.h" #include "utils/snapmgr.h" +#include "utils/tuplesort.h" PG_MODULE_MAGIC; @@ -93,6 +96,8 @@ typedef struct BtreeCheckState bloom_filter *filter; /* Bloom filter fingerprints downlink blocks within tree */ bloom_filter *downlinkfilter; + /* readonly TIDs-are-unique tuplesort */ + Tuplesortstate *tidsort; /* Right half of incomplete split marker */ bool rightsplit; /* Debug counter */ @@ -144,6 +149,7 @@ static inline bool invariant_leq_nontarget_offset(BtreeCheckState *state, Page other, ScanKey key, OffsetNumber upperbound); +static inline int64 itemptr_encode(ItemPointer itemptr); static Page palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum); /* @@ -345,6 +351,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, state->heaprel = heaprel; state->readonly = readonly; state->heapallindexed = heapallindexed; + state->tidsort = NULL; if (state->heapallindexed) { @@ -413,6 +420,21 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, } } + if (state->readonly) + { + /* + * Encode TIDs as int8 values for the sort, rather than directly sorting + * item pointers. This can be significantly faster, primarily because TID + * is a pass-by-reference type on all platforms, whereas int8 is + * pass-by-value on most platforms. + */ + state->tidsort = tuplesort_begin_datum(INT8OID, Int8LessOperator, + InvalidOid, false, + maintenance_work_mem, NULL, + false); + } + + /* Create context for page */ state->targetcontext = AllocSetContextCreate(CurrentMemoryContext, "amcheck context", @@ -472,6 +494,46 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, previouslevel = current.level; } + if (state->readonly) + { + /* state variables for the merge */ + bool tuplesort_empty; + int64 last_tid = -1; + + /* Sort TIDs to detect duplicate pointers to the heap */ + tuplesort_performsort(state->tidsort); + + while (!tuplesort_empty) + { + Datum ts_val; + bool ts_isnull; + int64 ts_tid; + + tuplesort_empty = !tuplesort_getdatum(state->tidsort, true, + &ts_val, &ts_isnull, NULL); + Assert(tuplesort_empty || !ts_isnull); + if (!tuplesort_empty) + { + ts_tid = DatumGetInt64(ts_val); + + if (last_tid >= ts_tid) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("duplicate heap TID"))); + + last_tid = ts_tid; + + /* If int8 is pass-by-ref, free (encoded) TID Datum memory */ +#ifndef USE_FLOAT8_BYVAL + pfree(DatumGetPointer(ts_val)); +#endif + } + } + + /* Done with tuplesort object */ + tuplesort_end(state->tidsort); + } + /* * * Check whether heap contains unindexed/malformed tuples * */ @@ -909,6 +971,14 @@ bt_target_page_check(BtreeCheckState *state) if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid)) bloom_add_element(state->filter, (unsigned char *) itup, tupsize); + /* When checking for duplicate TIDs, add heap TIDs here */ + if (state->tidsort && P_ISLEAF(topaque)) + { + int64 encoded = itemptr_encode(&itup->t_tid); + + tuplesort_putdatum(state->tidsort, Int64GetDatum(encoded), false); + } + /* * * High key check * * @@ -1812,6 +1882,24 @@ invariant_leq_nontarget_offset(BtreeCheckState *state, return cmp <= 0; } +/* + * itemptr_encode - Encode ItemPointer as int64/int8 + * + * This is lifted from index.c. This is a performance optimization for sorting + * TIDs. + */ +static inline int64 +itemptr_encode(ItemPointer itemptr) +{ + BlockNumber block = ItemPointerGetBlockNumber(itemptr); + OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr); + int64 encoded; + + encoded = ((uint64) block << 16) | (uint16) offset; + + return encoded; +} + /* * Given a block number of a B-Tree page, return page in palloc()'d memory. * While at it, perform some basic checks of the page. -- 2.14.1