diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c old mode 100644 new mode 100755 index 2fd32e0..62a6af4 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -69,26 +69,274 @@ /* Static variables representing various special snapshot semantics */ SnapshotData SnapshotNowData = {HeapTupleSatisfiesNow}; SnapshotData SnapshotSelfData = {HeapTupleSatisfiesSelf}; SnapshotData SnapshotAnyData = {HeapTupleSatisfiesAny}; SnapshotData SnapshotToastData = {HeapTupleSatisfiesToast}; +/* hint bit cache definitions and structures */ + +#define HINTBIT_CACHE_BUCKET_BLOCKSZ 8192 +/* 8192 bytes of stororage in each hint bit cache bucket */ +#define HINTBIT_CACHE_BUCKET_COUNT 4 /* how many buckets to keep */ +#define HINTBIT_CACHE_ROLLUPSZ 100 +/* 100 cache misses are stored in array before re-filling cache */ +#define HINTBIT_CACHE_HIT_THRESHOLD 5 +/* cache buckets with less than 5 interesting transactions per cache re-fill + * are not considered interesting + */ + +#define GET_HINTBIT_CACHE_BUCKET(xid) \ + ((xid) / (HINTBIT_CACHE_BUCKET_BLOCKSZ * BITS_PER_BYTE)) + +typedef struct +{ + int XidBucket; + int HitCount; /* number of cache hits on this bucket */ + bool Clear; + /* during rollup, signals the bucket is changing and cache data needs to + * be cleared + */ +} HintBitCacheBucketHeader; + /* local functions */ static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); +static HintBitCacheBucketHeader HintbitCacheBucketList[HINTBIT_CACHE_BUCKET_COUNT] + = {{-1, 0, false}, {-1, 0, false}, {-1, 0, false}, {-1, 0, false}}; +static char HintBitCacheBucketList[HINTBIT_CACHE_BUCKET_COUNT][HINTBIT_CACHE_BUCKET_BLOCKSZ]; +static int HintBitCacheBucketRollupList[HINTBIT_CACHE_ROLLUPSZ]; +static int HintBitCacheMissCount = 0; + +/* qsort comparison function */ +static int +int_cmp(const void *p1, const void *p2) +{ + int v1 = *((const int *) p1); + int v2 = *((const int *) p2); + + if (v1 < v2) + return -1; + if (v1 > v2) + return 1; + return 0; +} + +/* + * RollUpHintBitCache() + * + * Whenever a transaction is checked against the hint bit cache and the status + * is not there (a cache miss), the transaction's 'bucket' is added to the + * rollup array. When that array fills, the rollup array is processed into + * a new set of cache buckets. + * + * The algorithm is to sort the rollup array and iterate it counting the + * number of identical buckets as we go. When we hit a new bucket (or the + * end of the array) and the number of counted buckets is over a minimum + * threshold, the bucket is moved into the cache if it has more cache hits + * than any bucket that is already in the cache. + */ +static void RollUpHintBitCache() +{ + TransactionId LastBucket = HintBitCacheBucketRollupList[0]; + /* The bucket we are duplicate tracking. */ + + int BucketIdx; + int HitCount = 0; + /* effectively initializes to 1: the first iteration of the loop will + * always match LastBucket and increment this value + */ + + int Bucket; + int BucketMinHits = HINTBIT_CACHE_ROLLUPSZ + 1; + /* The smallest number of hits for a bucket currently in the cache. It's + * scanned comparing as we go, so initialize it to an impossibly high + * value here */ + + int BucketMinIdx; + /* The cache bucket index of the page with the smallest number of hits. + * if a new cache bucket higher number of hits with this one, it is + * swapped in its place. + */ + int XidBucket; + + bool AlreadyInCache; + + /* sort the list, to make counting identical buckets easier */ + qsort(HintBitCacheBucketRollupList, HINTBIT_CACHE_ROLLUPSZ, sizeof(int), int_cmp); + + for (BucketIdx = 0; BucketIdx < HINTBIT_CACHE_ROLLUPSZ; BucketIdx++) + { + /* If bucket is the same as the last one, increment the hitcount + * or reset it and do cache management based on the number of hits + */ + XidBucket = HintBitCacheBucketRollupList[BucketIdx]; + + if(XidBucket == LastBucket) + HitCount++; + else + { + /* Now check the bucket represented by LastBucket to see if its + * interesting for cache purposes. To avoid thrashing cache pages + * in and out, a minimum threshold of hits has to be met before + * putting a bucket in the cache. + */ + if (HitCount >= HINTBIT_CACHE_HIT_THRESHOLD) + { +CheckLastXidBlock: + /* Look through the buckets, find the one with the smallest + * hit count, and save off the index. + */ + AlreadyInCache = false; + + for (Bucket = 0; Bucket < HINTBIT_CACHE_BUCKET_COUNT; Bucket++) + { + if (HintbitCacheBucketList[Bucket].HitCount < BucketMinHits) + { + BucketMinIdx = Bucket; + BucketMinHits = HintbitCacheBucketList[Bucket].HitCount; + } + + /* If this bucket is alredy in the cache, we can leave it + * an without clearing it. + */ + if (HintbitCacheBucketList[Bucket].XidBucket == LastBucket) + AlreadyInCache = true; + } + + /* does the bucket we are examining have a higher number of + * cache hits than any bucket currently in the cache? + */ + if (!AlreadyInCache && HitCount > BucketMinHits) + { + /* mark the cache buffer with the new bucket, hint count + * and clear flag. + */ + HintbitCacheBucketList[BucketMinIdx].XidBucket = LastBucket; + HintbitCacheBucketList[BucketMinIdx].HitCount = HitCount; + HintbitCacheBucketList[BucketMinIdx].Clear = true; + } + } + + /* reset the hit count, since we've already technically already + * seen this bucket in this iteration, HitCount is initialized + * to 1, and this bucket is now the one tracked via LastBucket. + */ + HitCount = 1; + LastBucket = HintBitCacheBucketRollupList[BucketIdx]; + } + } + + /* When the loop is exited, the last block of identical buckets never + * got checked with the XidBucket == LastBucket condition. Sneakily + * jump back into the loop to check it, decrementing BucketIdx so that + * the final LastBucket assignment does't look outside the array. + */ + if(HitCount >= HINTBIT_CACHE_HIT_THRESHOLD) + { + BucketIdx--; + goto CheckLastXidBlock; + } + + /* Cache buckets that are new have to be cleared. Look for ones market as + * such and reset the hit count for all buckets. + */ + for (Bucket = 0; Bucket < HINTBIT_CACHE_BUCKET_COUNT; Bucket++) + { + if(HintbitCacheBucketList[BucketMinIdx].Clear) + memset(HintBitCacheBucketList[Bucket], 0, HINTBIT_CACHE_BUCKET_BLOCKSZ); + + HintbitCacheBucketList[BucketMinIdx].Clear = 0; + HintbitCacheBucketList[BucketMinIdx].HitCount = 0; + } + + HintBitCacheMissCount = 0; + return; +} + +/* + * IsXidInHintBitCache() + * + * Compute the bucket for a TransactionId, and compare it against the buckets + * currently in the cache. If the bucket is found, resolve the bit position + * and and return true if the bit is set. If it's not set, put the + * TransactionId in the array that holds cache misses. If that array fills, + * do processing which rebuilds the cache from it. + */ +static inline bool +IsXidInHintBitCache(TransactionId xid) +{ + int XidBucket = GET_HINTBIT_CACHE_BUCKET(xid); + int Bucket; + + for(Bucket = 0; Bucket < HINTBIT_CACHE_BUCKET_COUNT; Bucket++) + { + if(HintbitCacheBucketList[Bucket].XidBucket == XidBucket) + { + int ByteOffset = (xid % HINTBIT_CACHE_BUCKET_BLOCKSZ) / BITS_PER_BYTE; + int BitOffset = xid % BITS_PER_BYTE; + if (HintBitCacheBucketList[Bucket][ByteOffset] & (1 << BitOffset)) + { + HintbitCacheBucketList[Bucket].HitCount++; + return true; + } + + break; + + } + } + + HintBitCacheBucketRollupList[HintBitCacheMissCount++] = XidBucket; + + if (HintBitCacheMissCount == HINTBIT_CACHE_ROLLUPSZ) + { + RollUpHintBitCache(); + } + + return false; +} + +/* + * SetXidInHintBitCache() + * + * Compute the bucket for a TransactionId, and compare it against the buckets + * currently in the cache. If the bucket is found, compute the bit position + * and set it to 1 (true). + */ +static inline void +SetXidInHintBitCache(TransactionId xid) +{ + int XidBucket = xid / (HINTBIT_CACHE_BUCKET_BLOCKSZ * BITS_PER_BYTE); + int Bucket; + + for(Bucket = 0; Bucket < HINTBIT_CACHE_BUCKET_COUNT; Bucket++) + { + if(XidBucket == HintbitCacheBucketList[Bucket].XidBucket) + { + int ByteOffset = (xid % HINTBIT_CACHE_BUCKET_BLOCKSZ) / BITS_PER_BYTE; + int BitOffset = xid % BITS_PER_BYTE; + HintBitCacheBucketList[Bucket][ByteOffset] |= (1 << BitOffset); + break; + } + } + + return; +} + /* * SetHintBits() * * Set commit/abort hint bits on a tuple, if appropriate at this time. * * It is only safe to set a transaction-committed hint bit if we know the * transaction's commit record has been flushed to disk. We cannot change * the LSN of the page here because we may hold only a share lock on the * buffer, so we can't use the LSN to interlock this; we have to just refrain * from setting the hint bit until some future re-examination of the tuple. + * The ability of the hint bit to be set is returned to the caller (the hint + * bit cache only saves safe bits in the cache and can use this information). * * We can always set hint bits when marking a transaction aborted. (Some * code in heapam.c relies on that!) * * Also, if we are cleaning up HEAP_MOVED_IN or HEAP_MOVED_OFF entries, then * we can always set the hint bits, since pre-9.0 VACUUM FULL always used @@ -101,27 +349,35 @@ static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); * Normal commits may be asynchronous, so for those we need to get the LSN * of the transaction and then check whether this is flushed. * * The caller should pass xid as the XID of the transaction to check, or * InvalidTransactionId if no check is needed. */ -static inline void +static inline bool SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid) { if (TransactionIdIsValid(xid)) { /* NB: xid must be known committed here! */ XLogRecPtr commitLSN = TransactionIdGetCommitLSN(xid); if (XLogNeedsFlush(commitLSN)) - return; /* not flushed yet, so don't set hint */ + return false; /* not flushed yet, so don't set hint */ } tuple->t_infomask |= infomask; SetBufferCommitInfoNeedsSave(buffer); + return true; +} + +static inline void +SetHintBitsCache(HeapTupleHeader tuple, Buffer buffer, + uint16 infomask, TransactionId xid) +{ + tuple->t_infomask |= infomask; } /* * HeapTupleSetHintBits --- exported version of SetHintBits() * * This must be separate because of C99's brain-dead notions about how to @@ -911,91 +1167,109 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Snapshot snapshot, bool HeapTupleSatisfiesMVCC(HeapTupleHeader tuple, Snapshot snapshot, Buffer buffer) { if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { - if (tuple->t_infomask & HEAP_XMIN_INVALID) - return false; - - /* Used by pre-9.0 binary upgrades */ - if (tuple->t_infomask & HEAP_MOVED_OFF) + if (IsXidInHintBitCache(HeapTupleHeaderGetXmin(tuple))) { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return false; - if (!TransactionIdIsInProgress(xvac)) - { - if (TransactionIdDidCommit(xvac)) - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - } + /* The hint bit was not set on the tuple, but it was set in the + * cache. Since cache bits are only set if the transaction's + * commit record has been flushed to disk, we can call a quicker + * version of SetHintBits that does not check the commit record. + */ + SetHintBitsCache(tuple, buffer, HEAP_XMIN_COMMITTED, + HeapTupleHeaderGetXmin(tuple)); } - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_IN) + else { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); + if (tuple->t_infomask & HEAP_XMIN_INVALID) + return false; - if (!TransactionIdIsCurrentTransactionId(xvac)) + /* Used by pre-9.0 binary upgrades */ + if (tuple->t_infomask & HEAP_MOVED_OFF) { - if (TransactionIdIsInProgress(xvac)) + TransactionId xvac = HeapTupleHeaderGetXvac(tuple); + + if (TransactionIdIsCurrentTransactionId(xvac)) return false; - if (TransactionIdDidCommit(xvac)) + if (!TransactionIdIsInProgress(xvac)) + { + if (TransactionIdDidCommit(xvac)) + { + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, + InvalidTransactionId); + return false; + } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); - else + } + } + /* Used by pre-9.0 binary upgrades */ + else if (tuple->t_infomask & HEAP_MOVED_IN) + { + TransactionId xvac = HeapTupleHeaderGetXvac(tuple); + + if (!TransactionIdIsCurrentTransactionId(xvac)) { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; + if (TransactionIdIsInProgress(xvac)) + return false; + if (TransactionIdDidCommit(xvac)) + SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, + InvalidTransactionId); + else + { + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, + InvalidTransactionId); + return false; + } } } - } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) - { - if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) - return false; /* inserted after scan started */ + else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) + { + if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) + return false; /* inserted after scan started */ - if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ - return true; + if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ + return true; - if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */ - return true; + if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */ + return true; - Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); + Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) - { - /* deleting subtransaction must have aborted */ - SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, - InvalidTransactionId); - return true; + if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) + { + /* deleting subtransaction must have aborted */ + SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, + InvalidTransactionId); + return true; + } + + if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + return true; /* deleted after scan started */ + else + return false; /* deleted before scan started */ } + else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) + return false; + else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) + { + /* Set the bit, and put it in the cache if we can */ + if(SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, + HeapTupleHeaderGetXmin(tuple))) + SetXidInHintBitCache(HeapTupleHeaderGetXmin(tuple)); - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) - return true; /* deleted after scan started */ + } else - return false; /* deleted before scan started */ - } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) - return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetXmin(tuple)); - else - { - /* it must have aborted or crashed */ - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; + { + /* it must have aborted or crashed */ + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, + InvalidTransactionId); + return false; + } } } /* * By here, the inserting transaction has committed - have to check * when...