diff -x '*.sql' -x '*.o' -x '*.txt' -rupN postgresql-9.1alpha4/src/backend/utils/time/tqual.c postgresql-9.1alpha4_hbcache/src/backend/utils/time/tqual.c --- postgresql-9.1alpha4/src/backend/utils/time/tqual.c 2011-03-09 08:19:24.000000000 -0600 +++ postgresql-9.1alpha4_hbcache/src/backend/utils/time/tqual.c 2011-04-01 11:16:03.095195001 -0500 @@ -122,6 +122,24 @@ SetHintBits(HeapTupleHeader tuple, Buffe } /* + * SetHintBitsCached() + * + * When hint bits have been read from a process local cache that is presumed + * to be as fast as the in tuple hint bit, the page is not marked as dirty to + * give the cache an opportunity to reduce i/o caused by writing out pages for + * which only the hint bits have been modified. + * + * The XLogNeedsFlush check is made when setting up the cache, so it does not + * have to be done again here. + */ +static inline void +SetHintBitsCached(HeapTupleHeader tuple, Buffer buffer, + uint16 infomask, TransactionId xid) +{ + tuple->t_infomask |= infomask; +} + +/* * HeapTupleSetHintBits --- exported version of SetHintBits() * * This must be separate because of C99's brain-dead notions about how to @@ -887,6 +905,157 @@ HeapTupleSatisfiesDirty(HeapTupleHeader return false; /* updated by other */ } + +#define COMMIT_CACHE_BUCKET_COUNT 4 +#define COMMIT_CACHE_BUCKET_BLOCKSZ BLCKSZ +#define COMMIT_CACHE_ROLLUPSZ 100 +#define COMMIT_CACHE_HIT_THRESHOLD 5 + +typedef struct +{ + int XidBucket; + int HitCount; + bool Clear; +} CommitCacheBucketHeader; + +static CommitCacheBucketHeader CommitCacheBucketList[COMMIT_CACHE_BUCKET_COUNT] + = {{-1, 0, false}, {-1, 0, false}, {-1, 0, false}, {-1, 0, false}}; +static char CommitCacheData[COMMIT_CACHE_BUCKET_COUNT][COMMIT_CACHE_BUCKET_BLOCKSZ]; +static TransactionId CommitCacheXidRollupList[COMMIT_CACHE_ROLLUPSZ]; +static int CommitCacheMissCount = 0; + +/* qsort comparison function */ +static int +xid_cmp(const void *p1, const void *p2) +{ + TransactionId v1 = *((const TransactionId *) p1); + TransactionId v2 = *((const TransactionId *) p2); + + if (v1 < v2) + return -1; + if (v1 > v2) + return 1; + return 0; +} + +static void RollUpCommitCache() +{ + TransactionId LastId = CommitCacheXidRollupList[0]; + int XidIdx; + int HitCount = 0; + int Bucket; + int BucketMinHits = COMMIT_CACHE_ROLLUPSZ + 1; + int BucketMinIdx; + int XidBucket; + + qsort(CommitCacheXidRollupList, COMMIT_CACHE_ROLLUPSZ, sizeof(int), xid_cmp); + + for (XidIdx = 0; XidIdx < COMMIT_CACHE_ROLLUPSZ; XidIdx++) + { + if(CommitCacheXidRollupList[XidIdx] == LastId) + HitCount++; + else + { + if (HitCount >= COMMIT_CACHE_HIT_THRESHOLD) + { +CheckLastXidBlock: + for (Bucket = 0; Bucket < COMMIT_CACHE_BUCKET_COUNT; Bucket++) + { + if (CommitCacheBucketList[Bucket].HitCount < BucketMinHits) + { + BucketMinIdx = Bucket; + BucketMinHits = CommitCacheBucketList[Bucket].HitCount; + } + } + if (HitCount > BucketMinHits) + { + XidBucket = LastId / COMMIT_CACHE_BUCKET_BLOCKSZ; + if(XidBucket != CommitCacheBucketList[BucketMinIdx].XidBucket) + { + CommitCacheBucketList[BucketMinIdx].XidBucket = XidBucket; + CommitCacheBucketList[BucketMinIdx].HitCount = HitCount; + CommitCacheBucketList[BucketMinIdx].Clear = true; + } + } + + } + HitCount = 1; + LastId = CommitCacheXidRollupList[XidIdx]; + } + } + + if(HitCount >= COMMIT_CACHE_HIT_THRESHOLD) + { + XidIdx--; + goto CheckLastXidBlock; + } + + for (Bucket = 0; Bucket < COMMIT_CACHE_BUCKET_COUNT; Bucket++) + { + if(CommitCacheBucketList[BucketMinIdx].Clear) + memset(CommitCacheData[Bucket], 0, COMMIT_CACHE_BUCKET_BLOCKSZ); + + CommitCacheBucketList[BucketMinIdx].Clear = 0; + CommitCacheBucketList[BucketMinIdx].HitCount = 0; + } + + CommitCacheMissCount = 0; + return; +} + +static int log_count=0; +static inline bool +IsXidInCommitCache(TransactionId xid) +{ + int XidBucket = xid / COMMIT_CACHE_BUCKET_BLOCKSZ; + int Bucket; + + for(Bucket = 0; Bucket < COMMIT_CACHE_BUCKET_COUNT; Bucket++) + { + if(CommitCacheBucketList[Bucket].XidBucket == XidBucket) + { + int ByteOffset = xid / BITS_PER_BYTE; + int BitOffset = xid % BITS_PER_BYTE; + if (CommitCacheData[Bucket][ByteOffset] & (1 << BitOffset)) + { + CommitCacheBucketList[Bucket].HitCount++; + return true; + } + + break; + + } + } + + CommitCacheXidRollupList[CommitCacheMissCount++] = xid; + + if (CommitCacheMissCount == COMMIT_CACHE_ROLLUPSZ) + RollUpCommitCache(); + + return false; +} + + +static inline void +SetXidInCommitCache(TransactionId xid) +{ + int XidBucket = xid / COMMIT_CACHE_BUCKET_BLOCKSZ; + int Bucket; + + for(Bucket = 0; Bucket < COMMIT_CACHE_BUCKET_COUNT; Bucket++) + { + if(XidBucket == Bucket) + { + int ByteOffset = xid / BITS_PER_BYTE; + int BitOffset = xid % BITS_PER_BYTE; + CommitCacheData[Bucket][ByteOffset] |= (1 << BitOffset); + break; + } + } + + return; +} + /* * HeapTupleSatisfiesMVCC * True iff heap tuple is valid for the given MVCC snapshot. @@ -916,9 +1085,14 @@ HeapTupleSatisfiesMVCC(HeapTupleHeader t { if (tuple->t_infomask & HEAP_XMIN_INVALID) return false; - + /* Check the commit cache we store. If the transaction is + * set there, we set the hint bits but don't dirty the page. + */ + if(IsXidInCommitCache(HeapTupleHeaderGetXmin(tuple))) + SetHintBitsCached(tuple, buffer, HEAP_XMIN_COMMITTED, + HeapTupleHeaderGetXmin(tuple)); /* Used by pre-9.0 binary upgrades */ - if (tuple->t_infomask & HEAP_MOVED_OFF) + else if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); @@ -985,8 +1159,21 @@ HeapTupleSatisfiesMVCC(HeapTupleHeader t else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) return false; else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) + { + /* Only cache this transaction as committed if it's safe + * in the xlog so as to avoid making unnecessary repeated + * repeated checks to XLogNeedsFlush() + */ + if (TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple))) + { + XLogRecPtr commitLSN = TransactionIdGetCommitLSN(HeapTupleHeaderGetXmin(tuple)); + + if (!XLogNeedsFlush(commitLSN)) + SetXidInCommitCache(HeapTupleHeaderGetXmin(tuple)); + } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetXmin(tuple)); + } else { /* it must have aborted or crashed */