diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index cc973b5..226b605 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -65,6 +65,7 @@ #define BUF_REUSABLE 0x02 #define DROP_RELS_BSEARCH_THRESHOLD 20 +#define DROP_RELS_BUF_MAPPING_THRESHOLD 4096 typedef struct PrivateRefCountEntry { @@ -136,6 +137,8 @@ static PrivateRefCountEntry *NewPrivateRefCountEntry(Buffer buffer); static PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move); static inline int32 GetPrivateRefCount(Buffer buffer); static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref); +static void DropForkSpecificBuffers(RelFileNode rnode, ForkNumber forkNum, + BlockNumber blocksToDel); /* * Ensure that the PrivateRefCountArray has sufficient space to store one more @@ -2511,6 +2514,53 @@ BufferGetLSNAtomic(Buffer buffer) } /* --------------------------------------------------------------------- + * DropForkSpecificBuffers + * + * This function removes from the buffer pool all the pages of specified + * fork of the specified relation. + * -------------------------------------------------------------------- + */ +static void +DropForkSpecificBuffers(RelFileNode rnode, ForkNumber forkNum, + BlockNumber blocksToDel) +{ + BlockNumber blk_count; + BufferTag newTag; /* identity of requested block */ + uint32 newHash; /* hash value for newTag */ + LWLock *newPartitionLock; /* buffer partition lock for it */ + int buf_id; + + for (blk_count = 0; blk_count < blocksToDel; blk_count++) + { + /* create a tag so we can lookup the buffer */ + INIT_BUFFERTAG(newTag, rnode, forkNum, blk_count); + + /* determine its hash code and partition lock ID */ + newHash = BufTableHashCode(&newTag); + newPartitionLock = BufMappingPartitionLock(newHash); + + /* see if the block is in the buffer pool already */ + LWLockAcquire(newPartitionLock, LW_SHARED); + buf_id = BufTableLookup(&newTag, newHash); + LWLockRelease(newPartitionLock); + if (buf_id >= 0) + { + volatile BufferDesc *bufHdr; + /* + * Found it. Now, pin the buffer so no one can steal it from the + * buffer pool, and check to see if the correct data has been loaded + * into the buffer. + */ + bufHdr = GetBufferDescriptor(buf_id); + + LockBufHdr(bufHdr); + + InvalidateBuffer(bufHdr); + } + } +} + +/* --------------------------------------------------------------------- * DropRelFileNodeBuffers * * This function removes from the buffer pool all the pages of the @@ -2596,9 +2646,16 @@ void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) { int i, - n = 0; + n = 0, + rel_count = 0; RelFileNode *nodes; bool use_bsearch; + bool traverse_buf_freelist = false; + BlockNumber total_blocks = 0; + BlockNumber *num_blocks; + BlockNumber *num_fsm_blocks; + BlockNumber *num_vm_blocks; + SMgrRelation reln; if (nnodes == 0) return; @@ -2628,59 +2685,144 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) } /* - * For low number of relations to drop just use a simple walk through, to - * save the bsearch overhead. The threshold to use is rather a guess than - * an exactly determined value, as it depends on many factors (CPU and RAM - * speeds, amount of shared buffers etc.). + * if the number of blocks in relations to drop is less than 25% of + * shared_buffers, then we can use the buf mapping table to find the + * buffers that need to be invalidated. Traversing the shared_buffers + * for such cases incurs a large overhead. */ - use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD; + if (NBuffers >= DROP_RELS_BUF_MAPPING_THRESHOLD) + { + num_blocks = palloc(sizeof(BlockNumber) * n); /* non-local relations */ + num_fsm_blocks = palloc0(sizeof(BlockNumber) * n); /* non-local relations */ + num_vm_blocks = palloc0(sizeof(BlockNumber) * n); /* non-local relations */ - /* sort the list of rnodes if necessary */ - if (use_bsearch) - pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator); - for (i = 0; i < NBuffers; i++) + for (rel_count = 0; rel_count < n; rel_count++) + { + RelFileNode *rnode = NULL; + + rnode = &nodes[rel_count]; + + reln = smgropen(*rnode, InvalidBackendId); + + num_blocks[rel_count] = smgrnblocks(reln, MAIN_FORKNUM); + total_blocks += num_blocks[rel_count]; + + if (smgrexists(reln, FSM_FORKNUM)) + { + num_fsm_blocks[rel_count] = smgrnblocks(reln, FSM_FORKNUM); + total_blocks += num_fsm_blocks[rel_count]; + } + + if (smgrexists(reln, VISIBILITYMAP_FORKNUM)) + { + num_vm_blocks[rel_count] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM); + total_blocks += num_vm_blocks[rel_count]; + } + + if (total_blocks > NBuffers / 4) + { + traverse_buf_freelist = true; + pfree(num_blocks); + pfree(num_fsm_blocks); + pfree(num_vm_blocks); + break; + } + } + } + else + traverse_buf_freelist = true; + + if (!traverse_buf_freelist) { - RelFileNode *rnode = NULL; - volatile BufferDesc *bufHdr = GetBufferDescriptor(i); + for (rel_count = 0; rel_count < n; rel_count++) + { + RelFileNode *rnode = NULL; + BlockNumber blks_relation, blks_fsm_relation, blks_vm_relation; + + rnode = &nodes[rel_count]; + + /* Invalidate buffers associated with MAIN_FORKNUM of relation. */ + blks_relation = num_blocks[rel_count]; + DropForkSpecificBuffers(*rnode, MAIN_FORKNUM, blks_relation); + /* Invalidate buffers associated with FSM_FORKNUM of relation. */ + blks_fsm_relation = num_fsm_blocks[rel_count]; + DropForkSpecificBuffers(*rnode, FSM_FORKNUM, blks_fsm_relation); + + /* Invalidate buffers associated with VISIBILITYMAP_FORKNUM of relation. */ + blks_vm_relation = num_vm_blocks[rel_count]; + DropForkSpecificBuffers(*rnode, VISIBILITYMAP_FORKNUM, blks_vm_relation); + } + } + else + { /* - * As in DropRelFileNodeBuffers, an unlocked precheck should be safe - * and saves some cycles. + * For low number of relations to drop just use a simple walk through, to + * save the bsearch overhead. The threshold to use is rather a guess than + * an exactly determined value, as it depends on many factors (CPU and RAM + * speeds, amount of shared buffers etc.). */ + use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD; + + /* sort the list of rnodes if necessary */ + if (use_bsearch) + pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator); - if (!use_bsearch) + for (i = 0; i < NBuffers; i++) { - int j; + RelFileNode *rnode = NULL; + volatile BufferDesc *bufHdr = GetBufferDescriptor(i); - for (j = 0; j < n; j++) + /* + * As in DropRelFileNodeBuffers, an unlocked precheck should be safe + * and saves some cycles. + */ + + if (!use_bsearch) { - if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j])) + int j; + + for (j = 0; j < n; j++) { - rnode = &nodes[j]; - break; + if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j])) + { + rnode = &nodes[j]; + break; + } } } - } - else - { - rnode = bsearch((const void *) &(bufHdr->tag.rnode), - nodes, n, sizeof(RelFileNode), - rnode_comparator); - } + else + { + rnode = bsearch((const void *) &(bufHdr->tag.rnode), + nodes, n, sizeof(RelFileNode), + rnode_comparator); + } - /* buffer doesn't belong to any of the given relfilenodes; skip it */ - if (rnode == NULL) - continue; + /* buffer doesn't belong to any of the given relfilenodes; skip it */ + if (rnode == NULL) + continue; - LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode))) - InvalidateBuffer(bufHdr); /* releases spinlock */ - else - UnlockBufHdr(bufHdr); + LockBufHdr(bufHdr); + if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode))) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else + UnlockBufHdr(bufHdr); + } } pfree(nodes); + + /* + * free the memory used for find the buffers in buf mapping table + * to free them. + */ + if (!traverse_buf_freelist) + { + pfree(num_blocks); + pfree(num_fsm_blocks); + pfree(num_vm_blocks); + } } /* ---------------------------------------------------------------------