diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 780ee3b..f2804f1 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -252,12 +252,19 @@ BackgroundWriterMain(void) prev_hibernate = false; /* + * Initialize the freelist latch. ToDo, this needs to be done under + * spinlock which will be used to protect freelist. + */ + + StrategyInitFreeListLatch(&MyProc->procLatch); + + /* * Loop forever */ for (;;) { - bool can_hibernate; - int rc; + bool can_hibernate = 0; + int rc = 0; /* Clear any already-pending wakeups */ ResetLatch(&MyProc->procLatch); @@ -281,7 +288,7 @@ BackgroundWriterMain(void) /* * Do one cycle of dirty-buffer writing. */ - can_hibernate = BgBufferSync(); + /*can_hibernate = BgBufferSync(); */ /* * Send off activity statistics to the stats collector @@ -339,6 +346,14 @@ BackgroundWriterMain(void) } /* + * Sleep untill signalled by backend. + */ + WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1); + + BgBufferSyncAndMoveBuffersToFreelist(); + + + /* * Sleep until we are signaled or BgWriterDelay has elapsed. * * Note: the feedback control loop in BgBufferSync() expects that we @@ -348,9 +363,9 @@ BackgroundWriterMain(void) * down with latch events that are likely to happen frequently during * normal operation. */ - rc = WaitLatch(&MyProc->procLatch, + /*rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - BgWriterDelay /* ms */ ); + BgWriterDelay ms );*/ /* * If no latch event and BgBufferSync says nothing's happening, extend @@ -370,17 +385,17 @@ BackgroundWriterMain(void) * for two consecutive cycles. Also, we mitigate any possible * consequences of a missed wakeup by not hibernating forever. */ - if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate) - { + /*if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate) + {*/ /* Ask for notification at next buffer allocation */ - StrategyNotifyBgWriter(&MyProc->procLatch); + /*StrategyNotifyBgWriter(&MyProc->procLatch);*/ /* Sleep ... */ - rc = WaitLatch(&MyProc->procLatch, + /*rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - BgWriterDelay * HIBERNATE_FACTOR); + BgWriterDelay * HIBERNATE_FACTOR);*/ /* Reset the notification request in case we timed out */ - StrategyNotifyBgWriter(NULL); - } + /*StrategyNotifyBgWriter(NULL); + }*/ /* * Emergency bailout if postmaster has died. This is to avoid the diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index c070278..7d4efed 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1635,6 +1635,41 @@ BgBufferSync(void) return (bufs_to_lap == 0 && recent_alloc == 0); } +void +BgBufferSyncAndMoveBuffersToFreelist(void) +{ + uint32 next_to_clean; + uint32 num_to_free; + int num_written; + volatile BufferDesc *bufHdr; + + StrategySyncStartAndEnd(&next_to_clean, &num_to_free); + + /* Make sure we can handle the pin inside SyncOneBuffer */ + ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + + num_written = 0; + + /* Execute the LRU scan */ + while (num_to_free > 0) + { + int buffer_state = SyncOneBuffer(next_to_clean, true); + + bufHdr = &BufferDescriptors[next_to_clean]; + if (++next_to_clean >= NBuffers) + next_to_clean = 0; + if (buffer_state & BUF_WRITTEN) + ++num_written; + if (buffer_state & BUF_REUSABLE) + { + if (StrategyMoveBufferToFreeListEnd (bufHdr)) + num_to_free--; + } + } + + BgWriterStats.m_buf_written_clean += num_written; +} + /* * SyncOneBuffer -- process a single buffer during syncing. * @@ -1673,6 +1708,8 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) else if (skip_recently_used) { /* Caller told us not to write recently-used buffers */ + if (bufHdr->refcount == 0 && bufHdr->usage_count > 0) + bufHdr->usage_count--; UnlockBufHdr(bufHdr); return result; } diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 4befab0..90e3f40 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -29,6 +29,7 @@ typedef struct int firstFreeBuffer; /* Head of list of unused buffers */ int lastFreeBuffer; /* Tail of list of unused buffers */ + int numFreeListBuffers; /* number of buffers on freelist */ /* * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is, @@ -42,6 +43,10 @@ typedef struct uint32 completePasses; /* Complete cycles of the clock sweep */ uint32 numBufferAllocs; /* Buffers allocated since last reset */ + Latch *freelistLatch; /* Latch to wake bgwriter */ + /* protects freelist variables (firstFreeBuffer, lastFreeBuffer, numFreeListBuffers, BufferDesc->freeNext)*/ + slock_t freelist_lck; + /* * Notification latch, or NULL if none. See StrategyNotifyBgWriter. */ @@ -112,7 +117,6 @@ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) { volatile BufferDesc *buf; - Latch *bgwriterLatch; int trycounter; /* @@ -129,31 +133,16 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) } } - /* Nope, so lock the freelist */ - *lock_held = true; - LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); - - /* - * We count buffer allocation requests so that the bgwriter can estimate - * the rate of buffer consumption. Note that buffers recycled by a - * strategy object are intentionally not counted here. - */ - StrategyControl->numBufferAllocs++; + *lock_held = false; /* - * If bgwriterLatch is set, we need to waken the bgwriter, but we should - * not do so while holding BufFreelistLock; so release and re-grab. This - * is annoyingly tedious, but it happens at most once per bgwriter cycle, - * so the performance hit is minimal. + * ideally numFreeListBuffers should get called under freelist + * spinlock, however here we need this number for estimating + * approximate number of free buffers required on freelist, + * so it would be okay, even if numFreeListBuffers is not exact. */ - bgwriterLatch = StrategyControl->bgwriterLatch; - if (bgwriterLatch) - { - StrategyControl->bgwriterLatch = NULL; - LWLockRelease(BufFreelistLock); - SetLatch(bgwriterLatch); - LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); - } + if (StrategyControl->numFreeListBuffers < 200) + SetLatch(StrategyControl->freelistLatch); /* * Try to get a buffer from the freelist. Note that the freeNext fields @@ -161,34 +150,51 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ - while (StrategyControl->firstFreeBuffer >= 0) + for(;;) { - buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; - Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); + SpinLockAcquire(&StrategyControl->freelist_lck); - /* Unconditionally remove buffer from freelist */ - StrategyControl->firstFreeBuffer = buf->freeNext; - buf->freeNext = FREENEXT_NOT_IN_LIST; + if (StrategyControl->firstFreeBuffer >= 0) + { + buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; + Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); - /* - * If the buffer is pinned or has a nonzero usage_count, we cannot use - * it; discard it and retry. (This can only happen if VACUUM put a - * valid buffer in the freelist and then someone else used it before - * we got to it. It's probably impossible altogether as of 8.3, but - * we'd better check anyway.) - */ - LockBufHdr(buf); - if (buf->refcount == 0 && buf->usage_count == 0) + /* Unconditionally remove buffer from freelist */ + StrategyControl->firstFreeBuffer = buf->freeNext; + buf->freeNext = FREENEXT_NOT_IN_LIST; + --StrategyControl->numFreeListBuffers; + + SpinLockRelease(&StrategyControl->freelist_lck); + + /* + * If the buffer is pinned or has a nonzero usage_count, we cannot use + * it; discard it and retry. (This can only happen if VACUUM put a + * valid buffer in the freelist and then someone else used it before + * we got to it. It's probably impossible altogether as of 8.3, but + * we'd better check anyway.) + */ + LockBufHdr(buf); + if (buf->refcount == 0 && buf->usage_count == 0) + { + if (strategy != NULL) + AddBufferToRing(strategy, buf); + return buf; + } + UnlockBufHdr(buf); + } + else { - if (strategy != NULL) - AddBufferToRing(strategy, buf); - return buf; + SpinLockRelease(&StrategyControl->freelist_lck); + break; } - UnlockBufHdr(buf); } /* Nothing on the freelist, so run the "clock sweep" algorithm */ trycounter = NBuffers; + + *lock_held = true; + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + for (;;) { buf = &BufferDescriptors[StrategyControl->nextVictimBuffer]; @@ -196,7 +202,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) if (++StrategyControl->nextVictimBuffer >= NBuffers) { StrategyControl->nextVictimBuffer = 0; - StrategyControl->completePasses++; + /*StrategyControl->completePasses++;*/ } /* @@ -241,7 +247,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) void StrategyFreeBuffer(volatile BufferDesc *buf) { - LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + SpinLockAcquire(&StrategyControl->freelist_lck); /* * It is possible that we are told to put something in the freelist that @@ -253,11 +259,50 @@ StrategyFreeBuffer(volatile BufferDesc *buf) if (buf->freeNext < 0) StrategyControl->lastFreeBuffer = buf->buf_id; StrategyControl->firstFreeBuffer = buf->buf_id; + ++StrategyControl->numFreeListBuffers; } - LWLockRelease(BufFreelistLock); + SpinLockRelease(&StrategyControl->freelist_lck); +} + +/* + * StrategyMoveBufferToFreeListEnd: put a buffer on the end of freelist + */ +bool +StrategyMoveBufferToFreeListEnd(volatile BufferDesc *buf) +{ + bool freed = false; + SpinLockAcquire(&StrategyControl->freelist_lck); + + /* + * It is possible that we are told to put something in the freelist that + * is already in it; don't screw up the list if so. + */ + if (buf->freeNext == FREENEXT_NOT_IN_LIST) + { + ++StrategyControl->numFreeListBuffers; + freed = true; + /* + * put the buffer on end of list and if list is empty then + * assign first and last freebuffer with this buffer id. + */ + buf->freeNext = FREENEXT_END_OF_LIST; + if (StrategyControl->firstFreeBuffer < 0) + { + StrategyControl->firstFreeBuffer = buf->buf_id; + StrategyControl->lastFreeBuffer = buf->buf_id; + SpinLockRelease(&StrategyControl->freelist_lck); + return freed; + } + BufferDescriptors[StrategyControl->lastFreeBuffer].freeNext = buf->buf_id; + StrategyControl->lastFreeBuffer = buf->buf_id; + } + SpinLockRelease(&StrategyControl->freelist_lck); + + return freed; } + /* * StrategySyncStart -- tell BufferSync where to start syncing * @@ -287,6 +332,31 @@ StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc) return result; } +void +StrategySyncStartAndEnd(uint32 *start, uint32 *end) +{ + int curfreebuffers; + int reqfreebuffers; + + /* + * ideally numFreeListBuffers should get called under + * freelist spin lock, however here we need this number for + * estimating approximate number of free buffers required + * on freelist, so it would be okay, even if numFreeListBuffers is not exact. + */ + + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + *start = StrategyControl->nextVictimBuffer; + curfreebuffers = StrategyControl->numFreeListBuffers; + reqfreebuffers = 2000; + if (reqfreebuffers > curfreebuffers) + *end = reqfreebuffers - curfreebuffers; + else + *end = 0; + LWLockRelease(BufFreelistLock); + return; +} + /* * StrategyNotifyBgWriter -- set or clear allocation notification latch * @@ -309,6 +379,19 @@ StrategyNotifyBgWriter(Latch *bgwriterLatch) } +void +StrategyInitFreeListLatch(Latch *bgwriterLatch) +{ + /* + * We acquire the BufFreelistLock just to ensure that the store appears + * atomic to StrategyGetBuffer. The bgwriter should call this rather + * infrequently, so there's no performance penalty from being safe. + */ + SpinLockAcquire(&StrategyControl->freelist_lck); + StrategyControl->freelistLatch= bgwriterLatch; + SpinLockRelease(&StrategyControl->freelist_lck); +} + /* * StrategyShmemSize * @@ -376,6 +459,7 @@ StrategyInitialize(bool init) */ StrategyControl->firstFreeBuffer = 0; StrategyControl->lastFreeBuffer = NBuffers - 1; + StrategyControl->numFreeListBuffers = NBuffers; /* Initialize the clock sweep pointer */ StrategyControl->nextVictimBuffer = 0; @@ -386,6 +470,8 @@ StrategyInitialize(bool init) /* No pending notification */ StrategyControl->bgwriterLatch = NULL; + StrategyControl->freelistLatch = NULL; + SpinLockInit(&StrategyControl->freelist_lck); } else Assert(!init); diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index c019013..05ff723 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -188,11 +188,14 @@ extern BufferDesc *LocalBufferDescriptors; extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held); extern void StrategyFreeBuffer(volatile BufferDesc *buf); +extern bool StrategyMoveBufferToFreeListEnd(volatile BufferDesc *buf); extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf); extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc); +extern void StrategySyncStartAndEnd(uint32 *start, uint32 *end); extern void StrategyNotifyBgWriter(Latch *bgwriterLatch); +extern void StrategyInitFreeListLatch(Latch *bgwriterLatch); extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 89447d0..b0e5598 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -219,6 +219,7 @@ extern void AbortBufferIO(void); extern void BufmgrCommit(void); extern bool BgBufferSync(void); +extern void BgBufferSyncAndMoveBuffersToFreelist(void); extern void AtProcExit_LocalBuffers(void); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 175fae3..fe86e07 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -136,10 +136,10 @@ extern PGDLLIMPORT LWLockPadded *MainLWLockArray; */ /* Number of partitions of the shared buffer mapping hashtable */ -#define NUM_BUFFER_PARTITIONS 16 +#define NUM_BUFFER_PARTITIONS 128 /* Number of partitions the shared lock tables are divided into */ -#define LOG2_NUM_LOCK_PARTITIONS 4 +#define LOG2_NUM_LOCK_PARTITIONS 7 #define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS) /* Number of partitions the shared predicate lock tables are divided into */