Move PinBuffer and UnpinBuffer to atomics
Hello hackers!
Continuing the theme: /messages/by-id/3368228.mTSz6V0Jsq@dinodell
This time, we fairly rewrote 'refcount' and 'usage_count' to atomic in
PinBuffer and UnpinBuffer (but save lock for buffer flags in Unpin).
In the same time it doesn't affect to correctness of buffer manager
because that variables already have LWLock on top of them (for partition of
hashtable). If someone pinned buffer after the call StrategyGetBuffer we just
try again (in BufferAlloc). Also in the code there is one more check before
deleting the old buffer, where changes can be rolled back. The other functions
where it is checked 'refcount' and 'usage_count' put exclusive locks.
Also stress test with 256 KB shared memory ended successfully.
Without patch we have 417523 TPS and with patch 965821 TPS for big x86 server.
All details here: https://gist.github.com/stalkerg/773a81b79a27b4d5d63f
Thank you.
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Attachments:
atomic_bufmgr_v5.patchtext/x-patch; charset=utf-8; name=atomic_bufmgr_v5.patchDownload
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index 6622d22..50ca2a5 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -33,14 +33,14 @@ typedef struct
BlockNumber blocknum;
bool isvalid;
bool isdirty;
- uint16 usagecount;
+ uint32 usagecount;
/*
* An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
* being pinned by too many backends and each backend will only pin once
* because of bufmgr.c's PrivateRefCount infrastructure.
*/
- int32 pinning_backends;
+ uint32 pinning_backends;
} BufferCachePagesRec;
@@ -160,8 +160,8 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode;
fctx->record[i].forknum = bufHdr->tag.forkNum;
fctx->record[i].blocknum = bufHdr->tag.blockNum;
- fctx->record[i].usagecount = bufHdr->usage_count;
- fctx->record[i].pinning_backends = bufHdr->refcount;
+ fctx->record[i].usagecount = pg_atomic_read_u32(&bufHdr->usage_count);
+ fctx->record[i].pinning_backends = pg_atomic_read_u32(&bufHdr->refcount);
if (bufHdr->flags & BM_DIRTY)
fctx->record[i].isdirty = true;
@@ -236,7 +236,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
values[7] = Int16GetDatum(fctx->record[i].usagecount);
nulls[7] = false;
/* unused for v1.0 callers, but the array is always long enough */
- values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
+ values[8] = UInt32GetDatum(fctx->record[i].pinning_backends);
nulls[8] = false;
}
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 3ae2848..e139a7c 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -96,8 +96,8 @@ InitBufferPool(void)
CLEAR_BUFFERTAG(buf->tag);
buf->flags = 0;
- buf->usage_count = 0;
- buf->refcount = 0;
+ pg_atomic_init_u32(&buf->usage_count, 0);
+ pg_atomic_init_u32(&buf->refcount, 0);
buf->wait_backend_pid = 0;
SpinLockInit(&buf->buf_hdr_lock);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 8c0358e..afba360 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -962,7 +962,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* into the buffer.
*/
buf = GetBufferDescriptor(buf_id);
-
valid = PinBuffer(buf, strategy);
/* Can release the mapping lock as soon as we've pinned it */
@@ -1013,7 +1012,15 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
buf = StrategyGetBuffer(strategy);
- Assert(buf->refcount == 0);
+ /*
+ * Ok, we can skip this but then we have to remove new buffer from
+ * hash table. Better to just try again.
+ */
+ if (pg_atomic_read_u32(&buf->refcount) != 0)
+ {
+ UnlockBufHdr(buf);
+ continue;
+ }
/* Must copy buffer flags while we still hold the spinlock */
oldFlags = buf->flags;
@@ -1211,7 +1218,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* over with a new victim buffer.
*/
oldFlags = buf->flags;
- if (buf->refcount == 1 && !(oldFlags & BM_DIRTY))
+ if (pg_atomic_read_u32(&buf->refcount) == 1 && !(oldFlags & BM_DIRTY))
break;
UnlockBufHdr(buf);
@@ -1234,10 +1241,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
buf->tag = newTag;
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
if (relpersistence == RELPERSISTENCE_PERMANENT)
- buf->flags |= BM_TAG_VALID | BM_PERMANENT;
+ buf->flags|= BM_TAG_VALID | BM_PERMANENT;
else
buf->flags |= BM_TAG_VALID;
- buf->usage_count = 1;
+ pg_atomic_write_u32(&buf->usage_count, 1);
UnlockBufHdr(buf);
@@ -1329,7 +1336,7 @@ retry:
* yet done StartBufferIO, WaitIO will fall through and we'll effectively
* be busy-looping here.)
*/
- if (buf->refcount != 0)
+ if (pg_atomic_read_u32(&buf->refcount) != 0)
{
UnlockBufHdr(buf);
LWLockRelease(oldPartitionLock);
@@ -1347,7 +1354,7 @@ retry:
oldFlags = buf->flags;
CLEAR_BUFFERTAG(buf->tag);
buf->flags = 0;
- buf->usage_count = 0;
+ pg_atomic_write_u32(&buf->usage_count, 0);
UnlockBufHdr(buf);
@@ -1399,7 +1406,7 @@ MarkBufferDirty(Buffer buffer)
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
+ Assert(pg_atomic_read_u32(&bufHdr->refcount) > 0);
/*
* If the buffer was not dirty already, do vacuum accounting.
@@ -1498,20 +1505,23 @@ PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
ReservePrivateRefCountEntry();
ref = NewPrivateRefCountEntry(b);
- LockBufHdr(buf);
- buf->refcount++;
+ pg_atomic_add_fetch_u32(&buf->refcount, 1);
+
if (strategy == NULL)
{
- if (buf->usage_count < BM_MAX_USAGE_COUNT)
- buf->usage_count++;
+ uint32 expect = pg_atomic_read_u32(&buf->usage_count);
+ while (expect < BM_MAX_USAGE_COUNT)
+ {
+ if (pg_atomic_compare_exchange_u32(&buf->usage_count, &expect, expect+1))
+ break;
+ }
}
else
{
- if (buf->usage_count == 0)
- buf->usage_count = 1;
+ uint32 expect = 0;
+ pg_atomic_compare_exchange_u32(&buf->usage_count, &expect, 1);
}
result = (buf->flags & BM_VALID) != 0;
- UnlockBufHdr(buf);
}
else
{
@@ -1558,7 +1568,7 @@ PinBuffer_Locked(volatile BufferDesc *buf)
*/
Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL);
- buf->refcount++;
+ pg_atomic_add_fetch_u32(&buf->refcount, 1);
UnlockBufHdr(buf);
b = BufferDescriptorGetBuffer(buf);
@@ -1598,15 +1608,14 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
Assert(!LWLockHeldByMe(buf->content_lock));
Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
- LockBufHdr(buf);
-
/* Decrement the shared reference count */
- Assert(buf->refcount > 0);
- buf->refcount--;
+ Assert(pg_atomic_read_u32(&buf->refcount) > 0);
+ pg_atomic_sub_fetch_u32(&buf->refcount, 1);
+ LockBufHdr(buf);
/* Support LockBufferForCleanup() */
if ((buf->flags & BM_PIN_COUNT_WAITER) &&
- buf->refcount == 1)
+ pg_atomic_read_u32(&buf->refcount) == 1)
{
/* we just released the last pin other than the waiter's */
int wait_backend_pid = buf->wait_backend_pid;
@@ -2095,7 +2104,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
*/
LockBufHdr(bufHdr);
- if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)
+ if (pg_atomic_read_u32(&bufHdr->refcount) == 0 && pg_atomic_read_u32(&bufHdr->usage_count) == 0)
result |= BUF_REUSABLE;
else if (skip_recently_used)
{
@@ -2278,7 +2287,7 @@ PrintBufferLeakWarning(Buffer buffer)
"(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
buffer, path,
buf->tag.blockNum, buf->flags,
- buf->refcount, loccount);
+ pg_atomic_read_u32(&buf->refcount), loccount);
pfree(path);
}
@@ -2809,7 +2818,7 @@ PrintBufferDescs(void)
i, buf->freeNext,
relpathbackend(buf->tag.rnode, InvalidBackendId, buf->tag.forkNum),
buf->tag.blockNum, buf->flags,
- buf->refcount, GetPrivateRefCount(b));
+ pg_atomic_read_u32(&buf->refcount), GetPrivateRefCount(b));
}
}
#endif
@@ -2834,7 +2843,7 @@ PrintPinnedBufs(void)
i, buf->freeNext,
relpathperm(buf->tag.rnode, buf->tag.forkNum),
buf->tag.blockNum, buf->flags,
- buf->refcount, GetPrivateRefCount(b));
+ pg_atomic_read_u32(&buf->refcount), GetPrivateRefCount(b));
}
}
}
@@ -3149,7 +3158,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
}
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
+ Assert(pg_atomic_read_u32(&bufHdr->refcount) > 0);
if (!(bufHdr->flags & BM_DIRTY))
{
dirtied = true; /* Means "will be dirtied by this action" */
@@ -3307,8 +3316,8 @@ LockBufferForCleanup(Buffer buffer)
/* Try to acquire lock */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
- if (bufHdr->refcount == 1)
+ Assert(pg_atomic_read_u32(&bufHdr->refcount) > 0);
+ if (pg_atomic_read_u32(&bufHdr->refcount) == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
@@ -3417,8 +3426,8 @@ ConditionalLockBufferForCleanup(Buffer buffer)
bufHdr = GetBufferDescriptor(buffer - 1);
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
- if (bufHdr->refcount == 1)
+ Assert(pg_atomic_read_u32(&bufHdr->refcount) > 0);
+ if (pg_atomic_read_u32(&bufHdr->refcount) == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index bc2c773..4461271 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -280,14 +280,13 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
* of 8.3, but we'd better check anyway.)
*/
LockBufHdr(buf);
- if (buf->refcount == 0 && buf->usage_count == 0)
+ if (pg_atomic_read_u32(&buf->refcount) == 0 && pg_atomic_read_u32(&buf->usage_count) == 0)
{
if (strategy != NULL)
AddBufferToRing(strategy, buf);
return buf;
}
UnlockBufHdr(buf);
-
}
}
@@ -303,11 +302,11 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
* it; decrement the usage_count (unless pinned) and keep scanning.
*/
LockBufHdr(buf);
- if (buf->refcount == 0)
+ if (pg_atomic_read_u32(&buf->refcount) == 0)
{
- if (buf->usage_count > 0)
+ if (buf->usage_count.value > 0)
{
- buf->usage_count--;
+ buf->usage_count.value--;
trycounter = NBuffers;
}
else
@@ -617,7 +616,7 @@ GetBufferFromRing(BufferAccessStrategy strategy)
*/
buf = GetBufferDescriptor(bufnum - 1);
LockBufHdr(buf);
- if (buf->refcount == 0 && buf->usage_count <= 1)
+ if (pg_atomic_read_u32(&buf->refcount) == 0 && pg_atomic_read_u32(&buf->usage_count) <= 1)
{
strategy->current_was_in_ring = true;
return buf;
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 3144afe..e1932f5 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -131,8 +131,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
/* this part is equivalent to PinBuffer for a shared buffer */
if (LocalRefCount[b] == 0)
{
- if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
- bufHdr->usage_count++;
+ if (pg_atomic_read_u32(&bufHdr->usage_count) < BM_MAX_USAGE_COUNT)
+ pg_atomic_add_fetch_u32(&bufHdr->usage_count, 1);
}
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
@@ -169,9 +169,9 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
if (LocalRefCount[b] == 0)
{
- if (bufHdr->usage_count > 0)
+ if (pg_atomic_read_u32(&bufHdr->usage_count) > 0)
{
- bufHdr->usage_count--;
+ pg_atomic_fetch_sub_u32(&bufHdr->usage_count, 1);
trycounter = NLocBuffer;
}
else
@@ -252,7 +252,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
bufHdr->tag = newTag;
bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
bufHdr->flags |= BM_TAG_VALID;
- bufHdr->usage_count = 1;
+ pg_atomic_write_u32(&bufHdr->usage_count, 1);
*foundPtr = FALSE;
return bufHdr;
@@ -328,7 +328,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
bufHdr->flags = 0;
- bufHdr->usage_count = 0;
+ pg_atomic_write_u32(&bufHdr->usage_count, 0);
}
}
}
@@ -368,7 +368,7 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
bufHdr->flags = 0;
- bufHdr->usage_count = 0;
+ pg_atomic_write_u32(&bufHdr->usage_count, 0);
}
}
}
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 521ee1c..68cbbf4 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -137,9 +137,9 @@ typedef struct buftag
typedef struct BufferDesc
{
BufferTag tag; /* ID of page contained in buffer */
- BufFlags flags; /* see bit definitions above */
- uint16 usage_count; /* usage counter for clock sweep code */
- unsigned refcount; /* # of backends holding pins on buffer */
+ BufFlags flags; /* see bit definitions above */
+ pg_atomic_uint32 usage_count; /* usage counter for clock sweep code */
+ pg_atomic_uint32 refcount; /* # of backends holding pins on buffer */
int wait_backend_pid; /* backend PID of pin-count waiter */
slock_t buf_hdr_lock; /* protects the above fields */
Hi,
On 2015-09-11 13:23:24 +0300, YUriy Zhuravlev wrote:
Continuing the theme: /messages/by-id/3368228.mTSz6V0Jsq@dinodell
Please don't just start new threads for a new version of the patch.
This time, we fairly rewrote 'refcount' and 'usage_count' to atomic in
PinBuffer and UnpinBuffer (but save lock for buffer flags in Unpin).
Hm.
In the same time it doesn't affect to correctness of buffer manager
because that variables already have LWLock on top of them (for partition of
hashtable).
Note that there's a pending patch that removes the buffer mapping locks
entirely.
If someone pinned buffer after the call StrategyGetBuffer we just try
again (in BufferAlloc). Also in the code there is one more check
before deleting the old buffer, where changes can be rolled back. The
other functions where it is checked 'refcount' and 'usage_count' put
exclusive locks.
I don't think this is correct. This way we can leave the for (;;) loop
in BufferAlloc() thinking that the buffer is unused (and can't be further
pinned because of the held spinlock!) while it actually has been pinned
since by PinBuffer(). Additionally oldFlags can get out of sync there.
I don't think the approach of making some of the fields atomics but not
really caring about the rest is going to work. My suggestion is to add a
single 'state' 32bit atomic. This 32bit state is subdivided into:
10bit for flags,
3bit for usage_count,
16bit for refcount
then turn each operation that currently uses one of these fields into
corresponding accesses (just different values for flags, bit-shiftery &
mask for reading usage count, bit mask for reading refcount). The trick
then is to add a *new* flag value BM_LOCKED. This can then act as a sort
of a 'one bit' spinlock.
That should roughly look like (more or less pseudocode):
void
LockBufHdr(BufferDesc *desc)
{
int state = pg_atomic_read_u32(&desc->state);
for (;;)
{
/* wait till lock is free */
while (unlikely(state & BM_LOCKED))
{
pg_spin_delay();
state = pg_atomic_read_u32(&desc->state);
/* add exponential backoff? Should seldomly be contended tho. */
}
/* and try to get lock */
if (pg_atomic_compare_exchange_u32(&desc->state, &state, state | BM_LOCKED))
break;
}
}
static bool
PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
{
...
if (ref == NULL)
{
ReservePrivateRefCountEntry();
ref = NewPrivateRefCountEntry(b);
...
int state = pg_atomic_read_u32(&desc->state);
int oldstate = state;
while (true)
{
/* spin-wait till lock is free */
while (unlikely(state & BM_LOCKED))
{
pg_spin_delay();
state = pg_atomic_read_u32(&desc->state);
}
/* increase refcount */
state += 1;
/* increase usagecount unless already max */
if ((state & USAGE_COUNT_MASK) != BM_MAX_USAGE_COUNT)
state += BM_USAGE_COUNT_ONE;
result = (state & BM_VALID) != 0;
if (pg_atomic_compare_exchange_u32(&desc->state, &oldstate, state))
break;
/* get ready for next loop, oldstate has been updated by cas */
state = oldstate;
}
...
}
other callsites can either just plainly continue to use
LockBufHdr/UnlockBufHdr or converted similarly to PinBuffer().
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 11 September 2015 at 22:23, YUriy Zhuravlev <u.zhuravlev@postgrespro.ru>
wrote:
Without patch we have 417523 TPS and with patch 965821 TPS for big x86
server.
All details here: https://gist.github.com/stalkerg/773a81b79a27b4d5d63f
Impressive!
I've run this on a single CPU server and don't see any speedup, so I assume
I'm not getting enough contention.
As soon as our 4 socket machine is free I'll try a pgbench run with that.
Just for fun, what's the results if you use -M prepared ?
Regards
David Rowley
--
David Rowley http://www.2ndQuadrant.com/
<http://www.2ndquadrant.com/>
PostgreSQL Development, 24x7 Support, Training & Services
On Friday 11 September 2015 18:14:21 Andres Freund wrote:
This way we can leave the for (;;) loop
in BufferAlloc() thinking that the buffer is unused (and can't be further
pinned because of the held spinlock!)
We lost lock after PinBuffer_Locked in BufferAlloc. Therefore, in essence,
nothing has changed.
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-11 19:33:26 +0300, YUriy Zhuravlev wrote:
On Friday 11 September 2015 18:14:21 Andres Freund wrote:
This way we can leave the for (;;) loop
in BufferAlloc() thinking that the buffer is unused (and can't be further
pinned because of the held spinlock!)We lost lock after PinBuffer_Locked in BufferAlloc. Therefore, in essence,
nothing has changed.
The relevant piece of code is:
/*
* Need to lock the buffer header too in order to change its tag.
*/
LockBufHdr(buf);
/*
* Somebody could have pinned or re-dirtied the buffer while we were
* doing the I/O and making the new hashtable entry. If so, we can't
* recycle this buffer; we must undo everything we've done and start
* over with a new victim buffer.
*/
oldFlags = buf->flags;
if (buf->refcount == 1 && !(oldFlags & BM_DIRTY))
break;
UnlockBufHdr(buf);
BufTableDelete(&newTag, newHash);
if ((oldFlags & BM_TAG_VALID) &&
oldPartitionLock != newPartitionLock)
LWLockRelease(oldPartitionLock);
LWLockRelease(newPartitionLock);
UnpinBuffer(buf, true);
}
/*
* Okay, it's finally safe to rename the buffer.
*
* Clearing BM_VALID here is necessary, clearing the dirtybits is just
* paranoia. We also reset the usage_count since any recency of use of
* the old content is no longer relevant. (The usage_count starts out at
* 1 so that the buffer can survive one clock-sweep pass.)
*/
buf->tag = newTag;
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
if (relpersistence == RELPERSISTENCE_PERMANENT)
buf->flags |= BM_TAG_VALID | BM_PERMANENT;
else
buf->flags |= BM_TAG_VALID;
buf->usage_count = 1;
UnlockBufHdr(buf);
so unless I'm missing something, no, we haven't lost the lock.
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Friday 11 September 2015 18:37:00 you wrote:
so unless I'm missing something, no, we haven't lost the lock.
This section is protected by like LWLockAcquire(newPartitionLock,
LW_EXCLUSIVE); before it (and we can't get this buffer from hash table).
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-11 19:46:02 +0300, YUriy Zhuravlev wrote:
On Friday 11 September 2015 18:37:00 you wrote:
so unless I'm missing something, no, we haven't lost the lock.
This section is protected by like LWLockAcquire(newPartitionLock,
LW_EXCLUSIVE); before it (and we can't get this buffer from hash table).
a) As I said upthread there's a patch to remove these locks entirely
b) It doesn't matter anyway. Not every pin goes through the buffer
mapping table. StrategyGetBuffer(), SyncOneBuffer(), ...
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Friday 11 September 2015 18:50:35 you wrote:
a) As I said upthread there's a patch to remove these locks entirely
It is very interesting. Could you provide a link? And it's not very good,
since there is a bottleneck PinBuffer / UnpinBuffer instead of LWLocks.
b) It doesn't matter anyway. Not every pin goes through the buffer
mapping table. StrategyGetBuffer(), SyncOneBuffer(), ...
StrategyGetBuffer call only from BufferAlloc .
SyncOneBuffer not problem too because:
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
And please read comment before LockBufHdr(bufHdr) in SyncOneBuffer.
We checked all functions with refcount and usage_count.
Thanks! ^_^
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-14 13:16:46 +0300, YUriy Zhuravlev wrote:
On Friday 11 September 2015 18:50:35 you wrote:
a) As I said upthread there's a patch to remove these locks entirely
It is very interesting. Could you provide a link?
And it's not very good,
since there is a bottleneck PinBuffer / UnpinBuffer instead of
LWLocks.
Where the bottleneck is entirely depends on your workload. If you have a
high cache replacement ratio the mapping partition locks are frequently
going to be held exclusively.
b) It doesn't matter anyway. Not every pin goes through the buffer
mapping table. StrategyGetBuffer(), SyncOneBuffer(), ...
StrategyGetBuffer call only from BufferAlloc .
It gets called without buffer mapping locks held. And it can (and
frequently will!) access all the buffers in the buffer pool.
SyncOneBuffer not problem too because:
PinBuffer_Locked(bufHdr);
Which you made ineffective because PinBuffer() doesn't take a lock
anymore. Mutual exclusion through locks only works if all participants
take the locks.
We checked all functions with refcount and usage_count.
Adding lockless behaviour by just taking out locks without analyzing the
whole isn't going to fly. You either need to provide backward
compatibility (a LockBuffer that provides actual exclusion) or you
actually need to go carefully through all relevant code and make it
lock-free.
I pointed out how you can actually make this safely lock-free giving you
the interesting code.
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-14 17:41:42 +0200, Andres Freund wrote:
I pointed out how you can actually make this safely lock-free giving you
the interesting code.
And here's an actual implementation of that approach. It's definitely
work-in-progress and could easily be optimized further. Don't have any
big machines to play around with right now tho.
Andres
Attachments:
bufferpin.difftext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 3ae2848..3e70792 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -95,12 +95,9 @@ InitBufferPool(void)
BufferDesc *buf = GetBufferDescriptor(i);
CLEAR_BUFFERTAG(buf->tag);
- buf->flags = 0;
- buf->usage_count = 0;
- buf->refcount = 0;
- buf->wait_backend_pid = 0;
- SpinLockInit(&buf->buf_hdr_lock);
+ pg_atomic_init_u32(&buf->state, 0);
+ buf->wait_backend_pid = 0;
buf->buf_id = i;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 8c0358e..345322a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -51,6 +51,8 @@
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
/* Note: these two macros only work on shared buffers, not local ones! */
#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
@@ -774,9 +776,13 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
if (isLocalBuf)
{
+ uint32 state;
+
+ state = pg_atomic_read_u32(&bufHdr->state);
/* Only need to adjust flags */
- Assert(bufHdr->flags & BM_VALID);
- bufHdr->flags &= ~BM_VALID;
+ Assert(state & BM_VALID);
+ state &= ~BM_VALID;
+ pg_atomic_write_u32(&bufHdr->state, state);
}
else
{
@@ -788,8 +794,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
do
{
LockBufHdr(bufHdr);
- Assert(bufHdr->flags & BM_VALID);
- bufHdr->flags &= ~BM_VALID;
+ Assert(pg_atomic_read_u32(&bufHdr->state) & BM_VALID);
+ pg_atomic_fetch_and_u32(&bufHdr->state, ~BM_VALID);
UnlockBufHdr(bufHdr);
} while (!StartBufferIO(bufHdr, true));
}
@@ -807,7 +813,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* it's not been recycled) but come right back here to try smgrextend
* again.
*/
- Assert(!(bufHdr->flags & BM_VALID)); /* spinlock not needed */
+ Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
@@ -885,7 +891,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (isLocalBuf)
{
/* Only need to adjust flags */
- bufHdr->flags |= BM_VALID;
+ pg_atomic_fetch_or_u32(&bufHdr->state, BM_VALID);
}
else
{
@@ -939,7 +945,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BufferTag oldTag; /* previous identity of selected buffer */
uint32 oldHash; /* hash value for oldTag */
LWLock *oldPartitionLock; /* buffer partition lock for it */
- BufFlags oldFlags;
+ uint32 oldFlags;
int buf_id;
volatile BufferDesc *buf;
bool valid;
@@ -1013,10 +1019,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
buf = StrategyGetBuffer(strategy);
- Assert(buf->refcount == 0);
+ Assert((pg_atomic_read_u32(&buf->state) & BUF_REFCOUNT_MASK) == 0);
/* Must copy buffer flags while we still hold the spinlock */
- oldFlags = buf->flags;
+ oldFlags = pg_atomic_read_u32(&buf->state) & BUF_FLAG_MASK;
/* Pin the buffer and then release the buffer spinlock */
PinBuffer_Locked(buf);
@@ -1210,8 +1216,9 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* recycle this buffer; we must undo everything we've done and start
* over with a new victim buffer.
*/
- oldFlags = buf->flags;
- if (buf->refcount == 1 && !(oldFlags & BM_DIRTY))
+ oldFlags = pg_atomic_read_u32(&buf->state) & BUF_FLAG_MASK;
+ if ((pg_atomic_read_u32(&buf->state) & BUF_REFCOUNT_MASK) == 1 &&
+ !(oldFlags & BM_DIRTY))
break;
UnlockBufHdr(buf);
@@ -1232,12 +1239,19 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* 1 so that the buffer can survive one clock-sweep pass.)
*/
buf->tag = newTag;
- buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
+ pg_atomic_fetch_and_u32(&buf->state,
+ ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
+ BM_CHECKPOINT_NEEDED | BM_IO_ERROR |
+ BM_PERMANENT |
+ BUF_USAGECOUNT_MASK));
if (relpersistence == RELPERSISTENCE_PERMANENT)
- buf->flags |= BM_TAG_VALID | BM_PERMANENT;
+ pg_atomic_fetch_or_u32(&buf->state,
+ BM_TAG_VALID | BM_PERMANENT |
+ BUF_USAGECOUNT_ONE);
else
- buf->flags |= BM_TAG_VALID;
- buf->usage_count = 1;
+ pg_atomic_fetch_or_u32(&buf->state,
+ BM_TAG_VALID |
+ BUF_USAGECOUNT_ONE);
UnlockBufHdr(buf);
@@ -1286,7 +1300,7 @@ InvalidateBuffer(volatile BufferDesc *buf)
BufferTag oldTag;
uint32 oldHash; /* hash value for oldTag */
LWLock *oldPartitionLock; /* buffer partition lock for it */
- BufFlags oldFlags;
+ uint32 oldFlags;
/* Save the original buffer tag before dropping the spinlock */
oldTag = buf->tag;
@@ -1329,7 +1343,7 @@ retry:
* yet done StartBufferIO, WaitIO will fall through and we'll effectively
* be busy-looping here.)
*/
- if (buf->refcount != 0)
+ if ((pg_atomic_read_u32(&buf->state) & BUF_REFCOUNT_MASK) != 0)
{
UnlockBufHdr(buf);
LWLockRelease(oldPartitionLock);
@@ -1344,10 +1358,9 @@ retry:
* Clear out the buffer's tag and flags. We must do this to ensure that
* linear scans of the buffer array don't think the buffer is valid.
*/
- oldFlags = buf->flags;
+ oldFlags = pg_atomic_read_u32(&buf->state) & BUF_FLAG_MASK;
CLEAR_BUFFERTAG(buf->tag);
- buf->flags = 0;
- buf->usage_count = 0;
+ pg_atomic_fetch_and_u32(&buf->state, BM_LOCKED | ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK));
UnlockBufHdr(buf);
@@ -1399,12 +1412,12 @@ MarkBufferDirty(Buffer buffer)
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
+ Assert((pg_atomic_read_u32(&bufHdr->state) & BUF_REFCOUNT_MASK) > 0);
/*
* If the buffer was not dirty already, do vacuum accounting.
*/
- if (!(bufHdr->flags & BM_DIRTY))
+ if (!(pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY))
{
VacuumPageDirty++;
pgBufferUsage.shared_blks_dirtied++;
@@ -1412,7 +1425,8 @@ MarkBufferDirty(Buffer buffer)
VacuumCostBalance += VacuumCostPageDirty;
}
- bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+ pg_atomic_fetch_or_u32(&bufHdr->state,
+ BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(bufHdr);
}
@@ -1495,23 +1509,39 @@ PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
if (ref == NULL)
{
+ uint32 state;
+ uint32 oldstate;
+
ReservePrivateRefCountEntry();
ref = NewPrivateRefCountEntry(b);
- LockBufHdr(buf);
- buf->refcount++;
- if (strategy == NULL)
- {
- if (buf->usage_count < BM_MAX_USAGE_COUNT)
- buf->usage_count++;
- }
- else
+ state = pg_atomic_read_u32(&buf->state);
+ oldstate = state;
+
+ while (true)
{
- if (buf->usage_count == 0)
- buf->usage_count = 1;
+ /* spin-wait till lock is free */
+ while (unlikely(state & BM_LOCKED))
+ {
+ pg_spin_delay();
+ state = pg_atomic_read_u32(&buf->state);
+ }
+
+ /* increase refcount */
+ state += 1;
+
+ /* increase usagecount unless already max */
+ if (((state & BUF_USAGECOUNT_MASK) >> BUF_USAGECOUNT_SHIFT) != BM_MAX_USAGE_COUNT)
+ state += BUF_USAGECOUNT_ONE;
+
+ result = (state & BM_VALID) != 0;
+
+ if (likely(pg_atomic_compare_exchange_u32(&buf->state, &oldstate, state)))
+ break;
+
+ /* get ready for next loop, oldstate has been updated by cas */
+ state = oldstate;
}
- result = (buf->flags & BM_VALID) != 0;
- UnlockBufHdr(buf);
}
else
{
@@ -1558,7 +1588,7 @@ PinBuffer_Locked(volatile BufferDesc *buf)
*/
Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL);
- buf->refcount++;
+ pg_atomic_fetch_add_u32(&buf->state, 1);
UnlockBufHdr(buf);
b = BufferDescriptorGetBuffer(buf);
@@ -1594,30 +1624,41 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
ref->refcount--;
if (ref->refcount == 0)
{
+ uint32 state;
+
/* I'd better not still hold any locks on the buffer */
Assert(!LWLockHeldByMe(buf->content_lock));
Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
- LockBufHdr(buf);
-
- /* Decrement the shared reference count */
- Assert(buf->refcount > 0);
- buf->refcount--;
+ /*
+ * Decrement the shared reference count.
+ *
+ * Arguably it'd be more robust if we checked for BM_LOCKED here, but
+ * currently all manipulation of ->state for shared buffers is through
+ * atomics.
+ */
+ state = pg_atomic_fetch_sub_u32(&buf->state, 1);
+ Assert((state & BUF_REFCOUNT_MASK) > 0);
/* Support LockBufferForCleanup() */
- if ((buf->flags & BM_PIN_COUNT_WAITER) &&
- buf->refcount == 1)
+ if (state & BM_PIN_COUNT_WAITER)
{
- /* we just released the last pin other than the waiter's */
- int wait_backend_pid = buf->wait_backend_pid;
+ LockBufHdr(buf);
- buf->flags &= ~BM_PIN_COUNT_WAITER;
- UnlockBufHdr(buf);
- ProcSendSignal(wait_backend_pid);
- }
- else
- UnlockBufHdr(buf);
+ if (pg_atomic_read_u32(&buf->state) & BM_PIN_COUNT_WAITER &&
+ (pg_atomic_read_u32(&buf->state) & BUF_REFCOUNT_MASK) == 1)
+ {
+ /* we just released the last pin other than the waiter's */
+ int wait_backend_pid = buf->wait_backend_pid;
+ pg_atomic_fetch_and_u32(&buf->state,
+ ~BM_PIN_COUNT_WAITER);
+ UnlockBufHdr(buf);
+ ProcSendSignal(wait_backend_pid);
+ }
+ else
+ UnlockBufHdr(buf);
+ }
ForgetPrivateRefCountEntry(ref);
}
}
@@ -1680,9 +1721,10 @@ BufferSync(int flags)
*/
LockBufHdr(bufHdr);
- if ((bufHdr->flags & mask) == mask)
+ if ((pg_atomic_read_u32(&bufHdr->state) & mask) == mask)
{
- bufHdr->flags |= BM_CHECKPOINT_NEEDED;
+ pg_atomic_fetch_or_u32(&bufHdr->state,
+ BM_CHECKPOINT_NEEDED);
num_to_write++;
}
@@ -1721,7 +1763,7 @@ BufferSync(int flags)
* write the buffer though we didn't need to. It doesn't seem worth
* guarding against this, though.
*/
- if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
+ if (pg_atomic_read_u32(&bufHdr->state) & BM_CHECKPOINT_NEEDED)
{
if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
{
@@ -2081,6 +2123,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
{
volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
int result = 0;
+ uint32 state;
ReservePrivateRefCountEntry();
@@ -2095,7 +2138,10 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
*/
LockBufHdr(bufHdr);
- if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)
+ state = pg_atomic_read_u32(&bufHdr->state);
+
+ if ((state & BUF_REFCOUNT_MASK) == 0 &&
+ (state & BUF_USAGECOUNT_MASK) == 0)
result |= BUF_REUSABLE;
else if (skip_recently_used)
{
@@ -2104,7 +2150,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
return result;
}
- if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))
+ if (!(state & BM_VALID) || !(state & BM_DIRTY))
{
/* It's clean, so nothing to do */
UnlockBufHdr(bufHdr);
@@ -2256,6 +2302,7 @@ PrintBufferLeakWarning(Buffer buffer)
int32 loccount;
char *path;
BackendId backend;
+ uint32 state;
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
@@ -2273,12 +2320,13 @@ PrintBufferLeakWarning(Buffer buffer)
/* theoretically we should lock the bufhdr here */
path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
+ state = pg_atomic_read_u32(&buf->state);
elog(WARNING,
"buffer refcount leak: [%03d] "
"(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
buffer, path,
- buf->tag.blockNum, buf->flags,
- buf->refcount, loccount);
+ buf->tag.blockNum, state & BUF_FLAG_MASK,
+ state & BUF_REFCOUNT_MASK, loccount);
pfree(path);
}
@@ -2424,7 +2472,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
recptr = BufferGetLSN(buf);
/* To check if block content changes while flushing. - vadim 01/17/97 */
- buf->flags &= ~BM_JUST_DIRTIED;
+ pg_atomic_fetch_and_u32(&buf->state, ~BM_JUST_DIRTIED);
UnlockBufHdr(buf);
/*
@@ -2444,7 +2492,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
* disastrous system-wide consequences. To make sure that can't happen,
* skip the flush if the buffer isn't permanent.
*/
- if (buf->flags & BM_PERMANENT)
+ if (pg_atomic_read_u32(&buf->state) & BM_PERMANENT)
XLogFlush(recptr);
/*
@@ -2538,7 +2586,7 @@ BufferIsPermanent(Buffer buffer)
* old value or the new value, but not random garbage.
*/
bufHdr = GetBufferDescriptor(buffer - 1);
- return (bufHdr->flags & BM_PERMANENT) != 0;
+ return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
}
/*
@@ -2874,7 +2922,8 @@ FlushRelationBuffers(Relation rel)
{
bufHdr = GetLocalBufferDescriptor(i);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
- (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+ (pg_atomic_read_u32(&bufHdr->state) & (BM_VALID | BM_DIRTY))
+ == (BM_VALID | BM_DIRTY))
{
ErrorContextCallback errcallback;
Page localpage;
@@ -2895,7 +2944,7 @@ FlushRelationBuffers(Relation rel)
localpage,
false);
- bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+ pg_atomic_fetch_and_u32(&bufHdr->state, ~(BM_DIRTY | BM_JUST_DIRTIED));
/* Pop the error context stack */
error_context_stack = errcallback.previous;
@@ -2923,7 +2972,8 @@ FlushRelationBuffers(Relation rel)
LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
- (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+ (pg_atomic_read_u32(&bufHdr->state) & (BM_VALID | BM_DIRTY))
+ == (BM_VALID | BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
@@ -2975,7 +3025,8 @@ FlushDatabaseBuffers(Oid dbid)
LockBufHdr(bufHdr);
if (bufHdr->tag.rnode.dbNode == dbid &&
- (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+ (pg_atomic_read_u32(&bufHdr->state) & (BM_VALID | BM_DIRTY))
+ == (BM_VALID | BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
@@ -3093,12 +3144,13 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
* is only intended to be used in cases where failing to write out the
* data would be harmless anyway, it doesn't really matter.
*/
- if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
+ if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
XLogRecPtr lsn = InvalidXLogRecPtr;
bool dirtied = false;
bool delayChkpt = false;
+ uint32 state;
/*
* If we need to protect hint bit updates from torn writes, WAL-log a
@@ -3109,7 +3161,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
* We don't check full_page_writes here because that logic is included
* when we call XLogInsert() since the value changes dynamically.
*/
- if (XLogHintBitIsNeeded() && (bufHdr->flags & BM_PERMANENT))
+ if (XLogHintBitIsNeeded() && (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
{
/*
* If we're in recovery we cannot dirty a page because of a hint.
@@ -3149,8 +3201,12 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
}
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
- if (!(bufHdr->flags & BM_DIRTY))
+
+ state = pg_atomic_read_u32(&bufHdr->state);
+
+ Assert((state & BUF_REFCOUNT_MASK) > 0);
+
+ if (!(state & BM_DIRTY))
{
dirtied = true; /* Means "will be dirtied by this action" */
@@ -3170,7 +3226,9 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
if (!XLogRecPtrIsInvalid(lsn))
PageSetLSN(page, lsn);
}
- bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+
+ pg_atomic_fetch_or_u32(&bufHdr->state, BM_DIRTY | BM_JUST_DIRTIED);
+
UnlockBufHdr(bufHdr);
if (delayChkpt)
@@ -3208,9 +3266,9 @@ UnlockBuffers(void)
* Don't complain if flag bit not set; it could have been reset but we
* got a cancel/die interrupt before getting the signal.
*/
- if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
+ if ((pg_atomic_read_u32(&buf->state) & BM_PIN_COUNT_WAITER) != 0 &&
buf->wait_backend_pid == MyProcPid)
- buf->flags &= ~BM_PIN_COUNT_WAITER;
+ pg_atomic_fetch_and_u32(&buf->state, ~BM_PIN_COUNT_WAITER);
UnlockBufHdr(buf);
@@ -3304,25 +3362,30 @@ LockBufferForCleanup(Buffer buffer)
for (;;)
{
+ int state;
+
/* Try to acquire lock */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
- if (bufHdr->refcount == 1)
+
+ state = pg_atomic_read_u32(&bufHdr->state);
+
+ Assert((state & BUF_REFCOUNT_MASK) > 0);
+ if ((state & BUF_REFCOUNT_MASK) == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
return;
}
/* Failed, so mark myself as waiting for pincount 1 */
- if (bufHdr->flags & BM_PIN_COUNT_WAITER)
+ if (state & BM_PIN_COUNT_WAITER)
{
UnlockBufHdr(bufHdr);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
elog(ERROR, "multiple backends attempting to wait for pincount 1");
}
bufHdr->wait_backend_pid = MyProcPid;
- bufHdr->flags |= BM_PIN_COUNT_WAITER;
+ pg_atomic_fetch_or_u32(&bufHdr->state, BM_PIN_COUNT_WAITER);
PinCountWaitBuf = bufHdr;
UnlockBufHdr(bufHdr);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -3349,9 +3412,9 @@ LockBufferForCleanup(Buffer buffer)
* better be safe.
*/
LockBufHdr(bufHdr);
- if ((bufHdr->flags & BM_PIN_COUNT_WAITER) != 0 &&
+ if ((pg_atomic_read_u32(&bufHdr->state) & BM_PIN_COUNT_WAITER) != 0 &&
bufHdr->wait_backend_pid == MyProcPid)
- bufHdr->flags &= ~BM_PIN_COUNT_WAITER;
+ pg_atomic_fetch_and_u32(&bufHdr->state, ~BM_PIN_COUNT_WAITER);
UnlockBufHdr(bufHdr);
PinCountWaitBuf = NULL;
@@ -3393,22 +3456,25 @@ bool
ConditionalLockBufferForCleanup(Buffer buffer)
{
volatile BufferDesc *bufHdr;
+ uint32 refcount;
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
{
+ refcount = LocalRefCount[-buffer - 1];
/* There should be exactly one pin */
- Assert(LocalRefCount[-buffer - 1] > 0);
- if (LocalRefCount[-buffer - 1] != 1)
+ Assert(refcount > 0);
+ if (refcount != 1)
return false;
/* Nobody else to wait for */
return true;
}
/* There should be exactly one local pin */
- Assert(GetPrivateRefCount(buffer) > 0);
- if (GetPrivateRefCount(buffer) != 1)
+ refcount = GetPrivateRefCount(buffer);
+ Assert(refcount);
+ if (refcount != 1)
return false;
/* Try to acquire lock */
@@ -3417,8 +3483,10 @@ ConditionalLockBufferForCleanup(Buffer buffer)
bufHdr = GetBufferDescriptor(buffer - 1);
LockBufHdr(bufHdr);
- Assert(bufHdr->refcount > 0);
- if (bufHdr->refcount == 1)
+
+ refcount = pg_atomic_read_u32(&bufHdr->state) & BUF_REFCOUNT_MASK;
+ Assert(refcount > 0);
+ if (refcount == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
@@ -3456,7 +3524,7 @@ WaitIO(volatile BufferDesc *buf)
*/
for (;;)
{
- BufFlags sv_flags;
+ uint32 state;
/*
* It may not be necessary to acquire the spinlock to check the flag
@@ -3464,9 +3532,10 @@ WaitIO(volatile BufferDesc *buf)
* play it safe.
*/
LockBufHdr(buf);
- sv_flags = buf->flags;
+ state = pg_atomic_read_u32(&buf->state);
UnlockBufHdr(buf);
- if (!(sv_flags & BM_IO_IN_PROGRESS))
+
+ if (!(state & BM_IO_IN_PROGRESS))
break;
LWLockAcquire(buf->io_in_progress_lock, LW_SHARED);
LWLockRelease(buf->io_in_progress_lock);
@@ -3494,6 +3563,8 @@ WaitIO(volatile BufferDesc *buf)
static bool
StartBufferIO(volatile BufferDesc *buf, bool forInput)
{
+ uint32 state;
+
Assert(!InProgressBuf);
for (;;)
@@ -3506,7 +3577,9 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
LockBufHdr(buf);
- if (!(buf->flags & BM_IO_IN_PROGRESS))
+ state = pg_atomic_read_u32(&buf->state);
+
+ if (!(state & BM_IO_IN_PROGRESS))
break;
/*
@@ -3522,7 +3595,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
/* Once we get here, there is definitely no I/O active on this buffer */
- if (forInput ? (buf->flags & BM_VALID) : !(buf->flags & BM_DIRTY))
+ if (forInput ? (state & BM_VALID) : !(state & BM_DIRTY))
{
/* someone else already did the I/O */
UnlockBufHdr(buf);
@@ -3530,7 +3603,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
return false;
}
- buf->flags |= BM_IO_IN_PROGRESS;
+ pg_atomic_fetch_or_u32(&buf->state, BM_IO_IN_PROGRESS);
UnlockBufHdr(buf);
@@ -3565,11 +3638,13 @@ TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
LockBufHdr(buf);
- Assert(buf->flags & BM_IO_IN_PROGRESS);
- buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
- if (clear_dirty && !(buf->flags & BM_JUST_DIRTIED))
- buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
- buf->flags |= set_flag_bits;
+ Assert(pg_atomic_read_u32(&buf->state) & BM_IO_IN_PROGRESS);
+
+ pg_atomic_fetch_and_u32(&buf->state, ~(BM_IO_IN_PROGRESS | BM_IO_ERROR));
+ if (clear_dirty && !(pg_atomic_read_u32(&buf->state) & BM_JUST_DIRTIED))
+ pg_atomic_fetch_and_u32(&buf->state, ~(BM_DIRTY | BM_CHECKPOINT_NEEDED));
+
+ pg_atomic_fetch_or_u32(&buf->state, set_flag_bits);
UnlockBufHdr(buf);
@@ -3603,23 +3678,24 @@ AbortBufferIO(void)
LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
LockBufHdr(buf);
- Assert(buf->flags & BM_IO_IN_PROGRESS);
+ Assert(pg_atomic_read_u32(&buf->state) & BM_IO_IN_PROGRESS);
if (IsForInput)
{
- Assert(!(buf->flags & BM_DIRTY));
+ Assert(!(pg_atomic_read_u32(&buf->state) & BM_DIRTY));
+
/* We'd better not think buffer is valid yet */
- Assert(!(buf->flags & BM_VALID));
+ Assert(!(pg_atomic_read_u32(&buf->state) & BM_VALID));
UnlockBufHdr(buf);
}
else
{
- BufFlags sv_flags;
+ uint32 state;
- sv_flags = buf->flags;
- Assert(sv_flags & BM_DIRTY);
+ state = pg_atomic_read_u32(&buf->state);
+ Assert(state & BM_DIRTY);
UnlockBufHdr(buf);
/* Issue notice if this is not the first failure... */
- if (sv_flags & BM_IO_ERROR)
+ if (state & BM_IO_ERROR)
{
/* Buffer is pinned, so we can read tag without spinlock */
char *path;
@@ -3701,3 +3777,33 @@ rnode_comparator(const void *p1, const void *p2)
else
return 0;
}
+
+void
+LockBufHdr(volatile BufferDesc *desc)
+{
+ uint32 state = pg_atomic_read_u32(&desc->state);
+
+ for (;;)
+ {
+ /* wait till lock is free */
+ while (unlikely(state & BM_LOCKED))
+ {
+ pg_spin_delay();
+ state = pg_atomic_read_u32(&desc->state);
+
+ /* Add exponential backoff? Should seldomly be contended tho. */
+ }
+
+ /* and try to get lock */
+ if (pg_atomic_compare_exchange_u32(&desc->state, &state, state | BM_LOCKED))
+ break;
+ }
+}
+
+void
+UnlockBufHdr(volatile BufferDesc *desc)
+{
+ Assert(pg_atomic_read_u32(&desc->state) & BM_LOCKED);
+
+ pg_atomic_fetch_sub_u32(&desc->state, BM_LOCKED);
+}
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index bc2c773..3f2227b 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -250,6 +250,8 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
{
while (true)
{
+ uint32 state;
+
/* Acquire the spinlock to remove element from the freelist */
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
@@ -280,7 +282,9 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
* of 8.3, but we'd better check anyway.)
*/
LockBufHdr(buf);
- if (buf->refcount == 0 && buf->usage_count == 0)
+ state = pg_atomic_read_u32(&buf->state);
+ if ((state & BUF_REFCOUNT_MASK) == 0
+ && (state & BUF_USAGECOUNT_MASK) == 0)
{
if (strategy != NULL)
AddBufferToRing(strategy, buf);
@@ -295,6 +299,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
trycounter = NBuffers;
for (;;)
{
+ uint32 state;
buf = GetBufferDescriptor(ClockSweepTick());
@@ -303,11 +308,15 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
* it; decrement the usage_count (unless pinned) and keep scanning.
*/
LockBufHdr(buf);
- if (buf->refcount == 0)
+
+ state = pg_atomic_read_u32(&buf->state);
+
+ if ((state & BUF_REFCOUNT_MASK) == 0)
{
- if (buf->usage_count > 0)
+ if ((state & BUF_USAGECOUNT_MASK) != 0)
{
- buf->usage_count--;
+ pg_atomic_fetch_sub_u32(&buf->state, BUF_USAGECOUNT_ONE);
+
trycounter = NBuffers;
}
else
@@ -589,6 +598,8 @@ GetBufferFromRing(BufferAccessStrategy strategy)
{
volatile BufferDesc *buf;
Buffer bufnum;
+ uint32 state;
+ uint32 usagecount;
/* Advance to next ring slot */
if (++strategy->current >= strategy->ring_size)
@@ -617,7 +628,10 @@ GetBufferFromRing(BufferAccessStrategy strategy)
*/
buf = GetBufferDescriptor(bufnum - 1);
LockBufHdr(buf);
- if (buf->refcount == 0 && buf->usage_count <= 1)
+ state = pg_atomic_read_u32(&buf->state);
+ usagecount = (state & BUF_USAGECOUNT_MASK) >> BUF_USAGECOUNT_SHIFT;
+ if ((state & BUF_REFCOUNT_MASK) == 0
+ && usagecount <= 1)
{
strategy->current_was_in_ring = true;
return buf;
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 3144afe..1e11d71 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -108,6 +108,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
int b;
int trycounter;
bool found;
+ uint32 state;
INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
@@ -128,16 +129,25 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1);
#endif
+ state = pg_atomic_read_u32(&bufHdr->state);
+
/* this part is equivalent to PinBuffer for a shared buffer */
if (LocalRefCount[b] == 0)
{
- if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
- bufHdr->usage_count++;
+ int usagecount;
+
+ usagecount = (state & BUF_USAGECOUNT_MASK) >> BUF_USAGECOUNT_SHIFT;
+
+ if (usagecount < BM_MAX_USAGE_COUNT)
+ {
+ state += BUF_USAGECOUNT_ONE;
+ pg_atomic_write_u32(&bufHdr->state, state);
+ }
}
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
- if (bufHdr->flags & BM_VALID)
+ if (state & BM_VALID)
*foundPtr = TRUE;
else
{
@@ -169,9 +179,15 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
if (LocalRefCount[b] == 0)
{
- if (bufHdr->usage_count > 0)
+ int usagecount;
+
+ state = pg_atomic_read_u32(&bufHdr->state);
+ usagecount = (state & BUF_USAGECOUNT_MASK) >> BUF_USAGECOUNT_SHIFT;
+
+ if (usagecount > 0)
{
- bufHdr->usage_count--;
+ state -= BUF_USAGECOUNT_ONE;
+ pg_atomic_write_u32(&bufHdr->state, state);
trycounter = NLocBuffer;
}
else
@@ -193,7 +209,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
* this buffer is not referenced but it might still be dirty. if that's
* the case, write it out before reusing it!
*/
- if (bufHdr->flags & BM_DIRTY)
+ if (state & BM_DIRTY)
{
SMgrRelation oreln;
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
@@ -211,7 +227,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
false);
/* Mark not-dirty now in case we error out below */
- bufHdr->flags &= ~BM_DIRTY;
+ state &= ~BM_DIRTY;
+ pg_atomic_write_u32(&bufHdr->state, state);
pgBufferUsage.local_blks_written++;
}
@@ -228,7 +245,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
/*
* Update the hash table: remove old entry, if any, and make new one.
*/
- if (bufHdr->flags & BM_TAG_VALID)
+ if (state & BM_TAG_VALID)
{
hresult = (LocalBufferLookupEnt *)
hash_search(LocalBufHash, (void *) &bufHdr->tag,
@@ -237,7 +254,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
elog(ERROR, "local buffer hash table corrupted");
/* mark buffer invalid just in case hash insert fails */
CLEAR_BUFFERTAG(bufHdr->tag);
- bufHdr->flags &= ~(BM_VALID | BM_TAG_VALID);
+ state &= ~(BM_VALID | BM_TAG_VALID);
+ pg_atomic_write_u32(&bufHdr->state, state);
}
hresult = (LocalBufferLookupEnt *)
@@ -250,9 +268,11 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
* it's all ours now.
*/
bufHdr->tag = newTag;
- bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
- bufHdr->flags |= BM_TAG_VALID;
- bufHdr->usage_count = 1;
+ state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
+ state |= BM_TAG_VALID;
+ state &= ~BUF_USAGECOUNT_MASK;
+ state += BUF_USAGECOUNT_ONE;
+ pg_atomic_write_u32(&bufHdr->state, state);
*foundPtr = FALSE;
return bufHdr;
@@ -267,6 +287,7 @@ MarkLocalBufferDirty(Buffer buffer)
{
int bufid;
BufferDesc *bufHdr;
+ uint32 state;
Assert(BufferIsLocal(buffer));
@@ -280,10 +301,13 @@ MarkLocalBufferDirty(Buffer buffer)
bufHdr = GetLocalBufferDescriptor(bufid);
- if (!(bufHdr->flags & BM_DIRTY))
+ state = pg_atomic_read_u32(&bufHdr->state);
+
+ if (!(state & BM_DIRTY))
pgBufferUsage.local_blks_dirtied++;
- bufHdr->flags |= BM_DIRTY;
+ state |= BM_DIRTY;
+ pg_atomic_write_u32(&bufHdr->state, state);
}
/*
@@ -307,8 +331,11 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
{
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
+ uint32 state;
- if ((bufHdr->flags & BM_TAG_VALID) &&
+ state = pg_atomic_read_u32(&bufHdr->state);
+
+ if ((state & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock)
@@ -327,8 +354,9 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
elog(ERROR, "local buffer hash table corrupted");
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
- bufHdr->flags = 0;
- bufHdr->usage_count = 0;
+ state &= ~BUF_FLAG_MASK;
+ state &= ~BUF_USAGECOUNT_MASK;
+ pg_atomic_write_u32(&bufHdr->state, state);
}
}
}
@@ -349,8 +377,11 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
{
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
+ uint32 state;
+
+ state = pg_atomic_read_u32(&bufHdr->state);
- if ((bufHdr->flags & BM_TAG_VALID) &&
+ if ((state & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode))
{
if (LocalRefCount[i] != 0)
@@ -367,8 +398,9 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
elog(ERROR, "local buffer hash table corrupted");
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
- bufHdr->flags = 0;
- bufHdr->usage_count = 0;
+ state &= ~BUF_FLAG_MASK;
+ state &= ~BUF_USAGECOUNT_MASK;
+ pg_atomic_write_u32(&bufHdr->state, state);
}
}
}
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 521ee1c..92889e6 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -20,29 +20,40 @@
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
+#include "port/atomics.h"
#include "storage/spin.h"
#include "utils/relcache.h"
/*
+ * State is:
+ * 10 bit flags
+ * 4 bit usage count
+ * 18 bit refcount
+ */
+#define BUF_REFCOUNT_MASK ((1U << 18) - 1)
+#define BUF_FLAG_MASK 0xFFC00000U
+#define BUF_USAGECOUNT_MASK 0x003C0000U
+#define BUF_USAGECOUNT_ONE (1U << 18)
+#define BUF_USAGECOUNT_SHIFT 18
+
+/*
* Flags for buffer descriptors
*
* Note: TAG_VALID essentially means that there is a buffer hashtable
* entry associated with the buffer's tag.
*/
-#define BM_DIRTY (1 << 0) /* data needs writing */
-#define BM_VALID (1 << 1) /* data is valid */
-#define BM_TAG_VALID (1 << 2) /* tag is assigned */
-#define BM_IO_IN_PROGRESS (1 << 3) /* read or write in progress */
-#define BM_IO_ERROR (1 << 4) /* previous I/O failed */
-#define BM_JUST_DIRTIED (1 << 5) /* dirtied since write started */
-#define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole pin */
-#define BM_CHECKPOINT_NEEDED (1 << 7) /* must write for checkpoint */
-#define BM_PERMANENT (1 << 8) /* permanent relation (not
+#define BM_LOCKED (1U << 22) /* buffer header is locked */
+#define BM_DIRTY (1U << 23) /* data needs writing */
+#define BM_VALID (1U << 24) /* data is valid */
+#define BM_TAG_VALID (1U << 25) /* tag is assigned */
+#define BM_IO_IN_PROGRESS (1U << 26) /* read or write in progress */
+#define BM_IO_ERROR (1U << 27) /* previous I/O failed */
+#define BM_JUST_DIRTIED (1U << 28) /* dirtied since write started */
+#define BM_PIN_COUNT_WAITER (1U << 29) /* have waiter for sole pin */
+#define BM_CHECKPOINT_NEEDED (1U << 30) /* must write for checkpoint */
+#define BM_PERMANENT (1U << 31) /* permanent relation (not
* unlogged) */
-
-typedef bits16 BufFlags;
-
/*
* The maximum allowed value of usage_count represents a tradeoff between
* accuracy and speed of the clock-sweep buffer management algorithm. A
@@ -137,12 +148,11 @@ typedef struct buftag
typedef struct BufferDesc
{
BufferTag tag; /* ID of page contained in buffer */
- BufFlags flags; /* see bit definitions above */
- uint16 usage_count; /* usage counter for clock sweep code */
- unsigned refcount; /* # of backends holding pins on buffer */
- int wait_backend_pid; /* backend PID of pin-count waiter */
- slock_t buf_hdr_lock; /* protects the above fields */
+ /* state of the tag, containing flags, refcount and usagecount */
+ pg_atomic_uint32 state;
+
+ int wait_backend_pid; /* backend PID of pin-count waiter */
int buf_id; /* buffer's index number (from 0) */
int freeNext; /* link in freelist chain */
@@ -192,16 +202,11 @@ typedef union BufferDescPadded
#define FREENEXT_NOT_IN_LIST (-2)
/*
- * Macros for acquiring/releasing a shared buffer header's spinlock.
- * Do not apply these to local buffers!
- *
- * Note: as a general coding rule, if you are using these then you probably
- * need to be using a volatile-qualified pointer to the buffer header, to
- * ensure that the compiler doesn't rearrange accesses to the header to
- * occur before or after the spinlock is acquired/released.
+ * Functions for acquiring/releasing a shared buffer header's spinlock. Do
+ * not apply these to local buffers! FIXUP!
*/
-#define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
-#define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
+extern void LockBufHdr(volatile BufferDesc *desc);
+extern void UnlockBufHdr(volatile BufferDesc *desc);
/* in buf_init.c */
On Tuesday 15 September 2015 04:06:25 Andres Freund wrote:
And here's an actual implementation of that approach. It's definitely
work-in-progress and could easily be optimized further. Don't have any
big machines to play around with right now tho.
Thanks. Interesting.
We had a version like your patch. But this is only half the work. Example:
state = pg_atomic_read_u32(&buf->state);
if ((state & BUF_REFCOUNT_MASK) == 0
&& (state & BUF_USAGECOUNT_MASK) == 0)
After the first command somebody can change buf->state and local state not
actual.
In this embodiment, there is no significant difference between the two
patches. For honest work will need used the CAS for all IF statement.
Thanks! Hope for understanding. ^_^
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Saturday 12 September 2015 04:15:43 David Rowley wrote:
I've run this on a single CPU server and don't see any speedup, so I assume
I'm not getting enough contention.
As soon as our 4 socket machine is free I'll try a pgbench run with that.
Excellent! Will wait.
Just for fun, what's the results if you use -M prepared ?
Unfortunately now we can not check. :(
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mon, Sep 14, 2015 at 9:06 PM, Andres Freund <andres@anarazel.de> wrote:
On 2015-09-14 17:41:42 +0200, Andres Freund wrote:
I pointed out how you can actually make this safely lock-free giving you
the interesting code.And here's an actual implementation of that approach. It's definitely
work-in-progress and could easily be optimized further. Don't have any
big machines to play around with right now tho.
Are you confident this is faster across all workloads? Pin/Unpin are
probably faster but this comes at a cost of extra atomic ops during
the clock sweep loop. I wonder if this will degrade results under
heavy contention.
Also, I'm curious about your introduction of __builtin_expect()
macros. Did you measure any gain from them? I bet there are other
places they could be used -- for example the mvcc hint bit checks on
xmin.
merlin
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-15 12:51:24 +0300, YUriy Zhuravlev wrote:
We had a version like your patch. But this is only half the work. Example:
state = pg_atomic_read_u32(&buf->state);
if ((state & BUF_REFCOUNT_MASK) == 0
&& (state & BUF_USAGECOUNT_MASK) == 0)
After the first command somebody can change buf->state and local state not
actual.
No, they can't in a a relevant manner. We hold the buffer header lock.
In this embodiment, there is no significant difference between the two
patches. For honest work will need used the CAS for all IF statement.
What?
Thanks! Hope for understanding. ^_^
There's pretty little understanding left at this point. You're posting
things for review and you seem completely unwilling to actually respond
to points raised.
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-15 08:07:57 -0500, Merlin Moncure wrote:
Are you confident this is faster across all workloads?
No. This is a proof of concept I just wrote & posted because I didn't
see the patch moving in the right direction. But I do think it can be
made faster in all relevant workloads.
Pin/Unpin are probably faster but this comes at a cost of extra atomic
ops during the clock sweep loop. I wonder if this will degrade
results under heavy contention.
I think it's actually going to be faster under contention, and the
situation where it's slower is uncontended workloads where you a very
very low cache hit ratio.
Also, I'm curious about your introduction of __builtin_expect()
macros. Did you measure any gain from them?
I introduced them because I was bothered by the generated assembler ;)
But a bit more seriously, I do think there's some benefit in influencing
the code like that. I personally also find they *increase* readability
in cases like this where the likely() branch should be taken just about
all the time.
I bet there are other places they could be used -- for example the
mvcc hint bit checks on xmin.
I don't think those are good candidates, there's too many cases where
it's common to have the majority of cases go the other way.
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Sep 15, 2015 at 9:56 AM, Andres Freund <andres@anarazel.de> wrote:
On 2015-09-15 08:07:57 -0500, Merlin Moncure wrote:
Also, I'm curious about your introduction of __builtin_expect()
macros. Did you measure any gain from them?I introduced them because I was bothered by the generated assembler ;)
But a bit more seriously, I do think there's some benefit in influencing
the code like that. I personally also find they *increase* readability
in cases like this where the likely() branch should be taken just about
all the time.
right. For posterity, I agree with this.
I bet there are other places they could be used -- for example the
mvcc hint bit checks on xmin.I don't think those are good candidates, there's too many cases where
it's common to have the majority of cases go the other way.
Maybe, but, consider that penalty vs win is asymmetric. If the hint
bit isn't set, you're doing a lot of other work anyways such that the
branch penalty falls away to noise while if you win the benefits are
significant against the tight tuple scan loop.
Anyways, as it pertains to *this* patch, +1 for adding that feature.
merlin
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tuesday 15 September 2015 16:50:44 Andres Freund wrote:
No, they can't in a a relevant manner. We hold the buffer header lock.
I'm sorry, I did not notice of a LockBufHdr.
In this embodiment, your approach seems to be very similar to s_lock. Cycle in
PinBuffer behaves like s_lock.
In LockBufHdr:
if (pg_atomic_compare_exchange_u32(&desc->state, &state, state | BM_LOCKED))
conflict with:
while (unlikely(state & BM_LOCKED))
from PinBuffer.
Thus your patch does not remove the problem of competition for PinBuffer.
We will try check your patch this week.
You're posting
things for review and you seem completely unwilling to actually respond
to points raised.
I think we're just talking about different things.
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-15 19:43:28 +0300, YUriy Zhuravlev wrote:
On Tuesday 15 September 2015 16:50:44 Andres Freund wrote:
No, they can't in a a relevant manner. We hold the buffer header lock.
I'm sorry, I did not notice of a LockBufHdr.
In this embodiment, your approach seems to be very similar to s_lock. Cycle in
PinBuffer behaves like s_lock.
In LockBufHdr:
if (pg_atomic_compare_exchange_u32(&desc->state, &state, state | BM_LOCKED))conflict with:
while (unlikely(state & BM_LOCKED))
from PinBuffer.
Thus your patch does not remove the problem of competition for PinBuffer.
We will try check your patch this week.
That path is only taken if somebody else has already locked the buffer
(e.g. BufferAlloc()). If you have contention in PinBuffer() your
workload will be mostly cache resident and neither PinBuffer() nor
UnpinBuffer() set BM_LOCKED.
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
That path is only taken if somebody else has already locked the buffer
(e.g. BufferAlloc()). If you have contention in PinBuffer() your
workload will be mostly cache resident and neither PinBuffer() nor
UnpinBuffer() set BM_LOCKED.
Thanks. Now I understand everything. It might work.
We will be tested.
your workload
Simple pgbench -S for NUMA.
--
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2015-09-15 20:16:10 +0300, YUriy Zhuravlev wrote:
We will be tested.
Did you have a chance to run some benchmarks?
Andres
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Sep 24, 2015 at 6:32 PM, Andres Freund <andres@anarazel.de> wrote:
On 2015-09-15 20:16:10 +0300, YUriy Zhuravlev wrote:
We will be tested.
Did you have a chance to run some benchmarks?
Yes, we now have 60 physical cores intel server and we're running
benchmarks on it.
------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
On Thu, Sep 24, 2015 at 6:36 PM, Alexander Korotkov <
a.korotkov@postgrespro.ru> wrote:
On Thu, Sep 24, 2015 at 6:32 PM, Andres Freund <andres@anarazel.de> wrote:
On 2015-09-15 20:16:10 +0300, YUriy Zhuravlev wrote:
We will be tested.
Did you have a chance to run some benchmarks?
Yes, we now have 60 physical cores intel server and we're running
benchmarks on it.
We got a consensus with Andres that we should commit the CAS version first
and look to other optimizations.
Refactored version of atomic state patch is attached. The changes are
following:
1) Macros are used for access refcount and usagecount.
2) likely/unlikely were removed. I think introducing of likely/unlikely
should be a separate patch since it touches portability. Also, I didn't see
any performance effect of this.
3) LockBufHdr returns the state after taking lock. Without using atomic
increments it still can save some loops on skip atomic value reading.
------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Attachments:
pinunpin-cas.patchapplication/octet-stream; name=pinunpin-cas.patchDownload
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
new file mode 100644
index 3ae2848..3e70792
*** a/src/backend/storage/buffer/buf_init.c
--- b/src/backend/storage/buffer/buf_init.c
*************** InitBufferPool(void)
*** 95,106 ****
BufferDesc *buf = GetBufferDescriptor(i);
CLEAR_BUFFERTAG(buf->tag);
- buf->flags = 0;
- buf->usage_count = 0;
- buf->refcount = 0;
- buf->wait_backend_pid = 0;
! SpinLockInit(&buf->buf_hdr_lock);
buf->buf_id = i;
--- 95,103 ----
BufferDesc *buf = GetBufferDescriptor(i);
CLEAR_BUFFERTAG(buf->tag);
! pg_atomic_init_u32(&buf->state, 0);
! buf->wait_backend_pid = 0;
buf->buf_id = i;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
new file mode 100644
index 8c0358e..8992438
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 51,57 ****
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
-
/* Note: these two macros only work on shared buffers, not local ones! */
#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
#define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr)))
--- 51,56 ----
*************** static volatile BufferDesc *PinCountWait
*** 126,132 ****
* entry using ReservePrivateRefCountEntry() and then later, if necessary,
* fill it with NewPrivateRefCountEntry(). That split lets us avoid doing
* memory allocations in NewPrivateRefCountEntry() which can be important
! * because in some scenarios it's called with a spinlock held...
*/
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES];
static HTAB *PrivateRefCountHash = NULL;
--- 125,131 ----
* entry using ReservePrivateRefCountEntry() and then later, if necessary,
* fill it with NewPrivateRefCountEntry(). That split lets us avoid doing
* memory allocations in NewPrivateRefCountEntry() which can be important
! * because in some scenarios it's called with a header lock held...
*/
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES];
static HTAB *PrivateRefCountHash = NULL;
*************** ReadBuffer_common(SMgrRelation smgr, cha
*** 774,782 ****
*/
if (isLocalBuf)
{
! /* Only need to adjust flags */
! Assert(bufHdr->flags & BM_VALID);
! bufHdr->flags &= ~BM_VALID;
}
else
{
--- 773,780 ----
*/
if (isLocalBuf)
{
! Assert(pg_atomic_read_u32(&bufHdr->state) & BM_VALID);
! pg_atomic_fetch_and_u32(&bufHdr->state, ~BM_VALID);
}
else
{
*************** ReadBuffer_common(SMgrRelation smgr, cha
*** 788,795 ****
do
{
LockBufHdr(bufHdr);
! Assert(bufHdr->flags & BM_VALID);
! bufHdr->flags &= ~BM_VALID;
UnlockBufHdr(bufHdr);
} while (!StartBufferIO(bufHdr, true));
}
--- 786,793 ----
do
{
LockBufHdr(bufHdr);
! Assert(pg_atomic_read_u32(&bufHdr->state) & BM_VALID);
! pg_atomic_fetch_and_u32(&bufHdr->state, ~BM_VALID);
UnlockBufHdr(bufHdr);
} while (!StartBufferIO(bufHdr, true));
}
*************** ReadBuffer_common(SMgrRelation smgr, cha
*** 807,813 ****
* it's not been recycled) but come right back here to try smgrextend
* again.
*/
! Assert(!(bufHdr->flags & BM_VALID)); /* spinlock not needed */
bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
--- 805,811 ----
* it's not been recycled) but come right back here to try smgrextend
* again.
*/
! Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* header lock not needed */
bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
*************** ReadBuffer_common(SMgrRelation smgr, cha
*** 885,891 ****
if (isLocalBuf)
{
/* Only need to adjust flags */
! bufHdr->flags |= BM_VALID;
}
else
{
--- 883,889 ----
if (isLocalBuf)
{
/* Only need to adjust flags */
! pg_atomic_fetch_or_u32(&bufHdr->state, BM_VALID);
}
else
{
*************** BufferAlloc(SMgrRelation smgr, char relp
*** 939,945 ****
BufferTag oldTag; /* previous identity of selected buffer */
uint32 oldHash; /* hash value for oldTag */
LWLock *oldPartitionLock; /* buffer partition lock for it */
! BufFlags oldFlags;
int buf_id;
volatile BufferDesc *buf;
bool valid;
--- 937,943 ----
BufferTag oldTag; /* previous identity of selected buffer */
uint32 oldHash; /* hash value for oldTag */
LWLock *oldPartitionLock; /* buffer partition lock for it */
! uint32 oldFlags;
int buf_id;
volatile BufferDesc *buf;
bool valid;
*************** BufferAlloc(SMgrRelation smgr, char relp
*** 1001,1024 ****
/* Loop here in case we have to try another victim buffer */
for (;;)
{
/*
! * Ensure, while the spinlock's not yet held, that there's a free
* refcount entry.
*/
ReservePrivateRefCountEntry();
/*
* Select a victim buffer. The buffer is returned with its header
! * spinlock still held!
*/
! buf = StrategyGetBuffer(strategy);
! Assert(buf->refcount == 0);
! /* Must copy buffer flags while we still hold the spinlock */
! oldFlags = buf->flags;
! /* Pin the buffer and then release the buffer spinlock */
PinBuffer_Locked(buf);
/*
--- 999,1024 ----
/* Loop here in case we have to try another victim buffer */
for (;;)
{
+ uint32 state;
+
/*
! * Ensure, while the header lock isn't yet held, that there's a free
* refcount entry.
*/
ReservePrivateRefCountEntry();
/*
* Select a victim buffer. The buffer is returned with its header
! * lock still held!
*/
! buf = StrategyGetBuffer(strategy, &state);
! Assert(BUF_STATE_GET_REFCOUNT(state) == 0);
! /* Must copy buffer flags while we still hold the header lock */
! oldFlags = state & BUF_FLAG_MASK;
! /* Pin the buffer and then release the buffer header lock */
PinBuffer_Locked(buf);
/*
*************** BufferAlloc(SMgrRelation smgr, char relp
*** 1202,1208 ****
/*
* Need to lock the buffer header too in order to change its tag.
*/
! LockBufHdr(buf);
/*
* Somebody could have pinned or re-dirtied the buffer while we were
--- 1202,1208 ----
/*
* Need to lock the buffer header too in order to change its tag.
*/
! state = LockBufHdr(buf);
/*
* Somebody could have pinned or re-dirtied the buffer while we were
*************** BufferAlloc(SMgrRelation smgr, char relp
*** 1210,1217 ****
* recycle this buffer; we must undo everything we've done and start
* over with a new victim buffer.
*/
! oldFlags = buf->flags;
! if (buf->refcount == 1 && !(oldFlags & BM_DIRTY))
break;
UnlockBufHdr(buf);
--- 1210,1217 ----
* recycle this buffer; we must undo everything we've done and start
* over with a new victim buffer.
*/
! oldFlags = state & BUF_FLAG_MASK;
! if (BUF_STATE_GET_REFCOUNT(state) == 1 && !(oldFlags & BM_DIRTY))
break;
UnlockBufHdr(buf);
*************** BufferAlloc(SMgrRelation smgr, char relp
*** 1232,1243 ****
* 1 so that the buffer can survive one clock-sweep pass.)
*/
buf->tag = newTag;
! buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
if (relpersistence == RELPERSISTENCE_PERMANENT)
! buf->flags |= BM_TAG_VALID | BM_PERMANENT;
else
! buf->flags |= BM_TAG_VALID;
! buf->usage_count = 1;
UnlockBufHdr(buf);
--- 1232,1250 ----
* 1 so that the buffer can survive one clock-sweep pass.)
*/
buf->tag = newTag;
! pg_atomic_fetch_and_u32(&buf->state,
! ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
! BM_CHECKPOINT_NEEDED | BM_IO_ERROR |
! BM_PERMANENT |
! BUF_USAGECOUNT_MASK));
if (relpersistence == RELPERSISTENCE_PERMANENT)
! pg_atomic_fetch_or_u32(&buf->state,
! BM_TAG_VALID | BM_PERMANENT |
! BUF_USAGECOUNT_ONE);
else
! pg_atomic_fetch_or_u32(&buf->state,
! BM_TAG_VALID |
! BUF_USAGECOUNT_ONE);
UnlockBufHdr(buf);
*************** BufferAlloc(SMgrRelation smgr, char relp
*** 1267,1273 ****
* InvalidateBuffer -- mark a shared buffer invalid and return it to the
* freelist.
*
! * The buffer header spinlock must be held at entry. We drop it before
* returning. (This is sane because the caller must have locked the
* buffer in order to be sure it should be dropped.)
*
--- 1274,1280 ----
* InvalidateBuffer -- mark a shared buffer invalid and return it to the
* freelist.
*
! * The buffer header lock must be held at entry. We drop it before
* returning. (This is sane because the caller must have locked the
* buffer in order to be sure it should be dropped.)
*
*************** InvalidateBuffer(volatile BufferDesc *bu
*** 1286,1294 ****
BufferTag oldTag;
uint32 oldHash; /* hash value for oldTag */
LWLock *oldPartitionLock; /* buffer partition lock for it */
! BufFlags oldFlags;
! /* Save the original buffer tag before dropping the spinlock */
oldTag = buf->tag;
UnlockBufHdr(buf);
--- 1293,1302 ----
BufferTag oldTag;
uint32 oldHash; /* hash value for oldTag */
LWLock *oldPartitionLock; /* buffer partition lock for it */
! uint32 oldFlags;
! uint32 state;
! /* Save the original buffer tag before dropping the header lock */
oldTag = buf->tag;
UnlockBufHdr(buf);
*************** retry:
*** 1310,1316 ****
LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
/* Re-lock the buffer header */
! LockBufHdr(buf);
/* If it's changed while we were waiting for lock, do nothing */
if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
--- 1318,1324 ----
LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
/* Re-lock the buffer header */
! state = LockBufHdr(buf);
/* If it's changed while we were waiting for lock, do nothing */
if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
*************** retry:
*** 1329,1335 ****
* yet done StartBufferIO, WaitIO will fall through and we'll effectively
* be busy-looping here.)
*/
! if (buf->refcount != 0)
{
UnlockBufHdr(buf);
LWLockRelease(oldPartitionLock);
--- 1337,1343 ----
* yet done StartBufferIO, WaitIO will fall through and we'll effectively
* be busy-looping here.)
*/
! if (BUF_STATE_GET_REFCOUNT(state) != 0)
{
UnlockBufHdr(buf);
LWLockRelease(oldPartitionLock);
*************** retry:
*** 1344,1353 ****
* Clear out the buffer's tag and flags. We must do this to ensure that
* linear scans of the buffer array don't think the buffer is valid.
*/
! oldFlags = buf->flags;
CLEAR_BUFFERTAG(buf->tag);
! buf->flags = 0;
! buf->usage_count = 0;
UnlockBufHdr(buf);
--- 1352,1360 ----
* Clear out the buffer's tag and flags. We must do this to ensure that
* linear scans of the buffer array don't think the buffer is valid.
*/
! oldFlags = state & BUF_FLAG_MASK;
CLEAR_BUFFERTAG(buf->tag);
! pg_atomic_fetch_and_u32(&buf->state, BM_LOCKED | ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK));
UnlockBufHdr(buf);
*************** void
*** 1381,1386 ****
--- 1388,1394 ----
MarkBufferDirty(Buffer buffer)
{
volatile BufferDesc *bufHdr;
+ uint32 state;
if (!BufferIsValid(buffer))
elog(ERROR, "bad buffer ID: %d", buffer);
*************** MarkBufferDirty(Buffer buffer)
*** 1397,1410 ****
/* unfortunately we can't check if the lock is held exclusively */
Assert(LWLockHeldByMe(bufHdr->content_lock));
! LockBufHdr(bufHdr);
! Assert(bufHdr->refcount > 0);
/*
* If the buffer was not dirty already, do vacuum accounting.
*/
! if (!(bufHdr->flags & BM_DIRTY))
{
VacuumPageDirty++;
pgBufferUsage.shared_blks_dirtied++;
--- 1405,1418 ----
/* unfortunately we can't check if the lock is held exclusively */
Assert(LWLockHeldByMe(bufHdr->content_lock));
! state = LockBufHdr(bufHdr);
! Assert(BUF_STATE_GET_REFCOUNT(state) > 0);
/*
* If the buffer was not dirty already, do vacuum accounting.
*/
! if (!(state & BM_DIRTY))
{
VacuumPageDirty++;
pgBufferUsage.shared_blks_dirtied++;
*************** MarkBufferDirty(Buffer buffer)
*** 1412,1418 ****
VacuumCostBalance += VacuumCostPageDirty;
}
! bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(bufHdr);
}
--- 1420,1426 ----
VacuumCostBalance += VacuumCostPageDirty;
}
! pg_atomic_fetch_or_u32(&bufHdr->state, BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(bufHdr);
}
*************** ReleaseAndReadBuffer(Buffer buffer,
*** 1454,1460 ****
else
{
bufHdr = GetBufferDescriptor(buffer - 1);
! /* we have pin, so it's ok to examine tag without spinlock */
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
bufHdr->tag.forkNum == forkNum)
--- 1462,1468 ----
else
{
bufHdr = GetBufferDescriptor(buffer - 1);
! /* we have pin, so it's ok to examine tag without header lock */
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
bufHdr->tag.forkNum == forkNum)
*************** ReleaseAndReadBuffer(Buffer buffer,
*** 1482,1488 ****
* Note that ResourceOwnerEnlargeBuffers must have been done already.
*
* Returns TRUE if buffer is BM_VALID, else FALSE. This provision allows
! * some callers to avoid an extra spinlock cycle.
*/
static bool
PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
--- 1490,1496 ----
* Note that ResourceOwnerEnlargeBuffers must have been done already.
*
* Returns TRUE if buffer is BM_VALID, else FALSE. This provision allows
! * some callers to avoid an extra header lock cycle.
*/
static bool
PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
*************** PinBuffer(volatile BufferDesc *buf, Buff
*** 1495,1517 ****
if (ref == NULL)
{
ReservePrivateRefCountEntry();
ref = NewPrivateRefCountEntry(b);
! LockBufHdr(buf);
! buf->refcount++;
! if (strategy == NULL)
! {
! if (buf->usage_count < BM_MAX_USAGE_COUNT)
! buf->usage_count++;
! }
! else
{
! if (buf->usage_count == 0)
! buf->usage_count = 1;
}
! result = (buf->flags & BM_VALID) != 0;
! UnlockBufHdr(buf);
}
else
{
--- 1503,1542 ----
if (ref == NULL)
{
+ uint32 state;
+ uint32 oldstate;
+
ReservePrivateRefCountEntry();
ref = NewPrivateRefCountEntry(b);
!
! state = pg_atomic_read_u32(&buf->state);
! oldstate = state;
!
! while (true)
{
! /* spin-wait till lock is free */
! while (state & BM_LOCKED)
! {
! pg_spin_delay();
! state = pg_atomic_read_u32(&buf->state);
! oldstate = state;
! }
!
! /* increase refcount */
! state += BUF_REFCOUNT_ONE;
!
! /* increase usagecount unless already max */
! if (BUF_STATE_GET_USAGECOUNT(state) != BM_MAX_USAGE_COUNT)
! state += BUF_USAGECOUNT_ONE;
!
! if (pg_atomic_compare_exchange_u32(&buf->state, &oldstate, state))
! break;
!
! /* get ready for next loop, oldstate has been updated by cas */
! state = oldstate;
}
! result = (state & BM_VALID) != 0;
}
else
{
*************** PinBuffer(volatile BufferDesc *buf, Buff
*** 1527,1535 ****
/*
* PinBuffer_Locked -- as above, but caller already locked the buffer header.
! * The spinlock is released before return.
*
! * As this function is called with the spinlock held, the caller has to
* previously call ReservePrivateRefCountEntry().
*
* Currently, no callers of this function want to modify the buffer's
--- 1552,1560 ----
/*
* PinBuffer_Locked -- as above, but caller already locked the buffer header.
! * The header lock is released before return.
*
! * As this function is called with the header lock held, the caller has to
* previously call ReservePrivateRefCountEntry().
*
* Currently, no callers of this function want to modify the buffer's
*************** PinBuffer(volatile BufferDesc *buf, Buff
*** 1540,1546 ****
* Also all callers only ever use this function when it's known that the
* buffer can't have a preexisting pin by this backend. That allows us to skip
* searching the private refcount array & hash, which is a boon, because the
! * spinlock is still held.
*
* Note: use of this routine is frequently mandatory, not just an optimization
* to save a spin lock/unlock cycle, because we need to pin a buffer before
--- 1565,1571 ----
* Also all callers only ever use this function when it's known that the
* buffer can't have a preexisting pin by this backend. That allows us to skip
* searching the private refcount array & hash, which is a boon, because the
! * header lock is still held.
*
* Note: use of this routine is frequently mandatory, not just an optimization
* to save a spin lock/unlock cycle, because we need to pin a buffer before
*************** PinBuffer_Locked(volatile BufferDesc *bu
*** 1554,1564 ****
/*
* As explained, We don't expect any preexisting pins. That allows us to
! * manipulate the PrivateRefCount after releasing the spinlock
*/
Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL);
! buf->refcount++;
UnlockBufHdr(buf);
b = BufferDescriptorGetBuffer(buf);
--- 1579,1589 ----
/*
* As explained, We don't expect any preexisting pins. That allows us to
! * manipulate the PrivateRefCount after releasing the header lock
*/
Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL);
! pg_atomic_fetch_add_u32(&buf->state, 1);
UnlockBufHdr(buf);
b = BufferDescriptorGetBuffer(buf);
*************** UnpinBuffer(volatile BufferDesc *buf, bo
*** 1594,1623 ****
ref->refcount--;
if (ref->refcount == 0)
{
/* I'd better not still hold any locks on the buffer */
Assert(!LWLockHeldByMe(buf->content_lock));
Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
! LockBufHdr(buf);
!
! /* Decrement the shared reference count */
! Assert(buf->refcount > 0);
! buf->refcount--;
/* Support LockBufferForCleanup() */
! if ((buf->flags & BM_PIN_COUNT_WAITER) &&
! buf->refcount == 1)
{
! /* we just released the last pin other than the waiter's */
! int wait_backend_pid = buf->wait_backend_pid;
! buf->flags &= ~BM_PIN_COUNT_WAITER;
! UnlockBufHdr(buf);
! ProcSendSignal(wait_backend_pid);
! }
! else
! UnlockBufHdr(buf);
ForgetPrivateRefCountEntry(ref);
}
}
--- 1619,1658 ----
ref->refcount--;
if (ref->refcount == 0)
{
+ uint32 state;
+
/* I'd better not still hold any locks on the buffer */
Assert(!LWLockHeldByMe(buf->content_lock));
Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
! /*
! * Decrement the shared reference count.
! *
! * Arguably it'd be more robust if we checked for BM_LOCKED here, but
! * currently all manipulation of ->state for shared buffers is through
! * atomics.
! */
! state = pg_atomic_fetch_sub_u32(&buf->state, BUF_REFCOUNT_ONE);
! Assert(BUF_STATE_GET_REFCOUNT(state) > 0);
/* Support LockBufferForCleanup() */
! if (state & BM_PIN_COUNT_WAITER)
{
! state = LockBufHdr(buf);
! if (state & BM_PIN_COUNT_WAITER && BUF_STATE_GET_REFCOUNT(state) == 1)
! {
! /* we just released the last pin other than the waiter's */
! int wait_backend_pid = buf->wait_backend_pid;
+ pg_atomic_fetch_and_u32(&buf->state,
+ ~BM_PIN_COUNT_WAITER);
+ UnlockBufHdr(buf);
+ ProcSendSignal(wait_backend_pid);
+ }
+ else
+ UnlockBufHdr(buf);
+ }
ForgetPrivateRefCountEntry(ref);
}
}
*************** UnpinBuffer(volatile BufferDesc *buf, bo
*** 1635,1640 ****
--- 1670,1676 ----
static void
BufferSync(int flags)
{
+ uint32 state;
int buf_id;
int num_to_scan;
int num_to_write;
*************** BufferSync(int flags)
*** 1675,1688 ****
volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
/*
! * Header spinlock is enough to examine BM_DIRTY, see comment in
* SyncOneBuffer.
*/
! LockBufHdr(bufHdr);
! if ((bufHdr->flags & mask) == mask)
{
! bufHdr->flags |= BM_CHECKPOINT_NEEDED;
num_to_write++;
}
--- 1711,1725 ----
volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
/*
! * Header lock is enough to examine BM_DIRTY, see comment in
* SyncOneBuffer.
*/
! state = LockBufHdr(bufHdr);
! if ((state & mask) == mask)
{
! pg_atomic_fetch_or_u32(&bufHdr->state,
! BM_CHECKPOINT_NEEDED);
num_to_write++;
}
*************** BufferSync(int flags)
*** 1721,1727 ****
* write the buffer though we didn't need to. It doesn't seem worth
* guarding against this, though.
*/
! if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
{
if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
{
--- 1758,1764 ----
* write the buffer though we didn't need to. It doesn't seem worth
* guarding against this, though.
*/
! if (pg_atomic_read_u32(&bufHdr->state) & BM_CHECKPOINT_NEEDED)
{
if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
{
*************** SyncOneBuffer(int buf_id, bool skip_rece
*** 2081,2086 ****
--- 2118,2124 ----
{
volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
int result = 0;
+ uint32 state;
ReservePrivateRefCountEntry();
*************** SyncOneBuffer(int buf_id, bool skip_rece
*** 2093,2102 ****
* don't worry because our checkpoint.redo points before log record for
* upcoming changes and so we are not required to write such dirty buffer.
*/
! LockBufHdr(bufHdr);
! if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)
result |= BUF_REUSABLE;
else if (skip_recently_used)
{
/* Caller told us not to write recently-used buffers */
--- 2131,2143 ----
* don't worry because our checkpoint.redo points before log record for
* upcoming changes and so we are not required to write such dirty buffer.
*/
! state = LockBufHdr(bufHdr);
! if (BUF_STATE_GET_REFCOUNT(state) == 0 &&
! BUF_STATE_GET_USAGECOUNT(state) == 0)
! {
result |= BUF_REUSABLE;
+ }
else if (skip_recently_used)
{
/* Caller told us not to write recently-used buffers */
*************** SyncOneBuffer(int buf_id, bool skip_rece
*** 2104,2110 ****
return result;
}
! if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))
{
/* It's clean, so nothing to do */
UnlockBufHdr(bufHdr);
--- 2145,2151 ----
return result;
}
! if (!(state & BM_VALID) || !(state & BM_DIRTY))
{
/* It's clean, so nothing to do */
UnlockBufHdr(bufHdr);
*************** PrintBufferLeakWarning(Buffer buffer)
*** 2256,2261 ****
--- 2297,2303 ----
int32 loccount;
char *path;
BackendId backend;
+ uint32 state;
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
*************** PrintBufferLeakWarning(Buffer buffer)
*** 2273,2284 ****
/* theoretically we should lock the bufhdr here */
path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
elog(WARNING,
"buffer refcount leak: [%03d] "
"(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
buffer, path,
! buf->tag.blockNum, buf->flags,
! buf->refcount, loccount);
pfree(path);
}
--- 2315,2327 ----
/* theoretically we should lock the bufhdr here */
path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
+ state = pg_atomic_read_u32(&buf->state);
elog(WARNING,
"buffer refcount leak: [%03d] "
"(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
buffer, path,
! buf->tag.blockNum, state & BUF_FLAG_MASK,
! BUF_STATE_GET_REFCOUNT(state), loccount);
pfree(path);
}
*************** BufferGetBlockNumber(Buffer buffer)
*** 2333,2339 ****
else
bufHdr = GetBufferDescriptor(buffer - 1);
! /* pinned, so OK to read tag without spinlock */
return bufHdr->tag.blockNum;
}
--- 2376,2382 ----
else
bufHdr = GetBufferDescriptor(buffer - 1);
! /* pinned, so OK to read tag without lock */
return bufHdr->tag.blockNum;
}
*************** BufferGetTag(Buffer buffer, RelFileNode
*** 2356,2362 ****
else
bufHdr = GetBufferDescriptor(buffer - 1);
! /* pinned, so OK to read tag without spinlock */
*rnode = bufHdr->tag.rnode;
*forknum = bufHdr->tag.forkNum;
*blknum = bufHdr->tag.blockNum;
--- 2399,2405 ----
else
bufHdr = GetBufferDescriptor(buffer - 1);
! /* pinned, so OK to read tag without lock */
*rnode = bufHdr->tag.rnode;
*forknum = bufHdr->tag.forkNum;
*blknum = bufHdr->tag.blockNum;
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 2424,2430 ****
recptr = BufferGetLSN(buf);
/* To check if block content changes while flushing. - vadim 01/17/97 */
! buf->flags &= ~BM_JUST_DIRTIED;
UnlockBufHdr(buf);
/*
--- 2467,2473 ----
recptr = BufferGetLSN(buf);
/* To check if block content changes while flushing. - vadim 01/17/97 */
! pg_atomic_fetch_and_u32(&buf->state, ~BM_JUST_DIRTIED);
UnlockBufHdr(buf);
/*
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 2444,2450 ****
* disastrous system-wide consequences. To make sure that can't happen,
* skip the flush if the buffer isn't permanent.
*/
! if (buf->flags & BM_PERMANENT)
XLogFlush(recptr);
/*
--- 2487,2493 ----
* disastrous system-wide consequences. To make sure that can't happen,
* skip the flush if the buffer isn't permanent.
*/
! if (pg_atomic_read_u32(&buf->state) & BM_PERMANENT)
XLogFlush(recptr);
/*
*************** BufferIsPermanent(Buffer buffer)
*** 2532,2544 ****
/*
* BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
! * need not bother with the buffer header spinlock. Even if someone else
* changes the buffer header flags while we're doing this, we assume that
* changing an aligned 2-byte BufFlags value is atomic, so we'll read the
* old value or the new value, but not random garbage.
*/
bufHdr = GetBufferDescriptor(buffer - 1);
! return (bufHdr->flags & BM_PERMANENT) != 0;
}
/*
--- 2575,2587 ----
/*
* BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
! * need not bother with the buffer header lock. Even if someone else
* changes the buffer header flags while we're doing this, we assume that
* changing an aligned 2-byte BufFlags value is atomic, so we'll read the
* old value or the new value, but not random garbage.
*/
bufHdr = GetBufferDescriptor(buffer - 1);
! return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
}
/*
*************** DropRelFileNodeBuffers(RelFileNodeBacken
*** 2638,2644 ****
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock)
! InvalidateBuffer(bufHdr); /* releases spinlock */
else
UnlockBufHdr(bufHdr);
}
--- 2681,2687 ----
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock)
! InvalidateBuffer(bufHdr); /* releases lock */
else
UnlockBufHdr(bufHdr);
}
*************** DropRelFileNodesAllBuffers(RelFileNodeBa
*** 2736,2742 ****
LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
! InvalidateBuffer(bufHdr); /* releases spinlock */
else
UnlockBufHdr(bufHdr);
}
--- 2779,2785 ----
LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
! InvalidateBuffer(bufHdr); /* releases lock */
else
UnlockBufHdr(bufHdr);
}
*************** DropDatabaseBuffers(Oid dbid)
*** 2778,2784 ****
LockBufHdr(bufHdr);
if (bufHdr->tag.rnode.dbNode == dbid)
! InvalidateBuffer(bufHdr); /* releases spinlock */
else
UnlockBufHdr(bufHdr);
}
--- 2821,2827 ----
LockBufHdr(bufHdr);
if (bufHdr->tag.rnode.dbNode == dbid)
! InvalidateBuffer(bufHdr); /* releases lock */
else
UnlockBufHdr(bufHdr);
}
*************** FlushRelationBuffers(Relation rel)
*** 2874,2880 ****
{
bufHdr = GetLocalBufferDescriptor(i);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
! (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
ErrorContextCallback errcallback;
Page localpage;
--- 2917,2924 ----
{
bufHdr = GetLocalBufferDescriptor(i);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
! (pg_atomic_read_u32(&bufHdr->state) & (BM_VALID | BM_DIRTY))
! == (BM_VALID | BM_DIRTY))
{
ErrorContextCallback errcallback;
Page localpage;
*************** FlushRelationBuffers(Relation rel)
*** 2895,2901 ****
localpage,
false);
! bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
--- 2939,2945 ----
localpage,
false);
! pg_atomic_fetch_and_u32(&bufHdr->state, ~(BM_DIRTY | BM_JUST_DIRTIED));
/* Pop the error context stack */
error_context_stack = errcallback.previous;
*************** FlushRelationBuffers(Relation rel)
*** 2923,2929 ****
LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
! (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
--- 2967,2974 ----
LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
! (pg_atomic_read_u32(&bufHdr->state) & (BM_VALID | BM_DIRTY))
! == (BM_VALID | BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
*************** FlushDatabaseBuffers(Oid dbid)
*** 2975,2981 ****
LockBufHdr(bufHdr);
if (bufHdr->tag.rnode.dbNode == dbid &&
! (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
--- 3020,3027 ----
LockBufHdr(bufHdr);
if (bufHdr->tag.rnode.dbNode == dbid &&
! (pg_atomic_read_u32(&bufHdr->state) & (BM_VALID | BM_DIRTY))
! == (BM_VALID | BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
*************** MarkBufferDirtyHint(Buffer buffer, bool
*** 3086,3104 ****
* This routine might get called many times on the same page, if we are
* making the first scan after commit of an xact that added/deleted many
* tuples. So, be as quick as we can if the buffer is already dirty. We
! * do this by not acquiring spinlock if it looks like the status bits are
* already set. Since we make this test unlocked, there's a chance we
* might fail to notice that the flags have just been cleared, and failed
* to reset them, due to memory-ordering issues. But since this function
* is only intended to be used in cases where failing to write out the
* data would be harmless anyway, it doesn't really matter.
*/
! if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
XLogRecPtr lsn = InvalidXLogRecPtr;
bool dirtied = false;
bool delayChkpt = false;
/*
* If we need to protect hint bit updates from torn writes, WAL-log a
--- 3132,3151 ----
* This routine might get called many times on the same page, if we are
* making the first scan after commit of an xact that added/deleted many
* tuples. So, be as quick as we can if the buffer is already dirty. We
! * do this by not acquiring header lock if it looks like the status bits are
* already set. Since we make this test unlocked, there's a chance we
* might fail to notice that the flags have just been cleared, and failed
* to reset them, due to memory-ordering issues. But since this function
* is only intended to be used in cases where failing to write out the
* data would be harmless anyway, it doesn't really matter.
*/
! if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
XLogRecPtr lsn = InvalidXLogRecPtr;
bool dirtied = false;
bool delayChkpt = false;
+ uint32 state;
/*
* If we need to protect hint bit updates from torn writes, WAL-log a
*************** MarkBufferDirtyHint(Buffer buffer, bool
*** 3109,3115 ****
* We don't check full_page_writes here because that logic is included
* when we call XLogInsert() since the value changes dynamically.
*/
! if (XLogHintBitIsNeeded() && (bufHdr->flags & BM_PERMANENT))
{
/*
* If we're in recovery we cannot dirty a page because of a hint.
--- 3156,3162 ----
* We don't check full_page_writes here because that logic is included
* when we call XLogInsert() since the value changes dynamically.
*/
! if (XLogHintBitIsNeeded() && (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
{
/*
* If we're in recovery we cannot dirty a page because of a hint.
*************** MarkBufferDirtyHint(Buffer buffer, bool
*** 3149,3156 ****
}
LockBufHdr(bufHdr);
! Assert(bufHdr->refcount > 0);
! if (!(bufHdr->flags & BM_DIRTY))
{
dirtied = true; /* Means "will be dirtied by this action" */
--- 3196,3207 ----
}
LockBufHdr(bufHdr);
!
! state = pg_atomic_read_u32(&bufHdr->state);
!
! Assert(BUF_STATE_GET_REFCOUNT(state) > 0);
!
! if (!(state & BM_DIRTY))
{
dirtied = true; /* Means "will be dirtied by this action" */
*************** MarkBufferDirtyHint(Buffer buffer, bool
*** 3170,3176 ****
if (!XLogRecPtrIsInvalid(lsn))
PageSetLSN(page, lsn);
}
! bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(bufHdr);
if (delayChkpt)
--- 3221,3229 ----
if (!XLogRecPtrIsInvalid(lsn))
PageSetLSN(page, lsn);
}
!
! pg_atomic_fetch_or_u32(&bufHdr->state, BM_DIRTY | BM_JUST_DIRTIED);
!
UnlockBufHdr(bufHdr);
if (delayChkpt)
*************** UnlockBuffers(void)
*** 3208,3216 ****
* Don't complain if flag bit not set; it could have been reset but we
* got a cancel/die interrupt before getting the signal.
*/
! if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
buf->wait_backend_pid == MyProcPid)
! buf->flags &= ~BM_PIN_COUNT_WAITER;
UnlockBufHdr(buf);
--- 3261,3269 ----
* Don't complain if flag bit not set; it could have been reset but we
* got a cancel/die interrupt before getting the signal.
*/
! if ((pg_atomic_read_u32(&buf->state) & BM_PIN_COUNT_WAITER) != 0 &&
buf->wait_backend_pid == MyProcPid)
! pg_atomic_fetch_and_u32(&buf->state, ~BM_PIN_COUNT_WAITER);
UnlockBufHdr(buf);
*************** LockBufferForCleanup(Buffer buffer)
*** 3304,3328 ****
for (;;)
{
/* Try to acquire lock */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
! LockBufHdr(bufHdr);
! Assert(bufHdr->refcount > 0);
! if (bufHdr->refcount == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
return;
}
/* Failed, so mark myself as waiting for pincount 1 */
! if (bufHdr->flags & BM_PIN_COUNT_WAITER)
{
UnlockBufHdr(bufHdr);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
elog(ERROR, "multiple backends attempting to wait for pincount 1");
}
bufHdr->wait_backend_pid = MyProcPid;
! bufHdr->flags |= BM_PIN_COUNT_WAITER;
PinCountWaitBuf = bufHdr;
UnlockBufHdr(bufHdr);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
--- 3357,3384 ----
for (;;)
{
+ int state;
+
/* Try to acquire lock */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
! state = LockBufHdr(bufHdr);
!
! Assert(BUF_STATE_GET_REFCOUNT(state) > 0);
! if (BUF_STATE_GET_REFCOUNT(state) == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
return;
}
/* Failed, so mark myself as waiting for pincount 1 */
! if (state & BM_PIN_COUNT_WAITER)
{
UnlockBufHdr(bufHdr);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
elog(ERROR, "multiple backends attempting to wait for pincount 1");
}
bufHdr->wait_backend_pid = MyProcPid;
! pg_atomic_fetch_or_u32(&bufHdr->state, BM_PIN_COUNT_WAITER);
PinCountWaitBuf = bufHdr;
UnlockBufHdr(bufHdr);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
*************** LockBufferForCleanup(Buffer buffer)
*** 3349,3357 ****
* better be safe.
*/
LockBufHdr(bufHdr);
! if ((bufHdr->flags & BM_PIN_COUNT_WAITER) != 0 &&
bufHdr->wait_backend_pid == MyProcPid)
! bufHdr->flags &= ~BM_PIN_COUNT_WAITER;
UnlockBufHdr(bufHdr);
PinCountWaitBuf = NULL;
--- 3405,3413 ----
* better be safe.
*/
LockBufHdr(bufHdr);
! if ((pg_atomic_read_u32(&bufHdr->state) & BM_PIN_COUNT_WAITER) != 0 &&
bufHdr->wait_backend_pid == MyProcPid)
! pg_atomic_fetch_and_u32(&bufHdr->state, ~BM_PIN_COUNT_WAITER);
UnlockBufHdr(bufHdr);
PinCountWaitBuf = NULL;
*************** bool
*** 3393,3414 ****
ConditionalLockBufferForCleanup(Buffer buffer)
{
volatile BufferDesc *bufHdr;
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
{
/* There should be exactly one pin */
! Assert(LocalRefCount[-buffer - 1] > 0);
! if (LocalRefCount[-buffer - 1] != 1)
return false;
/* Nobody else to wait for */
return true;
}
/* There should be exactly one local pin */
! Assert(GetPrivateRefCount(buffer) > 0);
! if (GetPrivateRefCount(buffer) != 1)
return false;
/* Try to acquire lock */
--- 3449,3474 ----
ConditionalLockBufferForCleanup(Buffer buffer)
{
volatile BufferDesc *bufHdr;
+ uint32 state,
+ refcount;
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
{
+ refcount = LocalRefCount[-buffer - 1];
/* There should be exactly one pin */
! Assert(refcount > 0);
! if (refcount != 1)
return false;
/* Nobody else to wait for */
return true;
}
/* There should be exactly one local pin */
! refcount = GetPrivateRefCount(buffer);
! Assert(refcount);
! if (refcount != 1)
return false;
/* Try to acquire lock */
*************** ConditionalLockBufferForCleanup(Buffer b
*** 3416,3424 ****
return false;
bufHdr = GetBufferDescriptor(buffer - 1);
! LockBufHdr(bufHdr);
! Assert(bufHdr->refcount > 0);
! if (bufHdr->refcount == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
--- 3476,3486 ----
return false;
bufHdr = GetBufferDescriptor(buffer - 1);
! state = LockBufHdr(bufHdr);
! refcount = BUF_STATE_GET_REFCOUNT(state);
!
! Assert(refcount > 0);
! if (refcount == 1)
{
/* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr);
*************** WaitIO(volatile BufferDesc *buf)
*** 3456,3472 ****
*/
for (;;)
{
! BufFlags sv_flags;
/*
! * It may not be necessary to acquire the spinlock to check the flag
* here, but since this test is essential for correctness, we'd better
* play it safe.
*/
! LockBufHdr(buf);
! sv_flags = buf->flags;
UnlockBufHdr(buf);
! if (!(sv_flags & BM_IO_IN_PROGRESS))
break;
LWLockAcquire(buf->io_in_progress_lock, LW_SHARED);
LWLockRelease(buf->io_in_progress_lock);
--- 3518,3534 ----
*/
for (;;)
{
! uint32 state;
/*
! * It may not be necessary to acquire the header lock to check the flag
* here, but since this test is essential for correctness, we'd better
* play it safe.
*/
! state = LockBufHdr(buf);
UnlockBufHdr(buf);
!
! if (!(state & BM_IO_IN_PROGRESS))
break;
LWLockAcquire(buf->io_in_progress_lock, LW_SHARED);
LWLockRelease(buf->io_in_progress_lock);
*************** WaitIO(volatile BufferDesc *buf)
*** 3494,3499 ****
--- 3556,3563 ----
static bool
StartBufferIO(volatile BufferDesc *buf, bool forInput)
{
+ uint32 state;
+
Assert(!InProgressBuf);
for (;;)
*************** StartBufferIO(volatile BufferDesc *buf,
*** 3504,3512 ****
*/
LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
! LockBufHdr(buf);
! if (!(buf->flags & BM_IO_IN_PROGRESS))
break;
/*
--- 3568,3576 ----
*/
LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
! state = LockBufHdr(buf);
! if (!(state & BM_IO_IN_PROGRESS))
break;
/*
*************** StartBufferIO(volatile BufferDesc *buf,
*** 3522,3528 ****
/* Once we get here, there is definitely no I/O active on this buffer */
! if (forInput ? (buf->flags & BM_VALID) : !(buf->flags & BM_DIRTY))
{
/* someone else already did the I/O */
UnlockBufHdr(buf);
--- 3586,3592 ----
/* Once we get here, there is definitely no I/O active on this buffer */
! if (forInput ? (state & BM_VALID) : !(state & BM_DIRTY))
{
/* someone else already did the I/O */
UnlockBufHdr(buf);
*************** StartBufferIO(volatile BufferDesc *buf,
*** 3530,3536 ****
return false;
}
! buf->flags |= BM_IO_IN_PROGRESS;
UnlockBufHdr(buf);
--- 3594,3600 ----
return false;
}
! pg_atomic_fetch_or_u32(&buf->state, BM_IO_IN_PROGRESS);
UnlockBufHdr(buf);
*************** static void
*** 3561,3575 ****
TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
int set_flag_bits)
{
Assert(buf == InProgressBuf);
! LockBufHdr(buf);
! Assert(buf->flags & BM_IO_IN_PROGRESS);
! buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
! if (clear_dirty && !(buf->flags & BM_JUST_DIRTIED))
! buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
! buf->flags |= set_flag_bits;
UnlockBufHdr(buf);
--- 3625,3643 ----
TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
int set_flag_bits)
{
+ uint32 state;
+
Assert(buf == InProgressBuf);
! state = LockBufHdr(buf);
! Assert(state & BM_IO_IN_PROGRESS);
!
! pg_atomic_fetch_and_u32(&buf->state, ~(BM_IO_IN_PROGRESS | BM_IO_ERROR));
! if (clear_dirty && !(pg_atomic_read_u32(&buf->state) & BM_JUST_DIRTIED))
! pg_atomic_fetch_and_u32(&buf->state, ~(BM_DIRTY | BM_CHECKPOINT_NEEDED));
!
! pg_atomic_fetch_or_u32(&buf->state, set_flag_bits);
UnlockBufHdr(buf);
*************** AbortBufferIO(void)
*** 3594,3599 ****
--- 3662,3668 ----
if (buf)
{
+ uint32 state;
/*
* Since LWLockReleaseAll has already been called, we're not holding
* the buffer's io_in_progress_lock. We have to re-acquire it so that
*************** AbortBufferIO(void)
*** 3602,3627 ****
*/
LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
! LockBufHdr(buf);
! Assert(buf->flags & BM_IO_IN_PROGRESS);
if (IsForInput)
{
! Assert(!(buf->flags & BM_DIRTY));
/* We'd better not think buffer is valid yet */
! Assert(!(buf->flags & BM_VALID));
UnlockBufHdr(buf);
}
else
{
! BufFlags sv_flags;
!
! sv_flags = buf->flags;
! Assert(sv_flags & BM_DIRTY);
UnlockBufHdr(buf);
/* Issue notice if this is not the first failure... */
! if (sv_flags & BM_IO_ERROR)
{
! /* Buffer is pinned, so we can read tag without spinlock */
char *path;
path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
--- 3671,3694 ----
*/
LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
! state = LockBufHdr(buf);
! Assert(state & BM_IO_IN_PROGRESS);
if (IsForInput)
{
! Assert(!(state & BM_DIRTY));
!
/* We'd better not think buffer is valid yet */
! Assert(!(state & BM_VALID));
UnlockBufHdr(buf);
}
else
{
! Assert(state & BM_DIRTY);
UnlockBufHdr(buf);
/* Issue notice if this is not the first failure... */
! if (state & BM_IO_ERROR)
{
! /* Buffer is pinned, so we can read tag without header lock */
char *path;
path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
*************** shared_buffer_write_error_callback(void
*** 3645,3651 ****
{
volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg;
! /* Buffer is pinned, so we can read the tag without locking the spinlock */
if (bufHdr != NULL)
{
char *path = relpathperm(bufHdr->tag.rnode, bufHdr->tag.forkNum);
--- 3712,3718 ----
{
volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg;
! /* Buffer is pinned, so we can read the tag without locking the header */
if (bufHdr != NULL)
{
char *path = relpathperm(bufHdr->tag.rnode, bufHdr->tag.forkNum);
*************** rnode_comparator(const void *p1, const v
*** 3701,3703 ****
--- 3768,3801 ----
else
return 0;
}
+
+ uint32
+ LockBufHdr(volatile BufferDesc *desc)
+ {
+ uint32 state = pg_atomic_read_u32(&desc->state);
+
+ for (;;)
+ {
+ /* wait till lock is free */
+ while (state & BM_LOCKED)
+ {
+ pg_spin_delay();
+ state = pg_atomic_read_u32(&desc->state);
+
+ /* Add exponential backoff? Should seldomly be contended tho. */
+ }
+
+ /* and try to get lock */
+ if (pg_atomic_compare_exchange_u32(&desc->state, &state, state | BM_LOCKED))
+ break;
+ }
+ return state | BM_LOCKED;
+ }
+
+ void
+ UnlockBufHdr(volatile BufferDesc *desc)
+ {
+ Assert(pg_atomic_read_u32(&desc->state) & BM_LOCKED);
+
+ pg_atomic_sub_fetch_u32(&desc->state, BM_LOCKED);
+ }
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
new file mode 100644
index bc2c773..ba5f493
*** a/src/backend/storage/buffer/freelist.c
--- b/src/backend/storage/buffer/freelist.c
*************** typedef struct BufferAccessStrategyData
*** 98,104 ****
/* Prototypes for internal functions */
! static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
static void AddBufferToRing(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
--- 98,105 ----
/* Prototypes for internal functions */
! static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
! uint32 *lockstate);
static void AddBufferToRing(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
*************** ClockSweepTick(void)
*** 180,186 ****
* return the buffer with the buffer header spinlock still held.
*/
volatile BufferDesc *
! StrategyGetBuffer(BufferAccessStrategy strategy)
{
volatile BufferDesc *buf;
int bgwprocno;
--- 181,187 ----
* return the buffer with the buffer header spinlock still held.
*/
volatile BufferDesc *
! StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *lockstate)
{
volatile BufferDesc *buf;
int bgwprocno;
*************** StrategyGetBuffer(BufferAccessStrategy s
*** 192,198 ****
*/
if (strategy != NULL)
{
! buf = GetBufferFromRing(strategy);
if (buf != NULL)
return buf;
}
--- 193,199 ----
*/
if (strategy != NULL)
{
! buf = GetBufferFromRing(strategy, lockstate);
if (buf != NULL)
return buf;
}
*************** StrategyGetBuffer(BufferAccessStrategy s
*** 250,255 ****
--- 251,258 ----
{
while (true)
{
+ uint32 state;
+
/* Acquire the spinlock to remove element from the freelist */
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
*************** StrategyGetBuffer(BufferAccessStrategy s
*** 279,289 ****
* it before we got to it. It's probably impossible altogether as
* of 8.3, but we'd better check anyway.)
*/
! LockBufHdr(buf);
! if (buf->refcount == 0 && buf->usage_count == 0)
{
if (strategy != NULL)
AddBufferToRing(strategy, buf);
return buf;
}
UnlockBufHdr(buf);
--- 282,294 ----
* it before we got to it. It's probably impossible altogether as
* of 8.3, but we'd better check anyway.)
*/
! state = LockBufHdr(buf);
! if (BUF_STATE_GET_REFCOUNT(state) == 0
! && BUF_STATE_GET_USAGECOUNT(state) == 0)
{
if (strategy != NULL)
AddBufferToRing(strategy, buf);
+ *lockstate = state;
return buf;
}
UnlockBufHdr(buf);
*************** StrategyGetBuffer(BufferAccessStrategy s
*** 295,300 ****
--- 300,306 ----
trycounter = NBuffers;
for (;;)
{
+ uint32 state;
buf = GetBufferDescriptor(ClockSweepTick());
*************** StrategyGetBuffer(BufferAccessStrategy s
*** 302,313 ****
* If the buffer is pinned or has a nonzero usage_count, we cannot use
* it; decrement the usage_count (unless pinned) and keep scanning.
*/
! LockBufHdr(buf);
! if (buf->refcount == 0)
{
! if (buf->usage_count > 0)
{
! buf->usage_count--;
trycounter = NBuffers;
}
else
--- 308,321 ----
* If the buffer is pinned or has a nonzero usage_count, we cannot use
* it; decrement the usage_count (unless pinned) and keep scanning.
*/
! state = LockBufHdr(buf);
!
! if (BUF_STATE_GET_REFCOUNT(state) == 0)
{
! if (BUF_STATE_GET_USAGECOUNT(state) != 0)
{
! pg_atomic_fetch_sub_u32(&buf->state, BUF_USAGECOUNT_ONE);
!
trycounter = NBuffers;
}
else
*************** StrategyGetBuffer(BufferAccessStrategy s
*** 315,320 ****
--- 323,329 ----
/* Found a usable buffer */
if (strategy != NULL)
AddBufferToRing(strategy, buf);
+ *lockstate = state;
return buf;
}
}
*************** FreeAccessStrategy(BufferAccessStrategy
*** 585,594 ****
* The bufhdr spin lock is held on the returned buffer.
*/
static volatile BufferDesc *
! GetBufferFromRing(BufferAccessStrategy strategy)
{
volatile BufferDesc *buf;
Buffer bufnum;
/* Advance to next ring slot */
if (++strategy->current >= strategy->ring_size)
--- 594,604 ----
* The bufhdr spin lock is held on the returned buffer.
*/
static volatile BufferDesc *
! GetBufferFromRing(BufferAccessStrategy strategy, uint32 *lockstate)
{
volatile BufferDesc *buf;
Buffer bufnum;
+ uint32 state;
/* Advance to next ring slot */
if (++strategy->current >= strategy->ring_size)
*************** GetBufferFromRing(BufferAccessStrategy s
*** 616,625 ****
* shouldn't re-use it.
*/
buf = GetBufferDescriptor(bufnum - 1);
! LockBufHdr(buf);
! if (buf->refcount == 0 && buf->usage_count <= 1)
{
strategy->current_was_in_ring = true;
return buf;
}
UnlockBufHdr(buf);
--- 626,637 ----
* shouldn't re-use it.
*/
buf = GetBufferDescriptor(bufnum - 1);
! state = LockBufHdr(buf);
! if (BUF_STATE_GET_REFCOUNT(state) == 0
! && BUF_STATE_GET_USAGECOUNT(state) <= 1)
{
strategy->current_was_in_ring = true;
+ *lockstate = state;
return buf;
}
UnlockBufHdr(buf);
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
new file mode 100644
index 3144afe..c62a6f2
*** a/src/backend/storage/buffer/localbuf.c
--- b/src/backend/storage/buffer/localbuf.c
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 108,113 ****
--- 108,114 ----
int b;
int trycounter;
bool found;
+ uint32 state;
INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 128,143 ****
fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1);
#endif
/* this part is equivalent to PinBuffer for a shared buffer */
if (LocalRefCount[b] == 0)
{
! if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
! bufHdr->usage_count++;
}
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
! if (bufHdr->flags & BM_VALID)
*foundPtr = TRUE;
else
{
--- 129,149 ----
fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1);
#endif
+ state = pg_atomic_read_u32(&bufHdr->state);
+
/* this part is equivalent to PinBuffer for a shared buffer */
if (LocalRefCount[b] == 0)
{
! if (BUF_STATE_GET_USAGECOUNT(state) < BM_MAX_USAGE_COUNT)
! {
! state += BUF_USAGECOUNT_ONE;
! pg_atomic_write_u32(&bufHdr->state, state);
! }
}
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
! if (state & BM_VALID)
*foundPtr = TRUE;
else
{
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 169,177 ****
if (LocalRefCount[b] == 0)
{
! if (bufHdr->usage_count > 0)
{
! bufHdr->usage_count--;
trycounter = NLocBuffer;
}
else
--- 175,186 ----
if (LocalRefCount[b] == 0)
{
! state = pg_atomic_read_u32(&bufHdr->state);
!
! if (BUF_STATE_GET_USAGECOUNT(state) > 0)
{
! state -= BUF_USAGECOUNT_ONE;
! pg_atomic_write_u32(&bufHdr->state, state);
trycounter = NLocBuffer;
}
else
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 193,199 ****
* this buffer is not referenced but it might still be dirty. if that's
* the case, write it out before reusing it!
*/
! if (bufHdr->flags & BM_DIRTY)
{
SMgrRelation oreln;
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
--- 202,208 ----
* this buffer is not referenced but it might still be dirty. if that's
* the case, write it out before reusing it!
*/
! if (state & BM_DIRTY)
{
SMgrRelation oreln;
Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 211,217 ****
false);
/* Mark not-dirty now in case we error out below */
! bufHdr->flags &= ~BM_DIRTY;
pgBufferUsage.local_blks_written++;
}
--- 220,227 ----
false);
/* Mark not-dirty now in case we error out below */
! state &= ~BM_DIRTY;
! pg_atomic_write_u32(&bufHdr->state, state);
pgBufferUsage.local_blks_written++;
}
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 228,234 ****
/*
* Update the hash table: remove old entry, if any, and make new one.
*/
! if (bufHdr->flags & BM_TAG_VALID)
{
hresult = (LocalBufferLookupEnt *)
hash_search(LocalBufHash, (void *) &bufHdr->tag,
--- 238,244 ----
/*
* Update the hash table: remove old entry, if any, and make new one.
*/
! if (state & BM_TAG_VALID)
{
hresult = (LocalBufferLookupEnt *)
hash_search(LocalBufHash, (void *) &bufHdr->tag,
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 237,243 ****
elog(ERROR, "local buffer hash table corrupted");
/* mark buffer invalid just in case hash insert fails */
CLEAR_BUFFERTAG(bufHdr->tag);
! bufHdr->flags &= ~(BM_VALID | BM_TAG_VALID);
}
hresult = (LocalBufferLookupEnt *)
--- 247,254 ----
elog(ERROR, "local buffer hash table corrupted");
/* mark buffer invalid just in case hash insert fails */
CLEAR_BUFFERTAG(bufHdr->tag);
! state &= ~(BM_VALID | BM_TAG_VALID);
! pg_atomic_write_u32(&bufHdr->state, state);
}
hresult = (LocalBufferLookupEnt *)
*************** LocalBufferAlloc(SMgrRelation smgr, Fork
*** 250,258 ****
* it's all ours now.
*/
bufHdr->tag = newTag;
! bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
! bufHdr->flags |= BM_TAG_VALID;
! bufHdr->usage_count = 1;
*foundPtr = FALSE;
return bufHdr;
--- 261,271 ----
* it's all ours now.
*/
bufHdr->tag = newTag;
! state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
! state |= BM_TAG_VALID;
! state &= ~BUF_USAGECOUNT_MASK;
! state += BUF_USAGECOUNT_ONE;
! pg_atomic_write_u32(&bufHdr->state, state);
*foundPtr = FALSE;
return bufHdr;
*************** MarkLocalBufferDirty(Buffer buffer)
*** 267,272 ****
--- 280,286 ----
{
int bufid;
BufferDesc *bufHdr;
+ uint32 state;
Assert(BufferIsLocal(buffer));
*************** MarkLocalBufferDirty(Buffer buffer)
*** 280,289 ****
bufHdr = GetLocalBufferDescriptor(bufid);
! if (!(bufHdr->flags & BM_DIRTY))
! pgBufferUsage.local_blks_dirtied++;
! bufHdr->flags |= BM_DIRTY;
}
/*
--- 294,303 ----
bufHdr = GetLocalBufferDescriptor(bufid);
! state = pg_atomic_fetch_or_u32(&bufHdr->state, BM_DIRTY);
! if (!(state & BM_DIRTY))
! pgBufferUsage.local_blks_dirtied++;
}
/*
*************** DropRelFileNodeLocalBuffers(RelFileNode
*** 307,314 ****
{
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
! if ((bufHdr->flags & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock)
--- 321,331 ----
{
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
+ uint32 state;
! state = pg_atomic_read_u32(&bufHdr->state);
!
! if ((state & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock)
*************** DropRelFileNodeLocalBuffers(RelFileNode
*** 327,334 ****
elog(ERROR, "local buffer hash table corrupted");
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
! bufHdr->flags = 0;
! bufHdr->usage_count = 0;
}
}
}
--- 344,352 ----
elog(ERROR, "local buffer hash table corrupted");
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
! state &= ~BUF_FLAG_MASK;
! state &= ~BUF_USAGECOUNT_MASK;
! pg_atomic_write_u32(&bufHdr->state, state);
}
}
}
*************** DropRelFileNodeAllLocalBuffers(RelFileNo
*** 349,356 ****
{
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
! if ((bufHdr->flags & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode))
{
if (LocalRefCount[i] != 0)
--- 367,377 ----
{
BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
+ uint32 state;
! state = pg_atomic_read_u32(&bufHdr->state);
!
! if ((state & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode))
{
if (LocalRefCount[i] != 0)
*************** DropRelFileNodeAllLocalBuffers(RelFileNo
*** 367,374 ****
elog(ERROR, "local buffer hash table corrupted");
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
! bufHdr->flags = 0;
! bufHdr->usage_count = 0;
}
}
}
--- 388,396 ----
elog(ERROR, "local buffer hash table corrupted");
/* Mark buffer invalid */
CLEAR_BUFFERTAG(bufHdr->tag);
! state &= ~BUF_FLAG_MASK;
! state &= ~BUF_USAGECOUNT_MASK;
! pg_atomic_write_u32(&bufHdr->state, state);
}
}
}
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
new file mode 100644
index 521ee1c..5745bfc
*** a/src/include/storage/buf_internals.h
--- b/src/include/storage/buf_internals.h
***************
*** 20,48 ****
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "utils/relcache.h"
/*
* Flags for buffer descriptors
*
* Note: TAG_VALID essentially means that there is a buffer hashtable
* entry associated with the buffer's tag.
*/
! #define BM_DIRTY (1 << 0) /* data needs writing */
! #define BM_VALID (1 << 1) /* data is valid */
! #define BM_TAG_VALID (1 << 2) /* tag is assigned */
! #define BM_IO_IN_PROGRESS (1 << 3) /* read or write in progress */
! #define BM_IO_ERROR (1 << 4) /* previous I/O failed */
! #define BM_JUST_DIRTIED (1 << 5) /* dirtied since write started */
! #define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole pin */
! #define BM_CHECKPOINT_NEEDED (1 << 7) /* must write for checkpoint */
! #define BM_PERMANENT (1 << 8) /* permanent relation (not
* unlogged) */
-
- typedef bits16 BufFlags;
-
/*
* The maximum allowed value of usage_count represents a tradeoff between
* accuracy and speed of the clock-sweep buffer management algorithm. A
--- 20,62 ----
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
+ #include "port/atomics.h"
#include "storage/spin.h"
#include "utils/relcache.h"
/*
+ * State is:
+ * 10 bit flags
+ * 4 bit usage count
+ * 18 bit refcount
+ */
+ #define BUF_REFCOUNT_ONE 1
+ #define BUF_REFCOUNT_MASK ((1U << 18) - 1)
+ #define BUF_STATE_GET_REFCOUNT(state) ((state) & BUF_REFCOUNT_MASK)
+ #define BUF_USAGECOUNT_MASK 0x003C0000U
+ #define BUF_USAGECOUNT_ONE (1U << 18)
+ #define BUF_USAGECOUNT_SHIFT 18
+ #define BUF_STATE_GET_USAGECOUNT(state) (((state) & BUF_USAGECOUNT_MASK) >> BUF_USAGECOUNT_SHIFT)
+ #define BUF_FLAG_MASK 0xFFC00000U
+
+ /*
* Flags for buffer descriptors
*
* Note: TAG_VALID essentially means that there is a buffer hashtable
* entry associated with the buffer's tag.
*/
! #define BM_LOCKED (1U << 22) /* buffer header is locked */
! #define BM_DIRTY (1U << 23) /* data needs writing */
! #define BM_VALID (1U << 24) /* data is valid */
! #define BM_TAG_VALID (1U << 25) /* tag is assigned */
! #define BM_IO_IN_PROGRESS (1U << 26) /* read or write in progress */
! #define BM_IO_ERROR (1U << 27) /* previous I/O failed */
! #define BM_JUST_DIRTIED (1U << 28) /* dirtied since write started */
! #define BM_PIN_COUNT_WAITER (1U << 29) /* have waiter for sole pin */
! #define BM_CHECKPOINT_NEEDED (1U << 30) /* must write for checkpoint */
! #define BM_PERMANENT (1U << 31) /* permanent relation (not
* unlogged) */
/*
* The maximum allowed value of usage_count represents a tradeoff between
* accuracy and speed of the clock-sweep buffer management algorithm. A
*************** typedef struct buftag
*** 137,148 ****
typedef struct BufferDesc
{
BufferTag tag; /* ID of page contained in buffer */
- BufFlags flags; /* see bit definitions above */
- uint16 usage_count; /* usage counter for clock sweep code */
- unsigned refcount; /* # of backends holding pins on buffer */
- int wait_backend_pid; /* backend PID of pin-count waiter */
! slock_t buf_hdr_lock; /* protects the above fields */
int buf_id; /* buffer's index number (from 0) */
int freeNext; /* link in freelist chain */
--- 151,161 ----
typedef struct BufferDesc
{
BufferTag tag; /* ID of page contained in buffer */
! /* state of the tag, containing flags, refcount and usagecount */
! pg_atomic_uint32 state;
!
! int wait_backend_pid; /* backend PID of pin-count waiter */
int buf_id; /* buffer's index number (from 0) */
int freeNext; /* link in freelist chain */
*************** typedef union BufferDescPadded
*** 192,207 ****
#define FREENEXT_NOT_IN_LIST (-2)
/*
! * Macros for acquiring/releasing a shared buffer header's spinlock.
! * Do not apply these to local buffers!
! *
! * Note: as a general coding rule, if you are using these then you probably
! * need to be using a volatile-qualified pointer to the buffer header, to
! * ensure that the compiler doesn't rearrange accesses to the header to
! * occur before or after the spinlock is acquired/released.
*/
! #define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
! #define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
/* in buf_init.c */
--- 205,215 ----
#define FREENEXT_NOT_IN_LIST (-2)
/*
! * Functions for acquiring/releasing a shared buffer header's spinlock. Do
! * not apply these to local buffers! FIXUP!
*/
! extern uint32 LockBufHdr(volatile BufferDesc *desc);
! extern void UnlockBufHdr(volatile BufferDesc *desc);
/* in buf_init.c */
*************** extern BufferDesc *LocalBufferDescriptor
*** 216,222 ****
*/
/* freelist.c */
! extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy);
extern void StrategyFreeBuffer(volatile BufferDesc *buf);
extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
--- 224,231 ----
*/
/* freelist.c */
! extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
! uint32 *state);
extern void StrategyFreeBuffer(volatile BufferDesc *buf);
extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
On Thu, Oct 29, 2015 at 8:18 PM, Alexander Korotkov <
a.korotkov@postgrespro.ru> wrote:
On Thu, Sep 24, 2015 at 6:36 PM, Alexander Korotkov <
a.korotkov@postgrespro.ru> wrote:On Thu, Sep 24, 2015 at 6:32 PM, Andres Freund <andres@anarazel.de>
wrote:On 2015-09-15 20:16:10 +0300, YUriy Zhuravlev wrote:
We will be tested.
Did you have a chance to run some benchmarks?
Yes, we now have 60 physical cores intel server and we're running
benchmarks on it.We got a consensus with Andres that we should commit the CAS version first
and look to other optimizations.
Refactored version of atomic state patch is attached. The changes are
following:
1) Macros are used for access refcount and usagecount.
2) likely/unlikely were removed. I think introducing of likely/unlikely
should be a separate patch since it touches portability. Also, I didn't see
any performance effect of this.
3) LockBufHdr returns the state after taking lock. Without using atomic
increments it still can save some loops on skip atomic value reading.
pinunpin-cas-original-fix.patch is just original patch by Andres Freund
with fixed bug which causes hang.
Performance comparison on 72-cores Intel server in attached. On this
machine we see no regression in version of patch in previous letter.
------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Attachments:
pinunpin-comparison.pngimage/png; name=pinunpin-comparison.pngDownload
�PNG
IHDR � � r)W IDATx^�� �Vs����R��0�����F!Sf�P�dT4� �([&�5d������2�X�uThdM
**��_���\�}�s�}�s�s�y����+�=�������~�����
6� @ � ��$3 @ � @ e�df @ � @`#$�� @ � @ @��� @ � ���$3# @ � @ d��� @ � 2��@ � @ �@F,�fb@ � @ � 6@��
� @ � $�9 @ � @ �H�L23� @ � @&�9 @ � @ ��� @ � d$�rk& @ � @`#$�� @ � @ @��� @ � ���$3# @ � @ d��� @ � 2��@ � @ �@F,�fb@ � @ � 6@��
� @ � $�9 @ � @ �H�L23� @ � @&�9 @ � @ ��� @ � d$�rk& @ � @`#$�� @ � @ @��� @ � ���$3# @ � @ d��� @ � 2��@ � @ �@F,�fb@ � @ � 6@��
� @ � $�9 @ � @ �H�L23� @ � @&�9 @ � @ ��� @ � d$�rk& ��8������?v��]��5j����ic}���_��W������l�m��?��O�i\�]�3�8�v�i';���b�8w��w���#G��7�l[o�u��B�! @ $HrH�i���$��o��s�1��?������'�0I���_n����kr�����Y3;�����B��E��o���M�<�.���� �9�����>�����n;��s�a����I%� �lHr���� ��$���&��������SN�����v��W&x�����_o��/G��Z� B I+�B �@&IV����]w��:�����-Zd����=��#��'��[�N��`o�������~����:{���*F��A���1cl�]v)o�{���j�*{��g���������z��a�Z�j��f��m�o��n��SFV����SO=�e���o�3�W]�r������������_�f�m��#���?��+�<�����~jk�������N8��s�=�sZ���GUx���/����7�k3f���{�q�[���l��.��R��W��y���y��.���W_���������?�sR=K�.u1|��Wm��5��_����1�3����=���n���
�<����W�T������k��w�Y��M��Cu+T�-[f���+��>pQ����������������<Q\��[�V6<�����~m��f���{����S����;m��i�������o;����x����C������6a�{��7��j�������vsu�M7�0��1;v�����n��������/��U��Y��^�z.���~z��^psEq�S�����n�o��W��Y�2|� "I I�dX� �� d�d �$E��%�*�d���w{c�= ������ �J���p�B�(8���x��N�<��h���S�%��(J�%@�3 I!��DN�\�zuy�
40I�D��3���]����[����n�l������v���;AS=���\3�/��R��������m�}�uR����QG�X�X�d����7i���~�m������n�z�r�I�G ������>}�c$q���{���;;q���c��������'�DT����=�X'��P�Wb���]��4���}�q����z=I�>��N;�I���+��_�q:���\|���b��K7���5�8��=I�C=dO>���q���U+��D[�hn���G��O�>���+}�[�'=������m�$?��S���?���N�W�X��(hN�?*S�Lq1�8k���������n��=����R�?�<@ Q"�$G)��@R%�����+C�,��W_�$O%�$+�v�=�X�-�[�HN��J���������+�& ��Q��eFb)�R��+dI[�����LU�������s7�t�����N��*� A�(�^�SB���'YK�%���u��>�^�C{��e�����%�\��Z�T����;1U��V� ���9��l��N�4)�����"+N��R�����s������5�}��,�2��$t�A6l��������1H@S��K�g���_�{_j����(��q���.�2&��BsF�����P.���������7�Ip�\����+�Z�R����$9���*�7 @ ���H��NA ���$Y�D��/������sB��}��
2����S���O��1S.WI��VO�z%U���z��dFr�vjQ�S��(����L�T���^��{�_������q�-��]�+�K]R�I�?���U���k�^���!�L?�K��y������kj��o�uY�������J���]��}��.{��UW�����\sM���������"y�ruI���$W\S��++.�U������\S&���������=����2�U��>�� *��n��M������������: ^��e�zG���$��M^]�0E���$��ig��TmP�& D� ����!@ �H=�Z{_�dV�d�K&��zY=�yO�%2����$g��H�'q����+���?4��Z��e��X����L�T���C������{�}M��h��2������2�(e�k�d���^��zK�3�"S?�9��-+�r��"a�Rf�S��h �D��o�q�O����TT��X{�������Zz�,�2����}�+�$+S���^�$+;���*U���IB���'������CK��RA�4�tI�4��%Ymh����������m �b]]�� 1���$9����~�y
� �B I�J$� � TupW&1K�l��J���r����r�d�M���+t���Nr��2��
�$Y����9�h���n{�d���L��w�uW'P��j�lM����wL����JUW@�K��S")��E��-����L��Nf�����LE/���[���H@��$w�qn/�_��=��{��������r�d��l����X�VPW�f�y�W��5mO�6��9�C�����L��>��M{��|�d�?�<@ !@�C�MS� �$�$��c-��>U0���'��k!�<d�w��W�l\��j�����d�d,��S�?;w���=�������36-�M�d�����k�tW������{YSc��$��N���d���l���@�e ~z����9����`���F��IVf]����m��7I��]���A�r�M��o�,PYB,)M��k����B%��>�P�Z��k����Z��y�Z���Y���g��S6��@ �$9:��'� r"�$K�9��F�/,��5<7�p��:�I�rn�����N�����e�;�:����T�Lw��� ���I�Q���R*:�J{��5-Y�>c �NlV���I�z%�<���!\�Y}]���ow�}�qc�|�@)�y�V���ZYVe�u5���R�W�RI^u��2�|�?������nU�d����I��>m�WFU��uh��U�TYUe��@�*q��f�2���>�P��p�Ce���������B'V��Z��%��a>��ZV��T�>���J4�YEsC�t]_&��>n-��c��zWI���,� HrlBEG! T$�$�5]}$q�$�(�(���b-GM�'9�����J���U"��=��H��*�� ��L�$��p ��S��%�Z�;z�h�%�$YYc-����;e=%S*�WP ��������$��J��Q��WTI�%��3]�%^���UK��$\���up��%WU��b:g���[R���}�:[Yi�A"������� �/I�)�Z6�_�T���I��a���k��W�_K�%��H�����]}��J�.����=��S��z���I�od@ �%�$'7���@�x�(Uw�t���Jn���A�A���$[����t��6�� �K I��'�A �@52�4��2�a�M� Tw�5� @ �&�$;�@�� �%�j��$3 @ ���(G��A H$9a�s8Hr��x
� B!�$���F @ � @ ��8D�>B � @ �@(��P0� @ � �� ��(�G@ � @ � �
f� @ � �8@��%�@ � @ �@�C�L#� @ � Hr�D! @ � @ Hr(�i� @ � �@ I�C��# @ � �B I3�@ � @ �@ �q�}� @ � �P ��`�@ � @ �$9Q��� @ �
$9�4@ � @ q �$�!J�� @ � B!�$���F @ � @ ��8D�>B � @ �@(��P0� @ � �� ��(�G@ � @ � �
f� @ � �8@��%�@ � @ �@�C�L#� @ � Hr�D! @ � @ Hr(�i� @ � �@ I�C��# @ � �B I3�@ � @ �@ �q�}� @ � �P ��`�@ � @ �$9Q��� @ �
$9�4@ � @ q �$�!J�� @ � B!�$���F @ � @ ��8D�>B � @ �@(��P0� @ � �� ��(�G@ � @ � �
f� @ � �8@��%�@ � @ �@�C�L#� @ � Hr�D! @ � @ Hr(�i� @ � �@ I�C��# @ � �B I3�@ � @ �@ �q�}� @ � �P ��`�@ � @ �$9Q��� @ �
$9�4�D+V��
X�:u�8<��%�5k���
�\��.�����j�*k��q�B`���w�}g�m��m��&� ��@�c8�]|Hr�c� �Q�B���$?Q���H�@ �@��g��%N I.� �q�H2�@�d��G If.@ �?$9�1dE"�$ |��E�#�"uI.�6�$G0(t �@�����8<H2sA�d��d�@*$�� @ ������� �\$�kI�X@��2�E�f���.A �� ��#0� �d�@*$��@&�9@&�9 @ Y��d����H�Lr��#������52�!��xSd�# �@ Hr�x� ��@��d��d��� �E INV<M���aG�)$9�� �kd�C����$G<@t�@��, ��$3�"�$37�$3�$3 ��u����o�i{���?R�$�$� �� @&�9 H2� If ���C���^�W_}��8�*��I I��A If ��� ����[3�N`��v�������o��<��~��wl���v�A��G�0L�6�}�Q����I�&v�EY�f�������������+[�v�z���������_n����n����n���v���.WG����@��[�y����~fs�����z�n��fk��U��3�� �!����@���|FD&9j�{IN^L���/9��D`�������OQ����;Z���Tj��C�SO=�����KM�|�1����k����������� �G}���a��NtW�^�$���ow���wo[�~�-_����j+{���m����dI�Yg�e7�p�m���v��W�v�mgG}�����v�9��^{�UF4�\Hrrc��&�$8&�#�1 T��D�����+&]m}�SE���W��]I��q�������G�gqO?�t'��[����2��?���?��S6c�'����tI����nK�,������~�m���{����s}X�p�]x��E�C��&�$';��.@Hr�pcT5��`�U$9@�1�I�Y�b�����(�z��UJ�$�V�Z���1c�X��M�+S<t�P�q�m���6s�L��a����-���F���{��g���sb�u���+e������'���V�~�M79I����;�(|h4���d���H In��F�c� ��$7fU#�1����2��_�T%��w����Ho��f��+���O>Y.��������rK%I�4i�-[��N:��J��$k?��wS �7$�o��W2��� u�E��"�$3<H2s!���d�-~���Kf���Jw�����O>q�c��}������f��'�x�-�V�R�a���w�|[�h��3#�I�i��\\��cHr���c��da��*$9�����H��@�.r��dI�e�]f�}��5j������Nx%�&L�:u��,�i��f����������_|�;�����7�p-9�T��0���$Gnj$�CHr���`�$�$�I;�m!���M�=C�������h���A �� ��
%��@H2�B�d�� ��� ��\� Hr�c��D I.��5�$G, E��\$�lI�`P� � �9�qx�d��d��G I.��0�|[�n���j�*�����n�v�
��C �1$9�����%�$�TZ'��[$Reb����^#k�Ysk��ynE�i$9b��;���/{��M�U}�+��.�w}]���t����?h|��@ �$9
Q��$�$�2l�wI6'^��T��b��h���K�[4l���m����EIG������������������i���m����t��Z��n�v�u��{!��. � �\$�4Hr�c���*��hT%���o:c��2��e����rm�R���^��r��Jt��AHrjf�����<��T
�6t\�PR?x�xR��5�o��oX���m�?o�y�ysO��}����G�}�F�1�����gZ���=@ ("$���i:���x����'I�%e��
�vi&n�Y4O$R%B��l�S+�� ���}��������V�����{Ad�s�dOS?�H�B��'x�qH� ��W�\�����b��\9�]21J[��V'���N}Ww��� ���lp��6�����@ � �h�I$9y1�gDI��tAN�B�7�����d�P���N�?SV0U(���K_�8�J�����������Mjf��������������f��=���qf���l�����5�����~��g���f�KM��}I�������Nr���&f�!Q�}�n@ �� ��/��@9$�� I�d��Y3�r�%9�s���>���l�7�\��A��[v�*��"\H����fl���Cf>�>�/�O� ������(��]u�?������;���]GF�k��@9�y���]w�e�^{mF*����]���n�:�����u����w��Z����}����I+��$��\��.�'-��L�]�k�V�'2�T�����EoA IDAT�J?�I_O=$�{��.{Y�L"�o_�~/cFz�����������~�3N�y��d��3���o?���
��8���B ��#�<�v�u�\^���?�����^92�Lm����;�87n���S�J.~��&I>������N��v����}��
��>��~��G�n����H�o?6T ����km�x��+~sE����?�n�g
w�������:*�1,��L�x���f�7��p��uOr�K��&��L[��l��sw�����}k������@��z=9$�����m�m�����gE�?�`}�����.Ei��F�|�M���~UmU~��&I����c�����es��wZ�6m�7��
��-4��@� �Q�F�}�XJ0%�*Z^�+_�r����]�U|_�Ip�??)�T9N�DY���l�2q�����4���`Q[\�����|���Js{�v]�� S�P}��u[Fvi�w�]�n�8� �L��lZ��l�2'3Z�������w���^|�E�Ab�
�/��5j��f��m�'O������`��v�m���6`� ���o/����^����kg��~zy�����C���h���j�n��������������>��c�����O�>��G����3f�}���V�n]=z����[n��^{���_om��ucT���'����-G�l���������R�^�_~���SN9�:u*��PuIF���7NI�!�bS�Nu���b"~|��c(����T��qN�6�}�Q'�M�4��.���5kV�Uj����`�����v��:v�h]�t)o���n��
� '��^;�������w���U'��_���?���3f��7��x�
����G���~�1����
2���}���;�V������}M��QXn�E�(IHrI��
:��������u7��|U�?�������4�TAV_���J��5R�q�{e�;fk���[lk�TZ�W�g��u��.��D�}�j4�=!vR�I�WUZw�
_�m�~Xav�X���y��{] ���QM��g�j�Y7g�]���mP�mI��'��$VBu��g;����C%I�L^v�eV�vm������8�FIV����>�-�=��3�O���"��d���/�Z�j9��X��tI��a}_R�|�r4h�M�0�IpjQ�����w<��������*��
s��%�z��{��z����%Kl�m���J�j������?���;�<����Pk,F�k��e�#�J��������N�%���vZN�Tf��?����Z�y�gT)�� �8�{����T�z���.�Z���/d\�]]���\\��[�(.b�8^y����_��<�<}�t7v-���/=����]��^�~}���k�����_~$���Q����{EA����-����r����H{������vwY�(�2���~iW�2�U�o��l��~W'�)�]��Y��O���Jj�����M�L2�R�w�>�[��F�7���Z =n��O2��������@��:�?����z��n�5��lNk.����l��wY�i�����3�8Nx���+�
�!+�WGi;��o3R��)���qc�����,����S��(��-��L���+v�d%�4����r��wv��^�a���>$�*+) O�deD%�^��I�<!���`�����]����%������={V�X)yK�W�i����X4��+����QEc�~kId���3g��~+�]�r��$Y}y���]_$�_�zuqJg�� ��_����$��K.q���N-�_le�U�Q~��W���� �~P���$�$�^�up���.v��i���_��-�io��ng�E����z$�Z~�zuM���(��{�>S�A�+�~� �>����JR�1K��� ���deG��~`�}�=��bs����9���F�-~Z���Se:�� ���TZ�����Go��N���d]�46���j}2�l��������oJ?W������*�I ��(� ��0c���)S��&�l���L��E+���Z����]���K��L��%��I���x���\&WEK|�����sm������sO��o���e6��8]������+������3g���*4eY%��F��0fe�|�A��Y}Y�h����W�X��xj��Wb�Z�2�b�Z�2��~���.k,���x�$+{���2��`BK�%����U�Vn������~w�b�-��uhY�����}��.�\�$��O<���>��s2�����U�����r\��\t����L��Y�l��X:�������E�(IHri���)����F_��6l����x���:�Y�����U��O=�a���������c���,��G�)�bK�_�R���L����+�����[�(���C��N#��SWWk�������)����c�g�� T|��;�_M�'���t�����~)�� ���,�y('In��[V��^e�pe������n��3�����k
�J@�,��=�j�l�Lr�=���o��I���Q�z��N2=I����S�$k�����;�e�3e���8�$Y��Z$��:�g���������M�:I����Km���n���������Izu���k�R%YK�/��R�i���e��n����KrU���+V��K�����'+Ir��X�$k�����/�p���Fz��V'��>T��1k����L�,�ZA�)�\����gI��"u�$$�t��*���g������ f���S_�
�6������!eP:���ws�����be��������z
����4*�,)�2��������fg%)���Ve~�u^��%�]Y��g���Lr[�����x(���O8U�3�w/�
o[��=�,��w�&SN7{�!���q����cR����#�E��$N����TFT�����?g��\��,^����?~���������l���YI��_���+��b-��e����$��(eF����)c���\W)������L���,��U��>t����v��-D�S�Wr�}���|���:�W�6�5F��<v�X��[Z���l%Y �� e��WYZ4��I�E����b����|��[�.N�c�=���U'�&�VvXl������$k���m���\���:�����v����#���3�Y�@�K#���;z�h��;��.?���3m��t�������Dyv���gm��N�=k�Ye����q{�d���F���,�e�]{�����dM'}{���=����x��W���� �>c)���%y�f�u);�����2����oW���-���k
�H@vIr���r�^�zUy�uu�>-U��H��)�>W-��&���{����X����<�:[IV��Y�������v ���8��XvI�����+������e�E�m)��t����{���{�&IVf]Z�w���e����V8���qJ&u�����%����W�$+���+���=����w���}x��uk7����E��[��_R�}I��:b�w`���:a[������A��SO=I��_��4 ���s�k�v:�����o<�VbT.�B"Q~���_�;����3�����J��{����� �e�.J~b�'9�a�V������K���e��%�f�sROw%�x1}�J@r����*�/]%Kr�!T� BYV} ��7�$\{�%�Q/q����>��_d ��
�/�tM�����{�����'�P%I����C�EYW��}kl�����ux�����$9'\�~��$��e���}�k���A����*�S!��K]\�Af���A��=������d*����3}@��v�K���$G}����@�#��;���Z�R~M�2�����P���I���f/����k�����,���wY��g.4{el���������~v���B�K,����$�9o���y}���PnG��QpB ���p��}���p,w(����H���P��8D�>B Hr P#Pe�k�t �~�������p�U�L�7����������]�.�y���l�{��v��y{����+-����7��1��K�%^ �\� e��$Y���x�_��J(-���k��z=V�� pB �@Q �I.*~�3$9����wO�uM��.�����Y��e�>KvSJ����2�������#:LK�_��Z'ikI�J���Z������n����a�b�F�$ |��(�d�����[l[��+�"D��<$��� �%�$�K.z�UyM�7������(�������ie��?1�����L���_�5 r��^T��#$9^�
��%Y
>>��J(}8���
WB�o��c�
@ Y@��F���H IN���R��y���<���+38+I���O�y-�wY6Z'������\������{]`�w�]������������
OF$�;
$�����z���k������Hh��^�r�z���@ Q!�$G%�#v�����R�S��=�U��^I��\�(�[���/�ry����������� z���2�:<��;$�w����JI���+��A��3�Ox�wEWB�6�tH $9�AeH�@���T+�^�T�A]Ir�(KX�Q���9}���{�_93l����BF�k�����+5����X/�*�H7V�$Ws%��+�
9� �`� �@�c,�-Hr���Ko���*X�ke ���F���LvMi��dY{�UREZ��1��T��Zc�!]���Z*N9�)���Hr���B���~��4{d@����+����~�uX���@ ��@��Y�f��zI.y��=$9�!��x�;,K2�;J���'u[�pP�/�\�(����x��+m���.K���*� ��de�=y.��'��B�c�N��N Iql�Q�5�WB��������9�t�5q$p�����Gi���kQ��������k#G�,J�~5��g���?�h�m�]�U"�~�\�[jN8$9~���,��M����o��^����`����_.��n������*�W��yT�xk���#���[��J�S?�K�J���d
�H`���������~���t��~��>��v�e����W�w�y��i��~��� �~A��$9G`<� ���P� k(Y���$��jD����{���>!��Q��#��u� ���Hr��c��������WB�K='�tW����yvl�c}�UA�?�w�|��������e�������5�����w���^|�E���
4����/��Q#�={�M�<�B�����v�m����������������z�jk����~���m���>��.��E:t�m���6o�<������k�u���C�V[me��}�����O���GF���3�>��S�[���=��}�-��k��f�����m��1��S��x� {���m��u��f����[o�u���lU}����~������ovu6l��1S?y�{����n����!C\_v�}w�1c��Z����~{�5�G)� �\?�.aHr|�����>_���|�pPW ���s�+�WfY��+���Y����''�:��6��$��+������ ��]m����vm��!^��SDv�t,(k���q�T���;h�AV�N�J�r�!v��';�U������SN9�:t�PI�%��]v���]�n��v'~�Q���d����v���<�L������P(������V�ZNl%�z>]��:�,�}�����m��A6a�'��EmhL�C����$������s��6�-'����{��{��z����%Kl�m��������Lu��+���~��_�g��O���\���P�h�����������k���~��_8��� �����&�$�#��=����.#+���A]�Ir�\�x��d]�����xD'9�D���BG��$��j��������A�������(}~��w3�+�UV�q�������?��/�,�l*���}{'p�8{K�_x�{��W��.�J�~�a'�*7�x������>��>���~�{Z^���$<]��QVf�+�d e���+�m��v��W�:�+���s��(��N�{���2��Ju}M���o���$��K.q�t����k�_�qe�U�Q~��WcJa�����v @������e����D9�{��A�-���Or64C{I
u��I�5e��V�w��[k��
WBE>��tp�[cCi'��!�dlW�:e��d�M��������.��R&Y����]k)�����Zr-I�����z�erUn��&�����sm������sO��o���l�>�h%I�������|��J8g����b�h�������5����)����>p}Y�h����W�X��3g�[�-qN/������3���v��q��m��_��L�5�$'(�%\Hr��sm�d�#�k�*�����}�������)�Q|�d����eW8�d�w���1fC���2�\��G��{I��e�k�Y�3\ ��!�&���t�5Q" �� ���[e|�7��#��I��]�O��7�x�&M�T.�6l�C=������:uj���%�w���p���s��u��L�do�W�'��Z���T��sz&��;�p{��Z
��M�6u����K������Rm�?N-����Z]������E��I��q��L��$�1j�9��H�!c'RY�$���
%5��k�~�<K ����^��*�de�59������?�����$k���P?���}=&�pWB��d��L�@TH`?�p�G��o�u�����?g��\��-^����?~����e����l�������$Y�����+��b��,s�=��J��:����j���R�Xr�k���U�Z�T&Y������e��&�W�w����;�{�5i��e����UW]��n ���c�0S��M����Rq����_�����J� "*�%��@��9�]tHr�C�� ���Jo40IN��C�Zw)�����ew �A��DD�#�Ht"/I�p%��s���f_e�6w��)�
����%�Z2��W�*O��.�����:��N;�d�����[�-��N���c�g�.���U�n��$���6��/-��!X�v�\�?5N���$���he��a����iI����g |�����V���f�[����G��-Sy�vp�������������$�C�$���M I�^|k� ���Rf��
T��E�C���2A�D� ��P�#yI���n��[�S���-}��u��n?���Z����#�e��/\�"��uQ����8�5�,���6q�K$9Z��J�S������'-�.���\�������@����%9��P���A����&������@�~������8��si/N}�e\��,�\
Qf��@���W���X���J�t��������l��c���O/�"�jN����@.0^A��$E6~��-��w%���
-�~���g���fX�{���#��'� ��}H���YE�I$9j�?�!�$G#T�L6I�J�{�����������~��$���RG`��������$Y����:�
��CLWB�����U�����D�! D�@�J�_|a_|�;�.�^3
�M���sO�������:���w_w�Hr��@V��nzup4�\�y� �Q�B4�P�$g�J������\ ��@���$���[v��7�S���kWI�uJ��������3��K�[�h�$��F6CD����3Y��u��I.�q�I�S���k����e�J�x)�\���`�E�� J�@IJ��l�m��;��Q�F�$�[�n�����:�|2�^4]�`��
��v����0�'�#��W�vm��L`�GSl��#\�#�a�v����?���y���������;�{O��Dw$j.PJ������
4(]��X�z�m���������Y�[�t��;�
��y+����!^SzMq�yQ�M��/D;>��(II���s�����Z���=�\��mu��1]x>j�(��Z���.�^�j3�D H��� Iw<��Sv����F/�t������/�
�=�j�����2���zX UQEs�R��A�����n������I��u�w��ZH�7��;�.�_�}mM�I���/�7�~�Njw���mJ�P �x@�3H�.2�rle���o?{��\�Y�����K,���\���,��i�N�t��5�l���]��{���� �Jo���5��$`�uI�9�A���Z-e�j����oJ?��i}�v%���+��e�W~\�� IDATo�m:e5~� �Hr�<�|�>�����v��'���_o���$'a��3$�����_u_�~y��*�TA��.$9����sHri�����&�j�;S!�J��3����L������^�]�=��CU����+���j|m���6M����?�3�H*$�
In���
0����s���#�Rk$9�?
��I��Y�oDU�52��F5Y�!���g!��U����+�$�=&�pWB��X�7y����]�b����BJ�z
�m���U�X���_Z1��}q.�4�B Q%�$W!�M�6��n��f��a��
�.]� �Q��E���(2���C+Hr�N}���g��v]s����?�+��Qj��E��K�%��T���00���N�0�_V�g��*�j?��l��3� �M I�F���Z�tM�0��A&���5Z�#���#���$?����H�O�%Y]����p����AH�3 n�?ZsY�l�y�f{9Y�r�6M��lS D�@IKrT�B��A I6NY��w�l�?C���>j�[;�R;��H��@$y��f7�^��Sg�5����'��g-_���%�y
�����u�?>��� ��N@ Hr�i"������_<zN���Q�a|�����+����� �"�$7�T3��h��@$Y]~��Wn5k������D �{��?urv������U�{����I��@8��p8�J ��Ub�S��I�� #����8��$�1j��90IN����fmf A��L��qi~�V���q��_��f���_���_k�-��[���mD�
�3}_wzeC:)�~^� �G I.{Z�9$�� �M��d��@\kD��9���$��������Qp���j}M���'?����*��o������Rs�O���@ i�d��$�$� ����(�H��s ��!�q���}T��U�J��/0����;�*��2���To������$�+�^�Hz�o�"�/m�O��c����[u�nn�z�2q��Y_�@ (� �\ <^-mH���K��Zv�k�bO�� �5!�q���}\������z������eL�|�\��Y�=�w��:I�B��mO�ycR�Zc����Vu����Z}u�L�F�.�/��$�dB�@�&�$�C4U��7ln{M�����kP��E�� ���K��Hr��Y�X�du������fs�R�Y�x��B�
�]e�%������kZ���ko�Q��LtJf;��R/ +Hr��Eg�D I�'g�8��:��k�N��xwf�)�d@���q�I�{��(��z%�N���+{�
��q)s)eR=a�rm��'��e���k�����(@����&@ $�i�< �y�Kym���6y���
2�\�HJ
HrR"Y�8B����Y�5�/�N���������|a�%K I.��3�B ��L� #����$��$')���I.�_��._���aE�
C B������ ����"�Hr�s io"�I�h��A��g�798���"9$:�L I��O@ #$9���� k9�#���j�=���fOr~� io!�I�h��A��g�7k:8l��;�c70: ���I I����F��p/��9��j�W�J�z���!e_�� ��Hr�� �o �I�j~cB����������'�'j��'�$'?��0 Hrn`u@���*#����;��� #����$?�$'9���
I��OC �"$9�Q�O� �$g��
2���H��Hr�#������Y�$ ��@���yHrv!J� #����Rx
I.�(g7F$9;N<@ ���(G��E� �\sx�.�Hr�s�T�@�K%�5�I��O@ �:$9���%�$W�����fD6�Uu���b�@:�$�5��"���� P� ����@������^<�M���@;��+?�z�uLY�B���J�kHr�Z�p����* ��@�#�?Hr���_:� ���+����md���d$9~?�A�I�l��E��3z@ � ����@�����JM���<x������9$$9Op�@ B������ �\1^�(�Hr�~f��-�$�x��$�+^��@&H2�y@�W���$��������5�!!�y��5@ "�$G(t%^���x�
r�V�lL�1����f��-�z���4C9�+^?�A�I�l��E��3z@ � ����@��ds�s�����\dm����=�[�z�*}|��N�N� kHHr�?@ {
INX@�\ <^� HrDA7�G��%Y��S��I.UAF���sT�������^$9~1��� �$3 ��R�d��ID&����W�$�<�>vI�&UA (2�EO��'P��� W��Hr������d��$'#��(mHri���@�%9'A������f��@9��"���Q=D����6��x��^B �� ����@�JM�S�a���l�g�>�K�|�T�f��I7>�!���U�=E�������x���B �D If^@ O�&��g
��&�Y�X�m��"9��d@���J�kHr�Z�p����* ��@�#�?�$�r��I���n=F����:��x��^C H%�$3 �'�R�d�� �$����������� ���U@ !�$G$t#~JA����%�\3�RxI.�(g7F$9;N<@ ���(G��E�@�%�ZA^���� ����� �OJ$9�?��uI
u�B�#":@�FHr��x � $Y�G�6�������n���v� ���C��[��,(H2C� ��<� ��@ �'�$�?���H�*�:�ZYd��]FZ��z#���1$�H?�kI�X@��$���i��O�d�@RM�H�$#���c$9wfI|INbT���7�� %Hr��A_bE i�� �7�����%�-$9i�<Hr��x�@T �Q����$Ir���l7�#n3k�{��T�������^$9^�
��Hr�t��@8��p8�J $E�s������<���HHr����&���h6$�0~�
@
��(D�>��@$y���6h� �����md��?�"�ke����I������F�}G�
������C ('�$3 �'��K����� �X�A�s�5$� x zINP0
�\ @^� Hr�@�I ��� �7��d�X��&$9�����H��<�
�@1 ���N�� WIF���~H��<�Z�����o$���@ lHr��i/1�(����I��ikD���`��$��Z! �I I�6m%�@�$9U����fc���)��5��P3��!]9�S$9'\�}INlhs��32^� 9Hr�BB��B N�����N�k�V.�6M������Q�Fe�����\����$'&�I.!@ (:$��!�q%I� �ke�d�g��?�8��$�1j��I�+�B � �&m�J�8H2���C��g���8D)�>"��p�@ A@���K��&uI�Z�[w5���Y�����$E6^�"���W��E���K�� �!�$���VH ���� w8���[����$��:�-!�Q�N�}C���Mk� � �$A�:K�@�%���~nr�z
��>�f>�A�e�"��`�}%Hr�C�� �d�PR ��@������N ��<|�p��`�d�b��i�2���X'\���������
��~��e��M7$90���I�U��,�(^*�
$9�4�DQ��R��V�3����`�/[]>�7�kC��
��} �Ik�*E�c��:�$���! �F I
5
%�@�$�TYr<n��6n�B����h���>`g���b'�*�w�2��2��������$��-�o!�I�*c� J� �\jg����$�� +[|����9���t���~[Z��-����{�m�co��5�-��2����T�+B�c>_;�$���� � �\�4�Q��*������l��f ����G������+���N����g*�0K���*#�I�i.|Hr��R���H2@�� ��}�^�(H������o�]�tc�������$A~|H��'D�'��������.��
�Q�:�c�fnYu�s�UF��1J��HrbBY�@���R ��@��:W��d�`�,���.#��N�'�� O|�s7sa�a\�ce�u��O�Z��*#��F ��#���k>�B����;� �E I�V<�M�S��.��o�����J�o\�t�+��$����$''���I.� �C (>$��1�1%P,IN� Wu��]Z���~?��H��Q�g}Hr<�D��� �R' �p �����(�$g%�]a�y�~����a\7<���O������T]�a\~���2��g$�[�����s$�o��@ |Hr��i1!�������f�������*�7����
��}��q�5�|��H�_�w=Hr���g��d?iR ��@����V@ LI��t�����Q��co�ud��S�c"��Q]�4q�g�r��I�5��0.?�S�Ye$�O���I�o���9��7Q�� >$9|���aI�y��A�b��X��� .�%QV��
'���kj��UF����z��x����#�~��.@ �!�$�;�&�@��A����{a�g�-��~�~�B�Z6Ye$���yINN, �\(A�� P|Hr�c@bJ hI�� ?7w��=sa���N����k�86Q�)��$�&��vIo�*G�c.:@ #$���< )�U
�+��=sAY�#�9�a\'v���vl�a\y�6�kUe��d?)��.$9�����H��D��@������bB%�����U7�}L�����AA���������k�q�u�LY�Q�����oj
4H�D~����%�-$9�QeL�@�H�$���v�UW����m��7�a������Z)����]�����_�&�lb����=z��N>�d[�n��s�=����������}���.���R�'�7� $Y�s��.�r��ml|����^��
��������K��x�_4���q������r��u�����?��sb���j&�$���T�@�K%��H2�DI��W^i��5���;���}9r��}������V��\���{���t�R<x��z����iS'��3�<���kW��3�<cO<���h�IN�ODc�[��$��|����I����m����[�@1���g�g��=�����h�I�F��$9
Q��
#�I��a�y����#��K��#������;W�t�������g�a���O:�$��_�j��7w���[7��������*O����>�`��
���RJ���?�h�j�r�ZV�[i��t�}������;�-��b
�6���Y��u���y�����B�*��'�Yb����^����z�c��vn�q)�3{�^�p��w�6p����1o$��j��
�'�<(� �2����sH%Q� *H�$k����C��(�]w�e�J��S��Y����O<��������!C�#���/���=�\�L�:ul���6j�(W��/�XA��=��&�U�VY�����)�(�|�s'��������lwx�[b]��������ol?~�mhsh!����2������8}�iy��������w���7����?�3�����F���]����bN@�/X�z�5l�0�#���X�r�;����7��/>�@�c@ �/��H�_|a�\r�[^����������_���e�]��$���.����;�K�������ov������w�F��I�&��K/��:������Xn]�������j�l���Uu���l=�mS�r�D�[w��[����%��O��4�r�����a�u�C@ L 1���7���Q?����Pn��6'�G}tPg�}������I��ee��O�V�\&Y�����Y����z�������\r*�C��M���jX��M�5��7l��!]Ed�0��s>/�>;li�:�L�a\��LI��������u��.���Y���{�#�$�/fA�I�,�B �@b$Y{����c��w_�r7�B}�!�X�.?-��A]:�+uY��q�l�-�p{�=I�2��g�{�9��f-�&����zK�J��Y�m���N�u�u��m�*�:����?4���>[:9.�����s��W.���}���l2�� ����F�$3 ��@b$Y�����\�����wb-��UN�t�M7�����(������o��N;�=d:�ZK���s�rI�I�zg���*)�6�� ��
��( ��9������Ee��i�q�]���UK�(��$�C���l_,[mS����5���R�7Hr�E���"��@ �'�(I�a�'����s�de�;v��Dx��Av�WX�����i����Ut�w���I�$k����&Lpx!����~� _I�� k���7>�q3V8�KW8i�p�u�D���<I����6n�B�\':�UI.��g6��\� ��������@>�<���v���m���k��dN����I�5���e��G���$/\��������K{D�t$Hr8���
��(�G@ �@��!��@������"���2�z�����C����y
�k-����ub��ni5%�$k[G������;�������M$9�a��Hr X��@���Pq�X��"�����:������i2�J���������������&��8D)�>"��p�@ A@���K��&��$C�u���Pa�q���lP��9���Y�*�Z��u�t�\��8�!�q�R8}D���L+� �$�$I��M I����
�kW��]��{��k�17�c�8��r9�~c]��a\�M�TIV+������;��_{�y�=r5#��I�:�$
=
C �� ��J**55Ir� ������tn ����
�`��]R~�.�hl:����������-�>��6v�;�Z�$9a�D'��H��N@ (� �\>^.e�Ir����:�+�0����u�a�RM�cO�de����Y��r�uZ���h�!�EC�������A �� ��32^�@��$YK��N�k�V.�N�t�����k���K���M���k�Z6i@xB&�.�j���^7��%�!���!�E��������@ �� ��4^�@U�,A�de��4mc�{�5z�h_�y�tW���V�q�d&IVv����v�GN�\���tX���HG�$9�1��� j"�$�D��C�
��� �;�Kw��Ut��p�~c��zf���%�3�����S�=@�G�������B �� ����@��%Y���g[���m���
� {�q��7�I���g�z-�$��a��m��_rX;w�%���d�7��!����Y@ �$�$G3.�*R%y�������kh���dm��k�3�5N������NI��j�����J��S��c��7��g����+� ����}EHr�C� ��$�@�<I����Y���|�a\y����U%����#���5��%���P8m#��p�C+Hr�D! TO If�@ O������z���2���������j�F������.���8����Zu��-�V</��.���^���aG�)$9��{� � �$g�G ����O��sF�o���"�����^b���}rn���t�S�v� �1�n�I������wp�����@���\G�$�J��! D� �������c�C�G�ox�C����������������YFM3�N��|�����e���d�O�$� �����3*$9j�@ Z��h�����@� ������n�:���s��������c���X�$�u�\�^}:��k����:�SHrL@��� �R% �� �!��x��t���'��|��G����Y���2������f�v�8@�?>g�;��j�;�,�=��\�z_�$�]��2�%����K{$c���$�I�@��� �O I�AHR������/�X���J�����I�$�N��e�!�,�fj�d��[��V IDATr}���p$���
$��#�$����-@ 9@�s����K ]�G~����kd��9�v���/�2�R�5����q3���Xr�����INnls��+1�� =Hr�bB�"F@K��N�k�V.�N�t��k8A��i#���O�&-:d���Xd-�� O8�3'WG,�~t'I�_]GOw��y^7���#V����;Hr��4 � �>���d� k�2�m������Z��'���~����u�:u�T|�O���e�%H���F�5��o|��}��q`[���F�$'3���
I���@ �$9Z��7"P� ������
T�d �N�Vaym�PW��do��>4�����
�� �\,��kI�^L� �\ �����!������y��6��v�d��+VT�d�;��=��6���K�W�4[I���=.{�a�)��,H��A���BF�$B�w! D� ��8���>k��>�����z����N�tHW�$sr��Rw��du���^���.a�uH� �$9L��nI�v|� �l ��P���"�� �'����`��?�^���@.��R6�\$�[������_ �J������6��!���y@ �#�$G/&���F�6���@�d�{��uV���;���b7��$�.��y~wk��A��O�>@�}��j���!@ %O I.�) ���W+��2����{�����<��/m������5��\��(I�a��m��|n���u����%k�Hr��Y�h��B��. �h@��zQd�
��@>���m�����\����\%������,�.v��mI��g�kC��=�@�� ��L(y�
r����s����T\��(WI��GLs�h_�d�Hr�c���d�HR ��@�����#@`������t'��z-���_�v�u�������:u�D`$t�X��do���.��%������#$�G�1�
I�y �> 2���R&�*���nc#?�]� ���|�>��tOr)�,���#�: ������t�%������� �~��@ �#@&�x�i��r�Lw ���\���t �#��n�����e�Yr]����4��'�x��$�;~��� ����#���}���E+Y�M6����2��+{R�@������[:fHr�M���W��:e������tli��k��@�c@��$��� � �\$�4[d�AV&��&������F��?����:%A��+U���$'�Qk5_I� ���y�����=�6,��#$9G` ~INpp P2��� 5�E�%0����}�l���Id�M-H2sJ��d�{��/��/��;��h=vm�@�c<���$��� � �\�4Y� �^<����o.�2�,AVY�d ��S:g�� I.N��j!�<n�B��p,��ZTs���;����$'5��(%Hr)E���:|�p�}�
kmb�?����n�q�u���KkW��Hr O���"�Z��u�t7�f�����c<������#�>�:@ E �$:M�K A�)�*�d���p���
�d��[r}M����c���~�@ If�x�d� ��@��CFP
������=Pc9��Kkg��n_#W$�FD%�@�����p��v�����x@��� z�$A�:! �K I�7��H@���EV��R���v�%���@���Hr6���L�����{\���S����?Hr�bT�����R/ �� ����� d#�����MW��l(%��B%Y�N��u{n�c�u|������s$�o��@ |Hr��i1`�
ruw g�E$9J��I��\�k�����_��%p�Hr������<�� �@�#�R8��K���z����]a���JK���9�� ��PJ�3~H�V5t=�V�Yo3��^�N��S�������� �~��@ �#�$�=-�L U�{�Xe#W��(�����M���l(%�?$Y��=��M���e{p\���k�Hr��do�� �R7 �p ��p��� d#�����Mw��l(%��$����)��1�\�s� ���[�F���J�� �%�$���� ���}���E+Y��kl��5�2���O5����tI��R���K�Ej����k�K�,S�C I�O���)�4a�� <$9x�� �� +��f�:��{k������^�j>w g�e$9J��OI��\���]��]��%h�Hr��Y�P���: �@�#���l9�;�����
��?��$�P�C�����^��@����{�$M��! O I�1-D@���g[���������A�i��W<���N�y�-}� ��+��V��$�N��b�j�\�lF �1X��E��K�� B"�$��f�%0|�p�}�
���/]emL)_b�.�N�7���H��1�km~K�_���q���w>�\��o$9n��Hrpl��@X���H��o�d-WM��Id-]
� �AP�_�~K����q�����=��D{�$�h�3If.@ �?$9�1,��~m�=0����tA�pJg�hU������^�%Y�?���l�����v��6���-�\��G� < �D@��dJ���EV�q�*��r���+�.=�]���> ���g��*I7s��Tv��+�uL<�;$9wfI}�LrR#�� �R"�$�R�c<�TA��{�}��U
�����2R$9��o$I�|�j�:z���g�y���'��c�A$9A
��HrH�i�@��� �R�~}��l����.��W<]rX;�u{���&$94��n(I���%���ko};��4:g�$3<H2s�@� ���a�G0y�#6��Q���i�N(� yr6P��l(%���$��Bp`�mL�Q�M I�v|���&m�� $9���#�A�����/��uZZ�O[w g�}$9J�&(I�<���g@�r�!t��R�#D��g����D�~B �� ����4�E+Y�I=�i���}�m�����I'Y7�_���9(Hr6���LP�,r'���=7w���:��I�~���!�i�� $98������N����?�s����@���Hr6���L���-�n���M=s����������#�>�:@ E �$:MfG`������l���n��p<��n��.�hlA���M��l(%�� %Y������b�z�y~wk��A���t�HrL@��� �R% �� �!��� L~g�
�z��z�}��:���v���E���e��R��m�co��9�[���'9fA�
I�j<�D��7z
@ � ��|�,�~v�����s6��K��j-�4�Y�w�L������ Z��}o��{�fUq6�oa��F%�
6�1"`�Y{����b��
��BG�(�MT�]��H���?��w���{O������Q�sf�����}����z���,��&s�I I6�.QD�$GA�>! �K I��'��D ��<�y)���������%���IC�}*���-����;_��\�{��~2�y�d�jUDHrT�����d�X����M�D���Z.�^I���o'k�E6�$���[�T��Xr�����?�����
Iv���f�$'K��! �G I6�&��H7�j5��������'�.r�@Hr��� I�#��~t��!Q��g�{�
4��%YU3w��u���WCI�%] ���0! �B IfxG`�G�dP�X9~�N���>Y����;�"��
�H
C�51��z��-�����=���YU+IV�L�Yk��@�����;�d�+L~�@ �q��e9f�=^��I���r��+��A{{G>�v!��U$�x��d�M��u���
���
�I�?���-]�9%�����-'�w4`-�I��`��$��! �D I 4����������K���H���2{]-y�����Y��5�]Hr��
�*,I�AI����5^7��Y)��*��wO������+�$�9B�nI��~D@@ 8%�����A�I^^���][���#��5+���}���9R��Y#�����z���k���c�5j�n�m��Y:v�(-[���n����~/���
y�y{My��~��T4m$9�A`Y�6Jry?��<��"oWm]����{�9�<���$9�e/6i$�� @�~NI�<� ��W^)s��������J�*�UJ%��������������$W�XQz��!M���.�����L�6M4h�$4��7�K��l��sd������$T|��uU��$K�n��^�#3�Y�U�%�%T$����>��$��� D@�I��k��k�N&L�P(�w�y��y��r������~��g� '���*���v��Z�Jz��Y����{K�-d���I��:s�C��Y���K^�����g
��}z���F�T4�-[�H����U��K@����^-�In��%�V/�v�����jb���(�PPP ��m�j��E�=}D@6���*T0(*B��@�J����>! 8#��W��^�z��q�
�<��3����-�.z�o�^:w�,��������s����K%���o��}����>+����b�
���{�v�{���$y��M>��&�_���"�e��m�����/��]O�=���pv��� rFF��1X�tQ�8|X2���2����i�/��X�N9P.?�a��-�A?(�� �RlA�Q�����0h��Y��Q" Pg$y��e��oyu�;v������:qm�����.�H:u�$�^���~���$8P�qoF���O���V�ZR�N�5k�����������
�6-���c����;��ty�uq��s ��{�}l�[[��7��YnX�� "g$Y7������?���a�<�����L?�?������_.��W%X?�UqNH�����!�}����K2d�|��Hr@����g��-K��/
��y��=� Ih X�l�$9Q�7�����Sx�T��
=Y�M��x!�q�z�9#��@ �pF�u��.�3f�$���.���q���w��%�\����[��s��Z�e�� I�����K�
�f��!��.�f&��A_t���Vt�W�n��OH�������J��M����"y���^U�ul� ;n�����/��X� `?��%y��9�qM
��{m������G{�g��[�������8���.����u�����<��cr���{_�?�����o���n����K5jT(�*�����3���T����*�����Y���D���)�`��OHrj5v��8Kr������/��_�Y���:��K��r!�q�t�y"�e3�@ ��\�U@��i#g�uV!+��5j�G���3��e�DK�IV���d�n����N����r���{"�;�>����g%W�Z��1n����b&Y%Y�\�l������Hr0�����e��e�����O3��'$9�1`s�H��Wo�%��'5�^��%�H�����I��'�A ��@����6����^_~������������O�����#0�����!�d�/2o�]�aWu����V�R��]���T2B�w�V��;�m*����6����7�A���+YA �"�$���H��~�y����>9���
g���'���+d�����LL��}�$��s�\���^���E������I��L��$����
��^a-��%���n�
[w���
�a�������EI���5 ����@! �H rI��������������Nn��&��%}�X���|���ke�����1y+>���;K���Z�G�������dk�cD��)��$�n��K�������;�{�9Lr[7Jz�HB���K�n��k7{���wy����Hry*�{��x��,! � D.�~���s�=R�^=Y�r��q��`E�_1b�����]"S�~-��W�Q�������e@��)��$�
M�`����^
�T��h��^���Jp�����~m���)��gIV�J�n�o����B��q�I�K��p�@���p�.]*����C9D����x�������y���E�Cn�o���Z����W��g��Y�-z��-�lK���I.?�=�`���@� �u�&���E;������*��#���5�B �0B�������KAA�nGA�����~8X��-�m�%�~7��c��VIf+$9�q0��E2��y�K���u�I�:��Y����w������'�<�J[g�u)v:��=��E���'�@ �$�$��~�������
z��2`� �#���%�3�����I:l>\F~�Iz�9�{W��I��Z���$��Vgv�Y��P�Sk%���<�JW��,gU��R�HrJ��|Iv��$��@��<p�@Y�~��a����c�I����w��1+E|����e���o�������]�Xs�S�*!����e�$�;�\��$�;��I��(�A �@��|����w��� ��j�*����L�6-|�
�~��"S^(���+�����'$9�!bU'H�U�*6X]��;p��f���T�`#����2@��"I;� �#�$ggg��)S�f���������K�Z��#C���
�Z�o��}��e��c�:� IdXX�(�lu�v���k���/e�������[��.�B��"��#���5�B ��\��8�O��V�ZHYg�u���~���?�\���U�Qfc��E}�����@^^4��c���
�r��$��E�=��<��"oc1}G9�uco���.$��*�0`�� #$�<y��1�<�q���G#����]s�L��b��}B�
`��$G =�.�\����zL]I�`#�!��.�dK
E�� J!�$�������o����=x������R��Y>�N�������68��Cc�.kK�k�H��8�k��K��d�JY@Hrd����o"����zKN9��T)�c7|#AC��1�����Rr~�OF�p���>��$>d�� I��LiY�l������
��B��F�LH�3�$@ �"�d}'y���R�v������������D��"y;���'$����l�Hr����_���k�72���^�[7��
���d]��l�f�/��%�n(���AuK\�m/a7"G���#Y@ �&�$����e?��k������[��������'$9�!cMGH�5��-P]�}�K����7xm�{��!u*HVVV���$�K�n�$�2���]V���n(V�{�e=���!�$��V! �I�I4h���Q���;��0�����G%����-������'$����@sH�EL1��K����Nh ������&#�{���N5iX����:k���GQm��S&}���};����}��=$9� �t !��y��W^I7W�����K��������yv��$G4��I6�8!������,IzU�g1�����~'�T����\gm�C������! ��= � @ �0B�y'���TZ���$�w������+����'$����JvHr*��{��V���+�J�*��o�����!o�+����b�7,�#��y69Hrr������d��PL�
�jH�=D����'$����S*H�O -o&����E��Jys�|�pM��*����,'3����4��( C ����0�����8WN�VS�/�g��OH��#5��������� �����<�����p������M��uHr����@�"���+W����+A�J�!��}�d?�J6f���<�bKk��}B�CDt�$[P�B4I��������9+
7�RI�i��M�Hr�pi�@H"������L�u��[8I��K>�?��c�����]"��(��L��D���������#v�I2]@ � �0�87�3����EZ�n*�����c���8���sG�J�dI.*�o|���M����q�$���� DE I������}����s���]?��S��'��<���7n�(��U���L��"�� ���r�n$�({�������7x�;b�&_�������d'���u����_����o~�o���#���-'/@ N��8U;�\�M<K�n]*�l�K^=�� IDAT^r�3�>E�$�8��
I6�8!�f�$'���#��r��Y��w�����}M�{��-���f��������s����V�� �K I6�6�F�m��lK�X!C�/�L�3������6��G��+iJ !�)as�![%9Q��&_S>]ZX�����~#���V�LiZ���XV�LiVD�u#1�{�Ym�t[��#����� `$���X���dP�[r���`��N�T�8H��C���d_0Z����\T�G��H&}��p��=���N5iX����:���j�
p���g���f�K*������FYF����L� If�N {�Q���K��?N�[�Q��������{�)4�$� ��G�d��BJ�Hr"u}�X�UE�2\=���%���3�Rn��$�^!�� P6$�lF�����L��������y��;��(
$9�����H���M"5�$9�������{�9�hYF�4@ 4Hrh���Q�����+��u����{���O}���Iv�����$'���'�����;`�`�}_Zg�M�e$9��@�� �"�$�E:����+��;x�f}�U��>��c��I��@N2E$9I`���$_��63Q�����=@ � �4��?tzgyb�'r���2k��N��$�h@�3U$����
I����r�f����MX��$�7� �@P���������GJ~�fKN�9;�s��'$9�����d��@�� Y~c�����U�{�94RYF���@�oH��Dc�����Sz�&�~�%��
t��'$9�����d������G|��;ky���^0Q�r��Urh��R�b�h��; �L IN%�clk�Y�AZ��_�\}���>!���= �� $��1P�,w9����t����M�6n��=�3�K�n��{���_K\�,� �����nhs�w� �G I��f�E����d�����1�&9���������������o�v��$�Y7��f���DSo�8YNHk���9���o�y���� '��G��'���������gN��&�C �@����;���u�q��q�j��Kns�����D����JI.&�oB��+������!o��e�Jp�Q���Y�r����j�M�$�i���������)���G��o�N����~@ q!�$�����j���B���<���YN��$8�,mI��p>��$����&�T&}����f��fz���NuI�0����Cv���>\��{Wz�7+�6i'n�1� g ���2�D��}��[��4��!_-���c���h����"�&W'�����X��Sq�[wl��xN����lzA��%$9�e�/������U�\q�|��
��}B��;���$�R���@�������U������-�4��� �ls�"�=�L�~����U ?��ErOk!��qT�u�;���6�'$�����k�{*�9���/���K�].����|�����d�RN��k#Sw���������c��IipY�
�lQ�I�eM�G������2���l�eY� p� ��v}�n��e�=%���+���r������'$9�!em�H����5p$�W�V7��$'��M�ts�)E6���{�94����J�� " �$G ��.���S��x[�l�(�,�76�>!�.�^s@���ikkH����?�T%9Ib�/�a���g97��%]Z7f�/��E�� �%�$30R"�3�H��(r��?����cs����pq�!$����;9$�����1]IN *i����K�����W�����9�7��-��q����,*��ql�!ztn����sA �$9U�9��_���/�H�]�dE��X��$�<�hIv��>��$� ��&����8���!o��e���n�����,��&�odY��J� '�$^ �Kl�u��������:� I6qDF�-SzG�M�D�q!������!o��m��d3nq���=���Y�U-������������R&}��p����}C��X��,|�� �!�$[S*3���[i�JG/��+��m��yM�.�(8*d��v�$Z���B�CnpwAJr"m���"�]�R���U��oK���us�����r���Kn�F)/�6����� �������iW����I�M���%�cw�SQfHr*#��g�d�j�JFHr*��|&I��\q����e�x��2��E I��dL��y��W9`�iR������$����$����%�$3� ��8H��$'r-n���r�6��f�6#��M�d��q����_r�{N��e�,�70��>1�� 4�{$���D�tC���$�e�x6t@ �$9%l�|���df�9x�!�j������'�_�f&9��/LIf0��(J ������3�uG���
����&��U�d��]����H���^ �n���N��=���$�����;��8@�H��c��3�uG�3���&_��� �'�$_"3:�yb�r������;�Yd=�1����#�Tw,�fD$�y&��QP�&_����@�dH���1%���%��V�_�������}L���T�$H�)4�8�I���)�#��T"�8���k����-�pM�
l��x%@�xH2#�LS��E�-yY��YA��/��>!�e����$����&�$3�I.������Hf|�r7YVa���� �@ I6�
���;�X�]a���(���b}��l�`� <$9�v�$X��Bb&�|��|M�ti�
�T��u�{�G�����]M��g�k�� �O IN���-��M���n��
d���e|�?�I��g���C���!��F���H���M*9$9)\R�&_%��D'���! �� �������g����r������������EG�c�W��$��2��$9e.W�Hr�0{��./]�Y�^�����l��-e6�D��� �r@�� *��m]/�����*H�E�K��.���OHr,�&��4���PH2� A In, ����e@`wH2#�DS�_-���G�o���ep����I�/�@�H2c�($9���DG���!�$�����O��#%��H��'I�f�s�����r�)$��Z�� 3���s�Y$��z"����� `$���O�gOI���I��]�r�@�}*�.�$2X#I�� �t�$R�@�
(B�!�-����b$<DM I�����oTK���I]�P��s�>!������B���� �&T���d3�D�J��� "�� B �$� ��.6��V��w�6�����<�����$�6�C�I
��!�F�'����Pq������aF�G0�@� ��g�;���B��9pKUY�~ �>�Py�[���D��"��%�$3�d� � ����������ZI~�
��eg�u�=9� I�w�9���XASLIN���!��� ��@�cW���������s���
�f� �}*3���QH2���d�@QH2�����d�k�k��7������5��q����OH�������d��|N�$'���'�dW+K^�@�8%�����A�I^^���][���#��5+���v����{K�F��g���}]�v�;v��Q�v{n�����cGi����v�mN���K?�Vo]�����������a�jN��GR�$�A��6�d�k�GH��hIv��d���S���H�z���+���s���d���R�J�b�<y�d�����A��Ir���G���i���^{�5�6m�w���<t�9�����M�d�Z�r�S���x�Ld�$3� ��8H@�� �'��$��p�v�d�� �R|��w��g�)'�x�o*�t�R8p�t��A�����$���N�U�V��>�3�-Z���&�����+c��0�����?t�[��T���w.�X�Kq=&+##�>h�|^�:��K@�X�Y��M��
��������XJ�I^�z����K��WX�g�yF��.�.z�/3��[�n���/���n�|���K��}��g����LY�b��{��^;����n��?]��y�f���u�����\���w��.�h�_W)�Kq���o|�����SJZ�b|��-[�H�5|i�F�%���?K�j������9����� :g$y��e��oyu�;v����.�.zM�4��e��+���wOI���\tF���O���V�ZR�N�5k����{<}���,������-�����>m�����U,�xYnW�Ze��m.^�[���! �E�I^�f�t��]��_�n��a��^x����d�<x�<����,qI����{�w�}�t��E��-�vQ���'�_�������#Y�*�5��I��t��$������dkK�{�H��Hi�@���d]���}{3f���Y��l�}����u�B��=����r����;wz�������SOy�[�L�.����K�
�f��!�~�
���<p�i2����hC=9��`y0�����"�6V����d����"�lc���I�+�B ��3���z�!o��S�N�����Z�r�w�{�1�������J�I�[������3���U����u?H������W�+��s�S9�"����mH��.gzHr9A��6$9E&E@�yNI��M��s������&�,p������sss������D.�$��k���8qb��l�f���p�������^Y����L���;�;�� ����9���XASLIN���!��� ��pJ�cW=�}�2�r��������n������j<�@��Q���D��"��#���sy�D��C�{ �M I6�>�F���P��� �^ Rq����� �?�G�]�hj� ��qs�)$�������:;�� `
$��JDG�Q-ej�&i���t<n(�>%Y$9I`���$;Z�$�B�����H���%5@ 6����z�D7.�H�g�zv�\r����r�}Jr, �Is�v$���&���$0�oG�..�A �!�$����':����o�<i���4��)�}Ja �)@s�$�������4GA�-,iA �"�$����&�u���k)y�*J��g��k�������4A�,j
)!�)@s�$���� +Hr����dg��Or�L�j"�w<��O)�$9Ep�=�$;V��A�S��cH��E%%@ v����\��SG��*"�_{��x�P�}Jq �)�s�1$�������"8C�,*)A �#�$�����V���������bF��t�d�X���ls���I����-!��W��! � �1c�m+�v.�6�%����Oi�IN�C�"�3�T��4�9�(��XAI�%$9Ne_��dO����L���by�����F���4�9�(��P1�HIN�c�"���t �X@�cT�������o�^;3���Q��f���4:�8��H!�LIN�C�#��T ��@�cT�O.3�V�}V-cr����4k�$� ����dG
�fHr� zIv����� ����<T��}����m����O�I����$�����d��\�HrplmkI��b�@���������N�'2�K��
���#9����#�>@t� $��"����DG�@�)$i@ �&�$���E6��{�U�N��q�:����[��lE�I�5 ����@! �H I���x{RG���[o���M���O>�I� ��� ������d�@:���@I�=$��!�u�t{�|R9S�����u�C��S��d�@Z��ly}
I� �� �� ��@��
�������<�������k�{B�M�Nx�!���6�'$��
�� �e3����<R��*��]u�(M�gY��I�#�&U#�X������3�lR5��I��?�C �� ��ES�X����y�l�PA��_�v�15R+�B��,��A#��#��A$���4�V� *$9T��v6����o�<����\����a�8���x����\NP���$;^�$�C������ %�$Z���Z���Lj+y�+����'o��/�&i`wH2#B ��%�$3�d� � ��������Ir��*U
2���'K�6�9�iti!���7�g$��jD�{�zF�M��@ H� ��<3+����#eZ���6�i����� ��$ ��&�d�@�Hr P-mI��p�
@�$�����ki��E��]�T{X���t��<'$9� �lD"I����$S
� �2$9et�>�����s��������9oq�S@�B�kY�H�e(\$9 �6�$[X4B� �$��!q��&�N��r�����n�:��)!�f�!�(���+`F�H�u0!
$��* �� ���3���s&K�O����r�3��O-������dW*�^Hrz�\yIv�����$��� DM I��>�_�l�g;�#
�T���K@�J If($�q� �$3 �O I����e�}Xy�Zei���R�{���t�d��U4HrT���I6�QF�$GI��! �C I����]j���x���#���� �dW+�\^Hrr�\�Iv�����$'��' �F I6�"i��� ���m�����e�%���:
��yI.%��A���qy2D��C)� ���3YB n@�]�����}� �R��rd\���dflH��� 50$9T��v�$[��C�CGN�� |'�$��4�7~:BZ�w�����,���$�@b�+��b��*��8PH2� A If,@ �� �l
�F�=C���ZO��!Y�*9���i ���&����0i���lnm��I�8�A �� ��?��[��^r������Xj"}$9D�w�$\�CC�C�mxWH��"<@ � �$�����<T��}�����UI���=!�&W'�����X���lz���@������1md������}����XjR����@�
�lx�B
I �� ��! �A I�}�Yj}����\��gdM�H�5�
4P$9P��4�$[S��E�GL� '�$�8�K��d��'�!m���CZ/$�$3� ��8PH2� A If,@ �� �ly
G�8I��^���������+|$��z-�Y��E���W��"�A��m@ �@���L/�~��Im�]�O�t���[0��j��d3���$���L2c ��;�d�k�?k�d/',����Hr4�M���d�*M<Hr4�M���d�BL� �#�$'�����{�8_y��~c#y��KF��`��8T�������$9U._�Hr�8q � �&W����� �S�%?3S��r���(��L��I��t��$������dkK�{�H��Hi�@�������a��wK��IR� C�cWk�&�
��$0GoG�-l�i!�Is�v$���� H���N,�n���L�n��Y�6�lw���I����� �v�����d?i� �h ��pO��"K���(�������� �)as�!$�������6'B��,+IA 1#�$[X����$g�,�kg�<y����~��Y�2�l
�� I����m ������d�H� �� ���O����!�U��Z�����d_0Z��l} }I I��� �N��$ ��@�m +����;z�Z��:��!���7�w$��JD�-�zG�M��@ H� ������=�����P�vV���z����UBI��A]#�#�P����5�lXA�@
���E��������2d�M��[���2����$�~x �d��@� H2c����d�j��ki��E��B�`��r�Y�6E�\�H�s%M)!$9%l�=�$;W��B�SF��� �!�$S��y{jg���o���vH�:��~�;#�$�����d��X�Hr`h�kI��d@�7�d��mC�������O����(r7CE���k�Y!��s�~$�������
5�� `$��z�M�ti������]|Fz���-�;'��li�JIN
��7#���6������� � ������fL���[�d��UeL��Xjm@��d�`@H�E0 $��"hE IDAT�lH!�@��4������6��jT������k��5}�@ Ifh($�q��d�A� ��X� `?$������*!�F�%�������!�ldY"
I�;�B �� ��+�`���vr�������2���,�s��"�I#s�$���&���42g@��--�A 1"�$[P�������*K��seb��-�8!"���sYY"�e�����x��<Y"����=� �&�$�]���i��!^��-]Zgx�� I�O�K�If($�q� �$3 �O I6��/�=S�,�}6��;���j��8>�!���5�L��"�$�E(>_G��Sk2� �%�$�\�������df��rtA������hc����03��f�E ��@ �@�
�a���7�a�\vB���_hHr�j^\�H2� If ��@ n@�
���1g���������r_c��a�B�
+HD� ��7�[�[V��a&9B�t
@�'H�O }o��R��2������{4�$9=~�<�$�R���@������H�K�$@ ���������A�$//Oj��-}���f�����3g����{N��^{�%�]w�u�Q�}]�v�;v��Q�v{n�����cGi����v�m������J��Ox�8�E��Q�'$G IN���w#��V6�����x�|7��ru�
��$���z����W^)s��������J�*��S���O����O���+���'JFF�'�+V�=zH��M�{���d��i��A�P$y��62d�J���~2���q�V��$[U���E�CkU�H�U�
4X$9P�4@ �H��]��]�v2a��B)���;��3��O<�T�\p��7Nj���I�i��&�V���={>��woi���,X�`7I�Yg����K����������W��{����@`��-R�re�C����}����=?��/�xf^PP ��I���� �� ����U�J�
�s�*qlg�� �[L�I^�z����������3�HVV��L��k���r�]w���#�[T�o��v����<������)+V��{���k�����M�7m��{�W4D�[������ g7=��>h0};w��YW p�����^�B�1���%:��?3��M��
����u�����3��l�2������:q�;V�X]~]��?�n��f���)�mVI8p�<�������'���w�Z��N�:2k����[�|��<Tu��qy��+v��D�r��)����bp��cP�r��r�r��6@ pF���Y#��w�����6l�'�^x�oJ�������,_|q������{�w�}�t��E�"_|�E��������K��� c/�5x��;4$9��Od�$3� ��8H@�� �'��$���o�^����[���B}��gK���w���;x�`O���������d]�}���zx��1C��f]j�Lr�����p�T)���O�)'�������dG�dZHr���Iv��)��$� �G F�IV�=��'��:u�v����z�S�j������/������n��B������u�z��qQz���v��Xj������k�6\�($����d��@� H2c�����d�DK�I�3g�7������7��877W���~oS���;��;�����E%Y�\�l���.�E�Z��y�k_����;72@���c�Y ��t�y$��:����E�� -�$9Z������M���Uo������i������ �����a$��r,�Z�F��+C �
$��Aq��#d|�a��AE)%$��:%�4a;�G���SQ"�aP�@ �@���[��W|-�/w���La�u��Ey'�%}s�F���E�� �Q�7�o$��z
�T ��P�����$g�,�\PA�f�b�u ��nI�����!�v�����d�����lo���@� �l�X���#dB��w����uc��0J#�$3>� ��8PH2� A If,@ �� �lB
�,�����r����1�A If� ���$���$3 �C I6�����Y;+��vH�:����"�$�E(_g&9u.+K$�,B��:3���5�B �@�
�mb�u�-�����2 "B($�<���Iv����I.�x��$���d �M I���,���)��$�����d���r2Hr���{Iv��$�� �q�Yjq��IN�C�"�3�T��4�9�(��XAI�%$9��'�Z7�r����g#����!�$'C��{�dwk�LfHr2���Iv��d�� �e��� �S�%?3S��:�B��7��7��B�]�hj� ��qs�)$���� 7Hr��{�n�Y<I���)����]�#�E*]#��Ps�$�������
57�A���+YA �"�$GX�{�<N���Cm=X^���#��T ��Ps�$�������
57�A���+YA �"�$GU�u?H����W���_��ro��QEB�)@�S��cH�cM1$9Ep>�$;XTR� bG I�����J��qR� CFg�/M�gE ��J IN��[�!�n�3�l��T�����^M��$9��?1��24s�4�|�L�������t ���s�Y$��Z�� ��=��E���'�@ �$�$GQ���%glo����w��r��"
�L� ��&@GG�)d�i �it�q$��b�
[Hr���x�d�K�#`�g�H��4�mI��v~F�$�I����d��G�� � ��8xfTy$c��s}����"�K? �~P��
$������E7�@���#Y@ �&�$GP��O7�y�+��U.��/�5���H��oI���~d�$�A��6�d7�H�@� �!���������^G���@��#�;� �~���$����=��I��A���!@ @�C��] �����e����C����$�$�I����d{k�g�H��4�nI��~D@@ �!���O4�w�Wf�u����I��}m"���,���� ���&�lg���@QHr��a������^��}Q�r��!�NW~@��&jg{H��u�;j$�o����$�[;"� � �$�8��t����k9��Z�B���3]A I��}m"���,���� ���&�lg���@QHr������dV�
rr�2��{B���� �$A��6�d�jD�HrT�lI��nD
@ I�`l\���z�*��~G���;:�(��OH��4�mI��v~F�$�I����d��G�� � 3�!���R�z�+������+�I I��=m#���*�H�� ���6�lW���@q����E�'������?���:?R�t$$9H����$�S� #E���kW�H�]�"Z@ HrTc`�����dc�
,�����$ ��&�d�@�Hr P-mI��p�
@�f�C/��[��z���:�!v�$�����d��bhHr��
�
I6�@�@���r@J���R���[�������B I6���$G\ C�G�
)�a �� �I IN`��o]/�����Z�x�?���O)�n�� �lG���I���#�v�)�(��0(� �` �������]�.}Q�o��I�},Y�*�#��E I���� �f�'�����H���l~���@Y��������[���
r���dt�)i���&@�M�Ft� ���7�g$��jD�-z� �$��%��u�d?{��gf�u���N=+��h;dHr��
�I6�0!��$�����d��Ch� �I I.'�Tn����r������
2��OXj�
D��A�
.N��!�!�6�+$�����2p�� $9 ��&��<]�V�QN��X�w�`O4$9
����$�W�("B���nf�H��u!*@ �@������m�7����W�;�K�I>���@�M�P8�!��p6�$��
��kz� $9 �_|4L.�����SdL���a�j�D�Q@��"oV�H�Y��*$9*����$�W"� �,$9Yb��?�������k���)n�� �lS���I��M-#�6U+�X��`��: �0 �QN,���v'���>�B�Q@���oN�H�9��2$9J�f��$�U�� �
$9je<��h�d��C���)�f�u �MhI6�
���$G_"@�M��1 �f��( �C IN�^ �>:���t�9vs]s���@�&@�M�B�1 ������d�`FH�u
@ �@���W����9B�W��Z ��&�d��a(Hr��
�I6���$G\ �� �$��E�H,��QP �/����>�5�9$��jD�{�zF�M�F�� ����w@ ~@���X��'v�Q[?�6g��k?��u�3� �lR5��I���I=#�&U#�X��h��; ? �~P,���%�U�%�U:Gn���[�9� �&U#�X������3�lR5��I��?�C �� ���_����D�gt��o�i������u�2� �lZE��I���i�"��U$�x�����3 � �~��S�&C6} M�T������i�DH��U ?&$9|�&��$�X�hbB���N�� �$�$�H�����GU�K�]���N�|l��L$�$�X��cB��gnb�H��U�&&$9��
@�OH�O47nX&�^8�kmt��r�!M|j�fL%�$�Z�p�B���mjoH��� ?.$9|��@�oH�ODGO�#���!Gm�"�^��O�����d��^lHrx�M� I6�:���$���� A I��j��-dv��,����
� �6T)�������lC���I�3�@ � ��]�Z� ��&�d�@�Hr P-lI��h��$�f! �H I����n��VN�F;D^��kZ� �6T)�������lC���I�3�@ � ���nO�"�*���w!��C�4a$��*#�<cz@�m�R81"��p�@ A@�}��r���B�8d���u�-��
�d�|�Hr��m�I��J���$���^ I IN������~K�K��"/ue�u�8�zI��\��$�����d��h�Hr�xi�@(��41�q�����t�z��q��4[�q� �6U+�X������2�lS���I�/�C � ��&��R���K��S�4[�q� �6U+�X������2�lS���I�/�C � ��&�#G)m/��]�����F I��b���$���V�d�*\�Hrpli�@X��4I�$����<p�ki����@�m�X0�"��p��U$���/�[Z� $9M�*�w����k{M�-��m�d�*L�Hr0\mkI��b���$���! �E IN��������L��� �lc���I����-"�6V-����`��* �0 �a��/� �N�3�d����9� ��T9�JINC 0� �lD�FH��U�?f$��6��$�X�`bF���J�� �$�$�I���"�$;U���A�SF���H�S�L+$9-|<@�H�e �6V����d����"�lc���I�+�B � �\��s��������~��:Hn��V�[�n�5�/C ��&��������lha"I� :]B �� �\
����������C�?�xy�����>�{����2����d���H��LmlI��j���$��V! �I I.�v^^�<����#�xw���K:v�(�F��5jx��o�0�E_�_�+T� EE(a������_��A�'dff��{v��)+V�g�A���@�� �\J��z�-o���_����{wi�����_|ai� ;]�7o�*U�x�q��������t,p���~X��
�W�_d���
U�V��3��s�1P� ,%�$�R�W^yE,X�-�N\7�x�\v�e|��t��6����io[,���v~F�rk?i�������C PHr)�������?�Xn��������Z����~�����@�c>