Decouple last important WAL record LSN from WAL insert locks
Hi,
While working on something else, I noticed that each WAL insert lock
tracks its own last important WAL record's LSN (lastImportantAt) and
both the bgwriter and checkpointer later computes the max
value/server-wide last important WAL record's LSN via
GetLastImportantRecPtr(). While doing so, each WAL insertion lock is
acquired in exclusive mode in a for loop. This seems like too much
overhead to me. I quickly coded a patch (attached herewith) that
tracks the server-wide last important WAL record's LSN in
XLogCtlInsert (lastImportantPos) protected with a spinlock and gets
rid of lastImportantAt from each WAL insert lock. I ran pgbench with a
simple insert [1]./configure --prefix=$PWD/inst/ CFLAGS="-O3" > install.log && make -j 8 install > install.log 2>&1 & cd inst/bin ./pg_ctl -D data -l logfile stop rm -rf data logfile insert.sql free -m sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches' free -m ./initdb -D data ./pg_ctl -D data -l logfile start ./psql -d postgres -c 'ALTER SYSTEM SET shared_buffers = "8GB";' ./psql -d postgres -c 'ALTER SYSTEM SET max_wal_size = "32GB";' ./psql -d postgres -c 'ALTER SYSTEM SET max_connections = "4096";' ./psql -d postgres -c 'ALTER SYSTEM SET bgwriter_delay = "10ms";' ./pg_ctl -D data -l logfile restart ./pgbench -i -s 1 -d postgres ./psql -d postgres -c "ALTER TABLE pgbench_accounts DROP CONSTRAINT pgbench_accounts_pkey;" cat << EOF >> insert.sql \set aid random(1, 10 * :scale) \set delta random(1, 100000 * :scale) INSERT INTO pgbench_accounts (aid, bid, abalance) VALUES (:aid, :aid, :delta); EOF for c in 1 2 4 8 16 32 64 128 256 512 768 1024 2048 4096; do echo -n "$c ";./pgbench -n -M prepared -U ubuntu postgres -b simple-update -c$c -j$c -T5 2>&1|grep '^tps'|awk '{print $3}';done and the results are below. While the test was run,
the GetLastImportantRecPtr() was called 4-5 times.
# of clients HEAD PATCHED
1 83 82
2 159 157
4 303 302
8 576 570
16 1104 1095
32 2055 2041
64 2286 2295
128 2270 2285
256 2302 2253
512 2205 2290
768 2224 2180
1024 2109 2150
2048 1941 1936
4096 1856 1848
It doesn't seem to hurt (for this use-case) anyone, however there
might be some benefit if bgwriter and checkpointer come in the way of
WAL inserters. With the patch, the extra exclusive lock burden on WAL
insert locks is gone. Since the amount of work the WAL inserters do
under the new spinlock is very minimal (updating
XLogCtlInsert->lastImportantPos), it may not be an issue. Also, it's
worthwhile to look at the existing comment [2]* records. Tracking the WAL activity directly in WALInsertLock has the * advantage of not needing any additional locks to update the value., which doesn't talk
about the performance impact of having a lock.
Thoughts?
[1]: ./configure --prefix=$PWD/inst/ CFLAGS="-O3" > install.log && make -j 8 install > install.log 2>&1 & cd inst/bin ./pg_ctl -D data -l logfile stop rm -rf data logfile insert.sql free -m sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches' free -m ./initdb -D data ./pg_ctl -D data -l logfile start ./psql -d postgres -c 'ALTER SYSTEM SET shared_buffers = "8GB";' ./psql -d postgres -c 'ALTER SYSTEM SET max_wal_size = "32GB";' ./psql -d postgres -c 'ALTER SYSTEM SET max_connections = "4096";' ./psql -d postgres -c 'ALTER SYSTEM SET bgwriter_delay = "10ms";' ./pg_ctl -D data -l logfile restart ./pgbench -i -s 1 -d postgres ./psql -d postgres -c "ALTER TABLE pgbench_accounts DROP CONSTRAINT pgbench_accounts_pkey;" cat << EOF >> insert.sql \set aid random(1, 10 * :scale) \set delta random(1, 100000 * :scale) INSERT INTO pgbench_accounts (aid, bid, abalance) VALUES (:aid, :aid, :delta); EOF for c in 1 2 4 8 16 32 64 128 256 512 768 1024 2048 4096; do echo -n "$c ";./pgbench -n -M prepared -U ubuntu postgres -b simple-update -c$c -j$c -T5 2>&1|grep '^tps'|awk '{print $3}';done
./configure --prefix=$PWD/inst/ CFLAGS="-O3" > install.log && make -j
8 install > install.log 2>&1 &
cd inst/bin
./pg_ctl -D data -l logfile stop
rm -rf data logfile insert.sql
free -m
sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'
free -m
./initdb -D data
./pg_ctl -D data -l logfile start
./psql -d postgres -c 'ALTER SYSTEM SET shared_buffers = "8GB";'
./psql -d postgres -c 'ALTER SYSTEM SET max_wal_size = "32GB";'
./psql -d postgres -c 'ALTER SYSTEM SET max_connections = "4096";'
./psql -d postgres -c 'ALTER SYSTEM SET bgwriter_delay = "10ms";'
./pg_ctl -D data -l logfile restart
./pgbench -i -s 1 -d postgres
./psql -d postgres -c "ALTER TABLE pgbench_accounts DROP CONSTRAINT
pgbench_accounts_pkey;"
cat << EOF >> insert.sql
\set aid random(1, 10 * :scale)
\set delta random(1, 100000 * :scale)
INSERT INTO pgbench_accounts (aid, bid, abalance) VALUES (:aid, :aid, :delta);
EOF
for c in 1 2 4 8 16 32 64 128 256 512 768 1024 2048 4096; do echo -n
"$c ";./pgbench -n -M prepared -U ubuntu postgres -b simple-update
-c$c -j$c -T5 2>&1|grep '^tps'|awk '{print $3}';done
[2]: * records. Tracking the WAL activity directly in WALInsertLock has the * advantage of not needing any additional locks to update the value.
* records. Tracking the WAL activity directly in WALInsertLock has the
* advantage of not needing any additional locks to update the value.
--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
Attachments:
v1-0001-Decouple-last-important-WAL-record-LSN-from-WAL-i.patchapplication/octet-stream; name=v1-0001-Decouple-last-important-WAL-record-LSN-from-WAL-i.patchDownload
From d91d4aafcc4cac4344a0674e3119c6c504884810 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 23 Nov 2022 11:22:56 +0000
Subject: [PATCH v1] Decouple last important WAL record LSN from WAL insert
locks
---
src/backend/access/transam/xlog.c | 57 +++++++++++++------------------
1 file changed, 23 insertions(+), 34 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a31fbbff78..882e968c8f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -363,21 +363,11 @@ typedef struct XLogwrtResult
* the WAL record is just copied to the page and the lock is released. But
* to avoid the deadlock-scenario explained above, the indicator is always
* updated before sleeping while holding an insertion lock.
- *
- * lastImportantAt contains the LSN of the last important WAL record inserted
- * using a given lock. This value is used to detect if there has been
- * important WAL activity since the last time some action, like a checkpoint,
- * was performed - allowing to not repeat the action if not. The LSN is
- * updated for all insertions, unless the XLOG_MARK_UNIMPORTANT flag was
- * set. lastImportantAt is never cleared, only overwritten by the LSN of newer
- * records. Tracking the WAL activity directly in WALInsertLock has the
- * advantage of not needing any additional locks to update the value.
*/
typedef struct
{
LWLock lock;
XLogRecPtr insertingAt;
- XLogRecPtr lastImportantAt;
} WALInsertLock;
/*
@@ -447,6 +437,19 @@ typedef struct XLogCtlInsert
int runningBackups;
XLogRecPtr lastBackupStart;
+ slock_t lastimportantpos_lck; /* protects lastImportantPos */
+
+ /*
+ * lastImportantPos contains the LSN of the last important WAL record
+ * inserted in the server. This value is used to detect if there has been
+ * important WAL activity since the last time some action, like a
+ * checkpoint, was performed - allowing to not repeat the action if not.
+ * The LSN is updated for all insertions, unless the XLOG_MARK_UNIMPORTANT
+ * flag was set. lastImportantPos is never cleared, only overwritten by the
+ * LSN of newer records.
+ */
+ XLogRecPtr lastImportantPos;
+
/*
* WAL insertion locks.
*/
@@ -871,9 +874,9 @@ XLogInsertRecord(XLogRecData *rdata,
*/
if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
{
- int lockno = holdingAllLocks ? 0 : MyLockNo;
-
- WALInsertLocks[lockno].l.lastImportantAt = StartPos;
+ SpinLockAcquire(&Insert->lastimportantpos_lck);
+ Insert->lastImportantPos = StartPos;
+ SpinLockRelease(&Insert->lastimportantpos_lck);
}
}
else
@@ -4603,9 +4606,10 @@ XLOGShmemInit(void)
{
LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr;
- WALInsertLocks[i].l.lastImportantAt = InvalidXLogRecPtr;
}
+ XLogCtl->Insert.lastImportantPos = InvalidXLogRecPtr;
+
/*
* Align the start of the page buffers to a full xlog block size boundary.
* This simplifies some calculations in XLOG insertion. It is also
@@ -4625,6 +4629,7 @@ XLOGShmemInit(void)
XLogCtl->WalWriterSleeping = false;
SpinLockInit(&XLogCtl->Insert.insertpos_lck);
+ SpinLockInit(&XLogCtl->Insert.lastimportantpos_lck);
SpinLockInit(&XLogCtl->info_lck);
SpinLockInit(&XLogCtl->ulsn_lck);
}
@@ -6109,32 +6114,16 @@ GetWALInsertionTimeLine(void)
* GetLastImportantRecPtr -- Returns the LSN of the last important record
* inserted. All records not explicitly marked as unimportant are considered
* important.
- *
- * The LSN is determined by computing the maximum of
- * WALInsertLocks[i].lastImportantAt.
*/
XLogRecPtr
GetLastImportantRecPtr(void)
{
+ XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogRecPtr res = InvalidXLogRecPtr;
- int i;
- for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
- {
- XLogRecPtr last_important;
-
- /*
- * Need to take a lock to prevent torn reads of the LSN, which are
- * possible on some of the supported platforms. WAL insert locks only
- * support exclusive mode, so we have to use that.
- */
- LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
- last_important = WALInsertLocks[i].l.lastImportantAt;
- LWLockRelease(&WALInsertLocks[i].l.lock);
-
- if (res < last_important)
- res = last_important;
- }
+ SpinLockAcquire(&Insert->lastimportantpos_lck);
+ res = Insert->lastImportantPos;
+ SpinLockRelease(&Insert->lastimportantpos_lck);
return res;
}
--
2.34.1
Hi,
On 2022-11-23 19:12:07 +0530, Bharath Rupireddy wrote:
While working on something else, I noticed that each WAL insert lock
tracks its own last important WAL record's LSN (lastImportantAt) and
both the bgwriter and checkpointer later computes the max
value/server-wide last important WAL record's LSN via
GetLastImportantRecPtr(). While doing so, each WAL insertion lock is
acquired in exclusive mode in a for loop. This seems like too much
overhead to me.
GetLastImportantRecPtr() should be a very rare operation, so it's fine for it
to be expensive. The important thing is for the maintenance of the underlying
data to be very cheap.
I quickly coded a patch (attached herewith) that
tracks the server-wide last important WAL record's LSN in
XLogCtlInsert (lastImportantPos) protected with a spinlock and gets
rid of lastImportantAt from each WAL insert lock.
That strikes me as a very bad idea. It adds another point of contention to a
very very hot code path, to make a very rare code path cheaper.
Greetings,
Andres Freund
On Sun, Nov 27, 2022 at 2:43 AM Andres Freund <andres@anarazel.de> wrote:
Hi,
On 2022-11-23 19:12:07 +0530, Bharath Rupireddy wrote:
While working on something else, I noticed that each WAL insert lock
tracks its own last important WAL record's LSN (lastImportantAt) and
both the bgwriter and checkpointer later computes the max
value/server-wide last important WAL record's LSN via
GetLastImportantRecPtr(). While doing so, each WAL insertion lock is
acquired in exclusive mode in a for loop. This seems like too much
overhead to me.GetLastImportantRecPtr() should be a very rare operation, so it's fine for it
to be expensive. The important thing is for the maintenance of the underlying
data to be very cheap.I quickly coded a patch (attached herewith) that
tracks the server-wide last important WAL record's LSN in
XLogCtlInsert (lastImportantPos) protected with a spinlock and gets
rid of lastImportantAt from each WAL insert lock.That strikes me as a very bad idea. It adds another point of contention to a
very very hot code path, to make a very rare code path cheaper.
Thanks for the response. I agree that GetLastImportantRecPtr() gets
called rarely, however, what concerns me is that it's taking all the
WAL insertion locks when it gets called.
Is tracking lastImportantPos as pg_atomic_uint64 in XLogCtlInsert any
better than an explicit spinlock? I think it's better on platforms
where atomics are supported, however, it boils down to using a spin
lock on the platforms where atomics aren't supported.
--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
Hi,
On 2022-11-28 11:42:19 +0530, Bharath Rupireddy wrote:
On Sun, Nov 27, 2022 at 2:43 AM Andres Freund <andres@anarazel.de> wrote:
On 2022-11-23 19:12:07 +0530, Bharath Rupireddy wrote:
While working on something else, I noticed that each WAL insert lock
tracks its own last important WAL record's LSN (lastImportantAt) and
both the bgwriter and checkpointer later computes the max
value/server-wide last important WAL record's LSN via
GetLastImportantRecPtr(). While doing so, each WAL insertion lock is
acquired in exclusive mode in a for loop. This seems like too much
overhead to me.GetLastImportantRecPtr() should be a very rare operation, so it's fine for it
to be expensive. The important thing is for the maintenance of the underlying
data to be very cheap.I quickly coded a patch (attached herewith) that
tracks the server-wide last important WAL record's LSN in
XLogCtlInsert (lastImportantPos) protected with a spinlock and gets
rid of lastImportantAt from each WAL insert lock.That strikes me as a very bad idea. It adds another point of contention to a
very very hot code path, to make a very rare code path cheaper.Thanks for the response. I agree that GetLastImportantRecPtr() gets
called rarely, however, what concerns me is that it's taking all the
WAL insertion locks when it gets called.
So what? It's far from the only operation doing so. And in contrast to most of
the other places (c.f. WALInsertLockAcquireExclusive()) it only takes one of
them at a time.
Is tracking lastImportantPos as pg_atomic_uint64 in XLogCtlInsert any
better than an explicit spinlock? I think it's better on platforms
where atomics are supported, however, it boils down to using a spin
lock on the platforms where atomics aren't supported.
A central atomic in XLogCtlInsert would be better than a spinlock protected
variable, but still bad. We do *not* want to have more central state that
needs to be manipulated, we want *less*.
If we wanted to optimize this - and I haven't seen any evidence it's worth
doing so - we should just optimize the lock acquisitions in
GetLastImportantRecPtr() away. *Without* centralizing the state.
Greetings,
Andres Freund
On Mon, Nov 28, 2022 at 11:55 PM Andres Freund <andres@anarazel.de> wrote:
Is tracking lastImportantPos as pg_atomic_uint64 in XLogCtlInsert any
better than an explicit spinlock? I think it's better on platforms
where atomics are supported, however, it boils down to using a spin
lock on the platforms where atomics aren't supported.A central atomic in XLogCtlInsert would be better than a spinlock protected
variable, but still bad. We do *not* want to have more central state that
needs to be manipulated, we want *less*.
Agreed.
If we wanted to optimize this - and I haven't seen any evidence it's worth
doing so - we should just optimize the lock acquisitions in
GetLastImportantRecPtr() away. *Without* centralizing the state.
Hm. I can think of converting lastImportantAt from XLogRecPtr to
pg_atomic_uint64 and letting it stay within the WALInsertLock
structure. This prevents torn-reads and also avoids WAL insertion lock
acquire-release cycles in GetLastImportantRecPtr(). Please see the
attached patch herewith.
If this idea is worth it, I would like to bring this and the other
thread [1]/messages/by-id/CALj2ACWkWbheFhkPwMw83CUpzHFGXSV_HXTBxG9+-PZ3ufHE=Q@mail.gmail.com that converts insertingAt to atomic and modifies other WAL
insert locks related code under one roof and start a new thread. BTW,
the patch at [1]/messages/by-id/CALj2ACWkWbheFhkPwMw83CUpzHFGXSV_HXTBxG9+-PZ3ufHE=Q@mail.gmail.com seems to be showing a good benefit for
high-concurrent inserts with small records.
Thoughts?
[1]: /messages/by-id/CALj2ACWkWbheFhkPwMw83CUpzHFGXSV_HXTBxG9+-PZ3ufHE=Q@mail.gmail.com
--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
Attachments:
v2-0001-Convert-lastImportantAt-to-64-bit-atomic-for-torn.patchapplication/x-patch; name=v2-0001-Convert-lastImportantAt-to-64-bit-atomic-for-torn.patchDownload
From 1f37fd240f2d381917595acbbc18d3b3ce714d2c Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Tue, 29 Nov 2022 06:55:27 +0000
Subject: [PATCH v2] Convert lastImportantAt to 64-bit atomic for torn-free
reads
GetLastImportantRecPtr() currently reads lastImportantAt under WAL
insertion lock acquire-release cycles to get torn-free reads.
Converting lastImportantAt to 64-bit atomic from XLogRecPtr
provides inherent benefit of torn-free reads and simplifies
GetLastImportantRecPtr() a bit as the WAL insertion lock
acquire-release cycles are no longer needed.
---
src/backend/access/transam/xlog.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a31fbbff78..831a18d3bb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -377,7 +377,7 @@ typedef struct
{
LWLock lock;
XLogRecPtr insertingAt;
- XLogRecPtr lastImportantAt;
+ pg_atomic_uint64 lastImportantAt;
} WALInsertLock;
/*
@@ -873,7 +873,8 @@ XLogInsertRecord(XLogRecData *rdata,
{
int lockno = holdingAllLocks ? 0 : MyLockNo;
- WALInsertLocks[lockno].l.lastImportantAt = StartPos;
+ pg_atomic_write_u64(&WALInsertLocks[lockno].l.lastImportantAt,
+ StartPos);
}
}
else
@@ -4603,7 +4604,8 @@ XLOGShmemInit(void)
{
LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr;
- WALInsertLocks[i].l.lastImportantAt = InvalidXLogRecPtr;
+ pg_atomic_write_u64(&WALInsertLocks[i].l.lastImportantAt,
+ InvalidXLogRecPtr);
}
/*
@@ -6124,13 +6126,10 @@ GetLastImportantRecPtr(void)
XLogRecPtr last_important;
/*
- * Need to take a lock to prevent torn reads of the LSN, which are
- * possible on some of the supported platforms. WAL insert locks only
- * support exclusive mode, so we have to use that.
+ * We atomically read lastImportantAt which prevents torn reads. Hence
+ * no need to take WAL insert lock here.
*/
- LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
- last_important = WALInsertLocks[i].l.lastImportantAt;
- LWLockRelease(&WALInsertLocks[i].l.lock);
+ last_important = pg_atomic_read_u64(&WALInsertLocks[i].l.lastImportantAt);
if (res < last_important)
res = last_important;
--
2.34.1