diff doc/src/sgml/config.sgml
index 074afee..8ff21a9
*** a/doc/src/sgml/config.sgml
--- b/doc/src/sgml/config.sgml
*************** SET ENABLE_SEQSCAN TO OFF;
*** 1869,1888 ****
When the commit data for a transaction is flushed to disk, any
additional commits ready at that time are also flushed out.
commit_delay adds a time delay, set in
! microseconds, before a transaction attempts to
! flush the WAL buffer out to disk. A nonzero delay can allow more
! transactions to be committed with only one flush operation, if
! system load is high enough that additional transactions become
! ready to commit within the given interval. But the delay is
! just wasted if no other transactions become ready to
! commit. Therefore, the delay is only performed if at least
! commit_siblings other transactions are
! active at the instant that a server process has written its
! commit record.
The default commit_delay> is zero (no delay).
- Since all pending commit data will be written at every flush
- regardless of this setting, it is rare that adding delay
- by increasing this parameter will actually improve performance.
--- 1869,1888 ----
When the commit data for a transaction is flushed to disk, any
additional commits ready at that time are also flushed out.
commit_delay adds a time delay, set in
! microseconds, before a leading transaction participating in
! group commit attempts to flush the WAL buffer out to disk.
! This can add an additional latency of of up to
! commit_delay microseconds for each transaction.
! A nonzero delay can allow more transactions to be committed with
! only one flush operation, if system load is high enough that
! additional transactions become ready to commit within the
! given interval. However, the delay is just wasted if no other
! transactions become ready to commit. Therefore, the delay
! is only performed if at least commit_siblings
! other transactions are active immediately before the leader
! backend participating in group commit proceeds with flushing
! WAL.
The default commit_delay> is zero (no delay).
diff doc/src/sgml/wal.sgml
index 0afb9d6..a98132d
*** a/doc/src/sgml/wal.sgml
--- b/doc/src/sgml/wal.sgml
***************
*** 376,384 ****
WAL to disk, in the hope that a single flush
executed by one such transaction can also serve other transactions
committing at about the same time. Setting commit_delay
! can only help when there are many concurrently committing transactions,
! and it is difficult to tune it to a value that actually helps rather
! than hurt throughput.
--- 376,382 ----
WAL to disk, in the hope that a single flush
executed by one such transaction can also serve other transactions
committing at about the same time. Setting commit_delay
! can only help when there are many concurrently committing transactions.
diff src/backend/access/transam/xact.c
index c71a10e..513172b
*** a/src/backend/access/transam/xact.c
--- b/src/backend/access/transam/xact.c
*************** bool XactDeferrable;
*** 67,75 ****
int synchronous_commit = SYNCHRONOUS_COMMIT_ON;
- int CommitDelay = 0; /* precommit delay in microseconds */
- int CommitSiblings = 5; /* # concurrent xacts needed to sleep */
-
/*
* MyXactAccessedTempRel is set when a temporary relation is accessed.
* We don't allow PREPARE TRANSACTION in that case. (This is global
--- 67,72 ----
*************** RecordTransactionCommit(void)
*** 1118,1139 ****
if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) ||
forceSyncCommit || nrels > 0)
{
- /*
- * Synchronous commit case:
- *
- * Sleep before flush! So we can flush more than one commit records
- * per single fsync. (The idea is some other backend may do the
- * XLogFlush while we're sleeping. This needs work still, because on
- * most Unixen, the minimum select() delay is 10msec or more, which is
- * way too long.)
- *
- * We do not sleep if enableFsync is not turned on, nor if there are
- * fewer than CommitSiblings other backends with active transactions.
- */
- if (CommitDelay > 0 && enableFsync &&
- MinimumActiveBackends(CommitSiblings))
- pg_usleep(CommitDelay);
-
XLogFlush(XactLastRecEnd);
/*
--- 1115,1120 ----
diff src/backend/access/transam/xlog.c
index d3650bd..3ef8d88
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
*************** bool fullPageWrites = true;
*** 80,85 ****
--- 80,87 ----
bool log_checkpoints = false;
int sync_method = DEFAULT_SYNC_METHOD;
int wal_level = WAL_LEVEL_MINIMAL;
+ int CommitDelay = 0; /* precommit delay in microseconds */
+ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
*************** XLogFlush(XLogRecPtr record)
*** 2111,2116 ****
--- 2113,2132 ----
*/
continue;
}
+
+ /*
+ * Sleep before flush! By adding a delay here, we may give further
+ * backends the opportunity to join the backlog of group commit
+ * followers; this can significantly improve transaction throughput, at
+ * the risk of increasing transaction latency.
+ *
+ * We do not sleep if enableFsync is not turned on, nor if there are
+ * fewer than CommitSiblings other backends with active transactions.
+ */
+ if (CommitDelay > 0 && enableFsync &&
+ MinimumActiveBackends(CommitSiblings))
+ pg_usleep(CommitDelay);
+
/* Got the lock */
LogwrtResult = XLogCtl->LogwrtResult;
if (!XLByteLE(record, LogwrtResult.Flush))
diff src/backend/utils/misc/guc.c
index d75ab43..9b86ac1
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
*************** static struct config_int ConfigureNamesI
*** 2031,2037 ****
{
{"commit_delay", PGC_USERSET, WAL_SETTINGS,
gettext_noop("Sets the delay in microseconds between transaction commit and "
! "flushing WAL to disk."),
NULL
},
&CommitDelay,
--- 2031,2037 ----
{
{"commit_delay", PGC_USERSET, WAL_SETTINGS,
gettext_noop("Sets the delay in microseconds between transaction commit and "
! "flushing WAL to disk for the group commit leader."),
NULL
},
&CommitDelay,