diff doc/src/sgml/config.sgml index 074afee..8ff21a9 *** a/doc/src/sgml/config.sgml --- b/doc/src/sgml/config.sgml *************** SET ENABLE_SEQSCAN TO OFF; *** 1869,1888 **** When the commit data for a transaction is flushed to disk, any additional commits ready at that time are also flushed out. commit_delay adds a time delay, set in ! microseconds, before a transaction attempts to ! flush the WAL buffer out to disk. A nonzero delay can allow more ! transactions to be committed with only one flush operation, if ! system load is high enough that additional transactions become ! ready to commit within the given interval. But the delay is ! just wasted if no other transactions become ready to ! commit. Therefore, the delay is only performed if at least ! commit_siblings other transactions are ! active at the instant that a server process has written its ! commit record. The default commit_delay is zero (no delay). - Since all pending commit data will be written at every flush - regardless of this setting, it is rare that adding delay - by increasing this parameter will actually improve performance. --- 1869,1888 ---- When the commit data for a transaction is flushed to disk, any additional commits ready at that time are also flushed out. commit_delay adds a time delay, set in ! microseconds, before a leading transaction participating in ! group commit attempts to flush the WAL buffer out to disk. ! This can add an additional latency of of up to ! commit_delay microseconds for each transaction. ! A nonzero delay can allow more transactions to be committed with ! only one flush operation, if system load is high enough that ! additional transactions become ready to commit within the ! given interval. However, the delay is just wasted if no other ! transactions become ready to commit. Therefore, the delay ! is only performed if at least commit_siblings ! other transactions are active immediately before the leader ! backend participating in group commit proceeds with flushing ! WAL. The default commit_delay is zero (no delay). diff doc/src/sgml/wal.sgml index 0afb9d6..a98132d *** a/doc/src/sgml/wal.sgml --- b/doc/src/sgml/wal.sgml *************** *** 376,384 **** WAL to disk, in the hope that a single flush executed by one such transaction can also serve other transactions committing at about the same time. Setting commit_delay ! can only help when there are many concurrently committing transactions, ! and it is difficult to tune it to a value that actually helps rather ! than hurt throughput. --- 376,382 ---- WAL to disk, in the hope that a single flush executed by one such transaction can also serve other transactions committing at about the same time. Setting commit_delay ! can only help when there are many concurrently committing transactions. diff src/backend/access/transam/xact.c index c71a10e..513172b *** a/src/backend/access/transam/xact.c --- b/src/backend/access/transam/xact.c *************** bool XactDeferrable; *** 67,75 **** int synchronous_commit = SYNCHRONOUS_COMMIT_ON; - int CommitDelay = 0; /* precommit delay in microseconds */ - int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ - /* * MyXactAccessedTempRel is set when a temporary relation is accessed. * We don't allow PREPARE TRANSACTION in that case. (This is global --- 67,72 ---- *************** RecordTransactionCommit(void) *** 1118,1139 **** if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) || forceSyncCommit || nrels > 0) { - /* - * Synchronous commit case: - * - * Sleep before flush! So we can flush more than one commit records - * per single fsync. (The idea is some other backend may do the - * XLogFlush while we're sleeping. This needs work still, because on - * most Unixen, the minimum select() delay is 10msec or more, which is - * way too long.) - * - * We do not sleep if enableFsync is not turned on, nor if there are - * fewer than CommitSiblings other backends with active transactions. - */ - if (CommitDelay > 0 && enableFsync && - MinimumActiveBackends(CommitSiblings)) - pg_usleep(CommitDelay); - XLogFlush(XactLastRecEnd); /* --- 1115,1120 ---- diff src/backend/access/transam/xlog.c index d3650bd..3ef8d88 *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** bool fullPageWrites = true; *** 80,85 **** --- 80,87 ---- bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; + int CommitDelay = 0; /* precommit delay in microseconds */ + int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ #ifdef WAL_DEBUG bool XLOG_DEBUG = false; *************** XLogFlush(XLogRecPtr record) *** 2111,2116 **** --- 2113,2132 ---- */ continue; } + + /* + * Sleep before flush! By adding a delay here, we may give further + * backends the opportunity to join the backlog of group commit + * followers; this can significantly improve transaction throughput, at + * the risk of increasing transaction latency. + * + * We do not sleep if enableFsync is not turned on, nor if there are + * fewer than CommitSiblings other backends with active transactions. + */ + if (CommitDelay > 0 && enableFsync && + MinimumActiveBackends(CommitSiblings)) + pg_usleep(CommitDelay); + /* Got the lock */ LogwrtResult = XLogCtl->LogwrtResult; if (!XLByteLE(record, LogwrtResult.Flush)) diff src/backend/utils/misc/guc.c index d75ab43..9b86ac1 *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** static struct config_int ConfigureNamesI *** 2031,2037 **** { {"commit_delay", PGC_USERSET, WAL_SETTINGS, gettext_noop("Sets the delay in microseconds between transaction commit and " ! "flushing WAL to disk."), NULL }, &CommitDelay, --- 2031,2037 ---- { {"commit_delay", PGC_USERSET, WAL_SETTINGS, gettext_noop("Sets the delay in microseconds between transaction commit and " ! "flushing WAL to disk for the group commit leader."), NULL }, &CommitDelay,