diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 1e2aa9f..2dc99a6 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -4685,7 +4685,6 @@ MaxLivePostmasterChildren(void) extern slock_t *ShmemLock; extern LWLock *LWLockArray; extern slock_t *ProcStructLock; -extern PROC_HDR *ProcGlobal; extern PGPROC *AuxiliaryProcs; extern PMSignalData *PMSignalState; extern pgsocket pgStatSock; diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index e3ad319..dee1291 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -112,6 +112,94 @@ static const char *const lock_mode_names[] = "AccessExclusiveLock" }; +/* + * ZZZ: Rewrite this comment! + * + * Fast-path structures, to speed up AccessShareLock, RowShareLock, and + * RowExclusiveLock operations on database relations. These locks are very + * commonly taken and released, but very rarely conflict, so it's worth going + * to some extra trouble to optimize this case. + * + * The idea here is that backends are permitted to record these relatively + * weak locks in a per-backend queue rather than entering them in the global + * lock table. This is faster, and avoids contention on the lock manager + * partition locks, which is especially problematic in the case where many + * backends are attempting to access the same table. + */ + +static int FastPathLocalUseCount = 0; + +#define FAST_PATH_BITS_PER_SLOT 3 +#define FAST_PATH_LOCKNUMBER_OFFSET 1 +#define FAST_PATH_MASK ((1 << FAST_PATH_BITS_PER_SLOT) - 1) +#define FAST_PATH_GET_BITS(proc, n) \ + (((proc)->fpLockBits >> (FAST_PATH_BITS_PER_SLOT * n)) & FAST_PATH_MASK) +#define FAST_PATH_BIT_POSITION(n, l) \ + (AssertMacro((l) >= FAST_PATH_LOCKNUMBER_OFFSET), \ + AssertMacro((l) < FAST_PATH_BITS_PER_SLOT+FAST_PATH_LOCKNUMBER_OFFSET), \ + AssertMacro((n) < FP_LOCK_SLOTS_PER_BACKEND), \ + ((l) - FAST_PATH_LOCKNUMBER_OFFSET + FAST_PATH_BITS_PER_SLOT * (n))) +#define FAST_PATH_SET_LOCKMODE(proc, n, l) \ + (proc)->fpLockBits |= UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l) +#define FAST_PATH_CLEAR_LOCKMODE(proc, n, l) \ + (proc)->fpLockBits &= ~(UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l)) +#define FAST_PATH_CHECK_LOCKMODE(proc, n, l) \ + ((proc)->fpLockBits & (UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l))) + +/* + * The fast-path lock mechanism is concerned only with relation locks on + * unshared relations by backends bound to a database. The fast-path + * mechanism exists mostly to accelerate acquisition and release of locks + * that rarely conflict. Because ShareUpdateExclusiveLock is + * self-conflicting, it can't use the fast-path mechanism; but it also does + * not conflict with any of the locks that do, so we can ignore it completely. + */ +#define FastPathTag(locktag) \ + ((locktag)->locktag_lockmethodid == DEFAULT_LOCKMETHOD && \ + (locktag)->locktag_type == LOCKTAG_RELATION && \ + (locktag)->locktag_field1 == MyDatabaseId && \ + MyDatabaseId != InvalidOid) +#define FastPathWeakMode(mode) ((mode) < ShareUpdateExclusiveLock) +#define FastPathStrongMode(mode) ((mode) > ShareUpdateExclusiveLock) +#define FastPathRelevantMode(mode) ((mode) != ShareUpdateExclusiveLock) + +static bool FastPathGrantLock(Oid relid, LOCKMODE lockmode); +static bool FastPathUnGrantLock(Oid relid, LOCKMODE lockmode); +static bool FastPathTransferLocks(LockMethod lockMethodTable, + const LOCKTAG *locktag, uint32 hashcode); + +/* ZZZ: Remove this. */ +#define DEBUG_FAST_LOCK 0 + +/* + * To make the fast-path lock mechanism work, we must have some way of + * preventing the use of the fast-path when a conflicting lock might be + * present. We partition* the locktag space into FAST_PATH_HASH_BUCKETS + * partitions, and maintain an integer count of the number of "strong" lockers + * in each partition. When any "strong" lockers are present (which is + * hopefully not very often), the fast-path mechanism can't be used, and we + * must fall back to the slower method of pushing matching locks directly + * into the main lock tables. + * + * The deadlock detector does not know anything about the fast path mechanism, + * so any locks that might be involved in a deadlock must be transferred from + * the fast-path queues to the main lock table. + */ + +#define FAST_PATH_STRONG_LOCK_HASH_BITS 10 +#define FAST_PATH_STRONG_LOCK_HASH_PARTITIONS \ + (1 << FAST_PATH_STRONG_LOCK_HASH_BITS) +#define FastPathStrongLockHashPartition(hashcode) \ + ((hashcode) % FAST_PATH_STRONG_LOCK_HASH_PARTITIONS) + +typedef struct +{ + slock_t mutex; + uint32 count[FAST_PATH_STRONG_LOCK_HASH_PARTITIONS]; +} FastPathStrongLockData; + +FastPathStrongLockData *FastPathStrongLocks; + #ifndef LOCK_DEBUG static bool Dummy_trace = false; #endif @@ -254,6 +342,8 @@ PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP) static uint32 proclock_hash(const void *key, Size keysize); static void RemoveLocalLock(LOCALLOCK *locallock); +static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc, + const LOCKTAG *locktag, uint32 hashcode, LOCKMODE lockmode); static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner); static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner); static void ReleaseLockForOwner(LOCALLOCK *locallock, ResourceOwner owner); @@ -262,6 +352,8 @@ static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode, static void CleanUpLock(LOCK *lock, PROCLOCK *proclock, LockMethod lockMethodTable, uint32 hashcode, bool wakeupNeeded); +static void LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc, + LOCKTAG *locktag, LOCKMODE lockmode); /* @@ -283,6 +375,7 @@ InitLocks(void) int hash_flags; long init_table_size, max_table_size; + bool found; /* * Compute init/max size to request for lock hashtables. Note these @@ -329,6 +422,14 @@ InitLocks(void) hash_flags); /* + * Allocate fast-path structures. + */ + FastPathStrongLocks = ShmemInitStruct("Fast Path Strong Lock Data", + sizeof(FastPathStrongLockData), &found); + if (!found) + SpinLockInit(&FastPathStrongLocks->mutex); + + /* * Allocate non-shared hash table for LOCALLOCK structs. This stores lock * counts and resource owner information. * @@ -492,12 +593,9 @@ LockAcquireExtended(const LOCKTAG *locktag, LOCALLOCK *locallock; LOCK *lock; PROCLOCK *proclock; - PROCLOCKTAG proclocktag; bool found; ResourceOwner owner; uint32 hashcode; - uint32 proclock_hashcode; - int partition; LWLockId partitionLock; int status; bool log_lock = false; @@ -571,6 +669,7 @@ LockAcquireExtended(const LOCKTAG *locktag, locallock->maxLockOwners = newsize; } } + hashcode = locallock->hashcode; /* * If we already hold the lock, we can just increase the count locally. @@ -600,16 +699,255 @@ LockAcquireExtended(const LOCKTAG *locktag, log_lock = true; } + /* Locks that participate in the fast path require special handling. */ + if (FastPathTag(locktag) && FastPathRelevantMode(lockmode)) + { + uint32 fasthashcode; + + fasthashcode = FastPathStrongLockHashPartition(hashcode); + + /* + * If we remember having filled up the fast path array, we don't + * attempt to make any further use of it until we release some locks. + * It's possible that some other backend has transferred some of those + * locks to the shared hash table, leaving space free, but it's not + * worth acquiring the LWLock just to check. It's also possible that + * we're acquiring a second or third lock type on a relation we have + * already locked using the fast-path, but for now we don't worry about + * that case either. + */ + if (FastPathWeakMode(lockmode) + && FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND) + { + bool acquired; + + /* + * LWLockAcquire acts as a memory sequencing point, so it's safe + * to assume that any strong locker whose increment to + * FastPathStrongLocks->counts becomes visible after we test it has + * yet to begin to transfer fast-path locks. + */ + LWLockAcquire(MyProc->fpLWLock, LW_EXCLUSIVE); + if (FastPathStrongLocks->count[fasthashcode] != 0) + acquired = false; + else + acquired = FastPathGrantLock(locktag->locktag_field2, lockmode); + LWLockRelease(MyProc->fpLWLock); + if (acquired) + { + GrantLockLocal(locallock, owner); +#if DEBUG_FAST_LOCK > 1 + elog(WARNING, "pid %d fast acquire rel %u/%u mode %s used %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], FastPathLocalUseCount); +#endif + return LOCKACQUIRE_OK; + } +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d FALLTHROUGH acquire rel %u/%u mode %s partition %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], fasthashcode); +#endif + } + else if (FastPathStrongMode(lockmode)) + { + /* + * Adding to a memory location is not atomic, so we take a + * spinlock to ensure we don't collide with someone else trying + * to bump the count at the same time. + * + * XXX: It might be worth considering using an atomic fetch-and-add + * instruction here, on architectures where that is supported. + */ +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d EMBARGO acquire rel %u/%u mode %s partition %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], fasthashcode); +#endif + SpinLockAcquire(&FastPathStrongLocks->mutex); + FastPathStrongLocks->count[fasthashcode]++; + SpinLockRelease(&FastPathStrongLocks->mutex); + /* + * ZZZ: If a failure occurs after this point and before we + * actually get the lock, we'll leak a strong lock count! + * + * ZZZ: Need to check return value of FastPathTransferLocks! + */ + FastPathTransferLocks(lockMethodTable, locktag, hashcode); + } + } + /* * Otherwise we've got to mess with the shared lock table. */ - hashcode = locallock->hashcode; - partition = LockHashPartition(hashcode); partitionLock = LockHashPartitionLock(hashcode); LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* + * Find or create a proclock entry with this tag + */ + proclock = SetupLockInTable(lockMethodTable, MyProc, locktag, + hashcode, lockmode); + if (!proclock) + { + LWLockRelease(partitionLock); + if (reportMemoryError) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_locks_per_transaction."))); + else + return LOCKACQUIRE_NOT_AVAIL; + } + locallock->proclock = proclock; + lock = proclock->tag.myLock; + locallock->lock = lock; + + /* + * If lock requested conflicts with locks requested by waiters, must join + * wait queue. Otherwise, check for conflict with already-held locks. + * (That's last because most complex check.) + */ + if (lockMethodTable->conflictTab[lockmode] & lock->waitMask) + status = STATUS_FOUND; + else + status = LockCheckConflicts(lockMethodTable, lockmode, + lock, proclock, MyProc); + + if (status == STATUS_OK) + { + /* No conflict with held or previously requested locks */ + GrantLock(lock, proclock, lockmode); + GrantLockLocal(locallock, owner); + } + else + { + Assert(status == STATUS_FOUND); + + /* + * We can't acquire the lock immediately. If caller specified no + * blocking, remove useless table entries and return NOT_AVAIL without + * waiting. + */ + if (dontWait) + { + if (proclock->holdMask == 0) + { + uint32 proclock_hashcode; + + proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode); + SHMQueueDelete(&proclock->lockLink); + SHMQueueDelete(&proclock->procLink); + if (!hash_search_with_hash_value(LockMethodProcLockHash, + (void *) &(proclock->tag), + proclock_hashcode, + HASH_REMOVE, + NULL)) + elog(PANIC, "proclock table corrupted"); + } + else + PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock); + lock->nRequested--; + lock->requested[lockmode]--; + LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); + Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); + Assert(lock->nGranted <= lock->nRequested); + LWLockRelease(partitionLock); + if (locallock->nLocks == 0) + RemoveLocalLock(locallock); + return LOCKACQUIRE_NOT_AVAIL; + } + + /* + * In Hot Standby perform early deadlock detection in normal backends. + * If deadlock found we release partition lock but do not return. + */ + if (RecoveryInProgress() && !InRecovery) + CheckRecoveryConflictDeadlock(partitionLock); + + /* + * Set bitmask of locks this process already holds on this object. + */ + MyProc->heldLocks = proclock->holdMask; + + /* + * Sleep till someone wakes me up. + */ + + TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1, + locktag->locktag_field2, + locktag->locktag_field3, + locktag->locktag_field4, + locktag->locktag_type, + lockmode); + + WaitOnLock(locallock, owner); + + TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1, + locktag->locktag_field2, + locktag->locktag_field3, + locktag->locktag_field4, + locktag->locktag_type, + lockmode); + + /* + * NOTE: do not do any material change of state between here and + * return. All required changes in locktable state must have been + * done when the lock was granted to us --- see notes in WaitOnLock. + */ + + /* + * Check the proclock entry status, in case something in the ipc + * communication doesn't work correctly. + */ + if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) + { + PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); + LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); + /* Should we retry ? */ + LWLockRelease(partitionLock); + elog(ERROR, "LockAcquire failed"); + } + PROCLOCK_PRINT("LockAcquire: granted", proclock); + LOCK_PRINT("LockAcquire: granted", lock, lockmode); + } + + LWLockRelease(partitionLock); + + /* + * Emit a WAL record if acquisition of this lock need to be replayed in a + * standby server. + */ + if (log_lock) + { + /* + * Decode the locktag back to the original values, to avoid sending + * lots of empty bytes with every message. See lock.h to check how a + * locktag is defined for LOCKTAG_RELATION + */ + LogAccessExclusiveLock(locktag->locktag_field1, + locktag->locktag_field2); + } + + return LOCKACQUIRE_OK; +} + +/* + * Find or create LOCK and PROCLOCK objects as needed for a new lock + * request. + */ +static PROCLOCK * +SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc, + const LOCKTAG *locktag, uint32 hashcode, LOCKMODE lockmode) +{ + LOCK *lock; + PROCLOCK *proclock; + PROCLOCKTAG proclocktag; + uint32 proclock_hashcode; + bool found; + + /* * Find or create a lock with this tag. * * Note: if the locallock object already existed, it might have a pointer @@ -623,17 +961,7 @@ LockAcquireExtended(const LOCKTAG *locktag, HASH_ENTER_NULL, &found); if (!lock) - { - LWLockRelease(partitionLock); - if (reportMemoryError) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"), - errhint("You might need to increase max_locks_per_transaction."))); - else - return LOCKACQUIRE_NOT_AVAIL; - } - locallock->lock = lock; + return NULL; /* * if it's a new lock object, initialize it @@ -662,7 +990,7 @@ LockAcquireExtended(const LOCKTAG *locktag, * Create the hash key for the proclock table. */ proclocktag.myLock = lock; - proclocktag.myProc = MyProc; + proclocktag.myProc = proc; proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode); @@ -693,27 +1021,21 @@ LockAcquireExtended(const LOCKTAG *locktag, NULL)) elog(PANIC, "lock table corrupted"); } - LWLockRelease(partitionLock); - if (reportMemoryError) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"), - errhint("You might need to increase max_locks_per_transaction."))); - else - return LOCKACQUIRE_NOT_AVAIL; + return NULL; } - locallock->proclock = proclock; /* * If new, initialize the new entry */ if (!found) { + uint32 partition = LockHashPartition(hashcode); + proclock->holdMask = 0; proclock->releaseMask = 0; /* Add proclock to appropriate lists */ SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink); - SHMQueueInsertBefore(&(MyProc->myProcLocks[partition]), + SHMQueueInsertBefore(&(proc->myProcLocks[partition]), &proclock->procLink); PROCLOCK_PRINT("LockAcquire: new", proclock); } @@ -779,130 +1101,7 @@ LockAcquireExtended(const LOCKTAG *locktag, lock->tag.locktag_field1, lock->tag.locktag_field2, lock->tag.locktag_field3); - /* - * If lock requested conflicts with locks requested by waiters, must join - * wait queue. Otherwise, check for conflict with already-held locks. - * (That's last because most complex check.) - */ - if (lockMethodTable->conflictTab[lockmode] & lock->waitMask) - status = STATUS_FOUND; - else - status = LockCheckConflicts(lockMethodTable, lockmode, - lock, proclock, MyProc); - - if (status == STATUS_OK) - { - /* No conflict with held or previously requested locks */ - GrantLock(lock, proclock, lockmode); - GrantLockLocal(locallock, owner); - } - else - { - Assert(status == STATUS_FOUND); - - /* - * We can't acquire the lock immediately. If caller specified no - * blocking, remove useless table entries and return NOT_AVAIL without - * waiting. - */ - if (dontWait) - { - if (proclock->holdMask == 0) - { - SHMQueueDelete(&proclock->lockLink); - SHMQueueDelete(&proclock->procLink); - if (!hash_search_with_hash_value(LockMethodProcLockHash, - (void *) &(proclock->tag), - proclock_hashcode, - HASH_REMOVE, - NULL)) - elog(PANIC, "proclock table corrupted"); - } - else - PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock); - lock->nRequested--; - lock->requested[lockmode]--; - LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); - Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); - Assert(lock->nGranted <= lock->nRequested); - LWLockRelease(partitionLock); - if (locallock->nLocks == 0) - RemoveLocalLock(locallock); - return LOCKACQUIRE_NOT_AVAIL; - } - - /* - * In Hot Standby perform early deadlock detection in normal backends. - * If deadlock found we release partition lock but do not return. - */ - if (RecoveryInProgress() && !InRecovery) - CheckRecoveryConflictDeadlock(partitionLock); - - /* - * Set bitmask of locks this process already holds on this object. - */ - MyProc->heldLocks = proclock->holdMask; - - /* - * Sleep till someone wakes me up. - */ - - TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1, - locktag->locktag_field2, - locktag->locktag_field3, - locktag->locktag_field4, - locktag->locktag_type, - lockmode); - - WaitOnLock(locallock, owner); - - TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1, - locktag->locktag_field2, - locktag->locktag_field3, - locktag->locktag_field4, - locktag->locktag_type, - lockmode); - - /* - * NOTE: do not do any material change of state between here and - * return. All required changes in locktable state must have been - * done when the lock was granted to us --- see notes in WaitOnLock. - */ - - /* - * Check the proclock entry status, in case something in the ipc - * communication doesn't work correctly. - */ - if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) - { - PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); - LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); - /* Should we retry ? */ - LWLockRelease(partitionLock); - elog(ERROR, "LockAcquire failed"); - } - PROCLOCK_PRINT("LockAcquire: granted", proclock); - LOCK_PRINT("LockAcquire: granted", lock, lockmode); - } - - LWLockRelease(partitionLock); - - /* - * Emit a WAL record if acquisition of this lock need to be replayed in a - * standby server. - */ - if (log_lock) - { - /* - * Decode the locktag back to the original values, to avoid sending - * lots of empty bytes with every message. See lock.h to check how a - * locktag is defined for LOCKTAG_RELATION - */ - LogAccessExclusiveLock(locktag->locktag_field1, - locktag->locktag_field2); - } - - return LOCKACQUIRE_OK; + return proclock; } /* @@ -1439,6 +1638,53 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) if (locallock->nLocks > 0) return TRUE; + /* Locks that participate in the fast path require special handling. */ + if (FastPathTag(locktag) && FastPathRelevantMode(lockmode)) + { + if (FastPathWeakMode(lockmode) && FastPathLocalUseCount > 0) + { + bool released; + + /* + * We might not find the lock here, even if we originally entered + * it here. Another backend may have moved it to the main table. + */ + LWLockAcquire(MyProc->fpLWLock, LW_EXCLUSIVE); + released = FastPathUnGrantLock(locktag->locktag_field2, lockmode); + LWLockRelease(MyProc->fpLWLock); + if (released) + { +#if DEBUG_FAST_LOCK > 1 + elog(WARNING, "pid %d fast release rel %u/%u mode %s used %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], FastPathLocalUseCount); +#endif + RemoveLocalLock(locallock); + return TRUE; + } +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d FALLTHROUGH release rel %u/%u mode %s", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode]); +#endif + } + else if (FastPathStrongMode(lockmode)) + { + uint32 fasthashcode; + + fasthashcode = FastPathStrongLockHashPartition(locallock->hashcode); + SpinLockAcquire(&FastPathStrongLocks->mutex); + Assert(FastPathStrongLocks->count[fasthashcode] > 0); + FastPathStrongLocks->count[fasthashcode]--; + SpinLockRelease(&FastPathStrongLocks->mutex); +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d DE-EMBARGO release rel %u/%u mode %s partition %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], fasthashcode); +#endif + } + } + /* * Otherwise we've got to mess with the shared lock table. */ @@ -1447,11 +1693,34 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* - * We don't need to re-find the lock or proclock, since we kept their - * addresses in the locallock table, and they couldn't have been removed - * while we were holding a lock on them. + * Normally, we don't need to re-find the lock or proclock, since we kept + * their addresses in the locallock table, and they couldn't have been + * removed while we were holding a lock on them. But it's possible that + * the locks have been moved to the main hash table by another backend, in + * which case we might need to go look them up after all. */ lock = locallock->lock; + if (!lock) + { + PROCLOCKTAG proclocktag; + bool found; + + Assert(FastPathTag(locktag) && FastPathWeakMode(lockmode)); + lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + (void *) locktag, + locallock->hashcode, + HASH_FIND, + &found); + Assert(found && lock != NULL); + locallock->lock = lock; + + proclocktag.myLock = lock; + proclocktag.myProc = MyProc; + locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash, + (void *) &proclocktag, + HASH_FIND, &found); + Assert(found); + } LOCK_PRINT("LockRelease: found", lock, lockmode); proclock = locallock->proclock; PROCLOCK_PRINT("LockRelease: found", proclock); @@ -1529,6 +1798,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) LOCK *lock; PROCLOCK *proclock; int partition; + bool have_fast_path_lwlock = false; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); @@ -1554,11 +1824,69 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) { if (locallock->proclock == NULL || locallock->lock == NULL) { + LOCKMODE lockmode = locallock->tag.mode; + Oid relid; + /* - * We must've run out of shared memory while trying to set up this - * lock. Just forget the local entry. + * If the LOCALLOCK entry is unused, we must've run out of shared + * memory while trying to set up this lock. Just forget the local + * entry. */ - Assert(locallock->nLocks == 0); + if (locallock->nLocks == 0) + { + RemoveLocalLock(locallock); + continue; + } + + /* + * Otherwise, we should be dealing with a lock acquired via the + * fast-path. If not, we've got trouble. + */ + if (!FastPathTag(&locallock->tag.lock) + || !FastPathWeakMode(lockmode)) + elog(PANIC, "locallock table corrupted"); + + /* + * If we don't currently hold the LWLock that protects our + * fast-path data structures, we must acquire it before + * attempting to release the lock via the fast-path. + */ + if (!have_fast_path_lwlock) + { + LWLockAcquire(MyProc->fpLWLock, LW_EXCLUSIVE); + have_fast_path_lwlock = true; + } + + /* Attempt fast-path release. */ + relid = locallock->tag.lock.locktag_field2; + if (FastPathUnGrantLock(relid, lockmode)) + { +#if DEBUG_FAST_LOCK > 1 + elog(WARNING, "pid %d fast ReleaseAll rel %u/%u mode %s used %d", + MyProcPid, locallock->tag.lock.locktag_field1, relid, + lock_mode_names[lockmode], FastPathLocalUseCount); +#endif + RemoveLocalLock(locallock); + continue; + } + + /* + * Our lock, originally taken via the fast path, has been + * transferred to the main lock table. That's going to require + * some extra work, so release our fast-path lock before starting. + */ + LWLockRelease(MyProc->fpLWLock); + have_fast_path_lwlock = false; + + /* + * Now dump the lock. We haven't got a pointer to the LOCK or + * PROCLOCK in this case, so we have to handle this a bit + * differently than a normal lock release. Unfortunately, this + * requires an extra LWLock acquire-and-release cycle on the + * partitionLock, but hopefully it shouldn't happen often. + */ + LockRefindAndRelease(lockMethodTable, MyProc, + &locallock->tag.lock, lockmode); RemoveLocalLock(locallock); continue; } @@ -1606,6 +1934,9 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) RemoveLocalLock(locallock); } + if (have_fast_path_lwlock) + LWLockRelease(MyProc->fpLWLock); + /* * Now, scan each lock partition separately. */ @@ -1626,6 +1957,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) { bool wakeupNeeded = false; PROCLOCK *nextplock; + uint32 strong_lock_count = 0; /* Get link first, since we may unlink/delete this proclock */ nextplock = (PROCLOCK *) @@ -1669,11 +2001,36 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) for (i = 1; i <= numLockModes; i++) { if (proclock->releaseMask & LOCKBIT_ON(i)) + { wakeupNeeded |= UnGrantLock(lock, i, proclock, lockMethodTable); + if (FastPathStrongMode(i)) + ++strong_lock_count; + } } Assert((lock->nRequested >= 0) && (lock->nGranted >= 0)); Assert(lock->nGranted <= lock->nRequested); + + if (FastPathTag(&lock->tag) && strong_lock_count > 0) + { + uint32 hashcode, + fasthashcode; + + hashcode = LockTagHashCode(&lock->tag); + fasthashcode = FastPathStrongLockHashPartition(hashcode); + /* + * ZZZ: What happens if a failure occurs before we get to + * this point? We'd leak a strong lock count. + */ + SpinLockAcquire(&FastPathStrongLocks->mutex); + FastPathStrongLocks->count[fasthashcode] -= strong_lock_count; + SpinLockRelease(&FastPathStrongLocks->mutex); +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d DE-EMBARGO release rel %u/%u partition %d count %d", + MyProcPid, lock->tag.locktag_field1, lock->tag.locktag_field2, + fasthashcode, strong_lock_count); +#endif + } LOCK_PRINT("LockReleaseAll: updated", lock, 0); proclock->releaseMask = 0; @@ -1824,6 +2181,128 @@ LockReassignCurrentOwner(void) } } +/* + * FastPathGrantLock + * Grant lock using per-backend fast-path array, if there is space. + */ +static bool +FastPathGrantLock(Oid relid, LOCKMODE lockmode) +{ + uint32 f; + uint32 unused_slot = FP_LOCK_SLOTS_PER_BACKEND; + + /* Scan for existing entry for this relid, remembering empty slot. */ + for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; ++f) + { + if (FAST_PATH_GET_BITS(MyProc, f) == 0) + unused_slot = f; + else if (MyProc->fpRelId[f] == relid) + { + Assert(!FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode)); + FAST_PATH_SET_LOCKMODE(MyProc, f, lockmode); + return true; + } + } + + /* If no existing entry, use any empty slot. */ + if (unused_slot < FP_LOCK_SLOTS_PER_BACKEND) + { + MyProc->fpRelId[unused_slot] = relid; + FAST_PATH_SET_LOCKMODE(MyProc, unused_slot, lockmode); + ++FastPathLocalUseCount; + return true; + } + + /* No existing entry, and no empty slot. */ + return false; +} + +/* + * FastPathUnGrantLock + * Release fast-path lock, if present. Update backend-private local + * use count, while we're at it. + */ +static bool +FastPathUnGrantLock(Oid relid, LOCKMODE lockmode) +{ + uint32 f; + bool result = false; + + FastPathLocalUseCount = 0; + for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; ++f) + { + if (MyProc->fpRelId[f] == relid + && FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode)) + { + Assert(!result); + FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode); + result = true; + } + if (FAST_PATH_GET_BITS(MyProc, f) != 0) + ++FastPathLocalUseCount; + } + return result; +} + +/* + * FastPathTransferLocks + * Transfer locks matching the given lock tag from per-backend fast-path + * arrays to the shared hash table. + */ +static bool +FastPathTransferLocks(LockMethod lockMethodTable, const LOCKTAG *locktag, + uint32 hashcode) +{ + LWLockId partitionLock = LockHashPartitionLock(hashcode); + Oid relid = locktag->locktag_field2; + uint32 i; + + /* + * Every PGPROC that can potentially hold a fast-path lock is present + * in ProcGlobal->allProcs. Prepared transactions are not, but + * any outstanding fast-path locks held by prepared transactions are + * transferred to the main lock table. + */ + for (i = 0; i < ProcGlobal->allProcCount; ++i) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; + uint32 f; + + LWLockAcquire(proc->fpLWLock, LW_EXCLUSIVE); + for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; ++f) + { + uint32 lockmode; + + /* Look for an allocated slot matching the given relid. */ + if (relid != proc->fpRelId[f] || FAST_PATH_GET_BITS(proc, f) == 0) + continue; + + /* Find or create lock object. */ + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + for (lockmode = FAST_PATH_LOCKNUMBER_OFFSET; + lockmode < FAST_PATH_LOCKNUMBER_OFFSET+FAST_PATH_BITS_PER_SLOT; + ++lockmode) + { + PROCLOCK *proclock; + + if (!FAST_PATH_CHECK_LOCKMODE(proc, f, lockmode)) + continue; + proclock = SetupLockInTable(lockMethodTable, proc, locktag, + hashcode, lockmode); + if (!proclock) + { + LWLockRelease(partitionLock); + return false; + } + GrantLock(proclock->tag.myLock, proclock, lockmode); + FAST_PATH_CLEAR_LOCKMODE(proc, f, lockmode); + } + LWLockRelease(partitionLock); + } + LWLockRelease(proc->fpLWLock); + } + return true; +} /* * GetLockConflicts @@ -1945,6 +2424,85 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) return vxids; } +/* + * Find a lock in the shared lock table and release it. It is the caller's + * responsibility to verify that this is a sane thing to do. (For example, it + * would be bad to release a lock here if there might still be a LOCALLOCK + * object with pointers to it.) + * + * We currently use this in two situations: first, to release locks held by + * prepared transactions on commit (see lock_twophase_postcommit); and second, + * to release locks taken via the fast-path, transferred to the main hash + * table, and then released (see LockReleaseAll). + */ +static void +LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc, + LOCKTAG *locktag, LOCKMODE lockmode) +{ + LOCK *lock; + PROCLOCK *proclock; + PROCLOCKTAG proclocktag; + uint32 hashcode; + uint32 proclock_hashcode; + LWLockId partitionLock; + bool wakeupNeeded; + + hashcode = LockTagHashCode(locktag); + partitionLock = LockHashPartitionLock(hashcode); + + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + /* + * Re-find the lock object (it had better be there). + */ + lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + (void *) locktag, + hashcode, + HASH_FIND, + NULL); + if (!lock) + elog(PANIC, "failed to re-find shared lock object"); + + /* + * Re-find the proclock object (ditto). + */ + proclocktag.myLock = lock; + proclocktag.myProc = proc; + + proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode); + + proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash, + (void *) &proclocktag, + proclock_hashcode, + HASH_FIND, + NULL); + if (!proclock) + elog(PANIC, "failed to re-find shared proclock object"); + + /* + * Double-check that we are actually holding a lock of the type we want to + * release. + */ + if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) + { + PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock); + LWLockRelease(partitionLock); + elog(WARNING, "you don't own a lock of type %s", + lockMethodTable->lockModeNames[lockmode]); + return; + } + + /* + * Do the releasing. CleanUpLock will waken any now-wakable waiters. + */ + wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable); + + CleanUpLock(lock, proclock, + lockMethodTable, hashcode, + wakeupNeeded); + + LWLockRelease(partitionLock); +} /* * AtPrepare_Locks @@ -1968,6 +2526,8 @@ AtPrepare_Locks(void) /* * We don't need to touch shared memory for this --- all the necessary * state information is in the locallock table. + * + * ZZZ: Must move fast-path locks to main table!! */ hash_seq_init(&status, LockMethodLocalHash); @@ -2704,81 +3264,18 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; PGPROC *proc = TwoPhaseGetDummyProc(xid); LOCKTAG *locktag; - LOCKMODE lockmode; LOCKMETHODID lockmethodid; - LOCK *lock; - PROCLOCK *proclock; - PROCLOCKTAG proclocktag; - uint32 hashcode; - uint32 proclock_hashcode; - LWLockId partitionLock; LockMethod lockMethodTable; - bool wakeupNeeded; Assert(len == sizeof(TwoPhaseLockRecord)); locktag = &rec->locktag; - lockmode = rec->lockmode; lockmethodid = locktag->locktag_lockmethodid; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; - hashcode = LockTagHashCode(locktag); - partitionLock = LockHashPartitionLock(hashcode); - - LWLockAcquire(partitionLock, LW_EXCLUSIVE); - - /* - * Re-find the lock object (it had better be there). - */ - lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, - (void *) locktag, - hashcode, - HASH_FIND, - NULL); - if (!lock) - elog(PANIC, "failed to re-find shared lock object"); - - /* - * Re-find the proclock object (ditto). - */ - proclocktag.myLock = lock; - proclocktag.myProc = proc; - - proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode); - - proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash, - (void *) &proclocktag, - proclock_hashcode, - HASH_FIND, - NULL); - if (!proclock) - elog(PANIC, "failed to re-find shared proclock object"); - - /* - * Double-check that we are actually holding a lock of the type we want to - * release. - */ - if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) - { - PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock); - LWLockRelease(partitionLock); - elog(WARNING, "you don't own a lock of type %s", - lockMethodTable->lockModeNames[lockmode]); - return; - } - - /* - * Do the releasing. CleanUpLock will waken any now-wakable waiters. - */ - wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable); - - CleanUpLock(lock, proclock, - lockMethodTable, hashcode, - wakeupNeeded); - - LWLockRelease(partitionLock); + LockRefindAndRelease(lockMethodTable, proc, locktag, rec->lockmode); } /* diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 0fe7ce4..8fae67e 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -167,6 +167,9 @@ NumLWLocks(void) /* bufmgr.c needs two for each shared buffer */ numLocks += 2 * NBuffers; + /* lock.c needs one per backend */ + numLocks += MaxBackends; + /* clog.c needs one per CLOG buffer */ numLocks += NUM_CLOG_BUFFERS; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index ee03316..9cc376e 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -67,7 +67,7 @@ PGPROC *MyProc = NULL; NON_EXEC_STATIC slock_t *ProcStructLock = NULL; /* Pointers to shared-memory structures */ -NON_EXEC_STATIC PROC_HDR *ProcGlobal = NULL; +PROC_HDR *ProcGlobal = NULL; NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL; /* If we are waiting for a lock, this points to the associated LOCALLOCK */ @@ -160,6 +160,7 @@ InitProcGlobal(void) PGPROC *procs; int i; bool found; + uint32 TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS; /* Create the ProcGlobal shared structure */ ProcGlobal = (PROC_HDR *) @@ -167,68 +168,61 @@ InitProcGlobal(void) Assert(!found); /* - * Create the PGPROC structures for auxiliary (bgwriter) processes, too. - * These do not get linked into the freeProcs list. - */ - AuxiliaryProcs = (PGPROC *) - ShmemInitStruct("AuxiliaryProcs", NUM_AUXILIARY_PROCS * sizeof(PGPROC), - &found); - Assert(!found); - - /* * Initialize the data structures. */ + ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY; ProcGlobal->freeProcs = NULL; ProcGlobal->autovacFreeProcs = NULL; - ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY; - - /* - * Pre-create the PGPROC structures and create a semaphore for each. - */ - procs = (PGPROC *) ShmemAlloc((MaxConnections) * sizeof(PGPROC)); - if (!procs) - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"))); - MemSet(procs, 0, MaxConnections * sizeof(PGPROC)); - for (i = 0; i < MaxConnections; i++) - { - PGSemaphoreCreate(&(procs[i].sem)); - procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs; - ProcGlobal->freeProcs = &procs[i]; - InitSharedLatch(&procs[i].waitLatch); - } - /* - * Likewise for the PGPROCs reserved for autovacuum. + * Create and initialize all the PGPROC structures we'll need. * - * Note: the "+1" here accounts for the autovac launcher + * There are three separate consumers of PGPROC structures: (1) normal + * backends, (2) autovacuum workers and the autovacuum launcher, and (3) + * auxiliary processes. Each PGPROC structure is dedicated to exactly + * one of these purposes, and they do not move between groups. */ - procs = (PGPROC *) ShmemAlloc((autovacuum_max_workers + 1) * sizeof(PGPROC)); + procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC)); + ProcGlobal->allProcs = procs; + ProcGlobal->allProcCount = TotalProcs; if (!procs) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); - MemSet(procs, 0, (autovacuum_max_workers + 1) * sizeof(PGPROC)); - for (i = 0; i < autovacuum_max_workers + 1; i++) + MemSet(procs, 0, TotalProcs * sizeof(PGPROC)); + for (i = 0; i < TotalProcs; i++) { + /* Common initialization for all PGPROCs, regardless of type. */ PGSemaphoreCreate(&(procs[i].sem)); - procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs; - ProcGlobal->autovacFreeProcs = &procs[i]; + procs[i].fpLWLock = LWLockAssign(); InitSharedLatch(&procs[i].waitLatch); + + /* + * Newly created PGPROCs for normal backends or for autovacuum must + * be queued up on the appropriate free list. Because there can only + * ever be a small, fixed number of auxiliary processes, no free + * list is used in that case; InitAuxiliaryProcess() instead uses a + * linear search. + */ + if (i < MaxConnections) + { + /* PGPROC for normal backend, add to freeProcs list */ + procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs; + ProcGlobal->freeProcs = &procs[i]; + } + else if (i < MaxBackends) + { + /* PGPROC for AV launcher/worker, add to autovacFreeProcs list */ + procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs; + ProcGlobal->autovacFreeProcs = &procs[i]; + } } /* - * And auxiliary procs. + * Save a pointer to the block of PGPROC structures reserved for + * auxiliary proceses. */ - MemSet(AuxiliaryProcs, 0, NUM_AUXILIARY_PROCS * sizeof(PGPROC)); - for (i = 0; i < NUM_AUXILIARY_PROCS; i++) - { - AuxiliaryProcs[i].pid = 0; /* marks auxiliary proc as not in use */ - PGSemaphoreCreate(&(AuxiliaryProcs[i].sem)); - InitSharedLatch(&procs[i].waitLatch); - } + AuxiliaryProcs = &procs[MaxBackends]; /* Create ProcStructLock spinlock, too */ ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t)); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 4819cb8..938f47d 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -51,6 +51,15 @@ struct XidCache #define PROC_VACUUM_STATE_MASK (0x0E) /* + * We allow a small number of "weak" relation locks (AccesShareLock, + * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure + * rather than the main lock table. This eases contention on the lock + * manager LWLocks in workloads that have a small number of "hot" tables. + * See storage/lmgr/lock.c for full details. + */ +#define FP_LOCK_SLOTS_PER_BACKEND 16 + +/* * Each backend has a PGPROC struct in shared memory. There is also a list of * currently-unused PGPROC structs that will be reallocated to new backends. * @@ -136,6 +145,11 @@ struct PGPROC */ SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS]; + /* Info about fast-path locks taken by this backend */ + LWLockId fpLWLock; /* protects the fields below */ + uint64 fpLockBits; /* lock modes held for each fast-path slot */ + Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */ + struct XidCache subxids; /* cache for subtransaction XIDs */ }; @@ -150,6 +164,10 @@ extern PGDLLIMPORT PGPROC *MyProc; */ typedef struct PROC_HDR { + /* Array of PGPROC structures (not including dummies for prepared txns) */ + PGPROC *allProcs; + /* Length of allProcs array */ + uint32 allProcCount; /* Head of list of free PGPROC structures */ PGPROC *freeProcs; /* Head of list of autovacuum's free PGPROC structures */ @@ -163,6 +181,8 @@ typedef struct PROC_HDR int startupBufferPinWaitBufId; } PROC_HDR; +extern PROC_HDR *ProcGlobal; + /* * We set aside some extra PGPROC structures for auxiliary processes, * ie things that aren't full-fledged backends but need shmem access.