diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index 1725591..69c5c9f 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -613,8 +613,8 @@ brin_page_cleanup(Relation idxrel, Buffer buf) */ if (PageIsNew(page)) { - LockRelationForExtension(idxrel, ShareLock); - UnlockRelationForExtension(idxrel, ShareLock); + LockRelationForExtension(idxrel, LW_SHARED); + UnlockRelationForExtension(idxrel, LW_SHARED); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (PageIsNew(page)) @@ -706,7 +706,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, */ if (!RELATION_IS_LOCAL(irel)) { - LockRelationForExtension(irel, ExclusiveLock); + LockRelationForExtension(irel, LW_EXCLUSIVE); extensionLockHeld = true; } buf = ReadBuffer(irel, P_NEW); @@ -758,7 +758,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, } if (extensionLockHeld) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, LW_EXCLUSIVE); ReleaseBuffer(buf); return InvalidBuffer; @@ -768,7 +768,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (extensionLockHeld) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, LW_EXCLUSIVE); page = BufferGetPage(buf); diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 9ed279b..1d07e10 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -567,7 +567,7 @@ revmap_physical_extend(BrinRevmap *revmap) else { if (needLock) - LockRelationForExtension(irel, ExclusiveLock); + LockRelationForExtension(irel, LW_EXCLUSIVE); buf = ReadBuffer(irel, P_NEW); if (BufferGetBlockNumber(buf) != mapBlk) @@ -579,7 +579,7 @@ revmap_physical_extend(BrinRevmap *revmap) * page from under whoever is using it. */ if (needLock) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, LW_EXCLUSIVE); LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); return; @@ -588,7 +588,7 @@ revmap_physical_extend(BrinRevmap *revmap) page = BufferGetPage(buf); if (needLock) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, LW_EXCLUSIVE); } /* Check that it's a regular block (or an empty page) */ diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index d03d59d..c98c194 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -323,13 +323,13 @@ GinNewBuffer(Relation index) /* Must extend the file */ needLock = !RELATION_IS_LOCAL(index); if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, LW_EXCLUSIVE); buffer = ReadBuffer(index, P_NEW); LockBuffer(buffer, GIN_EXCLUSIVE); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, LW_EXCLUSIVE); return buffer; } diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 26c077a..d139b76 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -716,10 +716,10 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) needLock = !RELATION_IS_LOCAL(index); if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, LW_EXCLUSIVE); npages = RelationGetNumberOfBlocks(index); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, LW_EXCLUSIVE); totFreePages = 0; @@ -766,10 +766,10 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) stats->pages_free = totFreePages; if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, LW_EXCLUSIVE); stats->num_pages = RelationGetNumberOfBlocks(index); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, LW_EXCLUSIVE); return stats; } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index cbdaec9..a3e8186 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -801,13 +801,13 @@ gistNewBuffer(Relation r) needLock = !RELATION_IS_LOCAL(r); if (needLock) - LockRelationForExtension(r, ExclusiveLock); + LockRelationForExtension(r, LW_EXCLUSIVE); buffer = ReadBuffer(r, P_NEW); LockBuffer(buffer, GIST_EXCLUSIVE); if (needLock) - UnlockRelationForExtension(r, ExclusiveLock); + UnlockRelationForExtension(r, LW_EXCLUSIVE); return buffer; } diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 77d9d12..e85eb7d 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -59,10 +59,10 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) /* try to find deleted pages */ if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, LW_EXCLUSIVE); npages = RelationGetNumberOfBlocks(rel); if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, LW_EXCLUSIVE); totFreePages = 0; for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++) @@ -91,10 +91,10 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) /* return statistics */ stats->pages_free = totFreePages; if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, LW_EXCLUSIVE); stats->num_pages = RelationGetNumberOfBlocks(rel); if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, LW_EXCLUSIVE); return stats; } diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 6529fe3..812f7e0 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -519,11 +519,11 @@ loop: if (needLock) { if (!use_fsm) - LockRelationForExtension(relation, ExclusiveLock); - else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock)) + LockRelationForExtension(relation, LW_EXCLUSIVE); + else if (!ConditionalLockRelationForExtension(relation, LW_EXCLUSIVE)) { /* Couldn't get the lock immediately; wait for it. */ - LockRelationForExtension(relation, ExclusiveLock); + LockRelationForExtension(relation, LW_EXCLUSIVE); /* * Check if some other backend has extended a block for us while @@ -537,7 +537,7 @@ loop: */ if (targetBlock != InvalidBlockNumber) { - UnlockRelationForExtension(relation, ExclusiveLock); + UnlockRelationForExtension(relation, LW_EXCLUSIVE); goto loop; } @@ -576,7 +576,7 @@ loop: * against vacuumlazy.c --- see comments therein. */ if (needLock) - UnlockRelationForExtension(relation, ExclusiveLock); + UnlockRelationForExtension(relation, LW_EXCLUSIVE); /* * We need to initialize the empty new page. Double-check that it really diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index e5616ce..aaba35b 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -641,7 +641,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) * Note that another backend might have extended or created the relation * by the time we get the lock. */ - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, LW_EXCLUSIVE); /* Might have to re-open if a cache flush happened */ RelationOpenSmgr(rel); @@ -679,7 +679,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) /* Update local cache with the up-to-date size */ rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now; - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, LW_EXCLUSIVE); pfree(pg); } diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index f815fd4..7ac9a2e 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -658,7 +658,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) needLock = !RELATION_IS_LOCAL(rel); if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, LW_EXCLUSIVE); buf = ReadBuffer(rel, P_NEW); @@ -672,7 +672,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) * condition against btvacuumscan --- see comments therein. */ if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, LW_EXCLUSIVE); /* Initialize the new page before returning it */ page = BufferGetPage(buf); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 775f2ff..ae076ae 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1059,10 +1059,10 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, { /* Get the current relation length */ if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, LW_EXCLUSIVE); num_pages = RelationGetNumberOfBlocks(rel); if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, LW_EXCLUSIVE); /* Quit if we've scanned the whole relation */ if (blkno >= num_pages) diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index e57ac49..ab88a07 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -230,13 +230,13 @@ SpGistNewBuffer(Relation index) /* Must extend the file */ needLock = !RELATION_IS_LOCAL(index); if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, LW_EXCLUSIVE); buffer = ReadBuffer(index, P_NEW); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, LW_EXCLUSIVE); return buffer; } diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index cce9b3f..84c3502 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -824,10 +824,10 @@ spgvacuumscan(spgBulkDeleteState *bds) { /* Get the current relation length */ if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, LW_EXCLUSIVE); num_pages = RelationGetNumberOfBlocks(index); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, LW_EXCLUSIVE); /* Quit if we've scanned the whole relation */ if (blkno >= num_pages) diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 5b43a66..658b98c 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -851,8 +851,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * it's got exclusive lock on the whole relation. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); - LockRelationForExtension(onerel, ExclusiveLock); - UnlockRelationForExtension(onerel, ExclusiveLock); + LockRelationForExtension(onerel, LW_EXCLUSIVE); + UnlockRelationForExtension(onerel, LW_EXCLUSIVE); LockBufferForCleanup(buf); if (PageIsNew(page)) { diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 4648473..a099fd8 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -624,7 +624,7 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) * Note that another backend might have extended or created the relation * by the time we get the lock. */ - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, LW_EXCLUSIVE); /* Might have to re-open if a cache flush happened */ RelationOpenSmgr(rel); @@ -652,7 +652,7 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) /* Update local cache with the up-to-date size */ rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now; - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, LW_EXCLUSIVE); pfree(pg); } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index fe98898..0901bec 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -24,6 +24,31 @@ #include "storage/procarray.h" #include "utils/inval.h" +/* + * Compute the hash code associated with a RELEXTLOCK. + * + * To avoid unnecessary recomputations of the hash code, we try to do this + * just once per function, and then pass it around as needed. Aside from + * passing the hashcode to hash_search_with_hash_value(), we can extract + * the lock partition number from the hashcode. + */ +#define RelExtLockTargetTagHashCode(relextlocktargettag) \ + get_hash_value(RelExtLockHash, (const void *) relextlocktargettag) + +/* + * The lockmgr's shared hash tables are partitioned to reduce contention. + * To determine which partition a given relid belongs to, compute the tag's + * hash code with ExtLockTagHashCode(), then apply one of these macros. + * NB: NUM_RELEXTENSIONLOCK_PARTITIONS must be a power of 2! + */ +#define RelExtLockHashPartition(hashcode) \ + ((hashcode) % NUM_RELEXTLOCK_PARTITIONS) +#define RelExtLockHashPartitionLock(hashcode) \ + (&MainLWLockArray[RELEXTLOCK_MANAGER_LWLOCK_OFFSET + \ + LockHashPartition(hashcode)].lock) +#define RelExtLockHashPartitionLockByIndex(i) \ + (&MainLWLockArray[RELEXTLOCK_MANAGER_LWLOCK_OFFSET + (i)].lock + /* * Per-backend counter for generating speculative insertion tokens. @@ -57,6 +82,67 @@ typedef struct XactLockTableWaitInfo } XactLockTableWaitInfo; static void XactLockTableWaitErrorCb(void *arg); +static bool CreateRelExtLock(const RELEXTLOCKTAG *targettag, uint32 hashcode, + LWLockMode lockmode, bool conditional); +static void DeleteRelExtLock(const RELEXTLOCKTAG *targettag, uint32 hashcode); +static bool RelExtLockExists(const RELEXTLOCKTAG *targettag); + +/* + * Pointers to hash tables containing lock state + * + * The RelExtLockHash hash table is in shared memory; LocalRelExtLockHash + * hashtable is local to each backend. + */ +static HTAB *RelExtLockHash; +static HTAB *LocalRelExtLockHash; + + +/* + * InitRelExtLock + * Initialize the relation extension lock manager's data structures. + */ +void +InitRelExtLock(long max_table_size) +{ + HASHCTL info; + long init_table_size; + + /* + * Compute init/max size to request for lock hashtables. Note these + * calculations must agree with LockShmemSize! + */ + init_table_size = max_table_size / 2; + + /* + * Allocate hash table for RELEXTLOCK structs. This stores per-relation + * lock. + */ + MemSet(&info, 0, sizeof(info)); + info.keysize = sizeof(Oid); + info.entrysize = sizeof(RELEXTLOCK); + info.num_partitions = NUM_RELEXTLOCK_PARTITIONS; + + RelExtLockHash = ShmemInitHash("EXTRELLOCK Hash", + init_table_size, + max_table_size, + &info, + HASH_ELEM | HASH_BLOBS | HASH_PARTITION); + + if (LocalRelExtLockHash) + hash_destroy(LocalRelExtLockHash); + + /* + * Allocate non-shared hash table for RELEXTLOCK structs. This stores + * per-relation extension lock and holding information. + */ + info.keysize = sizeof(Oid); + info.entrysize = sizeof(LOCALRELEXTLOCK); + + LocalRelExtLockHash = hash_create("LOCALRELEXTLOCK hash", + 16, + &info, + HASH_ELEM | HASH_BLOBS); +} /* * RelationInitLockInfo @@ -321,7 +407,7 @@ UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode) /* * LockRelationForExtension * - * This lock tag is used to interlock addition of pages to relations. + * This lock is used to interlock addition of pages to relations. * We need such locking because bufmgr/smgr definition of P_NEW is not * race-condition-proof. * @@ -329,15 +415,31 @@ UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode) * the relation, so no AcceptInvalidationMessages call is needed here. */ void -LockRelationForExtension(Relation relation, LOCKMODE lockmode) +LockRelationForExtension(Relation relation, LWLockMode lockmode) { - LOCKTAG tag; + RELEXTLOCKTAG locktag; + LOCALRELEXTLOCK *local_lock; + bool found; + uint32 hashcode; - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); + locktag.relid = relation->rd_id; + locktag.mode = lockmode; - (void) LockAcquire(&tag, lockmode, false, false); + /* Do we have the lock already? */ + if (RelExtLockExists(&locktag)) + return; + + hashcode = RelExtLockTargetTagHashCode(&locktag); + + /* Acquire lock in local hash table */ + local_lock = (LOCALRELEXTLOCK *) hash_search_with_hash_value(LocalRelExtLockHash, + (void *) &locktag, + hashcode, + HASH_ENTER, &found); + local_lock->held = true; + + /* Actually create the lock in shared hash table */ + CreateRelExtLock(&locktag, hashcode, lockmode, false); } /* @@ -347,47 +449,95 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode) * Returns TRUE iff the lock was acquired. */ bool -ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode) +ConditionalLockRelationForExtension(Relation relation, LWLockMode lockmode) { - LOCKTAG tag; - - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); - - return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL); + RELEXTLOCKTAG locktag; + LOCALRELEXTLOCK *local_lock; + bool found; + uint32 hashcode; + bool ret; + + locktag.relid = relation->rd_id; + locktag.mode = lockmode; + + /* Do we have the lock already? */ + if (RelExtLockExists(&locktag)) + return true; + + hashcode = RelExtLockTargetTagHashCode(&locktag); + + /* Acquire lock in local hash table, but we're not sure the result of acquire yet */ + local_lock = (LOCALRELEXTLOCK *) hash_search_with_hash_value(LocalRelExtLockHash, + (void *) &locktag, + hashcode, + HASH_ENTER, &found); + ret = CreateRelExtLock(&locktag, hashcode, lockmode, true); + local_lock->held = ret; + + return ret; } /* * RelationExtensionLockWaiterCount * * Count the number of processes waiting for the given relation extension lock. + * NOte that this routine doesn't acquire the partition lock. Please make sure + * that the caller must acquire partitionlock in exclusive mode or we must call + * this routine after acquired the relation extension lock of this relation. */ int RelationExtensionLockWaiterCount(Relation relation) { - LOCKTAG tag; - - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); - - return LockWaiterCount(&tag); + RELEXTLOCKTAG locktag; + RELEXTLOCK *ext_lock; + bool found; + int nwaiters; + uint32 hashcode; + + locktag.relid = relation->rd_id; + locktag.mode = LW_EXCLUSIVE; + hashcode = RelExtLockTargetTagHashCode(&locktag); + + ext_lock = (RELEXTLOCK *) hash_search_with_hash_value(RelExtLockHash, + (void *) &locktag, + hashcode, + HASH_FIND, &found); + /* We assume that we already acquire this lock */ + Assert(found); + + nwaiters = LWLockWaiterCount(&(ext_lock->lock)); + + return nwaiters; } /* * UnlockRelationForExtension */ void -UnlockRelationForExtension(Relation relation, LOCKMODE lockmode) +UnlockRelationForExtension(Relation relation, LWLockMode lockmode) { - LOCKTAG tag; + RELEXTLOCKTAG locktag; + uint32 hashcode; + bool found; - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); + locktag.relid = relation->rd_id; + locktag.mode = lockmode; - LockRelease(&tag, lockmode, false); + /* Quick exit, if we don't acquire lock */ + if (!RelExtLockExists(&locktag)) + return; + + /* Remove hash entry from local hash table */ + hashcode = RelExtLockTargetTagHashCode(&locktag); + hash_search_with_hash_value(LocalRelExtLockHash, + (void *) &locktag, + hashcode, HASH_REMOVE, + &found); + + Assert(found); + + /* Actually remove the lock in shared hash table */ + DeleteRelExtLock(&locktag, hashcode); } /* @@ -961,12 +1111,6 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field2, tag->locktag_field1); break; - case LOCKTAG_RELATION_EXTEND: - appendStringInfo(buf, - _("extension of relation %u of database %u"), - tag->locktag_field2, - tag->locktag_field1); - break; case LOCKTAG_PAGE: appendStringInfo(buf, _("page %u of relation %u of database %u"), @@ -1042,3 +1186,112 @@ GetLockNameFromTagType(uint16 locktag_type) return "???"; return LockTagTypeNames[locktag_type]; } + + +/* + * Check whether a particular relation extension lock is held by this transaction. + * + * Note that this function may return false even when it fhe lock exists in local + * hash table, because the conditional relation extension lock doesn't remove the + * local hash entry even when failed to acquire lock. + */ +static bool +RelExtLockExists(const RELEXTLOCKTAG *targettag) +{ + LOCALRELEXTLOCK *lock; + uint32 hashcode; + + hashcode = RelExtLockTargetTagHashCode(targettag); + + lock = (LOCALRELEXTLOCK *) hash_search_with_hash_value(LocalRelExtLockHash, + (void *) targettag, + hashcode, HASH_FIND, NULL); + + if (!lock) + return false; + + /* + * Found entry in the table, but still need to check whether it's actually + * held -- it could be just created when acquiring conditional lock. + */ + return lock->held; +} + +/* + * Create RELEXTLOCK hash entry on shared hash table. To avoid dead-lock with + * partition lock and LWLock, we acquire them but don't release it here. The + * caller must call DeleteRelExtLock later to release these locks. + */ +static bool +CreateRelExtLock(const RELEXTLOCKTAG *targettag, uint32 hashcode, LWLockMode lockmode, + bool conditional) +{ + RELEXTLOCK *ext_lock; + LWLock *partitionLock; + bool found; + bool ret = false; + + partitionLock = RelExtLockHashPartitionLock(hashcode); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + ext_lock = (RELEXTLOCK *) hash_search_with_hash_value(RelExtLockHash, + (void * ) targettag, + hashcode, HASH_ENTER, &found); + + if (!ext_lock) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_pred_locks_per_transaction."))); + + /* This is a new hash entry, initialize it */ + if (!found) + LWLockInitialize(&(ext_lock->lock), LWTRANCHE_RELEXT_LOCK_MANAGER); + + if (conditional) + ret = LWLockConditionalAcquire(&(ext_lock->lock), lockmode); + else + ret = LWLockAcquire(&(ext_lock->lock), lockmode); + + /* Always return true if not conditional lock */ + return ret; +} + +/* + * Remove RELEXTLOCK from shared RelExtLockHash hash table. Since other backends + * might be acquiring it or waiting for this lock, we can delete it only if there + * is no longer backends who are interested in it. + * + * Note that we assume partition lock for hash table is already acquired when + * acquiring the lock. This routine should release partition lock as well after + * released LWLock. + */ +static void +DeleteRelExtLock(const RELEXTLOCKTAG *targettag, uint32 hashcode) +{ + RELEXTLOCK *ext_lock; + LOCALRELEXTLOCK *lock; + LWLock *partitionLock; + bool found; + + partitionLock = RelExtLockHashPartitionLock(hashcode); + + ext_lock = (RELEXTLOCK *) hash_search_with_hash_value(RelExtLockHash, + (void * ) targettag, + hashcode, + HASH_FIND, &found); + + if (!found) + return; + + /* + * Remove this hash entry if there is no longer someone who is interested + * in extension lock of this relation. + */ + if (LWLockCheckForCleanup(&(ext_lock->lock))) + hash_search_with_hash_value(RelExtLockHash, (void *) targettag, + hashcode, HASH_REMOVE, &found); + + LWLockRelease(&(ext_lock->lock)); + LWLockRelease(partitionLock); +} diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 4315be4..90311da 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -45,6 +45,7 @@ #include "storage/sinvaladt.h" #include "storage/spin.h" #include "storage/standby.h" +#include "storage/lmgr.h" #include "utils/memutils.h" #include "utils/ps_status.h" #include "utils/resowner_private.h" @@ -388,6 +389,10 @@ InitLocks(void) max_table_size = NLOCKENTS(); init_table_size = max_table_size / 2; + + /* Initialize lock structure for relation extension lock */ + InitRelExtLock(max_table_size); + /* * Allocate hash table for LOCK structs. This stores per-locked-object * information. @@ -3366,6 +3371,7 @@ LockShmemSize(void) /* lock hash table */ max_table_size = NLOCKENTS(); size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK))); + size = add_size(size, hash_estimate_size(max_table_size, sizeof(LWLock))); /* proclock hash table */ max_table_size *= 2; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 3e13394..c004213 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -451,6 +451,13 @@ InitializeLWLocks(void) for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++) LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER); + /* Initialize relation extension lmgr's LWLocks in main array */ + lock = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS + + NUM_BUFFER_PARTITIONS + NUM_LOCK_PARTITIONS + + NUM_PREDICATELOCK_PARTITIONS; + for (id = 0; id < NUM_RELEXTLOCK_PARTITIONS; id++, lock++) + LWLockInitialize(&lock->lock, LWTRANCHE_RELEXT_LOCK_MANAGER); + /* Initialize named tranches. */ if (NamedLWLockTrancheRequests > 0) { @@ -494,7 +501,7 @@ RegisterLWLockTranches(void) if (LWLockTrancheArray == NULL) { - LWLockTranchesAllocated = 64; + LWLockTranchesAllocated = 128; LWLockTrancheArray = (char **) MemoryContextAllocZero(TopMemoryContext, LWLockTranchesAllocated * sizeof(char *)); @@ -508,6 +515,7 @@ RegisterLWLockTranches(void) LWLockRegisterTranche(LWTRANCHE_LOCK_MANAGER, "lock_manager"); LWLockRegisterTranche(LWTRANCHE_PREDICATE_LOCK_MANAGER, "predicate_lock_manager"); + LWLockRegisterTranche(LWTRANCHE_RELEXT_LOCK_MANAGER, "relext_lock_manager"); LWLockRegisterTranche(LWTRANCHE_PARALLEL_QUERY_DSA, "parallel_query_dsa"); LWLockRegisterTranche(LWTRANCHE_TBM, "tbm"); @@ -1857,3 +1865,46 @@ LWLockHeldByMeInMode(LWLock *l, LWLockMode mode) } return false; } + +/* + * LWLockCheckForCleanup + * + * Return true only if there is no backend who waiting for this lock and is + * acquiring. + */ +bool +LWLockCheckForCleanup(LWLock *lock) +{ + uint32 state; + bool ret; + + state = pg_atomic_read_u32(&(lock->state)); + + ret = (state & LW_LOCK_MASK) == 0; + ret &= (state & LW_SHARED_MASK) == 0; + + return ret; +} + +int +LWLockWaiterCount(LWLock *lock) +{ + int nwaiters = 0; + proclist_mutable_iter iter; + uint32 state; + + state = pg_atomic_read_u32(&(lock->state)); + + /* Quick check using state of lock */ + if ((state & LW_FLAG_HAS_WAITERS) == 0) + return 0; + + LWLockWaitListLock(lock); + + proclist_foreach_modify(iter, &lock->waiters, lwWaitLink) + nwaiters++; + + LWLockWaitListUnlock(lock); + + return nwaiters; +} diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index ef4824f..5205542 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -234,7 +234,6 @@ pg_lock_status(PG_FUNCTION_ARGS) switch ((LockTagType) instance->locktag.locktag_type) { case LOCKTAG_RELATION: - case LOCKTAG_RELATION_EXTEND: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); nulls[3] = true; diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 2a1244c..e7f4828 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -34,6 +34,36 @@ typedef enum XLTW_Oper XLTW_RecheckExclusionConstr } XLTW_Oper; +typedef struct RELEXTLOCKTAG +{ + Oid relid; /* identifies the lockable object */ + LWLockMode mode; /* lock mode for this table entry */ +} RELEXTLOCKTAG; + +/* + * This structure holds information per-object relation extension + * lock. + */ +typedef struct RELEXTLOCK +{ + RELEXTLOCKTAG tag; /* hash key -- must be first */ + LWLock lock; /* LWLock for relation extension */ +} RELEXTLOCK; + +/* + * The LOCALRELEXTLOCK struct represents a local copy of data which is + * also present in the RELEXTLOCK table, organized for fast access without + * needing to acquire a LWLock. It is strictly for optimization. + */ +typedef struct LOCALRELEXTLOCK +{ + /* hash key */ + RELEXTLOCKTAG relid; /* unique identifier of locktable object */ + + /* data */ + bool held; /* is lock held? */ +} LOCALRELEXTLOCK; + extern void RelationInitLockInfo(Relation relation); /* Lock a relation */ @@ -51,10 +81,10 @@ extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); /* Lock a relation for extension */ -extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode); -extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode); -extern bool ConditionalLockRelationForExtension(Relation relation, - LOCKMODE lockmode); +extern void InitRelExtLock(long max_table_size); +extern void LockRelationForExtension(Relation relation, LWLockMode lockmode); +extern void UnlockRelationForExtension(Relation relation, LWLockMode lockmode); +extern bool ConditionalLockRelationForExtension(Relation relation, LWLockMode lockmode); extern int RelationExtensionLockWaiterCount(Relation relation); /* Lock a page (currently only used within indexes) */ diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 7a9c105..9d6e90f 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -139,8 +139,6 @@ typedef uint16 LOCKMETHODID; typedef enum LockTagType { LOCKTAG_RELATION, /* whole relation */ - /* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */ - LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */ /* same ID info as RELATION */ LOCKTAG_PAGE, /* one page of a relation */ /* ID info for a page is RELATION info + BlockNumber */ @@ -199,14 +197,6 @@ typedef struct LOCKTAG (locktag).locktag_type = LOCKTAG_RELATION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) -#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \ - ((locktag).locktag_field1 = (dboid), \ - (locktag).locktag_field2 = (reloid), \ - (locktag).locktag_field3 = 0, \ - (locktag).locktag_field4 = 0, \ - (locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \ - (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) - #define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (reloid), \ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 0cd45bb..acab6fb 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -120,14 +120,21 @@ extern PGDLLIMPORT int NamedLWLockTrancheRequests; #define LOG2_NUM_PREDICATELOCK_PARTITIONS 4 #define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS) +/* Number of partitions the shared relation extension lock tables are divided into */ +#define LOG2_NUM_RELEXTLOCK_PARTITIONS 4 +#define NUM_RELEXTLOCK_PARTITIONS (1 << LOG2_NUM_RELEXTLOCK_PARTITIONS) + /* Offsets for various chunks of preallocated lwlocks. */ #define BUFFER_MAPPING_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS #define LOCK_MANAGER_LWLOCK_OFFSET \ (BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS) #define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \ (LOCK_MANAGER_LWLOCK_OFFSET + NUM_LOCK_PARTITIONS) -#define NUM_FIXED_LWLOCKS \ +#define RELEXTLOCK_MANAGER_LWLOCK_OFFSET \ (PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS) +#define NUM_FIXED_LWLOCKS \ + (PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS + \ + NUM_RELEXTLOCK_PARTITIONS) typedef enum LWLockMode { @@ -151,6 +158,8 @@ extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val); extern void LWLockReleaseAll(void); extern bool LWLockHeldByMe(LWLock *lock); extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode); +extern bool LWLockCheckForCleanup(LWLock *lock); +extern int LWLockWaiterCount(LWLock *lock); extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval); extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value); @@ -211,6 +220,7 @@ typedef enum BuiltinTrancheIds LWTRANCHE_BUFFER_MAPPING, LWTRANCHE_LOCK_MANAGER, LWTRANCHE_PREDICATE_LOCK_MANAGER, + LWTRANCHE_RELEXT_LOCK_MANAGER, LWTRANCHE_PARALLEL_QUERY_DSA, LWTRANCHE_TBM, LWTRANCHE_FIRST_USER_DEFINED