From 2cef9eb9236a19845f967de3f392124016bbbba9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 27 Oct 2022 10:56:39 +1300
Subject: [PATCH 4/8] Don't re-acquire LockManager partition lock after wakeup.

Change the contract of WaitOnLock() and ProcSleep() to return with the
partition lock released.  ProcSleep() re-acquired the lock to prevent
interrupts, which was a problem at the time the ancestor of that code
was committed in fe548629c50, because then we had signal handlers that
longjmp'd out of there.  Now, that can happen only at points where we
explicitly run CHECK_FOR_INTERRUPTS(), so we can remove the lock, which
leads to the idea of making the function always release.

While an earlier commit fixed the problem that the backend woke us up
before it had even released the lock itself, this lock reacquisition
point was still contended when multiple backends woke at the same time.

XXX Right?  Or what am I missing?
---
 src/backend/storage/lmgr/lock.c | 18 ++++++++++++------
 src/backend/storage/lmgr/proc.c | 20 ++++++++++++--------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index f59ae20069..42c244f5fb 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -787,6 +787,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	LWLock	   *partitionLock;
 	bool		found_conflict;
 	bool		log_lock = false;
+	bool		partitionLocked = false;
 	LatchBatch	wakeups = {0};
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -995,6 +996,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	partitionLock = LockHashPartitionLock(hashcode);
 
 	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
+	partitionLocked = true;
 
 	/*
 	 * Find or create lock and proclock entries with this tag
@@ -1099,7 +1101,10 @@ LockAcquireExtended(const LOCKTAG *locktag,
 										 locktag->locktag_type,
 										 lockmode);
 
+		Assert(LWLockHeldByMeInMode(partitionLock, LW_EXCLUSIVE));
 		WaitOnLock(locallock, owner, &wakeups);
+		Assert(!LWLockHeldByMeInMode(partitionLock, LW_EXCLUSIVE));
+		partitionLocked = false;
 
 		TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1,
 										locktag->locktag_field2,
@@ -1124,7 +1129,6 @@ LockAcquireExtended(const LOCKTAG *locktag,
 			PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
 			LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
 			/* Should we retry ? */
-			LWLockRelease(partitionLock);
 			elog(ERROR, "LockAcquire failed");
 		}
 		PROCLOCK_PRINT("LockAcquire: granted", proclock);
@@ -1137,9 +1141,11 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	 */
 	FinishStrongLockAcquire();
 
-	LWLockRelease(partitionLock);
-
-	SetLatches(&wakeups);
+	if (partitionLocked)
+	{
+		LWLockRelease(partitionLock);
+		SetLatches(&wakeups);
+	}
 
 	/*
 	 * Emit a WAL record if acquisition of this lock needs to be replayed in a
@@ -1811,7 +1817,8 @@ MarkLockClear(LOCALLOCK *locallock)
  * Caller must have set MyProc->heldLocks to reflect locks already held
  * on the lockable object by this process.
  *
- * The appropriate partition lock must be held at entry.
+ * The appropriate partition lock must be held at entry.  It is not held on
+ * exit.
  */
 static void
 WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, LatchBatch *wakeups)
@@ -1868,7 +1875,6 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, LatchBatch *wakeups)
 			awaitedLock = NULL;
 			LOCK_PRINT("WaitOnLock: aborting on lock",
 					   locallock->lock, locallock->tag.mode);
-			LWLockRelease(LockHashPartitionLock(locallock->hashcode));
 
 			/*
 			 * Now that we aren't holding the partition lock, we can give an
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index f0e19b74a7..2a03cd4b1f 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1033,7 +1033,7 @@ ProcQueueInit(PROC_QUEUE *queue)
  * Caller must have set MyProc->heldLocks to reflect locks already held
  * on the lockable object by this process (under all XIDs).
  *
- * The lock table's partition lock must be held at entry, and will be held
+ * The lock table's partition lock must be held at entry, and will be released
  * at exit.
  *
  * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR if not (deadlock).
@@ -1062,6 +1062,12 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, LatchBatch *wakeups)
 	PGPROC	   *leader = MyProc->lockGroupLeader;
 	int			i;
 
+	/*
+	 * Every way out of this function will release the partition lock and send
+	 * buffered latch wakeups.
+	 */
+	Assert(LWLockHeldByMeInMode(partitionLock, LW_EXCLUSIVE));
+
 	/*
 	 * If group locking is in use, locks held by members of my locking group
 	 * need to be included in myHeldLocks.  This is not required for relation
@@ -1148,6 +1154,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, LatchBatch *wakeups)
 					/* Skip the wait and just grant myself the lock. */
 					GrantLock(lock, proclock, lockmode);
 					GrantAwaitedLock();
+					LWLockRelease(partitionLock);
+					SetLatches(wakeups);
 					return PROC_WAIT_STATUS_OK;
 				}
 				/* Break out of loop to put myself before him */
@@ -1191,6 +1199,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, LatchBatch *wakeups)
 	if (early_deadlock)
 	{
 		RemoveFromWaitQueue(MyProc, hashcode, wakeups);
+		LWLockRelease(partitionLock);
+		SetLatches(wakeups);
 		return PROC_WAIT_STATUS_ERROR;
 	}
 
@@ -1525,6 +1535,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, LatchBatch *wakeups)
 			}
 
 			LWLockRelease(partitionLock);
+			SetLatches(wakeups);
 
 			if (deadlock_state == DS_SOFT_DEADLOCK)
 				ereport(LOG,
@@ -1626,13 +1637,6 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, LatchBatch *wakeups)
 							standbyWaitStart, GetCurrentTimestamp(),
 							NULL, false);
 
-	/*
-	 * Re-acquire the lock table's partition lock.  We have to do this to hold
-	 * off cancel/die interrupts before we can mess with lockAwaited (else we
-	 * might have a missed or duplicated locallock update).
-	 */
-	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
-
 	/*
 	 * We no longer want LockErrorCleanup to do anything.
 	 */
-- 
2.35.1

