From 0b03fc09570635cd00248a5f4e3ab60e39b917fb Mon Sep 17 00:00:00 2001
From: David Geier <david.geier@servicenow.com>
Date: Sun, 14 Aug 2022 20:17:54 +0000
Subject: [PATCH] Improve heavyweight lock fast path

---
 src/backend/optimizer/util/plancat.c |  9 ----
 src/backend/storage/lmgr/lock.c      | 71 +++++++++++++++-------------
 src/backend/utils/adt/selfuncs.c     |  8 ++++
 src/include/storage/proc.h           |  4 +-
 4 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 5012bfe142..dfe99ba93b 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -414,16 +414,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 					info->tuples = rel->tuples;
 			}
 
-			if (info->relam == BTREE_AM_OID)
-			{
-				/* For btrees, get tree height while we have the index open */
-				info->tree_height = _bt_getrootheight(indexRelation);
-			}
-			else
-			{
-				/* For other index types, just set it to "unknown" for now */
 				info->tree_height = -1;
-			}
 
 			index_close(indexRelation, NoLock);
 
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 5f5803f681..b7139f51b5 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -163,14 +163,6 @@ typedef struct TwoPhaseLockRecord
 } TwoPhaseLockRecord;
 
 
-/*
- * Count of the number of fast path lock slots we believe to be used.  This
- * might be higher than the real number if another backend has transferred
- * our locks to the primary lock table, but it can never be lower than the
- * real value, since only we can acquire locks on our own behalf.
- */
-static int	FastPathLocalUseCount = 0;
-
 /*
  * Flag to indicate if the relation extension lock is held by this backend.
  * This flag is used to ensure that while holding the relation extension lock
@@ -203,18 +195,18 @@ static bool IsPageLockHeld PG_USED_FOR_ASSERTS_ONLY = false;
 #define FAST_PATH_LOCKNUMBER_OFFSET		1
 #define FAST_PATH_MASK					((1 << FAST_PATH_BITS_PER_SLOT) - 1)
 #define FAST_PATH_GET_BITS(proc, n) \
-	(((proc)->fpLockBits >> (FAST_PATH_BITS_PER_SLOT * n)) & FAST_PATH_MASK)
+        proc->fpLockBits[n]
 #define FAST_PATH_BIT_POSITION(n, l) \
 	(AssertMacro((l) >= FAST_PATH_LOCKNUMBER_OFFSET), \
 	 AssertMacro((l) < FAST_PATH_BITS_PER_SLOT+FAST_PATH_LOCKNUMBER_OFFSET), \
 	 AssertMacro((n) < FP_LOCK_SLOTS_PER_BACKEND), \
 	 ((l) - FAST_PATH_LOCKNUMBER_OFFSET + FAST_PATH_BITS_PER_SLOT * (n)))
 #define FAST_PATH_SET_LOCKMODE(proc, n, l) \
-	 (proc)->fpLockBits |= UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l)
+	 (proc)->fpLockBits[n] |= 1<<((l)-FAST_PATH_LOCKNUMBER_OFFSET)
 #define FAST_PATH_CLEAR_LOCKMODE(proc, n, l) \
-	 (proc)->fpLockBits &= ~(UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l))
+	 (proc)->fpLockBits[n]  &= ~(1 << ((l)-FAST_PATH_LOCKNUMBER_OFFSET))
 #define FAST_PATH_CHECK_LOCKMODE(proc, n, l) \
-	 ((proc)->fpLockBits & (UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l)))
+	 ((proc)->fpLockBits[n] & (1 << ((l)-FAST_PATH_LOCKNUMBER_OFFSET)))
 
 /*
  * The fast-path lock mechanism is concerned only with relation locks on
@@ -924,8 +916,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	 * lock type on a relation we have already locked using the fast-path, but
 	 * for now we don't worry about that case either.
 	 */
-	if (EligibleForRelationFastPath(locktag, lockmode) &&
-		FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
+	if (EligibleForRelationFastPath(locktag, lockmode))
 	{
 		uint32		fasthashcode = FastPathStrongLockHashPartition(hashcode);
 		bool		acquired;
@@ -940,8 +931,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 		if (FastPathStrongRelationLocks->count[fasthashcode] != 0)
 			acquired = false;
 		else
-			acquired = FastPathGrantRelationLock(locktag->locktag_field2,
-												 lockmode);
+			acquired = FastPathGrantRelationLock(locktag->locktag_field2, lockmode);
 		LWLockRelease(&MyProc->fpInfoLock);
 		if (acquired)
 		{
@@ -2076,8 +2066,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	locallock->lockCleared = false;
 
 	/* Attempt fast release of any lock eligible for the fast path. */
-	if (EligibleForRelationFastPath(locktag, lockmode) &&
-		FastPathLocalUseCount > 0)
+	if (EligibleForRelationFastPath(locktag, lockmode))
 	{
 		bool		released;
 
@@ -2647,6 +2636,11 @@ LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent)
 	ResourceOwnerForgetLock(CurrentResourceOwner, locallock);
 }
 
+//static int ItersGrant = 0;
+//static int CallsGrant = 0;
+//static int ItersUngrant = 0;
+//static int CallsUngrant = 0;
+
 /*
  * FastPathGrantRelationLock
  *		Grant lock using per-backend fast-path array, if there is space.
@@ -2654,20 +2648,31 @@ LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent)
 static bool
 FastPathGrantRelationLock(Oid relid, LOCKMODE lockmode)
 {
-	uint32		f;
+	uint32		f, i;
 	uint32		unused_slot = FP_LOCK_SLOTS_PER_BACKEND;
 
+        // if (CallsGrant % 100000 == 0 || CallsUngrant % 100000 == 0)
+        //    elog(WARNING, "counts: %d, %d, %d, %d, %f, %f", CallsGrant, CallsUngrant, ItersGrant, ItersUngrant,
+        //    (float)ItersGrant/(float)CallsGrant, (float)ItersUngrant/(float)CallsUngrant);
+
+        // CallsGrant++;
+
 	/* Scan for existing entry for this relid, remembering empty slot. */
-	for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; f++)
+	for (i = 0; i < FP_LOCK_SLOTS_PER_BACKEND; i++)
 	{
-		if (FAST_PATH_GET_BITS(MyProc, f) == 0)
-			unused_slot = f;
-		else if (MyProc->fpRelId[f] == relid)
+                f = ((relid % FP_LOCK_SLOTS_PER_BACKEND) + i) % FP_LOCK_SLOTS_PER_BACKEND;
+		Assert(f < FP_LOCK_SLOTS_PER_BACKEND);
+		// ItersGrant++;
+
+		if (MyProc->fpRelId[f] == relid)
 		{
 			Assert(!FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode));
 			FAST_PATH_SET_LOCKMODE(MyProc, f, lockmode);
 			return true;
 		}
+                else if (FAST_PATH_GET_BITS(MyProc, f) == 0)
+			if (unused_slot == FP_LOCK_SLOTS_PER_BACKEND)
+				unused_slot = f;
 	}
 
 	/* If no existing entry, use any empty slot. */
@@ -2675,7 +2680,6 @@ FastPathGrantRelationLock(Oid relid, LOCKMODE lockmode)
 	{
 		MyProc->fpRelId[unused_slot] = relid;
 		FAST_PATH_SET_LOCKMODE(MyProc, unused_slot, lockmode);
-		++FastPathLocalUseCount;
 		return true;
 	}
 
@@ -2691,24 +2695,25 @@ FastPathGrantRelationLock(Oid relid, LOCKMODE lockmode)
 static bool
 FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode)
 {
-	uint32		f;
-	bool		result = false;
+	uint32		f, i;
 
-	FastPathLocalUseCount = 0;
-	for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; f++)
+        // CallsUngrant++;
+
+	for (i = 0; i < FP_LOCK_SLOTS_PER_BACKEND; i++)
 	{
+                f = ((relid % FP_LOCK_SLOTS_PER_BACKEND) + i) % FP_LOCK_SLOTS_PER_BACKEND;
+		// ItersUngrant++;
+
 		if (MyProc->fpRelId[f] == relid
 			&& FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode))
 		{
 			Assert(!result);
 			FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
-			result = true;
-			/* we continue iterating so as to update FastPathLocalUseCount */
+			return true;
 		}
-		if (FAST_PATH_GET_BITS(MyProc, f) != 0)
-			++FastPathLocalUseCount;
 	}
-	return result;
+
+	return false;
 }
 
 /*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d35e5605de..dab1aaf195 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -97,6 +97,7 @@
 #include <ctype.h>
 #include <math.h>
 
+#include "access/nbtree.h"
 #include "access/brin.h"
 #include "access/brin_page.h"
 #include "access/gin.h"
@@ -6831,6 +6832,13 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	 * touched.  The number of such pages is btree tree height plus one (ie,
 	 * we charge for the leaf page too).  As above, charge once per SA scan.
 	 */
+        if (index->tree_height < 0)
+	{
+	    Relation indexRel = index_open(index->indexoid, AccessShareLock);
+	    index->tree_height = _bt_getrootheight(indexRel);
+	    index_close(indexRel, NoLock);
+	}
+
 	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
 	costs.indexStartupCost += descentCost;
 	costs.indexTotalCost += costs.num_sa_scans * descentCost;
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 2579e619eb..736b03d4f8 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -80,7 +80,7 @@ struct XidCache
  * rather than the main lock table.  This eases contention on the lock
  * manager LWLocks.  See storage/lmgr/README for additional details.
  */
-#define		FP_LOCK_SLOTS_PER_BACKEND 16
+#define		FP_LOCK_SLOTS_PER_BACKEND 64
 
 /*
  * An invalid pgprocno.  Must be larger than the maximum number of PGPROC
@@ -282,7 +282,7 @@ struct PGPROC
 
 	/* Lock manager data, recording fast-path locks taken by this backend. */
 	LWLock		fpInfoLock;		/* protects per-backend fast-path state */
-	uint64		fpLockBits;		/* lock modes held for each fast-path slot */
+	uint8		fpLockBits[FP_LOCK_SLOTS_PER_BACKEND];		/* lock modes held for each fast-path slot */
 	Oid			fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
 	bool		fpVXIDLock;		/* are we holding a fast-path VXID lock? */
 	LocalTransactionId fpLocalTransactionId;	/* lxid for fast-path VXID
-- 
2.25.1

