From da650f5dc0dda2155d088f56e2c062f7d10dec44 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 4 Jul 2022 11:56:02 +1200
Subject: [PATCH v2] Fix lock assertions in dshash.c.

dshash.c maintained flags to track locking, in order to make assertions
that the dshash API was being used correctly.  Unfortunately the
interaction with ereport's non-local exits was not thought through
carefully enough and the flag could get out of sync with reality.  Since
these flags were primarily used for assertions that no lock was held, we
can get most of the intended effect with a loop over all LWLocks instead
(in assertion builds only).

This problem was noted by Tom and Andres while reviewing changes to
support the new shared memory stats system in release 15, the second
user of dshash.c.  On closer inspection, even the earlier typcache.c
code was not guaranteed to meet the asserted conditions, now that it's
been highlighted that even dsa_get_address() can throw (albeit in
unlikely circumstances).

Back-patch to 11, where dshash.c arrived.

Reported-by: Tom Lane <tgl@sss.pgh.pa.us>
Reported-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>
Reviewed-by: Zhihong Yu <zyu@yugabyte.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20220311012712.botrpsikaufzteyt@alap3.anarazel.de
---
 src/backend/lib/dshash.c | 54 +++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c
index ec454b4d65..90255a5eb9 100644
--- a/src/backend/lib/dshash.c
+++ b/src/backend/lib/dshash.c
@@ -110,8 +110,6 @@ struct dshash_table
 	dshash_table_control *control;	/* Control object in DSM. */
 	dsa_pointer *buckets;		/* Current bucket pointers in DSM. */
 	size_t		size_log2;		/* log2(number of buckets) */
-	bool		find_locked;	/* Is any partition lock held by 'find'? */
-	bool		find_exclusively_locked;	/* ... exclusively? */
 };
 
 /* Given a pointer to an item, find the entry (user data) it holds. */
@@ -167,6 +165,7 @@ struct dshash_table
 		BUCKET_INDEX_FOR_HASH_AND_SIZE(hash,							\
 									   hash_table->size_log2)])
 
+static inline void assert_no_lock_held_by_me(dshash_table *hash_table);
 static void delete_item(dshash_table *hash_table,
 						dshash_table_item *item);
 static void resize(dshash_table *hash_table, size_t new_size);
@@ -234,9 +233,6 @@ dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
 		}
 	}
 
-	hash_table->find_locked = false;
-	hash_table->find_exclusively_locked = false;
-
 	/*
 	 * Set up the initial array of buckets.  Our initial size is the same as
 	 * the number of partitions.
@@ -285,8 +281,6 @@ dshash_attach(dsa_area *area, const dshash_parameters *params,
 	hash_table->params = *params;
 	hash_table->arg = arg;
 	hash_table->control = dsa_get_address(area, control);
-	hash_table->find_locked = false;
-	hash_table->find_exclusively_locked = false;
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
 
 	/*
@@ -309,7 +303,7 @@ dshash_attach(dsa_area *area, const dshash_parameters *params,
 void
 dshash_detach(dshash_table *hash_table)
 {
-	Assert(!hash_table->find_locked);
+	assert_no_lock_held_by_me(hash_table);
 
 	/* The hash table may have been destroyed.  Just free local memory. */
 	pfree(hash_table);
@@ -400,7 +394,7 @@ dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
 	partition = PARTITION_FOR_HASH(hash);
 
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
-	Assert(!hash_table->find_locked);
+	assert_no_lock_held_by_me(hash_table);
 
 	LWLockAcquire(PARTITION_LOCK(hash_table, partition),
 				  exclusive ? LW_EXCLUSIVE : LW_SHARED);
@@ -418,8 +412,6 @@ dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
 	else
 	{
 		/* The caller will free the lock by calling dshash_release_lock. */
-		hash_table->find_locked = true;
-		hash_table->find_exclusively_locked = exclusive;
 		return ENTRY_FROM_ITEM(item);
 	}
 }
@@ -449,7 +441,7 @@ dshash_find_or_insert(dshash_table *hash_table,
 	partition = &hash_table->control->partitions[partition_index];
 
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
-	Assert(!hash_table->find_locked);
+	assert_no_lock_held_by_me(hash_table);
 
 restart:
 	LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
@@ -494,8 +486,6 @@ restart:
 	}
 
 	/* The caller must release the lock with dshash_release_lock. */
-	hash_table->find_locked = true;
-	hash_table->find_exclusively_locked = true;
 	return ENTRY_FROM_ITEM(item);
 }
 
@@ -514,7 +504,7 @@ dshash_delete_key(dshash_table *hash_table, const void *key)
 	bool		found;
 
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
-	Assert(!hash_table->find_locked);
+	assert_no_lock_held_by_me(hash_table);
 
 	hash = hash_key(hash_table, key);
 	partition = PARTITION_FOR_HASH(hash);
@@ -551,14 +541,10 @@ dshash_delete_entry(dshash_table *hash_table, void *entry)
 	size_t		partition = PARTITION_FOR_HASH(item->hash);
 
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
-	Assert(hash_table->find_locked);
-	Assert(hash_table->find_exclusively_locked);
 	Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
 								LW_EXCLUSIVE));
 
 	delete_item(hash_table, item);
-	hash_table->find_locked = false;
-	hash_table->find_exclusively_locked = false;
 	LWLockRelease(PARTITION_LOCK(hash_table, partition));
 }
 
@@ -572,13 +558,7 @@ dshash_release_lock(dshash_table *hash_table, void *entry)
 	size_t		partition_index = PARTITION_FOR_HASH(item->hash);
 
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
-	Assert(hash_table->find_locked);
-	Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition_index),
-								hash_table->find_exclusively_locked
-								? LW_EXCLUSIVE : LW_SHARED));
 
-	hash_table->find_locked = false;
-	hash_table->find_exclusively_locked = false;
 	LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
 }
 
@@ -644,7 +624,7 @@ dshash_seq_next(dshash_seq_status *status)
 	if (status->curpartition == -1)
 	{
 		Assert(status->curbucket == 0);
-		Assert(!status->hash_table->find_locked);
+		assert_no_lock_held_by_me(status->hash_table);
 
 		status->curpartition = 0;
 
@@ -702,8 +682,6 @@ dshash_seq_next(dshash_seq_status *status)
 
 	status->curitem =
 		dsa_get_address(status->hash_table->area, next_item_pointer);
-	status->hash_table->find_locked = true;
-	status->hash_table->find_exclusively_locked = status->exclusive;
 
 	/*
 	 * The caller may delete the item. Store the next item in case of
@@ -722,9 +700,6 @@ dshash_seq_next(dshash_seq_status *status)
 void
 dshash_seq_term(dshash_seq_status *status)
 {
-	status->hash_table->find_locked = false;
-	status->hash_table->find_exclusively_locked = false;
-
 	if (status->curpartition >= 0)
 		LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
 }
@@ -743,8 +718,6 @@ dshash_delete_current(dshash_seq_status *status)
 
 	Assert(status->exclusive);
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
-	Assert(hash_table->find_locked);
-	Assert(hash_table->find_exclusively_locked);
 	Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
 								LW_EXCLUSIVE));
 
@@ -762,7 +735,7 @@ dshash_dump(dshash_table *hash_table)
 	size_t		j;
 
 	Assert(hash_table->control->magic == DSHASH_MAGIC);
-	Assert(!hash_table->find_locked);
+	assert_no_lock_held_by_me(hash_table);
 
 	for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
 	{
@@ -806,6 +779,19 @@ dshash_dump(dshash_table *hash_table)
 		LWLockRelease(PARTITION_LOCK(hash_table, i));
 }
 
+/*
+ * Assertion used to enforce rule that you can't lock more than one partition
+ * at a time, to avoid risk of deadlock.
+ */
+static inline void
+assert_no_lock_held_by_me(dshash_table *hash_table)
+{
+	int			i;
+
+	for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
+		Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
+}
+
 /*
  * Delete a locked item to which we have a pointer.
  */
-- 
2.36.1

