From 2afbc5a59c3ccd3fec14105aff40252eeaacf40c Mon Sep 17 00:00:00 2001
From: Yura Sokolov <y.sokolov@postgrespro.ru>
Date: Wed, 22 Sep 2021 13:10:37 +0300
Subject: [PATCH] More gentle dynahash usage in buffer allocation.

When BufferAlloc reuses some valid buffer, there is no need
to go through dynahash's free list.

---
 src/backend/storage/buffer/buf_table.c |  38 ++---
 src/backend/storage/buffer/bufmgr.c    |   9 +-
 src/backend/utils/hash/dynahash.c      | 190 +++++++++++++++++++++++++
 src/include/storage/buf_internals.h    |   5 +-
 src/include/utils/hsearch.h            |   8 ++
 5 files changed, 230 insertions(+), 20 deletions(-)

diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c
index caa03ae1233..382c84f5149 100644
--- a/src/backend/storage/buffer/buf_table.c
+++ b/src/backend/storage/buffer/buf_table.c
@@ -107,36 +107,42 @@ BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
 
 /*
  * BufTableInsert
- *		Insert a hashtable entry for given tag and buffer ID,
- *		unless an entry already exists for that tag
- *
- * Returns -1 on successful insertion.  If a conflicting entry exists
- * already, returns the buffer ID in that entry.
+ *		Insert a hashtable entry for given tag and buffer ID.
+ *		Caller should be sure there is no conflicting entry.
  *
  * Caller must hold exclusive lock on BufMappingLock for tag's partition
+ * and call BufTableLookup to check for conflicting entry.
  */
-int
+void
 BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
 {
 	BufferLookupEnt *result;
-	bool		found;
 
 	Assert(buf_id >= 0);		/* -1 is reserved for not-in-table */
 	Assert(tagPtr->blockNum != P_NEW);	/* invalid tag */
 
 	result = (BufferLookupEnt *)
-		hash_search_with_hash_value(SharedBufHash,
-									(void *) tagPtr,
-									hashcode,
-									HASH_ENTER,
-									&found);
-
-	if (found)					/* found something already in the table */
-		return result->id;
+		hash_insert_with_hash_nocheck(SharedBufHash,
+									  (void *) tagPtr,
+									  hashcode);
 
 	result->id = buf_id;
+}
+
+void
+BufTableMove(BufferTag *oldTagPtr, uint32 oldHash,
+			 BufferTag *newTagPtr, uint32 newHash,
+			 int buf_id)
+{
+	BufferLookupEnt *result PG_USED_FOR_ASSERTS_ONLY;
 
-	return -1;
+	result = (BufferLookupEnt *)
+		hash_update_hash_key_with_hash_nocheck(SharedBufHash,
+											   (void *) oldTagPtr,
+											   oldHash,
+											   (void *) newTagPtr,
+											   newHash);
+	Assert(result->id == buf_id);
 }
 
 /*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e88e4e918b0..8fbc676811c 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1325,7 +1325,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * Note that we have not yet removed the hashtable entry for the old
 		 * tag.
 		 */
-		buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
+		buf_id = BufTableLookup(&newTag, newHash);
 
 		if (buf_id >= 0)
 		{
@@ -1391,7 +1391,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			break;
 
 		UnlockBufHdr(buf, buf_state);
-		BufTableDelete(&newTag, newHash);
 		if (oldPartitionLock != NULL &&
 			oldPartitionLock != newPartitionLock)
 			LWLockRelease(oldPartitionLock);
@@ -1425,10 +1424,14 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	if (oldPartitionLock != NULL)
 	{
-		BufTableDelete(&oldTag, oldHash);
+		BufTableMove(&oldTag, oldHash, &newTag, newHash, buf->buf_id);
 		if (oldPartitionLock != newPartitionLock)
 			LWLockRelease(oldPartitionLock);
 	}
+	else
+	{
+		BufTableInsert(&newTag, newHash, buf->buf_id);
+	}
 
 	LWLockRelease(newPartitionLock);
 
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 6546e3c7c79..c0625927131 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -1288,6 +1288,196 @@ hash_update_hash_key(HTAB *hashp,
 	return true;
 }
 
+/*
+ * hash_update_hash_key_nocheck -- change the hash key of an existing table
+ * entry without check for duplicate.
+ *
+ * Caller should be sure there is no conflicting entry.
+ */
+void *
+hash_update_hash_key_with_hash_nocheck(HTAB *hashp,
+									   const void *oldKeyPtr,
+									   uint32 oldhashvalue,
+									   const void *newKeyPtr,
+									   uint32 newhashvalue)
+{
+	HASHHDR    *hctl = hashp->hctl;
+	Size		keysize;
+	uint32		bucket;
+	uint32		newbucket;
+	long		segment_num;
+	long		segment_ndx;
+	HASHSEGMENT segp;
+	HASHBUCKET	currBucket;
+	HASHBUCKET *prevBucketPtr;
+	HASHBUCKET *oldPrevPtr;
+	HashCompareFunc match;
+
+#if HASH_STATISTICS
+	hash_accesses++;
+	hctl->accesses++;
+#endif
+
+	/* disallow updates if frozen */
+	if (hashp->frozen)
+		elog(ERROR, "cannot update in frozen hashtable \"%s\"",
+			 hashp->tabname);
+
+	/*
+	 * Lookup the existing element using its saved hash value.  We need to do
+	 * this to be able to unlink it from its hash chain, but as a side benefit
+	 * we can verify the validity of the passed existingEntry pointer.
+	 */
+	bucket = calc_bucket(hctl, oldhashvalue);
+
+	segment_num = bucket >> hashp->sshift;
+	segment_ndx = MOD(bucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	prevBucketPtr = &segp[segment_ndx];
+	currBucket = *prevBucketPtr;
+
+	match = hashp->match;		/* save one fetch in inner loop */
+	keysize = hashp->keysize;	/* ditto */
+
+	while (currBucket != NULL)
+	{
+		if (currBucket->hashvalue == oldhashvalue &&
+			match(ELEMENTKEY(currBucket), oldKeyPtr, keysize) == 0)
+			break;
+		prevBucketPtr = &(currBucket->link);
+		currBucket = *prevBucketPtr;
+	}
+
+	if (currBucket == NULL)
+		elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
+			 hashp->tabname);
+
+	oldPrevPtr = prevBucketPtr;
+
+	/*
+	 * Now perform the simplified insertion operation: - to locate the hash
+	 * chain we want to put the entry into, - put into beginning without
+	 * search for duplicate.
+	 */
+	newbucket = calc_bucket(hctl, newhashvalue);
+
+	segment_num = newbucket >> hashp->sshift;
+	segment_ndx = MOD(newbucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	prevBucketPtr = &segp[segment_ndx];
+
+	/* copy new key into record */
+	currBucket->hashvalue = newhashvalue;
+	hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
+
+	/*
+	 * If old and new hash values belong to the same bucket, we need not
+	 * change any chain links, and indeed should not since this simplistic
+	 * update will corrupt the list if currBucket is the last element.  (We
+	 * cannot fall out earlier, however, since we need to scan the bucket to
+	 * check for duplicate keys.)
+	 */
+	if (bucket != newbucket)
+	{
+		/* OK to remove record from old hash bucket's chain. */
+		*oldPrevPtr = currBucket->link;
+
+		/* link into new hashbucket chain */
+		currBucket->link = *prevBucketPtr;
+		*prevBucketPtr = currBucket;
+	}
+	/* no memory barrier in `else` part because bucket is locked */
+
+	return (void *) ELEMENTKEY(currBucket);
+}
+
+/*
+ * hash_insert_with_hash_nocheck - inserts new entry into bucket without
+ * checking for duplicates.
+ *
+ * Caller should be sure there is no conflicting entry.
+ */
+void *
+hash_insert_with_hash_nocheck(HTAB *hashp,
+							  const void *keyPtr,
+							  uint32 hashvalue)
+{
+	HASHHDR    *hctl = hashp->hctl;
+	int			freelist_idx = FREELIST_IDX(hctl, hashvalue);
+	uint32		bucket;
+	long		segment_num;
+	long		segment_ndx;
+	HASHSEGMENT segp;
+	HASHBUCKET	currBucket;
+	HASHBUCKET *prevBucketPtr;
+
+#if HASH_STATISTICS
+	hash_accesses++;
+	hctl->accesses++;
+#endif
+
+	/* disallow updates if frozen */
+	if (hashp->frozen)
+		elog(ERROR, "cannot update in frozen hashtable \"%s\"",
+			 hashp->tabname);
+
+	if (!IS_PARTITIONED(hctl) &&
+		hctl->freeList[0].nentries > (long) hctl->max_bucket &&
+		!has_seq_scans(hashp))
+		(void) expand_table(hashp);
+
+	/*
+	 * Lookup the existing element using its saved hash value.  We need to do
+	 * this to be able to unlink it from its hash chain, but as a side benefit
+	 * we can verify the validity of the passed existingEntry pointer.
+	 */
+	bucket = calc_bucket(hctl, hashvalue);
+
+	segment_num = bucket >> hashp->sshift;
+	segment_ndx = MOD(bucket, hashp->ssize);
+
+	segp = hashp->dir[segment_num];
+
+	if (segp == NULL)
+		hash_corrupted(hashp);
+
+	prevBucketPtr = &segp[segment_ndx];
+
+	currBucket = get_hash_entry(hashp, freelist_idx);
+
+	if (currBucket == NULL)
+	{
+		/* report a generic message */
+		if (hashp->isshared)
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of shared memory")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of memory")));
+	}
+
+	/* copy key into record */
+	currBucket->hashvalue = hashvalue;
+	currBucket->link = *prevBucketPtr;
+	hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, hashp->keysize);
+
+	*prevBucketPtr = currBucket;
+
+	return (void *) ELEMENTKEY(currBucket);
+}
+
 /*
  * Allocate a new hashtable entry if possible; return NULL if out of memory.
  * (Or, if the underlying space allocator throws error for out-of-memory,
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 33fcaf5c9a8..7dd8d0cb775 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -327,7 +327,10 @@ extern Size BufTableShmemSize(int size);
 extern void InitBufTable(int size);
 extern uint32 BufTableHashCode(BufferTag *tagPtr);
 extern int	BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
-extern int	BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
+extern void BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
+extern void BufTableMove(BufferTag *oldTagPtr, uint32 oldHash,
+						 BufferTag *newTagPtr, uint32 newHash,
+						 int buf_id);
 extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
 
 /* localbuf.c */
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index d7af0239c8c..9b086cf1112 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -139,6 +139,14 @@ extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr,
 										 bool *foundPtr);
 extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
 								 const void *newKeyPtr);
+extern void *hash_update_hash_key_with_hash_nocheck(HTAB *hashp,
+													const void *oldKeyPtr,
+													uint32 oldhashvalue,
+													const void *newKeyPtr,
+													uint32 newhashvalue);
+extern void *hash_insert_with_hash_nocheck(HTAB *hashp,
+										   const void *keyPtr,
+										   uint32 hashvalue);
 extern long hash_get_num_entries(HTAB *hashp);
 extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
 extern void *hash_seq_search(HASH_SEQ_STATUS *status);
-- 
2.33.0

