From c1c4f563f3f3b954f27d8576f0f34fef78c095e9 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 11 Nov 2024 13:56:59 -0800
Subject: [PATCH v2 1/2] simplehash: add "grow_ok" to API.

Allows memory-sensitive callers to add an element iff it will not
cause the bucket array to grow.

Without this API, callers such as HashAgg cannot control memory usage
incrementally because hash table growth means that the size doubles.
---
 src/include/lib/simplehash.h | 40 ++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index 3e1b1f9461..9c49410cef 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -115,6 +115,7 @@
 #define SH_RESET SH_MAKE_NAME(reset)
 #define SH_INSERT SH_MAKE_NAME(insert)
 #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
+#define SH_INSERT_EXT SH_MAKE_NAME(insert_ext)
 #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
 #define SH_DELETE SH_MAKE_NAME(delete)
 #define SH_LOOKUP SH_MAKE_NAME(lookup)
@@ -135,7 +136,7 @@
 #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
 #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
 #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
-#define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
+#define SH_INSERT_EXT_INTERNAL SH_MAKE_NAME(insert_ext_internal)
 #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
 
 /* generate forward declarations necessary to use the hash table */
@@ -217,6 +218,13 @@ SH_SCOPE	SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
 SH_SCOPE	SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
 											uint32 hash, bool *found);
 
+/*
+ * <element> *<prefix>_insert_ext(<prefix>_hash *tb, <key> key, uint32 hash,
+ * 								  bool grow_ok, bool *found)
+ */
+SH_SCOPE	SH_ELEMENT_TYPE *SH_INSERT_EXT(SH_TYPE * tb, SH_KEY_TYPE key,
+										   uint32 hash, bool grow_ok, bool *found);
+
 /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
 SH_SCOPE	SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
 
@@ -606,7 +614,8 @@ SH_GROW(SH_TYPE * tb, uint64 newsize)
  * into its wrapper functions even if SH_SCOPE is extern.
  */
 static inline SH_ELEMENT_TYPE *
-SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
+SH_INSERT_EXT_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool grow_ok,
+					   bool *found)
 {
 	uint32		startelem;
 	uint32		curelem;
@@ -624,7 +633,7 @@ restart:
 	 * Note that this also reached when resizing the table due to
 	 * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
 	 */
-	if (unlikely(tb->members >= tb->grow_threshold))
+	if (unlikely(grow_ok && tb->members >= tb->grow_threshold))
 	{
 		if (unlikely(tb->size == SH_MAX_SIZE))
 			sh_error("hash table size exceeded");
@@ -651,6 +660,9 @@ restart:
 		/* any empty bucket can directly be used */
 		if (entry->status == SH_STATUS_EMPTY)
 		{
+			if (unlikely(!grow_ok && tb->members >= tb->grow_threshold))
+				return NULL;
+
 			tb->members++;
 			entry->SH_KEY = key;
 #ifdef SH_STORE_HASH
@@ -687,6 +699,9 @@ restart:
 			uint32		moveelem;
 			int32		emptydist = 0;
 
+			if (unlikely(!grow_ok && tb->members >= tb->grow_threshold))
+				return NULL;
+
 			/* find next empty bucket */
 			while (true)
 			{
@@ -778,7 +793,7 @@ SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
 {
 	uint32		hash = SH_HASH_KEY(tb, key);
 
-	return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+	return SH_INSERT_EXT_INTERNAL(tb, key, hash, true, found);
 }
 
 /*
@@ -789,7 +804,20 @@ SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
 SH_SCOPE	SH_ELEMENT_TYPE *
 SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
 {
-	return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+	return SH_INSERT_EXT_INTERNAL(tb, key, hash, true, found);
+}
+
+/*
+ * Insert the key into the hash-table using an already-calculated hash. Set
+ * *found to true if the key already exists, false otherwise. If growing is
+ * necessary, and and !grow_ok, return NULL; otherwise returns the hash-table
+ * entry.
+ */
+SH_SCOPE	SH_ELEMENT_TYPE *
+SH_INSERT_EXT(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool grow_ok,
+			  bool *found)
+{
+	return SH_INSERT_EXT_INTERNAL(tb, key, hash, grow_ok, found);
 }
 
 /*
@@ -1202,5 +1230,5 @@ SH_STAT(SH_TYPE * tb)
 #undef SH_PREV
 #undef SH_DISTANCE_FROM_OPTIMAL
 #undef SH_ENTRY_HASH
-#undef SH_INSERT_HASH_INTERNAL
+#undef SH_INSERT_EXT_INTERNAL
 #undef SH_LOOKUP_HASH_INTERNAL
-- 
2.34.1

