From dc2d997c1a1abfbeb09ba5b63e0cec7df105e8c5 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 11 Nov 2024 13:56:59 -0800
Subject: [PATCH v1 1/2] simplehash: add "grow_ok" to API.

Allows memory-sensitive callers to add an element iff it will not
cause the bucket array to grow.

Without this API, callers such as HashAgg cannot control memory usage
incrementally because hash table growth means that the size doubles.
---
 src/include/lib/simplehash.h | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index 3e1b1f9461..0bb3a1c1e2 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -115,6 +115,7 @@
 #define SH_RESET SH_MAKE_NAME(reset)
 #define SH_INSERT SH_MAKE_NAME(insert)
 #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
+#define SH_INSERT_EXT SH_MAKE_NAME(insert_ext)
 #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
 #define SH_DELETE SH_MAKE_NAME(delete)
 #define SH_LOOKUP SH_MAKE_NAME(lookup)
@@ -135,7 +136,7 @@
 #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
 #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
 #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
-#define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
+#define SH_INSERT_EXT_INTERNAL SH_MAKE_NAME(insert_ext_internal)
 #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
 
 /* generate forward declarations necessary to use the hash table */
@@ -217,6 +218,13 @@ SH_SCOPE	SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
 SH_SCOPE	SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
 											uint32 hash, bool *found);
 
+/*
+ * <element> *<prefix>_insert_ext(<prefix>_hash *tb, <key> key, uint32 hash,
+ * 								  bool grow_ok, bool *found)
+ */
+SH_SCOPE	SH_ELEMENT_TYPE *SH_INSERT_EXT(SH_TYPE * tb, SH_KEY_TYPE key,
+										   uint32 hash, bool grow_ok, bool *found);
+
 /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
 SH_SCOPE	SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
 
@@ -606,7 +614,8 @@ SH_GROW(SH_TYPE * tb, uint64 newsize)
  * into its wrapper functions even if SH_SCOPE is extern.
  */
 static inline SH_ELEMENT_TYPE *
-SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
+SH_INSERT_EXT_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool grow_ok,
+					   bool *found)
 {
 	uint32		startelem;
 	uint32		curelem;
@@ -629,6 +638,9 @@ restart:
 		if (unlikely(tb->size == SH_MAX_SIZE))
 			sh_error("hash table size exceeded");
 
+		if (!grow_ok)
+			return NULL;
+
 		/*
 		 * When optimizing, it can be very useful to print these out.
 		 */
@@ -778,7 +790,7 @@ SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
 {
 	uint32		hash = SH_HASH_KEY(tb, key);
 
-	return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+	return SH_INSERT_EXT_INTERNAL(tb, key, hash, true, found);
 }
 
 /*
@@ -789,7 +801,20 @@ SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
 SH_SCOPE	SH_ELEMENT_TYPE *
 SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
 {
-	return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+	return SH_INSERT_EXT_INTERNAL(tb, key, hash, true, found);
+}
+
+/*
+ * Insert the key into the hash-table using an already-calculated hash. Set
+ * *found to true if the key already exists, false otherwise. If growing is
+ * necessary, and and !grow_ok, return NULL; otherwise returns the hash-table
+ * entry.
+ */
+SH_SCOPE	SH_ELEMENT_TYPE *
+SH_INSERT_EXT(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool grow_ok,
+			  bool *found)
+{
+	return SH_INSERT_EXT_INTERNAL(tb, key, hash, grow_ok, found);
 }
 
 /*
@@ -1202,5 +1227,5 @@ SH_STAT(SH_TYPE * tb)
 #undef SH_PREV
 #undef SH_DISTANCE_FROM_OPTIMAL
 #undef SH_ENTRY_HASH
-#undef SH_INSERT_HASH_INTERNAL
+#undef SH_INSERT_EXT_INTERNAL
 #undef SH_LOOKUP_HASH_INTERNAL
-- 
2.34.1

