From 41b4173de858ce7776f8d5f166ed2e75d0de19fb Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Tue, 19 Jul 2016 14:10:28 -0700
Subject: [PATCH 2/4] WIP: Add an inline / macro customizable hashtable.

---
 src/include/lib/simplehash.h | 407 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 407 insertions(+)
 create mode 100644 src/include/lib/simplehash.h

diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
new file mode 100644
index 0000000..fefc2fc
--- /dev/null
+++ b/src/include/lib/simplehash.h
@@ -0,0 +1,407 @@
+/*
+ * simplehash.h
+ *	  Hash table implementation which will be specialized to user-defined types,
+ *	  by including this file to generate the required code. */
+
+/* helpers */
+#define SH_MAKE_PREFIX(a) CppConcat(a,_)
+#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
+#define SH_MAKE_NAME_(a,b) CppConcat(a,b)
+
+#ifdef SH_STORE_HASH
+#define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
+#else
+#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, akey, b->SH_KEY))
+#endif
+
+/* type declarations */
+#define SH_TYPE  SH_MAKE_NAME(hash)
+#define SH_STATUS  SH_MAKE_NAME(status)
+#define SH_STATUS_EMPTY  SH_MAKE_NAME(EMPTY)
+#define SH_STATUS_IN_USE  SH_MAKE_NAME(IN_USE)
+#define SH_STATUS_DELETED  SH_MAKE_NAME(DELETED)
+#define SH_ITERATOR SH_MAKE_NAME(iterator)
+
+/* function declarations */
+#define SH_CREATE SH_MAKE_NAME(create)
+#define SH_DESTROY SH_MAKE_NAME(destroy)
+#define SH_INSERT SH_MAKE_NAME(insert)
+#define SH_DELETE SH_MAKE_NAME(delete)
+#define SH_LOOKUP SH_MAKE_NAME(lookup)
+#define SH_RESIZE SH_MAKE_NAME(resize)
+#define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
+#define SH_ITERATE SH_MAKE_NAME(iterate)
+
+/* generate forward declarations necessary to use the hash table */
+#ifdef SH_DECLARE
+
+/* type definitions */
+typedef struct SH_TYPE
+{
+	uint32 size;
+	uint32 members;
+	uint32 deleted;
+	bool resizing;
+	SH_CONTAINS *data;
+	MemoryContext ctx;
+	void *private;
+} SH_TYPE;
+
+typedef enum SH_STATUS
+{
+	SH_STATUS_EMPTY = 0x00,
+	SH_STATUS_IN_USE = 0x01,
+	SH_STATUS_DELETED = 0x02,
+} SH_STATUS;
+
+typedef uint32 SH_ITERATOR;
+
+/* function prototypes */
+SH_SCOPE SH_TYPE * SH_CREATE(MemoryContext ctx, uint32 size);
+SH_SCOPE SH_CONTAINS * SH_INSERT(SH_TYPE *tb, SH_KEYTYPE key, bool *found);
+SH_SCOPE void SH_DESTROY(SH_TYPE *tb);
+SH_SCOPE void SH_RESIZE(SH_TYPE *tb, uint32 newsize);
+SH_SCOPE SH_CONTAINS * SH_INSERT(SH_TYPE *tb, SH_KEYTYPE key, bool *found);
+SH_SCOPE SH_CONTAINS * SH_LOOKUP(SH_TYPE *tb, SH_KEYTYPE key);
+SH_SCOPE bool SH_DELETE(SH_TYPE *tb, SH_KEYTYPE key);
+SH_SCOPE void SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter);
+SH_SCOPE SH_CONTAINS * SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter);
+
+#endif /* SH_DECLARE */
+
+
+/* generate implementation of the hash table */
+#ifdef SH_DEFINE
+
+/* FIXME: can we move these to a central location? */
+/* calculate ceil(log base 2) of num */
+static inline int
+sh_log2(long num)
+{
+	int			i;
+	long		limit;
+
+	/* guard against too-large input, which would put us into infinite loop */
+	if (num > LONG_MAX / 2)
+		num = LONG_MAX / 2;
+
+	for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
+		;
+	return i;
+}
+
+/* calculate first power of 2 >= num, bounded to what will fit in an int */
+static inline int
+sh_pow2_int(long num)
+{
+	if (num > INT_MAX / 2)
+		num = INT_MAX / 2;
+	return 1 << sh_log2(num);
+}
+
+/*
+ * create a hash table of size `size`, allocating required memory in the
+ * passed-in context.
+ */
+SH_SCOPE SH_TYPE *
+SH_CREATE(MemoryContext ctx, uint32 size)
+{
+	SH_TYPE *tb;
+
+	/* round up size to the next power of 2, eases lookups */
+	size = sh_pow2_int(size);
+
+	tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
+	tb->ctx = ctx;
+	tb->size = size;
+	tb->data = MemoryContextAllocExtended(tb->ctx, sizeof(SH_CONTAINS) * tb->size,
+										  MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
+
+	return tb;
+}
+
+/* destroy a previously created hash table */
+SH_SCOPE void
+SH_DESTROY(SH_TYPE *tb)
+{
+	pfree(tb->data);
+	pfree(tb);
+}
+
+/*
+ * Resize a hash table to at least `newsize`.
+ *
+ * Usually this will automatically be called by insertions/deletions, when
+ * necessary. But resizing to the exact input size can be advantageous
+ * performance-wise, when known at some point.
+ */
+SH_SCOPE void
+SH_RESIZE(SH_TYPE *tb, uint32 newsize)
+{
+	uint32 oldsize = tb->size;
+	SH_CONTAINS *olddata = tb->data;
+	int i;
+
+	Assert(!tb->resizing);
+	Assert(oldsize == sh_pow2_int(oldsize));
+
+	/* round up size to the next power of 2, eases lookups */
+	newsize = sh_pow2_int(newsize);
+
+	tb->resizing = true;
+	tb->size = newsize;
+	tb->data = MemoryContextAllocExtended(tb->ctx, sizeof(SH_CONTAINS) * tb->size,
+										  MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
+
+	ereport(DEBUG2,
+			(errmsg("resizing: %u to %u", oldsize, tb->size),
+			 errhidestmt(true)));
+
+	for (i = 0; i < oldsize; i++)
+	{
+		SH_CONTAINS *oldentry = &olddata[i];
+		if (oldentry->status == SH_STATUS_IN_USE)
+		{
+			SH_CONTAINS *newentry;
+			bool found;
+			newentry = SH_INSERT(tb, oldentry->SH_KEY, &found);
+			if (found)
+				elog(ERROR, "duplicate during resizing");
+
+			memcpy(newentry, oldentry, sizeof(SH_CONTAINS));
+		}
+	}
+
+	pfree(olddata);
+	tb->resizing = false;
+	tb->deleted = 0;
+}
+
+/*
+ * Insert the key key into the hash-table, set *found to true if the key
+ * already exists, false otherwise. Returns the hash-table entry in either
+ * case.
+ */
+SH_SCOPE SH_CONTAINS *
+SH_INSERT(SH_TYPE *tb, SH_KEYTYPE key, bool *found)
+{
+	uint32 hash = SH_HASH_KEY(tb, key);
+	uint32 startelem;
+	uint32 curelem;
+	SH_CONTAINS *data;
+
+	/*
+	 * Double size at 75% fill-factor. We do this check even if the key is
+	 * actually present, to avoid doing the check inside the loop. This also
+	 * lets us avoid having to re-find our position in the hashtable after
+	 * resizing.
+	 */
+	/* FIXME: compute next increase boundary when resizing? */
+	if (unlikely(!tb->resizing &&
+				 ((tb->members + 1) * 4 > tb->size * 3)))
+	{
+		SH_RESIZE(tb, tb->size * 2);
+	}
+
+
+	/* Similarly compact when there's 10% toombstones. */
+	if (unlikely(!tb->resizing && tb->deleted * 10 >= tb->size))
+	{
+		elog(LOG, "compacting");
+		SH_RESIZE(tb, tb->size);
+	}
+
+	startelem = hash & (tb->size - 1);
+	curelem = startelem;
+	data = tb->data;
+	while(true)
+	{
+		SH_CONTAINS *entry = &data[curelem];
+
+		if (entry->status == SH_STATUS_EMPTY)
+		{
+			tb->members++;
+			entry->SH_KEY = key;
+#ifdef SH_STORE_HASH
+			SH_GET_HASH(tb, entry) = hash;
+#endif
+			entry->status = SH_STATUS_IN_USE;
+			*found = false;
+			return entry;
+		}
+		else if (entry->status == SH_STATUS_IN_USE &&
+				 SH_COMPARE_KEYS(tb, hash, key, entry))
+		{
+			Assert(entry->status == SH_STATUS_IN_USE);
+			*found = true;
+			return entry;
+		}
+
+		/* deleted entry, unusable without checking later keys */
+
+		curelem++;
+
+		if (curelem == startelem)
+		{
+			/* XXX: shouldn't ever happen? */
+			elog(ERROR, "found no free hash-table entry");
+		}
+		else if (curelem >= tb->size)
+		{
+			curelem = 0;
+		}
+	}
+}
+
+
+/*
+ * Lookup up entry in hash table.  Returns NULL if key not present.
+ */
+SH_SCOPE SH_CONTAINS *
+SH_LOOKUP(SH_TYPE *tb, SH_KEYTYPE key)
+{
+	uint32 hash = SH_HASH_KEY(tb, key);
+	const uint32 startelem = hash & (tb->size - 1);
+	uint32 curelem = startelem;
+
+	while(true)
+	{
+		SH_CONTAINS *entry = &tb->data[curelem];
+
+		if (entry->status == SH_STATUS_EMPTY)
+		{
+			return NULL;
+		}
+
+		if (entry->status == SH_STATUS_IN_USE &&
+			SH_COMPARE_KEYS(tb, hash, key, entry))
+		{
+			return entry;
+		}
+
+		curelem++;
+
+		if (curelem == startelem)
+		{
+			/* XXX: shouldn't ever happen? */
+			elog(ERROR, "lookup wrapped");
+			return NULL;
+		}
+		else if (curelem >= tb->size)
+		{
+			curelem = 0;
+		}
+	}
+}
+
+/*
+ * Delete entry from hash table.  Returns whether key was present before
+ * deletion.
+ */
+SH_SCOPE bool
+SH_DELETE(SH_TYPE *tb, SH_KEYTYPE key)
+{
+	uint32 hash = SH_HASH_KEY(tb, key);
+	uint32 startelem = hash & (tb->size - 1);
+	uint32 curelem = startelem;
+
+	while(true)
+	{
+		SH_CONTAINS *entry = &tb->data[curelem];
+
+		if (entry->status == SH_STATUS_EMPTY)
+			return false;
+
+		if (entry->status == SH_STATUS_IN_USE &&
+			SH_COMPARE_KEYS(tb, hash, key, entry))
+		{
+			/*
+			 * XXX: Moving neighbouring entries would likely be the better
+			 * implementation.
+			 */
+			entry->status = SH_STATUS_DELETED;
+			tb->members--;
+			tb->deleted++;
+
+			return true;
+		}
+
+		curelem++;
+
+		if (curelem == startelem)
+		{
+			/* XXX: shouldn't ever happen? */
+			elog(ERROR, "delete wrapped");
+			return false;
+		}
+		else if (curelem >= tb->size)
+		{
+			curelem = 0;
+		}
+	}
+}
+
+/* Initialize iterator. */
+SH_SCOPE void
+SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter)
+{
+	*iter = 0;
+}
+
+/*
+ * Iterate over all entries in the hash-table. Return the next occupied entry,
+ * or NULL if done.
+ */
+SH_SCOPE SH_CONTAINS *
+SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter)
+{
+	while (true)
+	{
+		SH_CONTAINS *elem;
+		if (*iter >= tb->size)
+			return NULL;
+		elem = &tb->data[(*iter)++];
+		if (elem->status == SH_STATUS_IN_USE)
+		{
+			return elem;
+		}
+	}
+}
+
+#endif /* SH_DEFINE */
+
+
+/* undefine external paramters, so next hash table can be defined */
+#undef SH_PREFIX
+#undef SH_KEYTYPE
+#undef SH_KEY
+#undef SH_CONTAINS
+#undef SH_HASH_KEY
+#undef SH_SCOPE
+#undef SH_DECLARE
+#undef SH_DEFINE
+#undef SH_GET_HASH
+#undef SH_STORE_HASH
+
+/* undefine locally declared macros */
+#undef SH_MAKE_PREFIX
+#undef SH_MAKE_NAME
+#undef SH_MAKE_NAME_
+
+#undef SH_COMPARE_KEYS
+
+#undef SH_TYPE
+#undef SH_STATUS
+#undef SH_STATUS_EMPTY
+#undef SH_STATUS_IN_USE
+#undef SH_STATUS_DELETED
+#undef SH_ITERTOR
+
+#undef SH_CREATE
+#undef SH_DESTROY
+#undef SH_INSERT
+#undef SH_DELETE
+#undef SH_LOOKUP
+#undef SH_RESIZE
+#undef SH_START_ITERATE
+#undef SH_ITERATE
-- 
2.8.1

