From efc603dd67a1c95f2d24844b86436ed6b7a28c80 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 23 Nov 2016 00:23:42 -0800
Subject: [PATCH 1/3] Resize simplehash.h table in case of long runs.

This also allows to increase the default hashtable.
---
 src/include/lib/simplehash.h | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index 12aedbc..9522fa5 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -152,10 +152,12 @@ SH_SCOPE void SH_STAT(SH_TYPE *tb);
 
 #include "utils/memutils.h"
 
-/* conservative fillfactor for a robin hood table, might want to adjust */
-#define SH_FILLFACTOR (0.8)
+/* normal fillfactor, unless already close to maximum */
+#define SH_FILLFACTOR (0.9)
 /* increase fillfactor if we otherwise would error out */
-#define SH_MAX_FILLFACTOR (0.95)
+#define SH_MAX_FILLFACTOR (0.98)
+/* collision chain length at which we resize */
+#define SH_MAX_FILLFACTOR (0.98)
 /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
 #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
 
@@ -550,6 +552,34 @@ SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found)
 #endif
 			entry->status = SH_STATUS_IN_USE;
 			*found = false;
+
+			/*
+			 * To avoid negative consequences from overly imbalanced
+			 * hashtables, grow the hashtable if collisions lead to large
+			 * runs. The most likely cause of such imbalance is filling a
+			 * (currently) small table, from a currently big one, in
+			 * hash-table order.
+			 *
+			 * FIXME: compute boundaries in a more principled manner.
+			 */
+			if (unlikely(insertdist > 20 ||
+						 SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, emptyelem) > 1000))
+			{
+				/* don't grow if the table would grow overly much */
+				if (tb->members / ((double) tb->size) > 0.1)
+				{
+					/*
+					 * elog(WARNING, "clumping b, growing this thing");
+					 * SH_STAT(tb);
+					 */
+					/*
+					 * Trigger a growth cycle next round, easier than resizing
+					 * now.
+					 */
+					tb->grow_threshold = 0;
+				}
+			}
+
 			return entry;
 		}
 
-- 
2.10.2

