diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 53642d1..a4623dc 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -58,7 +58,7 @@ static void ExecHashRemoveNextSkewBucket(HashJoinTable hashtable);
  */
 
 /* Target bucket loading (tuples per bucket) */
-#define NTUP_PER_BUCKET			10
+#define NTUP_PER_BUCKET			4
 
 /* Multiple of NTUP_PER_BUCKET triggering the increase of nbuckets.
  * 
@@ -77,6 +77,8 @@ static void ExecHashRemoveNextSkewBucket(HashJoinTable hashtable);
 #define NTUP_GROW_COEFFICIENT	1.333
 #define NTUP_GROW_THRESHOLD		(NTUP_PER_BUCKET * NTUP_GROW_COEFFICIENT)
 
+#define SPACE_USED(hashtable, nbuckets) ((hashtable)->spaceUsed + (nbuckets) * sizeof(void*))
+
 /* ----------------------------------------------------------------
  *		ExecHash
  *
@@ -156,21 +158,33 @@ MultiExecHash(HashState *node)
 		}
 	}
 
-	/* If average number of tuples per bucket is over the defined threshold,
-	 * increase the number of buckets to get below it. */
+    /*
+     * Consider resizing the hash table (number of buckets) for better
+     * lookup performance. The code in ExecHashTableInsert guarantees
+     * we have enough memory to reach NTUP_PER_BUCKET, but maybe we can
+     * do better - getting lower number of tuples per bucket (up to
+     * NTUP_PER_BUCKET=1).
+     */
 	if (enable_hashjoin_bucket) {
 
-		/* consider only tuples in the non-skew buckets */
-		double nonSkewTuples = (hashtable->totalTuples - hashtable->skewTuples);
-
-		if ((nonSkewTuples / hashtable->nbatch) > (hashtable->nbuckets * NTUP_GROW_THRESHOLD)) {
+        instr_time start_time, end_time;
 
 #ifdef HJDEBUG
-			printf("Increasing nbucket to %d (average per bucket = %.1f)\n",
-				   nbuckets,  (batchTuples / hashtable->nbuckets));
+		printf("Increasing nbucket to %d (average per bucket = %.1f)\n",
+                nbuckets,  (batchTuples / hashtable->nbuckets));
 #endif
-			ExecHashIncreaseNumBuckets(hashtable);
-		}
+
+        elog(WARNING, "hash resize (start) : nbuckets=%d", hashtable->nbuckets);
+
+        INSTR_TIME_SET_CURRENT(start_time);
+
+        ExecHashIncreaseNumBuckets(hashtable);
+
+        INSTR_TIME_SET_CURRENT(end_time);
+        INSTR_TIME_SUBTRACT(end_time, start_time);
+
+        elog(WARNING, "hash resize (end) : nbuckets=%d duration=%.3f", hashtable->nbuckets, INSTR_TIME_GET_MILLISEC(end_time));
+
 	}
 
 	/* must provide our own instrumentation support */
@@ -738,35 +752,34 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
 	int			oldnbuckets = hashtable->nbuckets;
 	HashJoinTuple  *oldbuckets = hashtable->buckets;
 	MemoryContext   oldcxt;
-	double		batchTuples = (hashtable->totalTuples / hashtable->nbatch);
+
+    /* average number of tuples per batch */
+    double  batchTuples = (hashtable->totalTuples - hashtable->skewTuples) / hashtable->nbatch;
+
+    /* memory available for buckets */
+    Size    freeMemory = (hashtable->spaceAllowed - hashtable->spaceUsed);
 
 	/*
-	 * Determine the proper number of buckets, i.e. stop once the average
-	 * per bucket gets below the threshold (1.33 * NTUP_PER_BUCKET).
-	 * 
-	 * Also, check for overflow - this can only happen with extremely large
-	 * work_mem values, because (INT_MAX/2) means ~8GB only for the buckets.
-	 * With tuples, the hash table would require tens of GBs of work_mem.
-	 * 
-	 * XXX Technically there's also a limit for buckets fitting into work_mem
-	 * (with NTUP_PER_BUCKET tuples), but this can't be really exceeded
-	 * because when filling work_mem, another batch will be added (thus the
-	 * number of tuples will drop and more buckets won't be needed anymore).
-	 * 
-	 * That is, something like this will be enforced implicitly:
-	 * 
-	 *    work_mem * 1024L >= (nbuckets * tupsize * NTUP_GROW_THRESHOLD)
-	 * 
-	 * So it's enough to check only the overflow here.
+     * Start with the initial number of buckets, and double it until we
+     * reach at least one of these conditions:
+     * 
+     * (a) we exceed batch tuples (i.e. 1 tuple / bucket on average)
+     * 
+     * (b) we'd exceed work_mem in the next step
+     * 
+     * We're guaranteed to have enough memory for NTUP_PER_BUCKET, but
+     * going beyond may not be possible.
 	 */
 
-	/* double the number of buckets until we get below the growth threshold, or
-	 * until we hit the overflow protection */
-	while ((batchTuples > (hashtable->nbuckets * NTUP_GROW_THRESHOLD))
-			&& (hashtable->nbuckets <= (INT_MAX/2))) {
-		hashtable->nbuckets *= 2;
-		hashtable->log2_nbuckets += 1;
-	}
+    while ((hashtable->nbuckets < batchTuples) && 
+           (2 * (hashtable->nbuckets + 1) * sizeof(void*) <= freeMemory)) {
+        hashtable->nbuckets *= 2;
+        hashtable->log2_nbuckets += 1;
+    }
+
+    /* no change, the hashtable is already sized properly */
+    if (oldnbuckets == hashtable->nbuckets)
+        return;
 
 	/* XXX Not sure if we should update the info about used space here.
 	 * The code seems to ignore the space used for 'buckets' and we're not
@@ -870,6 +883,13 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		HashJoinTuple hashTuple;
 		int			hashTupleSize;
 
+        /*
+         * How many buckets would we need with the current number of tuples 
+         * per batch? Needs to be power of 2.
+         */
+        double  batchTuples = (hashtable->totalTuples - hashtable->skewTuples) / hashtable->nbatch;
+        int     nbuckets = my_log2(ceil(batchTuples / NTUP_PER_BUCKET));
+
 		/* Create the HashJoinTuple */
 		hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
 		hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
@@ -893,7 +913,7 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		hashtable->spaceUsed += hashTupleSize;
 		if (hashtable->spaceUsed > hashtable->spacePeak)
 			hashtable->spacePeak = hashtable->spaceUsed;
-		if (hashtable->spaceUsed > hashtable->spaceAllowed)
+		if (SPACE_USED(hashtable, nbuckets) > hashtable->spaceAllowed)
 			ExecHashIncreaseNumBatches(hashtable);
 
 	}