diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 589b2f1..18fd4a9 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -47,6 +47,7 @@ static void ExecHashSkewTableInsert(HashJoinTable hashtable,
 						int bucketNumber);
 static void ExecHashRemoveNextSkewBucket(HashJoinTable hashtable);
 
+static char * chunk_alloc(HashJoinTable hashtable, int tupleSize);
 
 /* ----------------------------------------------------------------
  *		ExecHash
@@ -130,6 +131,9 @@ MultiExecHash(HashState *node)
 	if (node->ps.instrument)
 		InstrStopNode(node->ps.instrument, hashtable->totalTuples);
 
+	/* print some context stats */
+	MemoryContextStats(hashtable->batchCxt);
+	
 	/*
 	 * We do not return the hash table directly because it's not a subtype of
 	 * Node, and so would violate the MultiExecProcNode API.  Instead, our
@@ -223,6 +227,8 @@ ExecEndHash(HashState *node)
 	ExecEndNode(outerPlan);
 }
 
+/* 32kB chunks by default */
+#define CHUNK_SIZE	(32*1024L)
 
 /* ----------------------------------------------------------------
  *		ExecHashTableCreate
@@ -294,6 +300,10 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 	hashtable->spaceAllowedSkew =
 		hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100;
 
+	hashtable->chunk_data = NULL;
+	hashtable->chunk_used = 0;
+	hashtable->chunk_length = 0;
+
 	/*
 	 * Get info about the hash functions to be used for each hash key. Also
 	 * remember whether the join operators are strict.
@@ -717,8 +727,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
 
 		/* Create the HashJoinTuple */
 		hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
-		hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
-													   hashTupleSize);
+		hashTuple = (HashJoinTuple) chunk_alloc(hashtable, hashTupleSize);
+
 		hashTuple->hashvalue = hashvalue;
 		memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
 
@@ -1068,6 +1078,13 @@ ExecHashTableReset(HashJoinTable hashtable)
 	hashtable->spaceUsed = 0;
 
 	MemoryContextSwitchTo(oldcxt);
+
+	/* reset the chunks too (the memory was allocated within batchCxt, so it's
+	 * already freed) */
+	hashtable->chunk_data = NULL;
+	hashtable->chunk_length = 0;
+	hashtable->chunk_used = 0;
+
 }
 
 /*
@@ -1318,6 +1335,31 @@ ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
 	return INVALID_SKEW_BUCKET_NO;
 }
 
+static
+char * chunk_alloc(HashJoinTable hashtable, int tupleSize) {
+
+	/* if tuple size is greater than of chunk size, just use MemoryContextAlloc directly */
+	/* TODO maybe using ~20% of chunk size would be more appropriate here */
+	if (tupleSize > CHUNK_SIZE)
+		return MemoryContextAlloc(hashtable->batchCxt, tupleSize);
+
+	/* ok, it's within chunk size, let's see if we have enough space for it in the current
+	 * chunk => if not, allocate a new chunk (works for the first call because the NULL
+	 * chunk has length=used=0) */
+	if ((hashtable->chunk_length - hashtable->chunk_used) < tupleSize) {
+		hashtable->chunk_data = MemoryContextAlloc(hashtable->batchCxt, CHUNK_SIZE);
+		hashtable->chunk_used = 0;
+		hashtable->chunk_length = CHUNK_SIZE;
+	}
+
+	/* OK, we have enough space in the chunk, let's add the tuple */
+	hashtable->chunk_used += tupleSize;
+
+	/* allocate pointer to the start of the tuple memory */
+	return hashtable->chunk_data + (hashtable->chunk_used - tupleSize);
+
+}
+
 /*
  * ExecHashSkewTableInsert
  *
@@ -1338,8 +1380,8 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
 
 	/* Create the HashJoinTuple */
 	hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
-	hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
-												   hashTupleSize);
+	hashTuple = (HashJoinTuple) chunk_alloc(hashtable, hashTupleSize);
+
 	hashTuple->hashvalue = hashvalue;
 	memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
 	HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 3beae40..3e63175 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -157,6 +157,11 @@ typedef struct HashJoinTableData
 
 	MemoryContext hashCxt;		/* context for whole-hash-join storage */
 	MemoryContext batchCxt;		/* context for this-batch-only storage */
+
+	char	   *chunk_data;		/* memory for dense-packing tuples */
+	Size		chunk_length;	/* size of the chunk */
+	Size		chunk_used;		/* currently-allocated memory in the chunk */
+
 }	HashJoinTableData;
 
 #endif   /* HASHJOIN_H */
