From 93d157dd5936301c38152a0d8c31939a6d7102db Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Thu, 7 Jul 2016 14:47:19 -0700
Subject: [PATCH 3/4] WIP: Use more efficient hashtable for tidbitmap.c

---
 src/backend/nodes/tidbitmap.c | 109 +++++++++++++++++++++++-------------------
 1 file changed, 59 insertions(+), 50 deletions(-)

diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index dfeb7d5..572674b 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -97,6 +97,7 @@
 typedef struct PagetableEntry
 {
 	BlockNumber blockno;		/* page number (hashtable key) */
+	char		status;
 	bool		ischunk;		/* T = lossy storage, F = exact */
 	bool		recheck;		/* should the tuples be rechecked? */
 	bitmapword	words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)];
@@ -128,7 +129,7 @@ struct TIDBitmap
 	NodeTag		type;			/* to make it a valid Node */
 	MemoryContext mcxt;			/* memory context containing me */
 	TBMStatus	status;			/* see codes above */
-	HTAB	   *pagetable;		/* hash table of PagetableEntry's */
+	struct pagetable_hash *pagetable; /* hash table of PagetableEntry's */
 	int			nentries;		/* number of entries in pagetable */
 	int			maxentries;		/* limit on same to meet maxbytes */
 	int			npages;			/* number of exact entries in pagetable */
@@ -168,6 +169,29 @@ static void tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno);
 static void tbm_lossify(TIDBitmap *tbm);
 static int	tbm_comparator(const void *left, const void *right);
 
+static inline uint32
+hash_blockno(BlockNumber b)
+{
+	uint32 h = b;
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+	return h;
+}
+
+#define SH_PREFIX pagetable
+#define SH_KEYTYPE BlockNumber
+#define SH_KEY blockno
+#define SH_CONTAINS PagetableEntry
+#define SH_HASH_KEY(tb, key) hash_blockno(key)
+#define SH_EQUAL(tb, a, b) a == b
+#define SH_SCOPE static
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
 
 /*
  * tbm_create - create an initially-empty bitmap
@@ -212,32 +236,24 @@ tbm_create(long maxbytes)
 static void
 tbm_create_pagetable(TIDBitmap *tbm)
 {
-	HASHCTL		hash_ctl;
-
 	Assert(tbm->status != TBM_HASH);
 	Assert(tbm->pagetable == NULL);
 
-	/* Create the hashtable proper */
-	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
-	hash_ctl.keysize = sizeof(BlockNumber);
-	hash_ctl.entrysize = sizeof(PagetableEntry);
-	hash_ctl.hcxt = tbm->mcxt;
-	tbm->pagetable = hash_create("TIDBitmap",
-								 128,	/* start small and extend */
-								 &hash_ctl,
-								 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+	tbm->pagetable = pagetable_create(tbm->mcxt, 128);
 
-	/* If entry1 is valid, push it into the hashtable */
 	if (tbm->status == TBM_ONE_PAGE)
 	{
 		PagetableEntry *page;
 		bool		found;
+		char		oldstatus;
 
-		page = (PagetableEntry *) hash_search(tbm->pagetable,
-											  (void *) &tbm->entry1.blockno,
-											  HASH_ENTER, &found);
+		page = pagetable_insert(tbm->pagetable,
+								tbm->entry1.blockno,
+								&found);
 		Assert(!found);
+		oldstatus = page->status;
 		memcpy(page, &tbm->entry1, sizeof(PagetableEntry));
+		page->status = oldstatus;
 	}
 
 	tbm->status = TBM_HASH;
@@ -250,7 +266,7 @@ void
 tbm_free(TIDBitmap *tbm)
 {
 	if (tbm->pagetable)
-		hash_destroy(tbm->pagetable);
+		pagetable_destroy(tbm->pagetable);
 	if (tbm->spages)
 		pfree(tbm->spages);
 	if (tbm->schunks)
@@ -357,12 +373,12 @@ tbm_union(TIDBitmap *a, const TIDBitmap *b)
 		tbm_union_page(a, &b->entry1);
 	else
 	{
-		HASH_SEQ_STATUS status;
+		pagetable_iterator i;
 		PagetableEntry *bpage;
 
 		Assert(b->status == TBM_HASH);
-		hash_seq_init(&status, b->pagetable);
-		while ((bpage = (PagetableEntry *) hash_seq_search(&status)) != NULL)
+		pagetable_start_iterate(b->pagetable, &i);
+		while ((bpage = pagetable_iterate(b->pagetable, &i)) != NULL)
 			tbm_union_page(a, bpage);
 	}
 }
@@ -449,12 +465,12 @@ tbm_intersect(TIDBitmap *a, const TIDBitmap *b)
 	}
 	else
 	{
-		HASH_SEQ_STATUS status;
+		pagetable_iterator i;
 		PagetableEntry *apage;
 
 		Assert(a->status == TBM_HASH);
-		hash_seq_init(&status, a->pagetable);
-		while ((apage = (PagetableEntry *) hash_seq_search(&status)) != NULL)
+		pagetable_start_iterate(a->pagetable, &i);
+		while ((apage = pagetable_iterate(a->pagetable, &i)) != NULL)
 		{
 			if (tbm_intersect_page(a, apage, b))
 			{
@@ -464,9 +480,7 @@ tbm_intersect(TIDBitmap *a, const TIDBitmap *b)
 				else
 					a->npages--;
 				a->nentries--;
-				if (hash_search(a->pagetable,
-								(void *) &apage->blockno,
-								HASH_REMOVE, NULL) == NULL)
+				if (!pagetable_delete(a->pagetable, apage->blockno))
 					elog(ERROR, "hash table corrupted");
 			}
 		}
@@ -606,7 +620,7 @@ tbm_begin_iterate(TIDBitmap *tbm)
 	 */
 	if (tbm->status == TBM_HASH && !tbm->iterating)
 	{
-		HASH_SEQ_STATUS status;
+		pagetable_iterator	i;
 		PagetableEntry *page;
 		int			npages;
 		int			nchunks;
@@ -620,9 +634,9 @@ tbm_begin_iterate(TIDBitmap *tbm)
 				MemoryContextAlloc(tbm->mcxt,
 								   tbm->nchunks * sizeof(PagetableEntry *));
 
-		hash_seq_init(&status, tbm->pagetable);
 		npages = nchunks = 0;
-		while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL)
+		pagetable_start_iterate(tbm->pagetable, &i);
+		while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL)
 		{
 			if (page->ischunk)
 				tbm->schunks[nchunks++] = page;
@@ -791,9 +805,7 @@ tbm_find_pageentry(const TIDBitmap *tbm, BlockNumber pageno)
 		return page;
 	}
 
-	page = (PagetableEntry *) hash_search(tbm->pagetable,
-										  (void *) &pageno,
-										  HASH_FIND, NULL);
+	page = pagetable_lookup(tbm->pagetable, pageno);
 	if (page == NULL)
 		return NULL;
 	if (page->ischunk)
@@ -833,16 +845,15 @@ tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno)
 			tbm_create_pagetable(tbm);
 		}
 
-		/* Look up or create an entry */
-		page = (PagetableEntry *) hash_search(tbm->pagetable,
-											  (void *) &pageno,
-											  HASH_ENTER, &found);
+		page = pagetable_insert(tbm->pagetable, pageno, &found);
 	}
 
 	/* Initialize it if not present before */
 	if (!found)
 	{
+		char oldstatus = page->status;
 		MemSet(page, 0, sizeof(PagetableEntry));
+		page->status = oldstatus;
 		page->blockno = pageno;
 		/* must count it too */
 		tbm->nentries++;
@@ -869,9 +880,9 @@ tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno)
 
 	bitno = pageno % PAGES_PER_CHUNK;
 	chunk_pageno = pageno - bitno;
-	page = (PagetableEntry *) hash_search(tbm->pagetable,
-										  (void *) &chunk_pageno,
-										  HASH_FIND, NULL);
+
+	page = pagetable_lookup(tbm->pagetable, chunk_pageno);
+
 	if (page != NULL && page->ischunk)
 	{
 		int			wordnum = WORDNUM(bitno);
@@ -912,9 +923,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
 	 */
 	if (bitno != 0)
 	{
-		if (hash_search(tbm->pagetable,
-						(void *) &pageno,
-						HASH_REMOVE, NULL) != NULL)
+		if (pagetable_delete(tbm->pagetable, pageno))
 		{
 			/* It was present, so adjust counts */
 			tbm->nentries--;
@@ -922,15 +931,14 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
 		}
 	}
 
-	/* Look up or create entry for chunk-header page */
-	page = (PagetableEntry *) hash_search(tbm->pagetable,
-										  (void *) &chunk_pageno,
-										  HASH_ENTER, &found);
+	page = pagetable_insert(tbm->pagetable, chunk_pageno, &found);
 
 	/* Initialize it if not present before */
 	if (!found)
 	{
+		char oldstatus = page->status;
 		MemSet(page, 0, sizeof(PagetableEntry));
+		page->status = oldstatus;
 		page->blockno = chunk_pageno;
 		page->ischunk = true;
 		/* must count it too */
@@ -939,8 +947,10 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
 	}
 	else if (!page->ischunk)
 	{
+		char oldstatus = page->status;
 		/* chunk header page was formerly non-lossy, make it lossy */
 		MemSet(page, 0, sizeof(PagetableEntry));
+		page->status = oldstatus;
 		page->blockno = chunk_pageno;
 		page->ischunk = true;
 		/* we assume it had some tuple bit(s) set, so mark it lossy */
@@ -962,7 +972,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
 static void
 tbm_lossify(TIDBitmap *tbm)
 {
-	HASH_SEQ_STATUS status;
+	pagetable_iterator i;
 	PagetableEntry *page;
 
 	/*
@@ -977,8 +987,8 @@ tbm_lossify(TIDBitmap *tbm)
 	Assert(!tbm->iterating);
 	Assert(tbm->status == TBM_HASH);
 
-	hash_seq_init(&status, tbm->pagetable);
-	while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL)
+	pagetable_start_iterate(tbm->pagetable, &i);
+	while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL)
 	{
 		if (page->ischunk)
 			continue;			/* already a chunk header */
@@ -996,7 +1006,6 @@ tbm_lossify(TIDBitmap *tbm)
 		if (tbm->nentries <= tbm->maxentries / 2)
 		{
 			/* we have done enough */
-			hash_seq_term(&status);
 			break;
 		}
 
-- 
2.8.1

