diff -cr --new-file pgsql/src/backend/executor/nodeHash.c pgsql-bfhj/src/backend/executor/nodeHash.c *** pgsql/src/backend/executor/nodeHash.c Tue Jan 1 14:45:49 2008 --- pgsql-bfhj/src/backend/executor/nodeHash.c Sun Nov 2 15:47:37 2008 *************** *** 32,37 **** --- 32,38 ---- #include "executor/nodeHashjoin.h" #include "miscadmin.h" #include "parser/parse_expr.h" + #include "utils/bloomfn.h" #include "utils/dynahash.h" #include "utils/memutils.h" #include "utils/lsyscache.h" *************** *** 86,91 **** --- 87,95 ---- hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; + /* initialize a Bloom filter for the relation */ + bloom_filter_init(node, outerNode->plan->plan_rows); + /* * get all inner tuples and insert into the hash table (or temp files) */ *************** *** 100,105 **** --- 104,110 ---- &hashvalue)) { ExecHashTableInsert(hashtable, slot, hashvalue); + bloom_filter_add(node, hashvalue); hashtable->totalTuples += 1; } } *************** *** 140,145 **** --- 145,152 ---- hashstate->ps.state = estate; hashstate->hashtable = NULL; hashstate->hashkeys = NIL; /* will be set by parent HashJoin */ + hashstate->bf_bitvector = NULL; + hashstate->bf_len = 0; /* * Miscellaneous initialization *************** *** 205,210 **** --- 212,222 ---- ExecFreeExprContext(&node->ps); /* + * free the Bloom filter + */ + bloom_filter_free(node); + + /* * shut down the subplan */ outerPlan = outerPlanState(node); diff -cr --new-file pgsql/src/backend/executor/nodeHashjoin.c pgsql-bfhj/src/backend/executor/nodeHashjoin.c *** pgsql/src/backend/executor/nodeHashjoin.c Thu Oct 23 10:34:34 2008 --- pgsql-bfhj/src/backend/executor/nodeHashjoin.c Sun Nov 2 15:09:11 2008 *************** *** 19,24 **** --- 19,25 ---- #include "executor/hashjoin.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" + #include "utils/bloomfn.h" #include "utils/memutils.h" *************** *** 193,198 **** --- 194,213 ---- return NULL; } + /* + * before spending a significant amount of time scanning hash + * buckets for non-outer hash joins, we'll perform a little + * join-filter pruning by first querying the Bloom filter to + * see whether there is no possibility of a match, and if so, + * we'll skip immediately to retrieving the next outer tuple. + */ + if (node->js.jointype == JOIN_INNER + && bloom_filter_query(hashNode, hashvalue) == false) + { + node->hj_NeedNewOuter = true; + continue; + } + econtext->ecxt_outertuple = outerTupleSlot; node->hj_NeedNewOuter = false; node->hj_MatchedOuter = false; diff -cr --new-file pgsql/src/backend/utils/hash/Makefile pgsql-bfhj/src/backend/utils/hash/Makefile *** pgsql/src/backend/utils/hash/Makefile Tue Feb 19 05:30:08 2008 --- pgsql-bfhj/src/backend/utils/hash/Makefile Thu Oct 30 21:38:25 2008 *************** *** 12,17 **** top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = dynahash.o hashfn.o pg_crc.o include $(top_srcdir)/src/backend/common.mk --- 12,17 ---- top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = bloomfn.o dynahash.o hashfn.o pg_crc.o include $(top_srcdir)/src/backend/common.mk diff -cr --new-file pgsql/src/backend/utils/hash/bloomfn.c pgsql-bfhj/src/backend/utils/hash/bloomfn.c *** pgsql/src/backend/utils/hash/bloomfn.c Wed Dec 31 19:00:00 1969 --- pgsql-bfhj/src/backend/utils/hash/bloomfn.c Sun Nov 2 15:42:33 2008 *************** *** 0 **** --- 1,109 ---- + /*------------------------------------------------------------------------- + * + * bloomfn.c + * Bloom filter functions + * + * + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header$ + * + * NOTES + * N/A + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include "utils/bloomfn.h" + #include "access/hash.h" + #include "nodes/nodes.h" + #include "nodes/execnodes.h" + + bool bloom_pruning = false; + + /* Initializes Bloom Filter */ + void + bloom_filter_init (HashState *node, double cardinality) + { + // uint32 num_bytes = (cardinality * 1.5); /* Start with a reasonable value */ + uint32 num_bytes = 2048; /* Start with a reasonable value */ + + if (bloom_pruning == false) + return; + + /* Find an optimal bloom filter size based on false probability */ + while (pow((1 - pow(M_E, -((double) cardinality / (num_bytes * BITS_PER_BYTE)))), 1.0) > 0.2) + { + /* No greater than 1MB */ + if (num_bytes <= 524288) + num_bytes *= 2; + else + break; + } + + // Temporary Debug + elog(LOG, "Bloom filter size is %d bytes (cardinality %f)", num_bytes, cardinality); + + /* Perform the allocation */ + node->bf_bitvector = (char *) palloc0(num_bytes); + node->bf_len = num_bytes; + } + + void + bloom_filter_free (HashState *node) + { + if (PointerIsValid(node->bf_bitvector)) + { + pfree(node->bf_bitvector); + node->bf_len = 0; + } + } + + void + bloom_filter_add (HashState *node, uint32 hashvalue /*, uint32 hashvalue2 */) + { + uint32 bit; + uint32 map_offset; + uint32 bit_mask; + uint32 hkey; + + if (node->bf_len == 0) + return; + + /* Set the initial bits based on the Jenkins hash */ + hkey = hashvalue; + bit = hkey % (node->bf_len * BITS_PER_BYTE); + map_offset = floor(bit / 8); + bit_mask = 1 << (bit & 7); + node->bf_bitvector[map_offset] |= bit_mask; + + return; + } + + bool + bloom_filter_query (HashState *node, uint32 hashvalue /*, uint32 hashvalue2 */) + { + uint32 bit; + uint32 map_offset; + uint32 bit_mask; + uint32 hkey; + + if (node->bf_len == 0) + return true; + + /* Set the initial bits based on the Jenkins hash result */ + hkey = hashvalue; + bit = hkey % (node->bf_len * BITS_PER_BYTE); + map_offset = floor(bit / 8); + bit_mask = 1 << (bit & 7); + + if (!(node->bf_bitvector[map_offset] & bit_mask)) + return false; /* bit not set */ + + return true; /* Match Found! */ + } + diff -cr --new-file pgsql/src/backend/utils/misc/guc.c pgsql-bfhj/src/backend/utils/misc/guc.c *** pgsql/src/backend/utils/misc/guc.c Mon Oct 6 09:05:36 2008 --- pgsql-bfhj/src/backend/utils/misc/guc.c Sun Nov 2 15:10:21 2008 *************** *** 59,64 **** --- 59,65 ---- #include "storage/fd.h" #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" + #include "utils/bloomfn.h" #include "utils/builtins.h" #include "utils/guc_tables.h" #include "utils/memutils.h" *************** *** 625,630 **** --- 626,640 ---- true, NULL, NULL }, { + {"bloom_pruning", PGC_USERSET, QUERY_TUNING_OTHER, + gettext_noop("Enables the executor's use of Bloom filters."), + gettext_noop("Bloom pruning helps the executor avoid processing " + "unnecessary tuples in hash joins.") + }, + &bloom_pruning, + false, NULL, NULL + }, + { {"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Enables the planner to use constraints to optimize queries."), gettext_noop("Child table scans will be skipped if their " diff -cr --new-file pgsql/src/backend/utils/misc/postgresql.conf.sample pgsql-bfhj/src/backend/utils/misc/postgresql.conf.sample *** pgsql/src/backend/utils/misc/postgresql.conf.sample Tue Sep 30 06:52:13 2008 --- pgsql-bfhj/src/backend/utils/misc/postgresql.conf.sample Sun Nov 2 15:11:27 2008 *************** *** 475,480 **** --- 475,481 ---- #sql_inheritance = on #standard_conforming_strings = off #synchronize_seqscans = on + #bloom_pruning = off # - Other Platforms and Clients - diff -cr --new-file pgsql/src/include/nodes/execnodes.h pgsql-bfhj/src/include/nodes/execnodes.h *** pgsql/src/include/nodes/execnodes.h Thu Oct 23 10:34:34 2008 --- pgsql-bfhj/src/include/nodes/execnodes.h Thu Oct 30 23:44:16 2008 *************** *** 1482,1487 **** --- 1482,1489 ---- HashJoinTable hashtable; /* hash table for the hashjoin */ List *hashkeys; /* list of ExprState nodes */ /* hashkeys is same as parent's hj_InnerHashKeys */ + char *bf_bitvector; /* Bloom filter bit vector */ + int bf_len; /* Bloom filter length */ } HashState; /* ---------------- diff -cr --new-file pgsql/src/include/utils/bloomfn.h pgsql-bfhj/src/include/utils/bloomfn.h *** pgsql/src/include/utils/bloomfn.h Wed Dec 31 19:00:00 1969 --- pgsql-bfhj/src/include/utils/bloomfn.h Sun Nov 2 15:09:55 2008 *************** *** 0 **** --- 1,27 ---- + /*------------------------------------------------------------------------- + * + * bloomfn.h + * POSTGRES bloomfn.h file definitions + * + * + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $Header$ + * + *------------------------------------------------------------------------- + */ + #ifndef BLOOMFN_H + #define BLOOMFN_H + + #include "nodes/execnodes.h" + + extern bool bloom_pruning; + + extern void bloom_filter_init (HashState *node, double cardinality); + extern void bloom_filter_free (HashState *node); + extern void bloom_filter_add (HashState *node, uint32 hashvalue); + extern bool bloom_filter_query (HashState *node, uint32 hashvalue); + + #endif /* BLOOMFN_H */ +