diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c index 0e0f393..04d9c46 100644 --- a/src/backend/access/hash/hashsort.c +++ b/src/backend/access/hash/hashsort.c @@ -37,7 +37,15 @@ struct HSpool { Tuplesortstate *sortstate; /* state data for tuplesort.c */ Relation index; - uint32 hash_mask; /* bitmask for hash codes */ + + /* + * We sort the hash keys based on the buckets they belong to. Below masks + * are used in _hash_hashkey2bucket to determine the bucket of given hash + * key. + */ + uint32 high_mask; + uint32 low_mask; + uint32 max_buckets; }; @@ -56,11 +64,12 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) * num_buckets buckets in the index, the appropriate mask can be computed * as follows. * - * Note: at present, the passed-in num_buckets is always a power of 2, so - * we could just compute num_buckets - 1. We prefer not to assume that - * here, though. + * NOTE : This hash mask calculation should be in sync with similar + * calculation in _hash_init_metabuffer. */ - hspool->hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1; + hspool->high_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1; + hspool->low_mask = (hspool->high_mask >> 1); + hspool->max_buckets = num_buckets - 1; /* * We size the sort area as maintenance_work_mem rather than work_mem to @@ -69,7 +78,9 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) */ hspool->sortstate = tuplesort_begin_index_hash(heap, index, - hspool->hash_mask, + hspool->high_mask, + hspool->low_mask, + hspool->max_buckets, maintenance_work_mem, false); @@ -122,7 +133,9 @@ _h_indexbuild(HSpool *hspool, Relation heapRel) #ifdef USE_ASSERT_CHECKING uint32 lasthashkey = hashkey; - hashkey = _hash_get_indextuple_hashkey(itup) & hspool->hash_mask; + hashkey = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup), + hspool->max_buckets, hspool->high_mask, + hspool->low_mask); Assert(hashkey >= lasthashkey); #endif diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index e1e692d..5b8aad1 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -127,6 +127,7 @@ #include "access/htup_details.h" #include "access/nbtree.h" +#include "access/hash.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "commands/tablespace.h" @@ -473,7 +474,9 @@ struct Tuplesortstate bool enforceUnique; /* complain if we find duplicate tuples */ /* These are specific to the index_hash subcase: */ - uint32 hash_mask; /* mask for sortable part of hash code */ + uint32 high_mask; /* masks for sortable part of hash code */ + uint32 low_mask; + uint32 max_buckets; /* * These variables are specific to the Datum case; they are set by @@ -991,7 +994,9 @@ tuplesort_begin_index_btree(Relation heapRel, Tuplesortstate * tuplesort_begin_index_hash(Relation heapRel, Relation indexRel, - uint32 hash_mask, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, int workMem, bool randomAccess) { Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); @@ -1002,8 +1007,11 @@ tuplesort_begin_index_hash(Relation heapRel, #ifdef TRACE_SORT if (trace_sort) elog(LOG, - "begin index sort: hash_mask = 0x%x, workMem = %d, randomAccess = %c", - hash_mask, + "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " + "max_buckets = 0x%x, workMem = %d, randomAccess = %c", + high_mask, + low_mask, + max_buckets, workMem, randomAccess ? 't' : 'f'); #endif @@ -1017,7 +1025,9 @@ tuplesort_begin_index_hash(Relation heapRel, state->heapRel = heapRel; state->indexRel = indexRel; - state->hash_mask = hash_mask; + state->high_mask = high_mask; + state->low_mask = low_mask; + state->max_buckets = max_buckets; MemoryContextSwitchTo(oldcontext); @@ -4157,8 +4167,8 @@ static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) { - uint32 hash1; - uint32 hash2; + Bucket bucket1; + Bucket bucket2; IndexTuple tuple1; IndexTuple tuple2; @@ -4167,13 +4177,14 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b, * that the first column of the index tuple is the hash key. */ Assert(!a->isnull1); - hash1 = DatumGetUInt32(a->datum1) & state->hash_mask; + bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), state->max_buckets, + state->high_mask, state->low_mask); Assert(!b->isnull1); - hash2 = DatumGetUInt32(b->datum1) & state->hash_mask; - - if (hash1 > hash2) + bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), state->max_buckets, + state->high_mask, state->low_mask); + if (bucket1 > bucket2) return 1; - else if (hash1 < hash2) + else if (bucket1 < bucket2) return -1; /* diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index 5b3f475..9719db4 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -72,7 +72,9 @@ extern Tuplesortstate *tuplesort_begin_index_btree(Relation heapRel, int workMem, bool randomAccess); extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel, Relation indexRel, - uint32 hash_mask, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, int workMem, bool randomAccess); extern Tuplesortstate *tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,