diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index c361509d68..67102997f1 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -39,6 +39,7 @@ typedef struct HSpool *spool; /* NULL if not using spooling */ double indtuples; /* # tuples accepted into index */ Relation heapRel; /* heap relation descriptor */ + HashInsertState istate; /* insert state */ } HashBuildState; static void hashbuildCallback(Relation index, @@ -118,6 +119,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) uint32 num_buckets; long sort_threshold; HashBuildState buildstate; + HashInsertStateData insertstate; /* * We expect to be called exactly once for any index relation. If that's @@ -158,9 +160,16 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) sort_threshold = Min(sort_threshold, NLocBuffer); if (num_buckets >= (uint32) sort_threshold) - buildstate.spool = _h_spoolinit(heap, index, num_buckets); + { + insertstate.sorted = true; + buildstate.spool = _h_spoolinit(heap, index, num_buckets, &insertstate); + } else + { + insertstate.sorted = false; buildstate.spool = NULL; + } + buildstate.istate = &insertstate; /* prepare to build the index */ buildstate.indtuples = 0; @@ -212,6 +221,7 @@ hashbuildCallback(Relation index, void *state) { HashBuildState *buildstate = (HashBuildState *) state; + HashInsertState istate = buildstate->istate; Datum index_values[1]; bool index_isnull[1]; IndexTuple itup; @@ -231,7 +241,7 @@ hashbuildCallback(Relation index, itup = index_form_tuple(RelationGetDescr(index), index_values, index_isnull); itup->t_tid = *tid; - _hash_doinsert(index, itup, buildstate->heapRel); + _hash_doinsert(index, itup, buildstate->heapRel, istate); pfree(itup); } @@ -254,6 +264,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull, Datum index_values[1]; bool index_isnull[1]; IndexTuple itup; + HashInsertStateData istate; /* convert data to a hash key; on failure, do not insert anything */ if (!_hash_convert_tuple(rel, @@ -265,7 +276,9 @@ hashinsert(Relation rel, Datum *values, bool *isnull, itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull); itup->t_tid = *ht_ctid; - _hash_doinsert(rel, itup, heapRel); + istate.sorted = false; + + _hash_doinsert(rel, itup, heapRel, &istate); pfree(itup); diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index 4f2fecb908..bf251f346d 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -34,7 +34,8 @@ static void _hash_vacuum_one_page(Relation rel, Relation hrel, * and hashinsert. By here, itup is completely filled in. */ void -_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel) +_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, + HashInsertState istate) { Buffer buf = InvalidBuffer; Buffer bucket_buf; @@ -198,7 +199,7 @@ restart_insert: START_CRIT_SECTION(); /* found page with enough space, so add the item here */ - itup_off = _hash_pgaddtup(rel, buf, itemsz, itup); + itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, istate->sorted); MarkBufferDirty(buf); /* metapage operations */ @@ -266,7 +267,7 @@ restart_insert: * page are sorted by hashkey value. */ OffsetNumber -_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup) +_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool sorted) { OffsetNumber itup_off; Page page; @@ -275,9 +276,36 @@ _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup) _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); - /* Find where to insert the tuple (preserving page's hashkey ordering) */ - hashkey = _hash_get_indextuple_hashkey(itup); - itup_off = _hash_binsearch(page, hashkey); + /* + * Find where to insert the tuple (preserving page's hashkey ordering). + * If the input is already sorted by hashkey, then we can avoid the + * binary search and just add it to the end of the page (but check!). + * + * If we need to binary search, find the last matching value, not the + * first, since this means less data movement when we have duplicates. + */ + if (sorted) + { + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + itup_off = maxoff + 1; + +#ifdef USE_ASSERT_CHECKING + if (OffsetNumberIsValid(maxoff)) + { + IndexTuple max_itup = (IndexTuple) PageGetItem(page, + PageGetItemId(page, maxoff)); + uint32 max_hashkey = _hash_get_indextuple_hashkey(max_itup); + + hashkey = _hash_get_indextuple_hashkey(itup); + Assert(max_hashkey <= hashkey); + } +#endif + } + else + { + hashkey = _hash_get_indextuple_hashkey(itup); + itup_off = _hash_binsearch(page, hashkey); + } if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false) == InvalidOffsetNumber) diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c index 19563148d0..6213855a4c 100644 --- a/src/backend/access/hash/hashsort.c +++ b/src/backend/access/hash/hashsort.c @@ -50,6 +50,8 @@ struct HSpool uint32 high_mask; uint32 low_mask; uint32 max_buckets; + + HashInsertState istate; }; @@ -57,7 +59,7 @@ struct HSpool * create and initialize a spool structure */ HSpool * -_h_spoolinit(Relation heap, Relation index, uint32 num_buckets) +_h_spoolinit(Relation heap, Relation index, uint32 num_buckets, HashInsertState istate) { HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool)); @@ -89,6 +91,8 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) NULL, TUPLESORT_NONE); + hspool->istate = istate; + return hspool; } @@ -145,7 +149,7 @@ _h_indexbuild(HSpool *hspool, Relation heapRel) Assert(hashkey >= lasthashkey); #endif - _hash_doinsert(hspool->index, itup, heapRel); + _hash_doinsert(hspool->index, itup, heapRel, hspool->istate); pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE, ++tups_done); diff --git a/src/include/access/hash.h b/src/include/access/hash.h index da372841c4..ff032d3fd3 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -357,6 +357,12 @@ typedef struct HashOptions #define HASHOPTIONS_PROC 3 #define HASHNProcs 3 +typedef struct HashInsertStateData +{ + bool sorted; +} HashInsertStateData; + +typedef HashInsertStateData *HashInsertState; /* public routines */ @@ -390,9 +396,10 @@ extern void hashadjustmembers(Oid opfamilyoid, /* private routines */ /* hashinsert.c */ -extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel); +extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, + HashInsertState istate); extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, - Size itemsize, IndexTuple itup); + Size itemsize, IndexTuple itup, bool sorted); extern void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups, OffsetNumber *itup_offsets, uint16 nitups); @@ -446,7 +453,8 @@ extern bool _hash_first(IndexScanDesc scan, ScanDirection dir); /* hashsort.c */ typedef struct HSpool HSpool; /* opaque struct in hashsort.c */ -extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets); +extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets, + HashInsertState istate); extern void _h_spooldestroy(HSpool *hspool); extern void _h_spool(HSpool *hspool, ItemPointer self, Datum *values, bool *isnull);