From c39f3ef547d03e02f2dfca6d011ca65aa7e6c8c3 Mon Sep 17 00:00:00 2001 From: Andrey Date: Sun, 25 Aug 2019 14:18:31 +0500 Subject: [PATCH 3/3] Implement GiST build using sort support --- src/backend/access/gist/gistbuild.c | 204 +++++++++++++++++++++++++++- src/backend/access/gist/gistutil.c | 30 +++- src/backend/utils/sort/tuplesort.c | 82 +++++++++++ src/include/access/gist_private.h | 3 + src/include/utils/tuplesort.h | 6 + 5 files changed, 312 insertions(+), 13 deletions(-) diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index e7112590b8..b49875e731 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -31,6 +31,7 @@ #include "utils/memutils.h" #include "utils/rel.h" #include "utils/regproc.h" +#include "utils/tuplesort.h" /* Step of index tuples for check whether to switch to buffering build mode */ #define BUFFERING_MODE_SWITCH_CHECK_STEP 256 @@ -56,6 +57,15 @@ typedef enum GIST_BUFFERING_ACTIVE /* in buffering build mode */ } GistBufferingMode; +/* + * Status record for spooling/sorting phase. + */ +typedef struct +{ + Tuplesortstate *sortstate; /* state data for tuplesort.c */ + Relation index; +} GSpool; + /* Working state for gistbuild and its callback */ typedef struct { @@ -77,7 +87,7 @@ typedef struct HTAB *parentMap; GistBufferingMode bufferingMode; - FmgrInfo sortFn; + GSpool *spool; } GISTBuildState; /* prototypes for private functions */ @@ -111,6 +121,173 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent); static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child); +static GSpool *gist_spoolinit(Relation heap, Relation index, SortSupport ssup); +static void gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull); +static void gist_indexsortbuild(GISTBuildState *state); +static void gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew); + +/* + * create and initialize a spool structure to sort tuples + */ +static GSpool * +gist_spoolinit(Relation heap, Relation index, SortSupport ssup) +{ + GSpool *gspool = (GSpool *) palloc0(sizeof(GSpool)); + + gspool->index = index; + + /* + * We size the sort area as maintenance_work_mem rather than work_mem to + * speed index creation. This should be OK since a single backend can't + * run multiple index creations in parallel. + */ + gspool->sortstate = tuplesort_begin_index_gist(heap, + index, + ssup, + maintenance_work_mem, + NULL, + false); + + return gspool; +} + +/* + * Flush page contents to actual page at blockno + * We have expected block number, because GiST build relies on that pages + * will be allocated in continous segments. This simplifies allocation + * logic. + */ +static void +gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew) +{ + OffsetNumber i, + maxoff; + Page newpage; + + Buffer buffer = ReadBuffer(rel, isNew ? P_NEW : blockno); + GISTInitBuffer(buffer, GistPageIsLeaf(page) ? F_LEAF : 0); + /* If the page is new - check that it was allocated correctly */ + Assert(BufferGetBlockNumber(buffer) == blockno); + + LockBuffer(buffer, GIST_EXCLUSIVE); + newpage = BufferGetPage(buffer); + + maxoff = PageGetMaxOffsetNumber(page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId itemid = PageGetItemId(page, i); + IndexTuple tuple = (IndexTuple) PageGetItem(page, itemid); + gistfillbuffer(newpage, &tuple, + 1, InvalidOffsetNumber); + } + UnlockReleaseBuffer(buffer); +} + +/* + * given a spool loaded by successive calls to _h_spool, + * create an entire index. + */ +static void +gist_indexsortbuild(GISTBuildState *state) +{ + GSpool *gspool = state->spool; + /* + * Build constructs GiST by levels. Level is always allocated + * sequentially from level_start until level_end. + */ + BlockNumber level_start = GIST_ROOT_BLKNO + 1; + BlockNumber level_end = level_start; + BlockNumber prev_level_start; + IndexTuple itup; + /* + * We keep one page in memory for the special case + * When layer will have only on page - we will place it + * to ROOT_BLOCK_NO + */ + Page page = palloc(BLCKSZ); + gistinitpage(page, F_LEAF); + + tuplesort_performsort(gspool->sortstate); + + /* Create a first layer of leaf pages */ + while ((itup = tuplesort_getindextuple(gspool->sortstate, true)) != NULL) + { + if (PageGetFreeSpace(page) >= IndexTupleSize(itup) + sizeof(ItemIdData)) + { + gistfillbuffer(page, &itup, 1, InvalidOffsetNumber); + } + else + { + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, F_LEAF); + gistfillbuffer(page, &itup, 1, InvalidOffsetNumber); + } + } + + /* Construct internal levels */ + do + { + /* If previous level had only one page - that page is a root */ + if (level_start == level_end) + { + gist_indexsortbuild_flush(state->indexrel, page, GIST_ROOT_BLKNO, + false); + return; + } + + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, 0); + + prev_level_start = level_start; + level_start = level_end; + + for (BlockNumber i = prev_level_start; i < level_start; i++) + { + /* For each page on previous level we form one tuple */ + Buffer lower_buffer = ReadBuffer(state->indexrel, i); + Page lower_page; + IndexTuple union_tuple; + MemoryContext oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt); + int vect_len; + LockBuffer(lower_buffer, GIST_SHARE); + lower_page = BufferGetPage(lower_buffer); + + IndexTuple *itvec = gistextractpage(lower_page, &vect_len); + union_tuple = gistunion(state->indexrel, itvec, vect_len, + state->giststate); + ItemPointerSetBlockNumber(&(union_tuple->t_tid), i); + + if (PageGetFreeSpace(page) >= IndexTupleSize(union_tuple) + sizeof(ItemIdData)) + { + gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber); + } + else + { + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, 0); + gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber); + } + + UnlockReleaseBuffer(lower_buffer); + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(state->giststate->tempCxt); + } + } while (true); +} + +/* + * spool an index entry into the sort file. + */ +static void +gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull) +{ + tuplesort_putindextuplevalues(gspool->sortstate, gspool->index, + self, values, isnull); +} + /* * Main entry point to GiST index build. Initially calls insert over and over, * but switches to more efficient buffering build algorithm after a certain @@ -129,7 +306,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.indexrel = index; buildstate.heaprel = heap; - buildstate.sortFn.fn_oid = InvalidOid; + buildstate.spool = NULL; if (index->rd_options) { /* Get buffering mode from the options string */ @@ -144,13 +321,16 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.bufferingMode = GIST_BUFFERING_AUTO; if (options->vl_len_ > offsetof(GiSTOptions, buildSortFunctionOffset) && - options->bufferingModeOffset > 0) + options->buildSortFunctionOffset > 0) { char *sortFuncName = (char *) options + options->buildSortFunctionOffset; Oid argList[1] = {INTERNALOID}; List *namelist = stringToQualifiedNameList(sortFuncName); Oid sortFuncOid = LookupFuncName(namelist, 1, argList, false); - fmgr_info(sortFuncOid, &buildstate.sortFn); + SortSupport sort = palloc0(sizeof(SortSupportData)); + OidFunctionCall1(sortFuncOid, PointerGetDatum(sort)); + buildstate.bufferingMode = GIST_BUFFERING_DISABLED; + buildstate.spool = gist_spoolinit(heap, index, sort); } fillfactor = options->fillfactor; @@ -223,6 +403,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) gistFreeBuildBuffers(buildstate.gfbb); } + if (buildstate.spool) + { + gist_indexsortbuild(&buildstate); + tuplesort_end(buildstate.spool->sortstate); + } + /* okay, all heap tuples are indexed */ MemoryContextSwitchTo(oldcxt); MemoryContextDelete(buildstate.giststate->tempCxt); @@ -498,14 +684,20 @@ gistBuildCallback(Relation index, GISTBuildState *buildstate = (GISTBuildState *) state; IndexTuple itup; MemoryContext oldCtx; + Datum compressed_values[INDEX_MAX_KEYS]; oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); /* form an index tuple and point it at the heap tuple */ - itup = gistFormTuple(buildstate->giststate, index, values, isnull, true); + itup = gistCompressValuesAndFormTuple(buildstate->giststate, index, values, isnull, true, compressed_values); itup->t_tid = htup->t_self; - if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) + if (buildstate->spool) + { + if (tupleIsAlive) + gist_spool(buildstate->spool, (ItemPointer)itup, compressed_values, isnull); + } + else if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) { /* We have buffers, so use them. */ gistBufferingBuildInsert(buildstate, itup); diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 6e2ff95b5c..330ef3a2af 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -174,6 +174,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc, &IsNull); + if (IsNull) continue; @@ -576,6 +577,13 @@ gistFormTuple(GISTSTATE *giststate, Relation r, Datum attdata[], bool isnull[], bool isleaf) { Datum compatt[INDEX_MAX_KEYS]; + return gistCompressValuesAndFormTuple(giststate, r, attdata, isnull, isleaf, compatt); +} + +IndexTuple +gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r, + Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]) +{ int i; IndexTuple res; @@ -746,14 +754,10 @@ gistpenalty(GISTSTATE *giststate, int attno, * Initialize a new index page */ void -GISTInitBuffer(Buffer b, uint32 f) +gistinitpage(Page page, uint32 f) { - GISTPageOpaque opaque; - Page page; - Size pageSize; - - pageSize = BufferGetPageSize(b); - page = BufferGetPage(b); + GISTPageOpaque opaque; + Size pageSize = BLCKSZ; PageInit(page, pageSize, sizeof(GISTPageOpaqueData)); opaque = GistPageGetOpaque(page); @@ -764,6 +768,18 @@ GISTInitBuffer(Buffer b, uint32 f) opaque->gist_page_id = GIST_PAGE_ID; } +/* + * Initialize a new index buffer + */ +void +GISTInitBuffer(Buffer b, uint32 f) +{ + Page page; + + page = BufferGetPage(b); + gistinitpage(page, f); +} + /* * Verify that a freshly-read page looks sane. */ diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 7b8e67899e..3249f3eb69 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -630,6 +630,8 @@ static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, Tuplesortstate *state); static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, Tuplesortstate *state); +static int comparetup_index_gist(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); static void writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup); @@ -1095,6 +1097,43 @@ tuplesort_begin_index_hash(Relation heapRel, return state; } +Tuplesortstate * +tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + SortSupport ssup, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the gist attribute */ + + state->comparetup = comparetup_index_gist; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->sortKeys = ssup; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, @@ -4138,6 +4177,49 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b, return 0; } +static int +comparetup_index_gist(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int32 compare; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + /* + * If key values are equal, we sort on ItemPointer. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) { diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index b4b6883f6d..ef9afdf1ca 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -487,12 +487,15 @@ extern IndexTuple gistgetadjusted(Relation r, GISTSTATE *giststate); extern IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, Datum *attdata, bool *isnull, bool isleaf); +extern IndexTuple gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r, + Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]); extern OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate); extern void GISTInitBuffer(Buffer b, uint32 f); +extern void gistinitpage(Page page, uint32 f); extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, bool l, bool isNull); diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index 4521de18e1..3f1ceac3d2 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -26,6 +26,7 @@ #include "fmgr.h" #include "storage/dsm.h" #include "utils/relcache.h" +#include "utils/sortsupport.h" /* @@ -209,6 +210,11 @@ extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel, uint32 max_buckets, int workMem, SortCoordinate coordinate, bool randomAccess); +extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + SortSupport ssup, + int workMem, SortCoordinate coordinate, + bool randomAccess); extern Tuplesortstate *tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, -- 2.20.1