From ca02781294be9b943ab1492fc2cf4b8b5b852244 Mon Sep 17 00:00:00 2001 From: Andrey Date: Sun, 25 Aug 2019 14:18:31 +0500 Subject: [PATCH 2/2] Implement GiST build using sort support --- src/backend/access/gist/gistbuild.c | 219 ++++++++++++++++++++++++- src/backend/access/gist/gistutil.c | 30 +++- src/backend/access/gist/gistvalidate.c | 7 +- src/backend/utils/sort/tuplesort.c | 37 +++++ src/include/access/gist.h | 3 +- src/include/access/gist_private.h | 3 + src/include/catalog/pg_amproc.dat | 2 + src/include/utils/tuplesort.h | 6 + 8 files changed, 295 insertions(+), 12 deletions(-) diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index ecef0ff072..8a9af71816 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -28,6 +28,8 @@ #include "storage/smgr.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/regproc.h" +#include "utils/tuplesort.h" /* Step of index tuples for check whether to switch to buffering build mode */ #define BUFFERING_MODE_SWITCH_CHECK_STEP 256 @@ -53,6 +55,15 @@ typedef enum GIST_BUFFERING_ACTIVE /* in buffering build mode */ } GistBufferingMode; +/* + * Status record for spooling/sorting phase. + */ +typedef struct +{ + Tuplesortstate *sortstate; /* state data for tuplesort.c */ + Relation index; +} GSpool; + /* Working state for gistbuild and its callback */ typedef struct { @@ -74,6 +85,7 @@ typedef struct HTAB *parentMap; GistBufferingMode bufferingMode; + GSpool *spool; } GISTBuildState; /* prototypes for private functions */ @@ -107,6 +119,173 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent); static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child); +static GSpool *gist_spoolinit(Relation heap, Relation index, SortSupport ssup); +static void gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull); +static void gist_indexsortbuild(GISTBuildState *state); +static void gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew); + +/* + * create and initialize a spool structure to sort tuples + */ +static GSpool * +gist_spoolinit(Relation heap, Relation index, SortSupport ssup) +{ + GSpool *gspool = (GSpool *) palloc0(sizeof(GSpool)); + + gspool->index = index; + + /* + * We size the sort area as maintenance_work_mem rather than work_mem to + * speed index creation. This should be OK since a single backend can't + * run multiple index creations in parallel. + */ + gspool->sortstate = tuplesort_begin_index_gist(heap, + index, + ssup, + maintenance_work_mem, + NULL, + false); + + return gspool; +} + +/* + * Flush page contents to actual page at blockno + * We have expected block number, because GiST build relies on that pages + * will be allocated in continous segments. This simplifies allocation + * logic. + */ +static void +gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew) +{ + OffsetNumber i, + maxoff; + Page newpage; + + Buffer buffer = ReadBuffer(rel, isNew ? P_NEW : blockno); + GISTInitBuffer(buffer, GistPageIsLeaf(page) ? F_LEAF : 0); + /* If the page is new - check that it was allocated correctly */ + Assert(BufferGetBlockNumber(buffer) == blockno); + + LockBuffer(buffer, GIST_EXCLUSIVE); + newpage = BufferGetPage(buffer); + + maxoff = PageGetMaxOffsetNumber(page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId itemid = PageGetItemId(page, i); + IndexTuple tuple = (IndexTuple) PageGetItem(page, itemid); + gistfillbuffer(newpage, &tuple, + 1, InvalidOffsetNumber); + } + UnlockReleaseBuffer(buffer); +} + +/* + * given a spool loaded by successive calls to _h_spool, + * create an entire index. + */ +static void +gist_indexsortbuild(GISTBuildState *state) +{ + GSpool *gspool = state->spool; + /* + * Build constructs GiST by levels. Level is always allocated + * sequentially from level_start until level_end. + */ + BlockNumber level_start = GIST_ROOT_BLKNO + 1; + BlockNumber level_end = level_start; + BlockNumber prev_level_start; + IndexTuple itup; + /* + * We keep one page in memory for the special case + * When layer will have only on page - we will place it + * to ROOT_BLOCK_NO + */ + Page page = palloc(BLCKSZ); + gistinitpage(page, F_LEAF); + + tuplesort_performsort(gspool->sortstate); + + /* Create a first layer of leaf pages */ + while ((itup = tuplesort_getindextuple(gspool->sortstate, true)) != NULL) + { + if (PageGetFreeSpace(page) >= IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace) + { + gistfillbuffer(page, &itup, 1, InvalidOffsetNumber); + } + else + { + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, F_LEAF); + gistfillbuffer(page, &itup, 1, InvalidOffsetNumber); + } + } + + /* Construct internal levels */ + do + { + /* If previous level had only one page - that page is a root */ + if (level_start == level_end) + { + gist_indexsortbuild_flush(state->indexrel, page, GIST_ROOT_BLKNO, + false); + return; + } + + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, 0); + + prev_level_start = level_start; + level_start = level_end; + + for (BlockNumber i = prev_level_start; i < level_start; i++) + { + /* For each page on previous level we form one tuple */ + Buffer lower_buffer = ReadBuffer(state->indexrel, i); + Page lower_page; + IndexTuple union_tuple; + MemoryContext oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt); + int vect_len; + LockBuffer(lower_buffer, GIST_SHARE); + lower_page = BufferGetPage(lower_buffer); + + IndexTuple *itvec = gistextractpage(lower_page, &vect_len); + union_tuple = gistunion(state->indexrel, itvec, vect_len, + state->giststate); + ItemPointerSetBlockNumber(&(union_tuple->t_tid), i); + + if (PageGetFreeSpace(page) >= IndexTupleSize(union_tuple) + sizeof(ItemIdData) + state->freespace) + { + gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber); + } + else + { + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, 0); + gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber); + } + + UnlockReleaseBuffer(lower_buffer); + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(state->giststate->tempCxt); + } + } while (true); +} + +/* + * spool an index entry into the sort file. + */ +static void +gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull) +{ + tuplesort_putindextuplevalues(gspool->sortstate, gspool->index, + self, values, isnull); +} + /* * Main entry point to GiST index build. Initially calls insert over and over, * but switches to more efficient buffering build algorithm after a certain @@ -121,10 +300,14 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) Buffer buffer; Page page; MemoryContext oldcxt = CurrentMemoryContext; - int fillfactor; + int fillfactor, i; + Oid SortSupportFnOids[INDEX_MAX_KEYS]; + bool hasallsortsupports = true; + int keyscount = IndexRelationGetNumberOfKeyAttributes(index); buildstate.indexrel = index; buildstate.heaprel = heap; + buildstate.spool = NULL; if (index->rd_options) { /* Get buffering mode from the options string */ @@ -152,6 +335,24 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) /* Calculate target amount of free space to leave on pages */ buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100; + for (i = 0; i < keyscount; i++) + { + SortSupportFnOids[i] = index_getprocid(index, i + 1, GIST_SORTSUPPORT_PROC); + if (!OidIsValid(SortSupportFnOids[i])) + { + hasallsortsupports = false; + break; + } + } + if (hasallsortsupports) + { + SortSupport sort = palloc0(sizeof(SortSupportData) * keyscount); + for (i = 0; i < keyscount; i++) + OidFunctionCall1(SortSupportFnOids[i], PointerGetDatum(sort + i)); + buildstate.bufferingMode = GIST_BUFFERING_DISABLED; + buildstate.spool = gist_spoolinit(heap, index, sort); + } + /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. @@ -208,6 +409,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) gistFreeBuildBuffers(buildstate.gfbb); } + if (buildstate.spool) + { + gist_indexsortbuild(&buildstate); + tuplesort_end(buildstate.spool->sortstate); + } + /* okay, all heap tuples are indexed */ MemoryContextSwitchTo(oldcxt); MemoryContextDelete(buildstate.giststate->tempCxt); @@ -468,14 +675,20 @@ gistBuildCallback(Relation index, GISTBuildState *buildstate = (GISTBuildState *) state; IndexTuple itup; MemoryContext oldCtx; + Datum compressed_values[INDEX_MAX_KEYS]; oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); /* form an index tuple and point it at the heap tuple */ - itup = gistFormTuple(buildstate->giststate, index, values, isnull, true); + itup = gistCompressValuesAndFormTuple(buildstate->giststate, index, values, isnull, true, compressed_values); itup->t_tid = htup->t_self; - if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) + if (buildstate->spool) + { + if (tupleIsAlive) + gist_spool(buildstate->spool, (ItemPointer)itup, compressed_values, isnull); + } + else if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) { /* We have buffers, so use them. */ gistBufferingBuildInsert(buildstate, itup); diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 49df05653b..57fc479086 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -174,6 +174,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc, &IsNull); + if (IsNull) continue; @@ -576,6 +577,13 @@ gistFormTuple(GISTSTATE *giststate, Relation r, Datum attdata[], bool isnull[], bool isleaf) { Datum compatt[INDEX_MAX_KEYS]; + return gistCompressValuesAndFormTuple(giststate, r, attdata, isnull, isleaf, compatt); +} + +IndexTuple +gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r, + Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]) +{ int i; IndexTuple res; @@ -746,14 +754,10 @@ gistpenalty(GISTSTATE *giststate, int attno, * Initialize a new index page */ void -GISTInitBuffer(Buffer b, uint32 f) +gistinitpage(Page page, uint32 f) { - GISTPageOpaque opaque; - Page page; - Size pageSize; - - pageSize = BufferGetPageSize(b); - page = BufferGetPage(b); + GISTPageOpaque opaque; + Size pageSize = BLCKSZ; PageInit(page, pageSize, sizeof(GISTPageOpaqueData)); opaque = GistPageGetOpaque(page); @@ -764,6 +768,18 @@ GISTInitBuffer(Buffer b, uint32 f) opaque->gist_page_id = GIST_PAGE_ID; } +/* + * Initialize a new index buffer + */ +void +GISTInitBuffer(Buffer b, uint32 f) +{ + Page page; + + page = BufferGetPage(b); + gistinitpage(page, f); +} + /* * Verify that a freshly-read page looks sane. */ diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c index dfc1a87a75..bc65b9c6bf 100644 --- a/src/backend/access/gist/gistvalidate.c +++ b/src/backend/access/gist/gistvalidate.c @@ -140,6 +140,10 @@ gistvalidate(Oid opclassoid) 5, 5, INTERNALOID, opcintype, INT2OID, OIDOID, INTERNALOID); break; + case GIST_SORTSUPPORT_PROC: + ok = check_amproc_signature(procform->amproc, VOIDOID, true, + 1, 1, INTERNALOID); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -259,7 +263,8 @@ gistvalidate(Oid opclassoid) (opclassgroup->functionset & (((uint64) 1) << i)) != 0) continue; /* got it */ if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC || - i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC) + i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC || + i == GIST_SORTSUPPORT_PROC) continue; /* optional methods */ ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 7b8e67899e..018901b654 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -1095,6 +1095,43 @@ tuplesort_begin_index_hash(Relation heapRel, return state; } +Tuplesortstate * +tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + SortSupport ssup, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->sortKeys = ssup; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 6902f4115b..eeea53a8f1 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -34,7 +34,8 @@ #define GIST_EQUAL_PROC 7 #define GIST_DISTANCE_PROC 8 #define GIST_FETCH_PROC 9 -#define GISTNProcs 9 +#define GIST_SORTSUPPORT_PROC 10 +#define GISTNProcs 10 /* * Page opaque data in a GiST index page. diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index f80694bf9a..b7b6ca13ac 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -486,12 +486,15 @@ extern IndexTuple gistgetadjusted(Relation r, GISTSTATE *giststate); extern IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, Datum *attdata, bool *isnull, bool isleaf); +extern IndexTuple gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r, + Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]); extern OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate); extern void GISTInitBuffer(Buffer b, uint32 f); +extern void gistinitpage(Page page, uint32 f); extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, bool l, bool isNull); diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 020b7413cc..1a52302f7e 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -409,6 +409,8 @@ amproc => 'gist_point_distance' }, { amprocfamily => 'gist/point_ops', amproclefttype => 'point', amprocrighttype => 'point', amprocnum => '9', amproc => 'gist_point_fetch' }, +{ amprocfamily => 'gist/point_ops', amproclefttype => 'point', + amprocrighttype => 'point', amprocnum => '10', amproc => 'gist_point_sortsupport' }, { amprocfamily => 'gist/box_ops', amproclefttype => 'box', amprocrighttype => 'box', amprocnum => '1', amproc => 'gist_box_consistent' }, { amprocfamily => 'gist/box_ops', amproclefttype => 'box', diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index 4521de18e1..3f1ceac3d2 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -26,6 +26,7 @@ #include "fmgr.h" #include "storage/dsm.h" #include "utils/relcache.h" +#include "utils/sortsupport.h" /* @@ -209,6 +210,11 @@ extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel, uint32 max_buckets, int workMem, SortCoordinate coordinate, bool randomAccess); +extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + SortSupport ssup, + int workMem, SortCoordinate coordinate, + bool randomAccess); extern Tuplesortstate *tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, -- 2.20.1