From 1de0069c34abb3c323daf0da7e5346f15652ae6d Mon Sep 17 00:00:00 2001 From: Andrey Date: Sun, 25 Aug 2019 14:18:31 +0500 Subject: [PATCH 2/2] Implement GiST build using sort support --- doc/src/sgml/gist.sgml | 51 +++++- src/backend/access/gist/gistbuild.c | 220 ++++++++++++++++++++++++- src/backend/access/gist/gistproc.c | 21 ++- src/backend/access/gist/gistutil.c | 30 +++- src/backend/access/gist/gistvalidate.c | 7 +- src/backend/utils/sort/tuplesort.c | 37 +++++ src/include/access/gist.h | 3 +- src/include/access/gist_private.h | 3 + src/include/catalog/pg_amproc.dat | 2 + src/include/utils/tuplesort.h | 6 + 10 files changed, 362 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/gist.sgml b/doc/src/sgml/gist.sgml index a7eec1e949..0e67863a55 100644 --- a/doc/src/sgml/gist.sgml +++ b/doc/src/sgml/gist.sgml @@ -269,7 +269,7 @@ CREATE INDEX ON my_table USING GIST (my_inet_column inet_ops); There are five methods that an index operator class for - GiST must provide, and four that are optional. + GiST must provide, and five that are optional. Correctness of the index is ensured by proper implementation of the same, consistent and union methods, while efficiency (size and speed) of the @@ -287,7 +287,8 @@ CREATE INDEX ON my_table USING GIST (my_inet_column inet_ops); if the operator class wishes to support ordered scans (nearest-neighbor searches). The optional ninth method fetch is needed if the operator class wishes to support index-only scans, except when the - compress method is omitted. + compress method is omitted. The sortsupport + method is also optional and is used diring fast GiST build. @@ -939,6 +940,52 @@ my_fetch(PG_FUNCTION_ARGS) + + sortsupport + + + Returns a comparator, suitable for fast GiST build. + + + + The SQL declaration of the function must look like this: + + +CREATE OR REPLACE FUNCTION my_sortsupport(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + + + The argument is a pointer to a SortSupport struct. + + + + The matching code in the C module could then follow this skeleton: + + +PG_FUNCTION_INFO_V1(my_sortsupport); + +static int +my_fastcmp(Datum x, Datum y, SortSupport ssup) +{ + /* esteblish order between x and y */ + + return z1 == z2 ? 0 : z1 > z2 ? 1 : -1; +} + +Datum +my_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = my_fastcmp; + PG_RETURN_VOID(); +} + + + + diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 739846a257..25ede54ae8 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -28,6 +28,8 @@ #include "storage/smgr.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/regproc.h" +#include "utils/tuplesort.h" /* Step of index tuples for check whether to switch to buffering build mode */ #define BUFFERING_MODE_SWITCH_CHECK_STEP 256 @@ -53,6 +55,15 @@ typedef enum GIST_BUFFERING_ACTIVE /* in buffering build mode */ } GistBufferingMode; +/* + * Status record for spooling/sorting phase. + */ +typedef struct +{ + Tuplesortstate *sortstate; /* state data for tuplesort.c */ + Relation index; +} GSpool; + /* Working state for gistbuild and its callback */ typedef struct { @@ -74,6 +85,7 @@ typedef struct HTAB *parentMap; GistBufferingMode bufferingMode; + GSpool *spool; } GISTBuildState; /* prototypes for private functions */ @@ -107,6 +119,174 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent); static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child); +static GSpool *gist_spoolinit(Relation heap, Relation index, SortSupport ssup); +static void gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull); +static void gist_indexsortbuild(GISTBuildState *state); +static void gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew); + +/* + * create and initialize a spool structure to sort tuples + */ +static GSpool * +gist_spoolinit(Relation heap, Relation index, SortSupport ssup) +{ + GSpool *gspool = (GSpool *) palloc0(sizeof(GSpool)); + + gspool->index = index; + + /* + * We size the sort area as maintenance_work_mem rather than work_mem to + * speed index creation. This should be OK since a single backend can't + * run multiple index creations in parallel. + */ + gspool->sortstate = tuplesort_begin_index_gist(heap, + index, + ssup, + maintenance_work_mem, + NULL, + false); + + return gspool; +} + +/* + * Flush page contents to actual page at blockno + * We have expected block number, because GiST build relies on that pages + * will be allocated in continous segments. This simplifies allocation + * logic. + */ +static void +gist_indexsortbuild_flush(Relation rel, Page page, BlockNumber blockno, bool isNew) +{ + OffsetNumber i, + maxoff; + Page newpage; + + Buffer buffer = ReadBuffer(rel, isNew ? P_NEW : blockno); + GISTInitBuffer(buffer, GistPageIsLeaf(page) ? F_LEAF : 0); + /* If the page is new - check that it was allocated correctly */ + Assert(BufferGetBlockNumber(buffer) == blockno); + + LockBuffer(buffer, GIST_EXCLUSIVE); + newpage = BufferGetPage(buffer); + + maxoff = PageGetMaxOffsetNumber(page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId itemid = PageGetItemId(page, i); + IndexTuple tuple = (IndexTuple) PageGetItem(page, itemid); + gistfillbuffer(newpage, &tuple, + 1, InvalidOffsetNumber); + } + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); +} + +/* + * given a spool loaded by successive calls to _h_spool, + * create an entire index. + */ +static void +gist_indexsortbuild(GISTBuildState *state) +{ + GSpool *gspool = state->spool; + /* + * Build constructs GiST by levels. Level is always allocated + * sequentially from level_start until level_end. + */ + BlockNumber level_start = GIST_ROOT_BLKNO + 1; + BlockNumber level_end = level_start; + BlockNumber prev_level_start; + IndexTuple itup; + /* + * We keep one page in memory for the special case + * When layer will have only on page - we will place it + * to ROOT_BLOCK_NO + */ + Page page = palloc(BLCKSZ); + gistinitpage(page, F_LEAF); + + tuplesort_performsort(gspool->sortstate); + + /* Create a first layer of leaf pages */ + while ((itup = tuplesort_getindextuple(gspool->sortstate, true)) != NULL) + { + if (PageGetFreeSpace(page) >= IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace) + { + gistfillbuffer(page, &itup, 1, InvalidOffsetNumber); + } + else + { + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, F_LEAF); + gistfillbuffer(page, &itup, 1, InvalidOffsetNumber); + } + } + + /* Construct internal levels */ + do + { + /* If previous level had only one page - that page is a root */ + if (level_start == level_end) + { + gist_indexsortbuild_flush(state->indexrel, page, GIST_ROOT_BLKNO, + false); + return; + } + + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, 0); + + prev_level_start = level_start; + level_start = level_end; + + for (BlockNumber i = prev_level_start; i < level_start; i++) + { + /* For each page on previous level we form one tuple */ + Buffer lower_buffer = ReadBuffer(state->indexrel, i); + Page lower_page; + IndexTuple union_tuple; + MemoryContext oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt); + int vect_len; + LockBuffer(lower_buffer, GIST_SHARE); + lower_page = BufferGetPage(lower_buffer); + + IndexTuple *itvec = gistextractpage(lower_page, &vect_len); + union_tuple = gistunion(state->indexrel, itvec, vect_len, + state->giststate); + ItemPointerSetBlockNumber(&(union_tuple->t_tid), i); + + if (PageGetFreeSpace(page) >= IndexTupleSize(union_tuple) + sizeof(ItemIdData) + state->freespace) + { + gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber); + } + else + { + gist_indexsortbuild_flush(state->indexrel, page, level_end, true); + level_end++; + gistinitpage(page, 0); + gistfillbuffer(page, &union_tuple, 1, InvalidOffsetNumber); + } + + UnlockReleaseBuffer(lower_buffer); + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(state->giststate->tempCxt); + } + } while (true); +} + +/* + * spool an index entry into the sort file. + */ +static void +gist_spool(GSpool *gspool, ItemPointer self, Datum *values, bool *isnull) +{ + tuplesort_putindextuplevalues(gspool->sortstate, gspool->index, + self, values, isnull); +} + /* * Main entry point to GiST index build. Initially calls insert over and over, * but switches to more efficient buffering build algorithm after a certain @@ -121,11 +301,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) Buffer buffer; Page page; MemoryContext oldcxt = CurrentMemoryContext; - int fillfactor; + int fillfactor, i; + Oid SortSupportFnOids[INDEX_MAX_KEYS]; + bool hasallsortsupports = true; + int keyscount = IndexRelationGetNumberOfKeyAttributes(index); buildstate.indexrel = index; buildstate.heaprel = heap; + buildstate.spool = NULL; if (index->rd_options) { /* Get buffering mode from the options string */ @@ -152,6 +336,24 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) /* Calculate target amount of free space to leave on pages */ buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100; + for (i = 0; i < keyscount; i++) + { + SortSupportFnOids[i] = index_getprocid(index, i + 1, GIST_SORTSUPPORT_PROC); + if (!OidIsValid(SortSupportFnOids[i])) + { + hasallsortsupports = false; + break; + } + } + if (hasallsortsupports) + { + SortSupport sort = palloc0(sizeof(SortSupportData) * keyscount); + for (i = 0; i < keyscount; i++) + OidFunctionCall1(SortSupportFnOids[i], PointerGetDatum(sort + i)); + buildstate.bufferingMode = GIST_BUFFERING_DISABLED; + buildstate.spool = gist_spoolinit(heap, index, sort); + } + /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. @@ -208,6 +410,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) gistFreeBuildBuffers(buildstate.gfbb); } + if (buildstate.spool) + { + gist_indexsortbuild(&buildstate); + tuplesort_end(buildstate.spool->sortstate); + } + /* okay, all heap tuples are indexed */ MemoryContextSwitchTo(oldcxt); MemoryContextDelete(buildstate.giststate->tempCxt); @@ -449,14 +657,20 @@ gistBuildCallback(Relation index, GISTBuildState *buildstate = (GISTBuildState *) state; IndexTuple itup; MemoryContext oldCtx; + Datum compressed_values[INDEX_MAX_KEYS]; oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); /* form an index tuple and point it at the heap tuple */ - itup = gistFormTuple(buildstate->giststate, index, values, isnull, true); + itup = gistCompressValuesAndFormTuple(buildstate->giststate, index, values, isnull, true, compressed_values); itup->t_tid = *tid; - if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) + if (buildstate->spool) + { + if (tupleIsAlive) + gist_spool(buildstate->spool, (ItemPointer)itup, compressed_values, isnull); + } + else if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) { /* We have buffers, so use them. */ gistBufferingBuildInsert(buildstate, itup); diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 7d50550525..056fa10080 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -31,6 +31,11 @@ static bool gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy); static bool rtree_internal_consistent(BOX *key, BOX *query, StrategyNumber strategy); +static int64 part_bits32_by2(uint32 x); +static int64 interleave_bits32(uint32 x, uint32 y); +static inline uint64 point_zorder_internal(Point *p); +static int gist_bbox_fastcmp(Datum x, Datum y, SortSupport ssup); + /* Minimum accepted ratio of split */ #define LIMIT_RATIO 0.3 @@ -1546,6 +1551,8 @@ gist_poly_distance(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(distance); } +/* Z-order routines */ +/* Interleave 32 bits with zeroes */ static int64 part_bits32_by2(uint32 x) { @@ -1560,12 +1567,14 @@ part_bits32_by2(uint32 x) return n; } +/* Compute Z-order for integers */ static int64 interleave_bits32(uint32 x, uint32 y) { return part_bits32_by2(x) | (part_bits32_by2(y) << 1); } +/* Compute Z-oder for point */ static inline uint64 point_zorder_internal(Point *p) { @@ -1573,16 +1582,20 @@ point_zorder_internal(Point *p) float f; uint32 i; } a,b; + if (isnan(a.f)) + a.i = INT32_MAX; + if (isnan(b.f)) + b.i = INT32_MAX; a.f = p->x; b.f = p->y; return interleave_bits32(a.i, b.i); } static int -gist_point_fastcmp(Datum x, Datum y, SortSupport ssup) +gist_bbox_fastcmp(Datum x, Datum y, SortSupport ssup) { - Point *p1 = DatumGetPointP(x); - Point *p2 = DatumGetPointP(y); + Point *p1 = &(DatumGetBoxP(x)->low); + Point *p2 = &(DatumGetBoxP(y)->low); uint64 z1 = point_zorder_internal(p1); uint64 z2 = point_zorder_internal(p2); @@ -1594,6 +1607,6 @@ gist_point_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - ssup->comparator = gist_point_fastcmp; + ssup->comparator = gist_bbox_fastcmp; PG_RETURN_VOID(); } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 553a6d67b1..b3a675df11 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -173,6 +173,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc, &IsNull); + if (IsNull) continue; @@ -575,6 +576,13 @@ gistFormTuple(GISTSTATE *giststate, Relation r, Datum attdata[], bool isnull[], bool isleaf) { Datum compatt[INDEX_MAX_KEYS]; + return gistCompressValuesAndFormTuple(giststate, r, attdata, isnull, isleaf, compatt); +} + +IndexTuple +gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r, + Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]) +{ int i; IndexTuple res; @@ -745,14 +753,10 @@ gistpenalty(GISTSTATE *giststate, int attno, * Initialize a new index page */ void -GISTInitBuffer(Buffer b, uint32 f) +gistinitpage(Page page, uint32 f) { - GISTPageOpaque opaque; - Page page; - Size pageSize; - - pageSize = BufferGetPageSize(b); - page = BufferGetPage(b); + GISTPageOpaque opaque; + Size pageSize = BLCKSZ; PageInit(page, pageSize, sizeof(GISTPageOpaqueData)); opaque = GistPageGetOpaque(page); @@ -763,6 +767,18 @@ GISTInitBuffer(Buffer b, uint32 f) opaque->gist_page_id = GIST_PAGE_ID; } +/* + * Initialize a new index buffer + */ +void +GISTInitBuffer(Buffer b, uint32 f) +{ + Page page; + + page = BufferGetPage(b); + gistinitpage(page, f); +} + /* * Verify that a freshly-read page looks sane. */ diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c index dfc1a87a75..bc65b9c6bf 100644 --- a/src/backend/access/gist/gistvalidate.c +++ b/src/backend/access/gist/gistvalidate.c @@ -140,6 +140,10 @@ gistvalidate(Oid opclassoid) 5, 5, INTERNALOID, opcintype, INT2OID, OIDOID, INTERNALOID); break; + case GIST_SORTSUPPORT_PROC: + ok = check_amproc_signature(procform->amproc, VOIDOID, true, + 1, 1, INTERNALOID); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -259,7 +263,8 @@ gistvalidate(Oid opclassoid) (opclassgroup->functionset & (((uint64) 1) << i)) != 0) continue; /* got it */ if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC || - i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC) + i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC || + i == GIST_SORTSUPPORT_PROC) continue; /* optional methods */ ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 7947d2bca0..d1231a80e5 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -1095,6 +1095,43 @@ tuplesort_begin_index_hash(Relation heapRel, return state; } +Tuplesortstate * +tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + SortSupport ssup, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->sortKeys = ssup; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 8292956cc0..282c24a19a 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -35,7 +35,8 @@ #define GIST_EQUAL_PROC 7 #define GIST_DISTANCE_PROC 8 #define GIST_FETCH_PROC 9 -#define GISTNProcs 9 +#define GIST_SORTSUPPORT_PROC 10 +#define GISTNProcs 10 /* * Page opaque data in a GiST index page. diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index a409975db1..d31278daf0 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -495,12 +495,15 @@ extern IndexTuple gistgetadjusted(Relation r, GISTSTATE *giststate); extern IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, Datum *attdata, bool *isnull, bool isleaf); +extern IndexTuple gistCompressValuesAndFormTuple(GISTSTATE *giststate, Relation r, + Datum attdata[], bool isnull[], bool isleaf, Datum compatt[]); extern OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate); extern void GISTInitBuffer(Buffer b, uint32 f); +extern void gistinitpage(Page page, uint32 f); extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, bool l, bool isNull); diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 5e705019b4..42eced0cd7 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -412,6 +412,8 @@ amproc => 'gist_point_distance' }, { amprocfamily => 'gist/point_ops', amproclefttype => 'point', amprocrighttype => 'point', amprocnum => '9', amproc => 'gist_point_fetch' }, +{ amprocfamily => 'gist/point_ops', amproclefttype => 'point', + amprocrighttype => 'point', amprocnum => '10', amproc => 'gist_point_sortsupport' }, { amprocfamily => 'gist/box_ops', amproclefttype => 'box', amprocrighttype => 'box', amprocnum => '1', amproc => 'gist_box_consistent' }, { amprocfamily => 'gist/box_ops', amproclefttype => 'box', diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index d774bc1152..d404fe4c46 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -25,6 +25,7 @@ #include "executor/tuptable.h" #include "storage/dsm.h" #include "utils/relcache.h" +#include "utils/sortsupport.h" /* @@ -208,6 +209,11 @@ extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel, uint32 max_buckets, int workMem, SortCoordinate coordinate, bool randomAccess); +extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + SortSupport ssup, + int workMem, SortCoordinate coordinate, + bool randomAccess); extern Tuplesortstate *tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, -- 2.20.1