Reduce amount of WAL generated by CREATE INDEX for gist, gin and sp-gist
Hi,
I want to propose a bunch of patches which allow to reduce WAL traffic
generated by CREATE INDEX for GiST, GIN and SP-GiST. Similarly to b-tree
and RUM, we can now log index pages of other access methods only once
in the end of indexbuild process. Implementation is based on generic_xlog.
Not only it decreases the amount of WAL generated, but also completely
eliminates WAL overhead in case of error during index build.
I also attached sql scripts which I used to measure xlog size.
They show that pg_wal_lsn_diff for patched version is from 3 to 5 times
smaller.
Not sure if regression tests are needed, since it is just an optimization.
But I do not mind to add them if someone feels that it is necessary.
--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Attachments:
0001_generate_xlog_for_rel_v0.patchtext/x-patch; name=0001_generate_xlog_for_rel_v0.patchDownload
commit 179285fb5175d715c20fc95eca3087b6a1899ed9
Author: Anastasia <a.lubennikova@postgrespro.ru>
Date: Wed Feb 28 17:45:54 2018 +0300
add function generate_xlog_for_rel()
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
index ce02354..dd2c041 100644
--- a/src/backend/access/transam/generic_xlog.c
+++ b/src/backend/access/transam/generic_xlog.c
@@ -545,3 +545,34 @@ generic_mask(char *page, BlockNumber blkno)
mask_unused_space(page);
}
+
+/*
+ * Function to write generic xlog for every existing block of a relation.
+ * Caller is responsible for locking the relation exclusively.
+ */
+void
+generate_xlog_for_rel(Relation rel)
+{
+ BlockNumber blkno;
+ BlockNumber nblocks;
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ elog(DEBUG2, "generate_xlog_for_rel '%s', nblocks %u BEGIN.",
+ RelationGetRelationName(rel), nblocks);
+
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ Buffer buffer;
+ GenericXLogState *state;
+
+ CHECK_FOR_INTERRUPTS();
+
+ buffer = ReadBuffer(rel, blkno);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ state = GenericXLogStart(rel);
+ GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
+ GenericXLogFinish(state);
+
+ UnlockReleaseBuffer(buffer);
+ }
+ elog(DEBUG2, "generate_xlog_for_rel '%s' END.", RelationGetRelationName(rel));
+}
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
index b23e1f6..33be157 100644
--- a/src/include/access/generic_xlog.h
+++ b/src/include/access/generic_xlog.h
@@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info);
extern void generic_desc(StringInfo buf, XLogReaderState *record);
extern void generic_mask(char *pagedata, BlockNumber blkno);
+/* other utils */
+void generate_xlog_for_rel(Relation rel);
+
#endif /* GENERIC_XLOG_H */
0002_gin_optimal_wal_v0.patchtext/x-patch; name=0002_gin_optimal_wal_v0.patchDownload
commit e176bd8f650a4b112fa2e61960a27cb57329138c
Author: Anastasia <a.lubennikova@postgrespro.ru>
Date: Wed Feb 28 17:53:15 2018 +0300
optimal WAL for gin
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 37070b3..c615d3c 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -388,7 +388,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* It will fit, perform the insertion */
START_CRIT_SECTION();
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -409,7 +409,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
MarkBufferDirty(childbuf);
}
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
ginxlogInsert xlrec;
@@ -566,7 +566,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/* write WAL record */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index f9daaba..349baa7 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -592,7 +592,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* Great, all the items fit on a single page. If needed, prepare data
* for a WAL record describing the changes we'll make.
*/
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
computeLeafRecompressWALData(leaf);
/*
@@ -629,6 +629,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* subsequent insertions will probably also go to the end. This packs
* the index somewhat tighter when appending to a table, which is very
* common.
+ *
*/
if (!btree->isBuild)
{
@@ -718,7 +719,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
dataPlaceToPageLeafRecompress(buf, leaf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
@@ -1151,7 +1152,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
@@ -1768,6 +1769,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
Pointer ptr;
int nrootitems;
int rootsize;
+ bool is_build = (buildStats != NULL);
/* Construct the new root page in memory first. */
tmppage = (Page) palloc(BLCKSZ);
@@ -1815,7 +1817,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageRestoreTempPage(tmppage, page);
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogCreatePostingTree data;
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 8107697..b0fdb23 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 23f7285..1855894 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -192,6 +193,7 @@ ginEntryInsert(GinState *ginstate,
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+ btree.isBuild = (buildStats != NULL);
stack = ginFindLeafPage(&btree, false, NULL);
page = BufferGetPage(stack->buffer);
@@ -342,23 +344,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- Page page;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
- page = BufferGetPage(RootBuffer);
- PageSetLSN(page, recptr);
-
- page = BufferGetPage(MetaBuffer);
- PageSetLSN(page, recptr);
- }
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
@@ -413,7 +398,10 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
- ginUpdateStats(index, &buildstate.buildStats);
+ ginUpdateStats(index, &buildstate.buildStats, true);
+
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generate_xlog_for_rel(index);
/*
* Return statistics
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 7bac7a1..a159a47 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -664,7 +664,7 @@ ginGetStats(Relation index, GinStatsData *stats)
* Note: nPendingPages and ginVersion are *not* copied over
*/
void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
{
Buffer metabuffer;
Page metapage;
@@ -694,7 +694,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 398532d..4cd46c4 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -758,7 +758,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
- ginUpdateStats(info->index, &idxStat);
+ ginUpdateStats(info->index, &idxStat, false);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 0acdb88..7a9e9e0 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -71,6 +71,7 @@ extern int gin_pending_list_limit;
/* ginutil.c */
extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index,
+ const GinStatsData *stats, bool is_build);
#endif /* GIN_H */
0003_gist_optimal_wal_v0.patchtext/x-patch; name=0003_gist_optimal_wal_v0.patchDownload
commit 5b42c832b92b08d6eff394c5608bf62791b704d6
Author: Anastasia <a.lubennikova@postgrespro.ru>
Date: Wed Feb 28 17:53:39 2018 +0300
optimial WAL for GIST
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 51c32e4..b32a323 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -169,7 +169,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
- gistdoinsert(r, itup, 0, giststate);
+ gistdoinsert(r, itup, 0, giststate, false);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
@@ -215,7 +215,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
BlockNumber *newblkno,
Buffer leftchildbuf,
List **splitinfo,
- bool markfollowright)
+ bool markfollowright,
+ bool is_build)
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
@@ -451,7 +452,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
- if (RelationNeedsWAL(rel))
+ if (RelationNeedsWAL(rel) && !is_build)
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION();
@@ -472,18 +473,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
- /* Write the WAL record */
- if (RelationNeedsWAL(rel))
+ /*
+ * Write the WAL record.
+ * Do not write XLog entry if the insertion is caused by
+ * index build process.
+ */
+ if (RelationNeedsWAL(rel) && !is_build)
recptr = gistXLogSplit(is_leaf,
- dist, oldrlink, oldnsn, leftchildbuf,
- markfollowright);
+ dist, oldrlink, oldnsn, leftchildbuf,
+ markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
- {
PageSetLSN(ptr->page, recptr);
- }
/*
* Return the new child buffers to the caller.
@@ -537,7 +540,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
- if (RelationNeedsWAL(rel))
+
+ if (RelationNeedsWAL(rel) && !is_build)
{
OffsetNumber ndeloffs = 0,
deloffs[1];
@@ -560,6 +564,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageSetLSN(page, recptr);
}
+
if (newblkno)
*newblkno = blkno;
}
@@ -576,17 +581,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
+ *
+ * We use fakeLSNs for inserions caused by index build. And when it is
+ * finished, we write generic_xlog entry for each index page and update
+ * all LSNs. In order to keep NSNs less then LSNs after this update, we
+ * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
*/
+
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
+ XLogRecPtr fakerecptr = InvalidXLogRecPtr;
- GistPageSetNSN(leftpg, recptr);
- GistClearFollowRight(leftpg);
+ if (!is_build)
+ GistPageSetNSN(leftpg, recptr);
+ else
+ GistPageSetNSN(leftpg, fakerecptr);
+ GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
+
END_CRIT_SECTION();
return is_split;
@@ -598,7 +614,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* so it does not bother releasing palloc'd allocations.
*/
void
-gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
+gistdoinsert(Relation r, IndexTuple itup, Size freespace,
+ GISTSTATE *giststate, bool is_build)
{
ItemId iid;
IndexTuple idxtuple;
@@ -610,6 +627,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
memset(&state, 0, sizeof(GISTInsertState));
state.freespace = freespace;
state.r = r;
+ state.is_build = is_build;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1220,7 +1238,7 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
oldoffnum, NULL,
leftchild,
&splitinfo,
- true);
+ true, state->is_build);
/*
* Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 434f15f..bfc10cd 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -20,6 +20,7 @@
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
@@ -178,18 +179,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
- PageSetLSN(page, recptr);
- }
- else
- PageSetLSN(page, gistGetFakeLSN(heap));
+ /*
+ * Do not write index pages to WAL unitl index build is finished.
+ * But we still need increasing LSNs on each page, so use FakeLSN,
+ * even for relations which eventually need WAL.
+ */
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
@@ -223,6 +218,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
freeGISTstate(buildstate.giststate);
/*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generate_xlog_for_rel(index);
+
+ /*
* Return statistics
*/
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
@@ -484,7 +488,7 @@ gistBuildCallback(Relation index,
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
- buildstate->giststate);
+ buildstate->giststate, true);
}
/* Update tuple count and total size. */
@@ -690,7 +694,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
itup, ntup, oldoffnum, &placed_to_blk,
InvalidBuffer,
&splitinfo,
- false);
+ false, true);
/*
* If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 55cccd2..cb86299 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -975,6 +975,7 @@ gistproperty(Oid index_oid, int attno,
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
* to detect concurrent page splits anyway. This function provides a fake
* sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
@@ -995,7 +996,6 @@ gistGetFakeLSN(Relation rel)
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
- Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 36ed724..0588fc7 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -241,6 +241,7 @@ typedef struct
{
Relation r;
Size freespace; /* free space to be left */
+ bool is_build;
GISTInsertStack *stack;
} GISTInsertState;
@@ -387,9 +388,9 @@ extern MemoryContext createTempGistContext(void);
extern GISTSTATE *initGISTstate(Relation index);
extern void freeGISTstate(GISTSTATE *giststate);
extern void gistdoinsert(Relation r,
- IndexTuple itup,
- Size freespace,
- GISTSTATE *GISTstate);
+ IndexTuple itup,
+ Size freespace,
+ GISTSTATE* giststate, bool is_build);
/* A List of these is returned from gistplacetopage() in *splitinfo */
typedef struct
@@ -404,7 +405,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
OffsetNumber oldoffnum, BlockNumber *newblkno,
Buffer leftchildbuf,
List **splitinfo,
- bool markleftchild);
+ bool markleftchild,
+ bool is_build);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
0004_spgist_optimal_wal_v0.patchtext/x-patch; name=0004_spgist_optimal_wal_v0.patchDownload
commit fe4c80efb0f40a6b113ac8274a7acb3c576918b2
Author: Anastasia <a.lubennikova@postgrespro.ru>
Date: Wed Feb 28 17:54:00 2018 +0300
optimal WAL for spgist
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 7bf26f8..2e08b00 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
MarkBufferDirty(nbuf);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
saveCurrent.buffer = InvalidBuffer;
}
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(saveCurrent.buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 34d9b48..0623bd8 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -21,6 +21,7 @@
#include "access/spgxlog.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -100,26 +101,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
MarkBufferDirty(nullbuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
-
- /*
- * Replay will re-initialize the pages, so don't take full pages
- * images. No other data to log.
- */
- XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
- PageSetLSN(BufferGetPage(metabuffer), recptr);
- PageSetLSN(BufferGetPage(rootbuffer), recptr);
- PageSetLSN(BufferGetPage(nullbuffer), recptr);
- }
END_CRIT_SECTION();
@@ -145,6 +126,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistUpdateMetaPage(index);
+ if (RelationNeedsWAL(index))
+ generate_xlog_for_rel(index);
+
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
result->heap_tuples = result->index_tuples = reltuples;
Hi Anastasia,
On 2/28/18 11:03 AM, Anastasia Lubennikova wrote:
I want to propose a bunch of patches which allow to reduce WAL traffic
generated by CREATE INDEX for GiST, GIN and SP-GiST. Similarly to b-tree
and RUM, we can now log index pages of other access methods only once
in the end of indexbuild process. Implementation is based on generic_xlog.Not only it decreases the amount of WAL generated, but also completely
eliminates WAL overhead in case of error during index build.
This seems to be a worthwhile patch, but it was submitted to the 2018-03
CF at the last moment with no prior discussion or review as far as I can
tell. It appears to be non-trivial and therefore not a good fit for the
last CF for PG11.
I have moved it to the 2018-09 CF.
Regards,
--
-David
david@pgmasters.net
On 28/02/18 18:03, Anastasia Lubennikova wrote:
Hi,
I want to propose a bunch of patches which allow to reduce WAL traffic
generated by CREATE INDEX for GiST, GIN and SP-GiST. Similarly to b-tree
and RUM, we can now log index pages of other access methods only once
in the end of indexbuild process.
Makes sense. Is there any scenario where the current method is better?
In theory, doing another pass through the whole index, to create the WAL
records, requires more I/O. But in practice, it seems that the reduction
in WAL is almost certainly a win.
Implementation is based on generic_xlog.
Why? I think we should just add a log_relation() function in
xloginsert.c directly, alongside log_newpage_buffer().
This makes the assumption that all the pages in these indexes used the
standard page layout. I think that's a valid assumption, but needs at
least a comment on that. And perhaps an Assert, to check that
pd_lower/upper look sane.
As a further optimization, would it be a win to WAL-log multiple pages
in each record?
This leaves the XLOG_*_CREATE_INDEX WAL record types unused, BTW.
- Heikki
On Tue, Jul 10, 2018 at 10:03 PM Heikki Linnakangas <hlinnaka@iki.fi> wrote:
On 28/02/18 18:03, Anastasia Lubennikova wrote:
I want to propose a bunch of patches which allow to reduce WAL traffic
generated by CREATE INDEX for GiST, GIN and SP-GiST. Similarly to b-tree
and RUM, we can now log index pages of other access methods only once
in the end of indexbuild process.Makes sense. Is there any scenario where the current method is better?
In theory, doing another pass through the whole index, to create the WAL
records, requires more I/O. But in practice, it seems that the reduction
in WAL is almost certainly a win.
It's frequently advised to move WAL to the separate storage device.
Then, it's not hard to imagine how current method can be faster, when
storage device containing index is much more loaded than storage
device containing WAL. But despite being faster it would still
consume way more overall I/O resources. I'm not sure if we should
provide some option to switch between WAL methods (or heuristics)...
------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
With the consent of Anastasia I will improving this patch further.
Attachment contains next version of the patch set.
11.07.2018 00:03, Heikki Linnakangas пишет:
On 28/02/18 18:03, Anastasia Lubennikova wrote:
Implementation is based on generic_xlog.
Why? I think we should just add a log_relation() function in
xloginsert.c directly, alongside log_newpage_buffer().
I have some arguments to stay this functionality at generic_xlog module:
1. xloginsert.c functions work on low level of abstraction, use buffers
and pages.
2. Code size using generic_xlog service functions looks more compact and
safe.
This makes the assumption that all the pages in these indexes used the
standard page layout. I think that's a valid assumption, but needs at
least a comment on that. And perhaps an Assert, to check that
pd_lower/upper look sane.
Done
As a further optimization, would it be a win to WAL-log multiple pages
in each record?
In this version of the patch we use simple optimization: pack
XLR_NORMAL_MAX_BLOCK_ID blocks pieces into each WAL-record.
This leaves the XLOG_*_CREATE_INDEX WAL record types unused, BTW.
Done
- Heikki
Benchmarks:
-----------
Test: pgbench -f gin-WAL-test.sql -t 5:
---------------------------------------
master:
Latency average: 27696.299 ms
WAL size: 2.66 GB
patched
Latency average: 22812.103 ms
WAL size: 1.23 GB
Test: pgbench -f gist-WAL-test.sql -t 5:
----------------------------------------
master:
Latency average: 19928.284 ms
WAL size: 1.25 GB
patched
Latency average: 18175.064 ms
WAL size: 0.63 GB
Test: pgbench -f spgist-WAL-test.sql -t 5:
------------------------------------------
master:
Latency average: 11529.384 ms
WAL size: 1.07 GB
patched
Latency average: 9330.828 ms
WAL size: 0.6 GB
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
Attachments:
0001-Relation-into-WAL-function.patchtext/x-patch; name=0001-Relation-into-WAL-function.patchDownload
From db400ce9532536da36812dbf0456e756a0ea4724 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:22:17 +0500
Subject: [PATCH 1/4] Relation-into-WAL-function
---
src/backend/access/transam/generic_xlog.c | 62 +++++++++++++++++++++++
src/include/access/generic_xlog.h | 3 ++
2 files changed, 65 insertions(+)
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
index ce023548ae..8397b58ee7 100644
--- a/src/backend/access/transam/generic_xlog.c
+++ b/src/backend/access/transam/generic_xlog.c
@@ -80,6 +80,7 @@ static void computeRegionDelta(PageData *pageData,
static void computeDelta(PageData *pageData, Page curpage, Page targetpage);
static void applyPageRedo(Page page, const char *delta, Size deltaSize);
+static void standard_page_layout_check(Buffer buf);
/*
* Write next fragment into pageData's delta.
@@ -545,3 +546,64 @@ generic_mask(char *page, BlockNumber blkno)
mask_unused_space(page);
}
+
+/*
+ * Check page layout.
+ * Caller must lock the buffer
+ */
+static void
+standard_page_layout_check(Buffer buf)
+{
+ PageHeader ph = (PageHeader) BufferGetPage(buf);
+
+ Assert((ph->pd_lower >= SizeOfPageHeaderData) &&
+ (ph->pd_lower <= ph->pd_upper) &&
+ (ph->pd_upper <= ph->pd_special) &&
+ (ph->pd_special <= BLCKSZ) &&
+ (ph->pd_special == MAXALIGN(ph->pd_special)));
+}
+
+/*
+ * Function to write generic xlog for every existing block of a relation.
+ * Caller is responsible for locking the relation exclusively.
+ */
+void
+generic_log_relation(Relation rel)
+{
+ BlockNumber blkno;
+ BlockNumber nblocks;
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ elog(DEBUG2, "generic_log_relation '%s', nblocks %u BEGIN.",
+ RelationGetRelationName(rel), nblocks);
+
+ for (blkno = 0; blkno < nblocks; )
+ {
+ GenericXLogState *state;
+ Buffer buffer[MAX_GENERIC_XLOG_PAGES];
+ int counter,
+ blocks_pack;
+
+ CHECK_FOR_INTERRUPTS();
+
+ blocks_pack = ((nblocks-blkno) < MAX_GENERIC_XLOG_PAGES) ?
+ (nblocks-blkno) : MAX_GENERIC_XLOG_PAGES;
+
+ state = GenericXLogStart(rel);
+
+ for (counter = 0 ; counter < blocks_pack; counter++)
+ {
+ buffer[counter] = ReadBuffer(rel, blkno++);
+ standard_page_layout_check(buffer[counter]);
+ LockBuffer(buffer[counter], BUFFER_LOCK_EXCLUSIVE);
+ GenericXLogRegisterBuffer(state, buffer[counter], GENERIC_XLOG_FULL_IMAGE);
+ }
+
+ GenericXLogFinish(state);
+
+ for (counter = 0 ; counter < blocks_pack; counter++)
+ UnlockReleaseBuffer(buffer[counter]);
+ }
+ elog(DEBUG2, "generic_log_relation '%s' END.", RelationGetRelationName(rel));
+}
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
index b23e1f684b..1f4b3b7030 100644
--- a/src/include/access/generic_xlog.h
+++ b/src/include/access/generic_xlog.h
@@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info);
extern void generic_desc(StringInfo buf, XLogReaderState *record);
extern void generic_mask(char *pagedata, BlockNumber blkno);
+/* other utils */
+extern void generic_log_relation(Relation rel);
+
#endif /* GENERIC_XLOG_H */
--
2.17.1
0002-GIN-Optimal-WAL-Usage.patchtext/x-patch; name=0002-GIN-Optimal-WAL-Usage.patchDownload
From 8db76f41386e9b205fd16f856a823c9a3fabe6fc Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:22:46 +0500
Subject: [PATCH 2/4] GIN-Optimal-WAL-Usage
---
src/backend/access/gin/ginbtree.c | 6 ++---
src/backend/access/gin/gindatapage.c | 10 ++++----
src/backend/access/gin/ginentrypage.c | 2 +-
src/backend/access/gin/gininsert.c | 30 ++++++++++--------------
src/backend/access/gin/ginutil.c | 4 ++--
src/backend/access/gin/ginvacuum.c | 2 +-
src/backend/access/gin/ginxlog.c | 33 ---------------------------
src/backend/access/rmgrdesc/gindesc.c | 6 -----
src/include/access/gin.h | 3 ++-
src/include/access/ginxlog.h | 2 --
10 files changed, 27 insertions(+), 71 deletions(-)
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 030d0f4418..b6d9f1bace 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -392,7 +392,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* It will fit, perform the insertion */
START_CRIT_SECTION();
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -413,7 +413,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
MarkBufferDirty(childbuf);
}
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
ginxlogInsert xlrec;
@@ -591,7 +591,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/* write WAL record */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 9f20513811..7dd4284e26 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* Great, all the items fit on a single page. If needed, prepare data
* for a WAL record describing the changes we'll make.
*/
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
computeLeafRecompressWALData(leaf);
/*
@@ -630,6 +630,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* subsequent insertions will probably also go to the end. This packs
* the index somewhat tighter when appending to a table, which is very
* common.
+ *
*/
if (!btree->isBuild)
{
@@ -719,7 +720,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
dataPlaceToPageLeafRecompress(buf, leaf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
@@ -1152,7 +1153,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1774,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
Pointer ptr;
int nrootitems;
int rootsize;
+ bool is_build = (buildStats != NULL);
/* Construct the new root page in memory first. */
tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1828,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageRestoreTempPage(tmppage, page);
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogCreatePostingTree data;
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 810769718f..b0fdb23e2b 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 5281eb6823..980ab2232c 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -194,6 +195,7 @@ ginEntryInsert(GinState *ginstate,
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+ btree.isBuild = (buildStats != NULL);
stack = ginFindLeafPage(&btree, false, NULL);
page = BufferGetPage(stack->buffer);
@@ -346,23 +348,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- Page page;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
- page = BufferGetPage(RootBuffer);
- PageSetLSN(page, recptr);
-
- page = BufferGetPage(MetaBuffer);
- PageSetLSN(page, recptr);
- }
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
@@ -417,7 +402,16 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
- ginUpdateStats(index, &buildstate.buildStats);
+ ginUpdateStats(index, &buildstate.buildStats, true);
+
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
/*
* Return statistics
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 0a32182dd7..37d3e89acb 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -666,7 +666,7 @@ ginGetStats(Relation index, GinStatsData *stats)
* Note: nPendingPages and ginVersion are *not* copied over
*/
void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
{
Buffer metabuffer;
Page metapage;
@@ -696,7 +696,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 3104bc12b6..1d8626fd5e 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -765,7 +765,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
- ginUpdateStats(info->index, &idxStat);
+ ginUpdateStats(info->index, &idxStat, false);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index 7a1e94a1d5..13a78a2a3b 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
UnlockReleaseBuffer(buffer);
}
-static void
-ginRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer RootBuffer,
- MetaBuffer;
- Page page;
-
- MetaBuffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(MetaBuffer);
-
- GinInitMetabuffer(MetaBuffer);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(MetaBuffer);
-
- RootBuffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
- page = (Page) BufferGetPage(RootBuffer);
-
- GinInitBuffer(RootBuffer, GIN_LEAF);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(RootBuffer);
-
- UnlockReleaseBuffer(RootBuffer);
- UnlockReleaseBuffer(MetaBuffer);
-}
-
static void
ginRedoCreatePTree(XLogReaderState *record)
{
@@ -724,9 +694,6 @@ gin_redo(XLogReaderState *record)
oldCtx = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- ginRedoCreateIndex(record);
- break;
case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(record);
break;
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index 3456187e3d..1bb1733666 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- /* no further information */
- break;
case XLOG_GIN_CREATE_PTREE:
/* no further information */
break;
@@ -188,9 +185,6 @@ gin_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_GIN_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIN_CREATE_PTREE:
id = "CREATE_PTREE";
break;
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 3d8a130b69..0fa33f5a19 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -71,6 +71,7 @@ extern int gin_pending_list_limit;
/* ginutil.c */
extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index,
+ const GinStatsData *stats, bool is_build);
#endif /* GIN_H */
diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h
index 64a3c9e18b..5c74973444 100644
--- a/src/include/access/ginxlog.h
+++ b/src/include/access/ginxlog.h
@@ -16,8 +16,6 @@
#include "lib/stringinfo.h"
#include "storage/off.h"
-#define XLOG_GIN_CREATE_INDEX 0x00
-
#define XLOG_GIN_CREATE_PTREE 0x10
typedef struct ginxlogCreatePostingTree
--
2.17.1
0003-GIST-Optimal-WAL-Usage.patchtext/x-patch; name=0003-GIST-Optimal-WAL-Usage.patchDownload
From 49a5fc72492f4917990bfeb7bd6c4fd06e52a7d5 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:23:11 +0500
Subject: [PATCH 3/4] GIST-Optimal-WAL-Usage
---
src/backend/access/gist/gist.c | 46 ++++++++++++++++++--------
src/backend/access/gist/gistbuild.c | 32 ++++++++++--------
src/backend/access/gist/gistutil.c | 2 +-
src/backend/access/gist/gistxlog.c | 22 ------------
src/backend/access/rmgrdesc/gistdesc.c | 5 ---
src/include/access/gist_private.h | 10 +++---
src/include/access/gistxlog.h | 1 -
7 files changed, 57 insertions(+), 61 deletions(-)
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 8a42effdf7..227998b1f8 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -172,7 +172,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
- gistdoinsert(r, itup, 0, giststate);
+ gistdoinsert(r, itup, 0, giststate, false);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
@@ -218,7 +218,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
BlockNumber *newblkno,
Buffer leftchildbuf,
List **splitinfo,
- bool markfollowright)
+ bool markfollowright,
+ bool is_build)
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
@@ -457,7 +458,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
- if (RelationNeedsWAL(rel))
+ if (RelationNeedsWAL(rel) && !is_build)
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION();
@@ -478,18 +479,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
- /* Write the WAL record */
- if (RelationNeedsWAL(rel))
+ /*
+ * Write the WAL record.
+ * Do not write XLog entry if the insertion is caused by
+ * index build process.
+ */
+ if (RelationNeedsWAL(rel) && !is_build)
recptr = gistXLogSplit(is_leaf,
- dist, oldrlink, oldnsn, leftchildbuf,
- markfollowright);
+ dist, oldrlink, oldnsn, leftchildbuf,
+ markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
- {
PageSetLSN(ptr->page, recptr);
- }
/*
* Return the new child buffers to the caller.
@@ -543,7 +546,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
- if (RelationNeedsWAL(rel))
+
+ if (RelationNeedsWAL(rel) && !is_build)
{
OffsetNumber ndeloffs = 0,
deloffs[1];
@@ -566,6 +570,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageSetLSN(page, recptr);
}
+
if (newblkno)
*newblkno = blkno;
}
@@ -582,17 +587,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
+ *
+ * We use fakeLSNs for inserions caused by index build. And when it is
+ * finished, we write generic_xlog entry for each index page and update
+ * all LSNs. In order to keep NSNs less then LSNs after this update, we
+ * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
*/
+
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
+ XLogRecPtr fakerecptr = InvalidXLogRecPtr;
- GistPageSetNSN(leftpg, recptr);
- GistClearFollowRight(leftpg);
+ if (!is_build)
+ GistPageSetNSN(leftpg, recptr);
+ else
+ GistPageSetNSN(leftpg, fakerecptr);
+ GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
+
END_CRIT_SECTION();
return is_split;
@@ -604,7 +620,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* so it does not bother releasing palloc'd allocations.
*/
void
-gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
+gistdoinsert(Relation r, IndexTuple itup, Size freespace,
+ GISTSTATE *giststate, bool is_build)
{
ItemId iid;
IndexTuple idxtuple;
@@ -616,6 +633,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
memset(&state, 0, sizeof(GISTInsertState));
state.freespace = freespace;
state.r = r;
+ state.is_build = is_build;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1232,7 +1250,7 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
oldoffnum, NULL,
leftchild,
&splitinfo,
- true);
+ true, state->is_build);
/*
* Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 434f15f014..b61dbf8ac3 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -20,6 +20,7 @@
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
@@ -178,18 +179,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
- PageSetLSN(page, recptr);
- }
- else
- PageSetLSN(page, gistGetFakeLSN(heap));
+ /*
+ * Do not write index pages to WAL unitl index build is finished.
+ * But we still need increasing LSNs on each page, so use FakeLSN,
+ * even for relations which eventually need WAL.
+ */
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
@@ -222,6 +217,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
freeGISTstate(buildstate.giststate);
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
/*
* Return statistics
*/
@@ -484,7 +488,7 @@ gistBuildCallback(Relation index,
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
- buildstate->giststate);
+ buildstate->giststate, true);
}
/* Update tuple count and total size. */
@@ -690,7 +694,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
itup, ntup, oldoffnum, &placed_to_blk,
InvalidBuffer,
&splitinfo,
- false);
+ false, true);
/*
* If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 12804c321c..0b3f8db679 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -974,6 +974,7 @@ gistproperty(Oid index_oid, int attno,
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
* to detect concurrent page splits anyway. This function provides a fake
* sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
@@ -994,7 +995,6 @@ gistGetFakeLSN(Relation rel)
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
- Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 1e09126978..9b0abccdaf 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -281,25 +281,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
UnlockReleaseBuffer(firstbuffer);
}
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = (Page) BufferGetPage(buffer);
-
- GISTInitBuffer(buffer, F_LEAF);
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
void
gist_redo(XLogReaderState *record)
{
@@ -321,9 +302,6 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
- case XLOG_GIST_CREATE_INDEX:
- gistRedoCreateIndex(record);
- break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
}
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index e5e925e0c5..1685a2fb8a 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -44,8 +44,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
- case XLOG_GIST_CREATE_INDEX:
- break;
}
}
@@ -62,9 +60,6 @@ gist_identify(uint8 info)
case XLOG_GIST_PAGE_SPLIT:
id = "PAGE_SPLIT";
break;
- case XLOG_GIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
}
return id;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 36ed7244ba..0588fc716a 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -241,6 +241,7 @@ typedef struct
{
Relation r;
Size freespace; /* free space to be left */
+ bool is_build;
GISTInsertStack *stack;
} GISTInsertState;
@@ -387,9 +388,9 @@ extern MemoryContext createTempGistContext(void);
extern GISTSTATE *initGISTstate(Relation index);
extern void freeGISTstate(GISTSTATE *giststate);
extern void gistdoinsert(Relation r,
- IndexTuple itup,
- Size freespace,
- GISTSTATE *GISTstate);
+ IndexTuple itup,
+ Size freespace,
+ GISTSTATE* giststate, bool is_build);
/* A List of these is returned from gistplacetopage() in *splitinfo */
typedef struct
@@ -404,7 +405,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
OffsetNumber oldoffnum, BlockNumber *newblkno,
Buffer leftchildbuf,
List **splitinfo,
- bool markleftchild);
+ bool markleftchild,
+ bool is_build);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 1a2b9496d0..5932fc395b 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -21,7 +21,6 @@
/* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */
#define XLOG_GIST_PAGE_SPLIT 0x30
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
-#define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
/*
--
2.17.1
0004-SPGIST-Optimal-WAL-Usage.patchtext/x-patch; name=0004-SPGIST-Optimal-WAL-Usage.patchDownload
From 41ec17abe5a60b5dedee28aa940c8556bc0e402f Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:23:36 +0500
Subject: [PATCH 4/4] SPGIST-Optimal-WAL-Usage
---
src/backend/access/rmgrdesc/spgdesc.c | 5 ----
src/backend/access/spgist/spgdoinsert.c | 12 ++++-----
src/backend/access/spgist/spginsert.c | 24 +++--------------
src/backend/access/spgist/spgxlog.c | 35 -------------------------
src/include/access/spgxlog.h | 1 -
5 files changed, 10 insertions(+), 67 deletions(-)
diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c
index 92b1392974..14beb5702f 100644
--- a/src/backend/access/rmgrdesc/spgdesc.c
+++ b/src/backend/access/rmgrdesc/spgdesc.c
@@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- break;
case XLOG_SPGIST_ADD_LEAF:
{
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
@@ -88,9 +86,6 @@ spg_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_SPGIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_SPGIST_ADD_LEAF:
id = "ADD_LEAF";
break;
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 098e09c574..e84ffdfb16 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
MarkBufferDirty(nbuf);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
saveCurrent.buffer = InvalidBuffer;
}
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(saveCurrent.buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 7dd0d61fbb..18ee9f8cbe 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -21,6 +21,7 @@
#include "access/spgxlog.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -104,26 +105,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
MarkBufferDirty(nullbuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
-
- /*
- * Replay will re-initialize the pages, so don't take full pages
- * images. No other data to log.
- */
- XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
- PageSetLSN(BufferGetPage(metabuffer), recptr);
- PageSetLSN(BufferGetPage(rootbuffer), recptr);
- PageSetLSN(BufferGetPage(nullbuffer), recptr);
- }
END_CRIT_SECTION();
@@ -150,6 +131,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistUpdateMetaPage(index);
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = buildstate.indtuples;
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 9e2bd3f811..2c42f1be42 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
size);
}
-static void
-spgRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(buffer);
- SpGistInitMetapage(page);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 2);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
static void
spgRedoAddLeaf(XLogReaderState *record)
{
@@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record)
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- spgRedoCreateIndex(record);
- break;
case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(record);
break;
diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h
index b72ccb5cc4..44ad891de3 100644
--- a/src/include/access/spgxlog.h
+++ b/src/include/access/spgxlog.h
@@ -18,7 +18,6 @@
#include "storage/off.h"
/* XLOG record types for SPGiST */
-#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
#define XLOG_SPGIST_MOVE_LEAFS 0x20
#define XLOG_SPGIST_ADD_NODE 0x30
--
2.17.1
On Tue, Jul 31, 2018 at 6:36 AM Andrey Lepikhov <a.lepikhov@postgrespro.ru> wrote:
With the consent of Anastasia I will improving this patch further.
Attachment contains next version of the patch set.
Thank you. I played a bit with this patch, and can confirm visible WAL size
reduction (it's rather obvious, but still). Although, probably due to lack of
my knowledge here, I wonder how does it work when an index is build
concurrently?
Benchmarks:
-----------Test: pgbench -f gin-WAL-test.sql -t 5:
---------------------------------------
master:
Latency average: 27696.299 ms
WAL size: 2.66 GB
Does it makes sense to measure latency of the entire script, since it contains
also some preparation work? Of course it still shows the difference between
patched version and master, but probably in a more noisy way.
I'm moving this patch to the next CF.
On 30.11.2018 15:10, Dmitry Dolgov wrote:
Thank you. I played a bit with this patch, and can confirm visible WAL size
reduction (it's rather obvious, but still). Although, probably due to lack of
my knowledge here, I wonder how does it work when an index is build
concurrently?
Routine generate_xlog_for_rel() works only at phase 2 of concurrent
index building.
At the phase 3, during validate_index() execution we use aminsert() ->
PlaceToPage() mechanism as before the patch.
In the concurrent mode, I do not expect any problems, caused by the patch.
Benchmarks:
-----------Test: pgbench -f gin-WAL-test.sql -t 5:
---------------------------------------
master:
Latency average: 27696.299 ms
WAL size: 2.66 GBDoes it makes sense to measure latency of the entire script, since it contains
also some preparation work? Of course it still shows the difference between
patched version and master, but probably in a more noisy way.
Ok. It is used only for demonstration.
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
On Tue, Dec 18, 2018 at 10:41:48AM +0500, Andrey Lepikhov wrote:
Ok. It is used only for demonstration.
The latest patch set needs a rebase, so moved to next CF, waiting on
author as this got no reviews.
--
Michael
On 04.02.2019 10:04, Michael Paquier wrote:
On Tue, Dec 18, 2018 at 10:41:48AM +0500, Andrey Lepikhov wrote:
Ok. It is used only for demonstration.
The latest patch set needs a rebase, so moved to next CF, waiting on
author as this got no reviews.
The new version in attachment.
--
Michael
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
Attachments:
0001-Relation-into-WAL-function.patchtext/x-patch; name=0001-Relation-into-WAL-function.patchDownload
From afc270f37cd082fb6e9f4ad694ea4cb123d98062 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 5 Feb 2019 08:11:47 +0500
Subject: [PATCH 1/4] Relation-into-WAL-function
---
src/backend/access/spgist/spgutils.c | 1 +
src/backend/access/transam/generic_xlog.c | 39 +++++++++++++++++++++++
src/include/access/generic_xlog.h | 3 ++
3 files changed, 43 insertions(+)
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 8e63c1fad2..b782bc2338 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -550,6 +550,7 @@ SpGistInitBuffer(Buffer b, uint16 f)
{
Assert(BufferGetPageSize(b) == BLCKSZ);
SpGistInitPage(BufferGetPage(b), f);
+ MarkBufferDirty(b);
}
/*
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
index 5b00b7275b..643da48345 100644
--- a/src/backend/access/transam/generic_xlog.c
+++ b/src/backend/access/transam/generic_xlog.c
@@ -542,3 +542,42 @@ generic_mask(char *page, BlockNumber blkno)
mask_unused_space(page);
}
+
+/*
+ * Function to write generic xlog for every existing block of a relation.
+ * Caller is responsible for locking the relation exclusively.
+ */
+void
+generic_log_relation(Relation rel)
+{
+ BlockNumber blkno;
+ BlockNumber nblocks = RelationGetNumberOfBlocks(rel);
+
+ for (blkno = 0; blkno < nblocks; )
+ {
+ GenericXLogState *state;
+ Buffer buffer[MAX_GENERIC_XLOG_PAGES];
+ int i,
+ blocks_pack;
+
+ CHECK_FOR_INTERRUPTS();
+
+ blocks_pack = ((nblocks-blkno) < MAX_GENERIC_XLOG_PAGES) ?
+ (nblocks-blkno) : MAX_GENERIC_XLOG_PAGES;
+
+ state = GenericXLogStart(rel);
+
+ for (i = 0 ; i < blocks_pack; i++)
+ {
+ buffer[i] = ReadBuffer(rel, blkno);
+ LockBuffer(buffer[i], BUFFER_LOCK_EXCLUSIVE);
+ GenericXLogRegisterBuffer(state, buffer[i], GENERIC_XLOG_FULL_IMAGE);
+ blkno++;
+ }
+
+ GenericXLogFinish(state);
+
+ for (i = 0 ; i < blocks_pack; i++)
+ UnlockReleaseBuffer(buffer[i]);
+ }
+}
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
index cb5b5b713a..e3bbf014cc 100644
--- a/src/include/access/generic_xlog.h
+++ b/src/include/access/generic_xlog.h
@@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info);
extern void generic_desc(StringInfo buf, XLogReaderState *record);
extern void generic_mask(char *pagedata, BlockNumber blkno);
+/* other utils */
+extern void generic_log_relation(Relation rel);
+
#endif /* GENERIC_XLOG_H */
--
2.17.1
0002-GIN-Optimal-WAL-Usage.patchtext/x-patch; name=0002-GIN-Optimal-WAL-Usage.patchDownload
From 25dc935da9d5502a06fb9dc1eea4912fa0f48be1 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 5 Feb 2019 08:13:29 +0500
Subject: [PATCH 2/4] GIN-Optimal-WAL-Usage
---
src/backend/access/gin/ginbtree.c | 6 ++---
src/backend/access/gin/gindatapage.c | 10 ++++----
src/backend/access/gin/ginentrypage.c | 2 +-
src/backend/access/gin/gininsert.c | 30 ++++++++++--------------
src/backend/access/gin/ginutil.c | 4 ++--
src/backend/access/gin/ginvacuum.c | 2 +-
src/backend/access/gin/ginxlog.c | 33 ---------------------------
src/backend/access/rmgrdesc/gindesc.c | 6 -----
src/include/access/gin.h | 3 ++-
src/include/access/ginxlog.h | 2 --
10 files changed, 27 insertions(+), 71 deletions(-)
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 533949e46a..9f82eef8c3 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -396,7 +396,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* It will fit, perform the insertion */
START_CRIT_SECTION();
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -417,7 +417,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
MarkBufferDirty(childbuf);
}
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
ginxlogInsert xlrec;
@@ -595,7 +595,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/* write WAL record */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 3ad8b76710..a73cf944b3 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* Great, all the items fit on a single page. If needed, prepare data
* for a WAL record describing the changes we'll make.
*/
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
computeLeafRecompressWALData(leaf);
/*
@@ -630,6 +630,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* subsequent insertions will probably also go to the end. This packs
* the index somewhat tighter when appending to a table, which is very
* common.
+ *
*/
if (!btree->isBuild)
{
@@ -719,7 +720,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
dataPlaceToPageLeafRecompress(buf, leaf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
@@ -1152,7 +1153,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1774,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
Pointer ptr;
int nrootitems;
int rootsize;
+ bool is_build = (buildStats != NULL);
/* Construct the new root page in memory first. */
tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1828,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageRestoreTempPage(tmppage, page);
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogCreatePostingTree data;
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 4889de2a4f..1f5ba33d51 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 524ac5be8b..6307743afd 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -194,6 +195,7 @@ ginEntryInsert(GinState *ginstate,
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+ btree.isBuild = (buildStats != NULL);
stack = ginFindLeafPage(&btree, false, false, NULL);
page = BufferGetPage(stack->buffer);
@@ -346,23 +348,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- Page page;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
- page = BufferGetPage(RootBuffer);
- PageSetLSN(page, recptr);
-
- page = BufferGetPage(MetaBuffer);
- PageSetLSN(page, recptr);
- }
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
@@ -417,7 +402,16 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
- ginUpdateStats(index, &buildstate.buildStats);
+ ginUpdateStats(index, &buildstate.buildStats, true);
+
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
/*
* Return statistics
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index afc20232ac..51b20bca6e 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -661,7 +661,7 @@ ginGetStats(Relation index, GinStatsData *stats)
* Note: nPendingPages and ginVersion are *not* copied over
*/
void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
{
Buffer metabuffer;
Page metapage;
@@ -691,7 +691,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index dfe885b101..b9a28d1863 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -759,7 +759,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
- ginUpdateStats(info->index, &idxStat);
+ ginUpdateStats(info->index, &idxStat, false);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index c467ffa346..b648af1ff6 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
UnlockReleaseBuffer(buffer);
}
-static void
-ginRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer RootBuffer,
- MetaBuffer;
- Page page;
-
- MetaBuffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(MetaBuffer);
-
- GinInitMetabuffer(MetaBuffer);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(MetaBuffer);
-
- RootBuffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
- page = (Page) BufferGetPage(RootBuffer);
-
- GinInitBuffer(RootBuffer, GIN_LEAF);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(RootBuffer);
-
- UnlockReleaseBuffer(RootBuffer);
- UnlockReleaseBuffer(MetaBuffer);
-}
-
static void
ginRedoCreatePTree(XLogReaderState *record)
{
@@ -767,9 +737,6 @@ gin_redo(XLogReaderState *record)
oldCtx = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- ginRedoCreateIndex(record);
- break;
case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(record);
break;
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index ef30ce16b0..f3f4e1b214 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- /* no further information */
- break;
case XLOG_GIN_CREATE_PTREE:
/* no further information */
break;
@@ -188,9 +185,6 @@ gin_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_GIN_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIN_CREATE_PTREE:
id = "CREATE_PTREE";
break;
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 61fa697039..d559ffc703 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -71,6 +71,7 @@ extern int gin_pending_list_limit;
/* ginutil.c */
extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index,
+ const GinStatsData *stats, bool is_build);
#endif /* GIN_H */
diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h
index 9bd4e0b9ba..2c5d743cac 100644
--- a/src/include/access/ginxlog.h
+++ b/src/include/access/ginxlog.h
@@ -16,8 +16,6 @@
#include "lib/stringinfo.h"
#include "storage/off.h"
-#define XLOG_GIN_CREATE_INDEX 0x00
-
#define XLOG_GIN_CREATE_PTREE 0x10
typedef struct ginxlogCreatePostingTree
--
2.17.1
0003-GIST-Optimal-WAL-Usage.patchtext/x-patch; name=0003-GIST-Optimal-WAL-Usage.patchDownload
From f032a0645d86c57e3b89a294dfb46cfad1119825 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 5 Feb 2019 08:14:41 +0500
Subject: [PATCH 3/4] GIST-Optimal-WAL-Usage
---
src/backend/access/gist/gist.c | 47 ++++++++++++++++++--------
src/backend/access/gist/gistbuild.c | 32 ++++++++++--------
src/backend/access/gist/gistutil.c | 2 +-
src/backend/access/gist/gistxlog.c | 22 ------------
src/backend/access/rmgrdesc/gistdesc.c | 5 ---
src/include/access/gist_private.h | 7 ++--
src/include/access/gistxlog.h | 1 -
7 files changed, 56 insertions(+), 60 deletions(-)
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index b75b3a8dac..710c1bfd3a 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -173,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
- gistdoinsert(r, itup, 0, giststate, heapRel);
+ gistdoinsert(r, itup, 0, giststate, heapRel, false);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
@@ -220,7 +220,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markfollowright,
- Relation heapRel)
+ Relation heapRel,
+ bool is_build)
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
@@ -459,7 +460,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
- if (RelationNeedsWAL(rel))
+ if (RelationNeedsWAL(rel) && !is_build)
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION();
@@ -480,18 +481,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
- /* Write the WAL record */
- if (RelationNeedsWAL(rel))
+ /*
+ * Write the WAL record.
+ * Do not write XLog entry if the insertion is caused by
+ * index build process.
+ */
+ if (RelationNeedsWAL(rel) && !is_build)
recptr = gistXLogSplit(is_leaf,
- dist, oldrlink, oldnsn, leftchildbuf,
- markfollowright);
+ dist, oldrlink, oldnsn, leftchildbuf,
+ markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
- {
PageSetLSN(ptr->page, recptr);
- }
/*
* Return the new child buffers to the caller.
@@ -545,7 +548,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
- if (RelationNeedsWAL(rel))
+
+ if (RelationNeedsWAL(rel) && !is_build)
{
OffsetNumber ndeloffs = 0,
deloffs[1];
@@ -568,6 +572,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageSetLSN(page, recptr);
}
+
if (newblkno)
*newblkno = blkno;
}
@@ -584,17 +589,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
+ *
+ * We use fakeLSNs for inserions caused by index build. And when it is
+ * finished, we write generic_xlog entry for each index page and update
+ * all LSNs. In order to keep NSNs less then LSNs after this update, we
+ * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
*/
+
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
+ XLogRecPtr fakerecptr = InvalidXLogRecPtr;
- GistPageSetNSN(leftpg, recptr);
- GistClearFollowRight(leftpg);
+ if (!is_build)
+ GistPageSetNSN(leftpg, recptr);
+ else
+ GistPageSetNSN(leftpg, fakerecptr);
+ GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
+
END_CRIT_SECTION();
return is_split;
@@ -605,9 +621,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* this routine assumes it is invoked in a short-lived memory context,
* so it does not bother releasing palloc'd allocations.
*/
-void
gistdoinsert(Relation r, IndexTuple itup, Size freespace,
- GISTSTATE *giststate, Relation heapRel)
+ GISTSTATE *giststate, Relation heapRel, bool is_build)
{
ItemId iid;
IndexTuple idxtuple;
@@ -620,6 +635,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
state.freespace = freespace;
state.r = r;
state.heapRel = heapRel;
+ state.is_build = is_build;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1237,7 +1253,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
leftchild,
&splitinfo,
true,
- state->heapRel);
+ state->heapRel,
+ state->is_build);
/*
* Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index bd142a3560..2e98e55187 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -17,6 +17,7 @@
#include <math.h>
#include "access/genam.h"
+#include "access/generic_xlog.h"
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/xloginsert.h"
@@ -180,18 +181,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
- PageSetLSN(page, recptr);
- }
- else
- PageSetLSN(page, gistGetFakeLSN(heap));
+ /*
+ * Do not write index pages to WAL unitl index build is finished.
+ * But we still need increasing LSNs on each page, so use FakeLSN,
+ * even for relations which eventually need WAL.
+ */
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
@@ -224,6 +219,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
freeGISTstate(buildstate.giststate);
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
/*
* Return statistics
*/
@@ -486,7 +490,7 @@ gistBuildCallback(Relation index,
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
- buildstate->giststate, buildstate->heaprel);
+ buildstate->giststate, buildstate->heaprel, true);
}
/* Update tuple count and total size. */
@@ -693,7 +697,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
InvalidBuffer,
&splitinfo,
false,
- buildstate->heaprel);
+ buildstate->heaprel, true);
/*
* If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 8d3dfad27b..f7088e2d94 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -949,6 +949,7 @@ gistproperty(Oid index_oid, int attno,
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
* to detect concurrent page splits anyway. This function provides a fake
* sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
@@ -969,7 +970,6 @@ gistGetFakeLSN(Relation rel)
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
- Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 408bd5390a..66d5bb831f 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -489,25 +489,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
UnlockReleaseBuffer(firstbuffer);
}
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = (Page) BufferGetPage(buffer);
-
- GISTInitBuffer(buffer, F_LEAF);
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
void
gist_redo(XLogReaderState *record)
{
@@ -532,9 +513,6 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
- case XLOG_GIST_CREATE_INDEX:
- gistRedoCreateIndex(record);
- break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
}
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index e468c9e15a..75f1ed747e 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -52,8 +52,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
- case XLOG_GIST_CREATE_INDEX:
- break;
}
}
@@ -73,9 +71,6 @@ gist_identify(uint8 info)
case XLOG_GIST_PAGE_SPLIT:
id = "PAGE_SPLIT";
break;
- case XLOG_GIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
}
return id;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 3698942f9d..b1680bfd6d 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -242,6 +242,7 @@ typedef struct
Relation r;
Relation heapRel;
Size freespace; /* free space to be left */
+ bool is_build;
GISTInsertStack *stack;
} GISTInsertState;
@@ -391,7 +392,8 @@ extern void gistdoinsert(Relation r,
IndexTuple itup,
Size freespace,
GISTSTATE *GISTstate,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
/* A List of these is returned from gistplacetopage() in *splitinfo */
typedef struct
@@ -407,7 +409,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markleftchild,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 5117aabf1a..2ee80e244e 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -22,7 +22,6 @@
/* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */
#define XLOG_GIST_PAGE_SPLIT 0x30
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
-#define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
/*
--
2.17.1
0004-SPGIST-Optimal-WAL-Usage.patchtext/x-patch; name=0004-SPGIST-Optimal-WAL-Usage.patchDownload
From 3dbe3d827173244e9bda928147f35e0929266215 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 5 Feb 2019 08:15:06 +0500
Subject: [PATCH 4/4] SPGIST-Optimal-WAL-Usage
---
src/backend/access/rmgrdesc/spgdesc.c | 5 ----
src/backend/access/spgist/spgdoinsert.c | 12 ++++-----
src/backend/access/spgist/spginsert.c | 24 +++--------------
src/backend/access/spgist/spgxlog.c | 35 -------------------------
src/include/access/spgxlog.h | 1 -
5 files changed, 10 insertions(+), 67 deletions(-)
diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c
index 37af31a764..40c1c8b3f9 100644
--- a/src/backend/access/rmgrdesc/spgdesc.c
+++ b/src/backend/access/rmgrdesc/spgdesc.c
@@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- break;
case XLOG_SPGIST_ADD_LEAF:
{
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
@@ -88,9 +86,6 @@ spg_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_SPGIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_SPGIST_ADD_LEAF:
id = "ADD_LEAF";
break;
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 0d07b8b291..c34c44cd8b 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
MarkBufferDirty(nbuf);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
saveCurrent.buffer = InvalidBuffer;
}
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(saveCurrent.buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index f428a15138..5da0183a9b 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -21,6 +21,7 @@
#include "access/spgxlog.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -104,26 +105,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
MarkBufferDirty(nullbuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
-
- /*
- * Replay will re-initialize the pages, so don't take full pages
- * images. No other data to log.
- */
- XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
- PageSetLSN(BufferGetPage(metabuffer), recptr);
- PageSetLSN(BufferGetPage(rootbuffer), recptr);
- PageSetLSN(BufferGetPage(nullbuffer), recptr);
- }
END_CRIT_SECTION();
@@ -150,6 +131,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistUpdateMetaPage(index);
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = buildstate.indtuples;
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 71836ee8a5..ebe6ae8715 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
size);
}
-static void
-spgRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(buffer);
- SpGistInitMetapage(page);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 2);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
static void
spgRedoAddLeaf(XLogReaderState *record)
{
@@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record)
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- spgRedoCreateIndex(record);
- break;
case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(record);
break;
diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h
index 6527fc9eb1..8199b3f250 100644
--- a/src/include/access/spgxlog.h
+++ b/src/include/access/spgxlog.h
@@ -18,7 +18,6 @@
#include "storage/off.h"
/* XLOG record types for SPGiST */
-#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
#define XLOG_SPGIST_MOVE_LEAFS 0x20
#define XLOG_SPGIST_ADD_NODE 0x30
--
2.17.1
The patchset had a problem with all-zero pages, has appeared at index
build stage: the generic_log_relation() routine sends all pages into the
WAL. So lsn field at all-zero page was initialized and the
PageIsVerified() routine detects it as a bad page.
The solution may be:
1. To improve index build algorithms and eliminate the possibility of
not used pages appearing.
2. To mark each page as 'dirty' right after initialization. In this case
we will got 'empty' page instead of the all-zeroed.
3. Do not write into the WAL all-zero pages.
In the patchset (see attachment) I used approach No.3.
On 04.02.2019 10:04, Michael Paquier wrote:
On Tue, Dec 18, 2018 at 10:41:48AM +0500, Andrey Lepikhov wrote:
Ok. It is used only for demonstration.
The latest patch set needs a rebase, so moved to next CF, waiting on
author as this got no reviews.
--
Michael
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
Attachments:
0001-Relation-into-WAL-function.patchtext/x-patch; name=0001-Relation-into-WAL-function.patchDownload
From ec20e8896181cf8b26755acaa6028a62a0c709e7 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 6 Feb 2019 14:39:59 +0500
Subject: [PATCH 1/4] Relation-into-WAL-function
---
src/backend/access/transam/generic_xlog.c | 48 +++++++++++++++++++++++
src/include/access/generic_xlog.h | 3 ++
2 files changed, 51 insertions(+)
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
index 5b00b7275b..c22e361747 100644
--- a/src/backend/access/transam/generic_xlog.c
+++ b/src/backend/access/transam/generic_xlog.c
@@ -542,3 +542,51 @@ generic_mask(char *page, BlockNumber blkno)
mask_unused_space(page);
}
+
+/*
+ * Function to write generic xlog for every existing block of a relation.
+ * Caller is responsible for locking the relation exclusively.
+ */
+void
+generic_log_relation(Relation rel)
+{
+ BlockNumber blkno;
+ BlockNumber nblocks;
+ int npbuf = 0;
+ GenericXLogState *state = NULL;
+ Buffer bufpack[MAX_GENERIC_XLOG_PAGES];
+
+ CHECK_FOR_INTERRUPTS();
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ /*
+ * Iterate over all index pages and WAL-logging it. Pages are grouping into
+ * the packages before adding to a WAL-record. Zero-pages are
+ * not logged.
+ */
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ Buffer buf;
+
+ buf = ReadBuffer(rel, blkno);
+ if (!PageIsNew(BufferGetPage(buf)))
+ {
+ if (npbuf == 0)
+ state = GenericXLogStart(rel);
+
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE);
+ bufpack[npbuf++] = buf;
+ }
+ else
+ ReleaseBuffer(buf);
+
+ if ((npbuf == MAX_GENERIC_XLOG_PAGES) || (blkno == nblocks-1))
+ {
+ GenericXLogFinish(state);
+
+ for (; npbuf > 0; npbuf--)
+ UnlockReleaseBuffer(bufpack[npbuf-1]);
+ }
+ }
+}
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
index cb5b5b713a..e3bbf014cc 100644
--- a/src/include/access/generic_xlog.h
+++ b/src/include/access/generic_xlog.h
@@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info);
extern void generic_desc(StringInfo buf, XLogReaderState *record);
extern void generic_mask(char *pagedata, BlockNumber blkno);
+/* other utils */
+extern void generic_log_relation(Relation rel);
+
#endif /* GENERIC_XLOG_H */
--
2.17.1
0002-GIN-Optimal-WAL-Usage.patchtext/x-patch; name=0002-GIN-Optimal-WAL-Usage.patchDownload
From 6fa828f6737d9c2dbcd2c2ce61a150e79e2bc1d2 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 6 Feb 2019 14:40:29 +0500
Subject: [PATCH 2/4] GIN-Optimal-WAL-Usage
---
src/backend/access/gin/ginbtree.c | 6 ++---
src/backend/access/gin/gindatapage.c | 9 ++++----
src/backend/access/gin/ginentrypage.c | 2 +-
src/backend/access/gin/gininsert.c | 30 ++++++++++--------------
src/backend/access/gin/ginutil.c | 4 ++--
src/backend/access/gin/ginvacuum.c | 2 +-
src/backend/access/gin/ginxlog.c | 33 ---------------------------
src/backend/access/rmgrdesc/gindesc.c | 6 -----
src/include/access/gin.h | 3 ++-
src/include/access/ginxlog.h | 2 --
10 files changed, 26 insertions(+), 71 deletions(-)
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 533949e46a..9f82eef8c3 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -396,7 +396,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* It will fit, perform the insertion */
START_CRIT_SECTION();
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -417,7 +417,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
MarkBufferDirty(childbuf);
}
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
ginxlogInsert xlrec;
@@ -595,7 +595,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/* write WAL record */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 3ad8b76710..f3aff62c8e 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* Great, all the items fit on a single page. If needed, prepare data
* for a WAL record describing the changes we'll make.
*/
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
computeLeafRecompressWALData(leaf);
/*
@@ -719,7 +719,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
dataPlaceToPageLeafRecompress(buf, leaf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
@@ -1152,7 +1152,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1773,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
Pointer ptr;
int nrootitems;
int rootsize;
+ bool is_build = (buildStats != NULL);
/* Construct the new root page in memory first. */
tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1827,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageRestoreTempPage(tmppage, page);
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogCreatePostingTree data;
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 4889de2a4f..1f5ba33d51 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 524ac5be8b..6307743afd 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -194,6 +195,7 @@ ginEntryInsert(GinState *ginstate,
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+ btree.isBuild = (buildStats != NULL);
stack = ginFindLeafPage(&btree, false, false, NULL);
page = BufferGetPage(stack->buffer);
@@ -346,23 +348,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- Page page;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
- page = BufferGetPage(RootBuffer);
- PageSetLSN(page, recptr);
-
- page = BufferGetPage(MetaBuffer);
- PageSetLSN(page, recptr);
- }
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
@@ -417,7 +402,16 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
- ginUpdateStats(index, &buildstate.buildStats);
+ ginUpdateStats(index, &buildstate.buildStats, true);
+
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
/*
* Return statistics
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index afc20232ac..51b20bca6e 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -661,7 +661,7 @@ ginGetStats(Relation index, GinStatsData *stats)
* Note: nPendingPages and ginVersion are *not* copied over
*/
void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
{
Buffer metabuffer;
Page metapage;
@@ -691,7 +691,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index dfe885b101..b9a28d1863 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -759,7 +759,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
- ginUpdateStats(info->index, &idxStat);
+ ginUpdateStats(info->index, &idxStat, false);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index c467ffa346..b648af1ff6 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
UnlockReleaseBuffer(buffer);
}
-static void
-ginRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer RootBuffer,
- MetaBuffer;
- Page page;
-
- MetaBuffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(MetaBuffer);
-
- GinInitMetabuffer(MetaBuffer);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(MetaBuffer);
-
- RootBuffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
- page = (Page) BufferGetPage(RootBuffer);
-
- GinInitBuffer(RootBuffer, GIN_LEAF);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(RootBuffer);
-
- UnlockReleaseBuffer(RootBuffer);
- UnlockReleaseBuffer(MetaBuffer);
-}
-
static void
ginRedoCreatePTree(XLogReaderState *record)
{
@@ -767,9 +737,6 @@ gin_redo(XLogReaderState *record)
oldCtx = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- ginRedoCreateIndex(record);
- break;
case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(record);
break;
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index ef30ce16b0..f3f4e1b214 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- /* no further information */
- break;
case XLOG_GIN_CREATE_PTREE:
/* no further information */
break;
@@ -188,9 +185,6 @@ gin_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_GIN_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIN_CREATE_PTREE:
id = "CREATE_PTREE";
break;
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 61fa697039..d559ffc703 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -71,6 +71,7 @@ extern int gin_pending_list_limit;
/* ginutil.c */
extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index,
+ const GinStatsData *stats, bool is_build);
#endif /* GIN_H */
diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h
index 9bd4e0b9ba..2c5d743cac 100644
--- a/src/include/access/ginxlog.h
+++ b/src/include/access/ginxlog.h
@@ -16,8 +16,6 @@
#include "lib/stringinfo.h"
#include "storage/off.h"
-#define XLOG_GIN_CREATE_INDEX 0x00
-
#define XLOG_GIN_CREATE_PTREE 0x10
typedef struct ginxlogCreatePostingTree
--
2.17.1
0003-GiST-Optimal-WAL-Usage.patchtext/x-patch; name=0003-GiST-Optimal-WAL-Usage.patchDownload
From 679aa38c664eecba4a0d049feffe2537be3ebe21 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 6 Feb 2019 14:41:08 +0500
Subject: [PATCH 3/4] GiST-Optimal-WAL-Usage
---
src/backend/access/gist/gist.c | 46 ++++++++++++++++++--------
src/backend/access/gist/gistbuild.c | 32 ++++++++++--------
src/backend/access/gist/gistutil.c | 2 +-
src/backend/access/gist/gistxlog.c | 22 ------------
src/backend/access/rmgrdesc/gistdesc.c | 5 ---
src/include/access/gist_private.h | 7 ++--
src/include/access/gistxlog.h | 1 -
7 files changed, 56 insertions(+), 59 deletions(-)
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index b75b3a8dac..c8f4e41db9 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -173,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
- gistdoinsert(r, itup, 0, giststate, heapRel);
+ gistdoinsert(r, itup, 0, giststate, heapRel, false);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
@@ -220,7 +220,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markfollowright,
- Relation heapRel)
+ Relation heapRel,
+ bool is_build)
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
@@ -459,7 +460,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
- if (RelationNeedsWAL(rel))
+ if (RelationNeedsWAL(rel) && !is_build)
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION();
@@ -480,18 +481,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
- /* Write the WAL record */
- if (RelationNeedsWAL(rel))
+ /*
+ * Write the WAL record.
+ * Do not write XLog entry if the insertion is caused by
+ * index build process.
+ */
+ if (RelationNeedsWAL(rel) && !is_build)
recptr = gistXLogSplit(is_leaf,
- dist, oldrlink, oldnsn, leftchildbuf,
- markfollowright);
+ dist, oldrlink, oldnsn, leftchildbuf,
+ markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
- {
PageSetLSN(ptr->page, recptr);
- }
/*
* Return the new child buffers to the caller.
@@ -545,7 +548,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
- if (RelationNeedsWAL(rel))
+
+ if (RelationNeedsWAL(rel) && !is_build)
{
OffsetNumber ndeloffs = 0,
deloffs[1];
@@ -568,6 +572,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageSetLSN(page, recptr);
}
+
if (newblkno)
*newblkno = blkno;
}
@@ -584,17 +589,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
+ *
+ * We use fakeLSNs for inserions caused by index build. And when it is
+ * finished, we write generic_xlog entry for each index page and update
+ * all LSNs. In order to keep NSNs less then LSNs after this update, we
+ * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
*/
+
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
+ XLogRecPtr fakerecptr = InvalidXLogRecPtr;
- GistPageSetNSN(leftpg, recptr);
- GistClearFollowRight(leftpg);
+ if (!is_build)
+ GistPageSetNSN(leftpg, recptr);
+ else
+ GistPageSetNSN(leftpg, fakerecptr);
+ GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
+
END_CRIT_SECTION();
return is_split;
@@ -607,7 +623,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
*/
void
gistdoinsert(Relation r, IndexTuple itup, Size freespace,
- GISTSTATE *giststate, Relation heapRel)
+ GISTSTATE *giststate, Relation heapRel, bool is_build)
{
ItemId iid;
IndexTuple idxtuple;
@@ -620,6 +636,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
state.freespace = freespace;
state.r = r;
state.heapRel = heapRel;
+ state.is_build = is_build;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1237,7 +1254,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
leftchild,
&splitinfo,
true,
- state->heapRel);
+ state->heapRel,
+ state->is_build);
/*
* Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index bd142a3560..2e98e55187 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -17,6 +17,7 @@
#include <math.h>
#include "access/genam.h"
+#include "access/generic_xlog.h"
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/xloginsert.h"
@@ -180,18 +181,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
- PageSetLSN(page, recptr);
- }
- else
- PageSetLSN(page, gistGetFakeLSN(heap));
+ /*
+ * Do not write index pages to WAL unitl index build is finished.
+ * But we still need increasing LSNs on each page, so use FakeLSN,
+ * even for relations which eventually need WAL.
+ */
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
@@ -224,6 +219,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
freeGISTstate(buildstate.giststate);
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
/*
* Return statistics
*/
@@ -486,7 +490,7 @@ gistBuildCallback(Relation index,
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
- buildstate->giststate, buildstate->heaprel);
+ buildstate->giststate, buildstate->heaprel, true);
}
/* Update tuple count and total size. */
@@ -693,7 +697,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
InvalidBuffer,
&splitinfo,
false,
- buildstate->heaprel);
+ buildstate->heaprel, true);
/*
* If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 8d3dfad27b..f7088e2d94 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -949,6 +949,7 @@ gistproperty(Oid index_oid, int attno,
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
* to detect concurrent page splits anyway. This function provides a fake
* sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
@@ -969,7 +970,6 @@ gistGetFakeLSN(Relation rel)
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
- Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 408bd5390a..66d5bb831f 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -489,25 +489,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
UnlockReleaseBuffer(firstbuffer);
}
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = (Page) BufferGetPage(buffer);
-
- GISTInitBuffer(buffer, F_LEAF);
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
void
gist_redo(XLogReaderState *record)
{
@@ -532,9 +513,6 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
- case XLOG_GIST_CREATE_INDEX:
- gistRedoCreateIndex(record);
- break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
}
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index e468c9e15a..75f1ed747e 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -52,8 +52,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
- case XLOG_GIST_CREATE_INDEX:
- break;
}
}
@@ -73,9 +71,6 @@ gist_identify(uint8 info)
case XLOG_GIST_PAGE_SPLIT:
id = "PAGE_SPLIT";
break;
- case XLOG_GIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
}
return id;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 3698942f9d..b1680bfd6d 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -242,6 +242,7 @@ typedef struct
Relation r;
Relation heapRel;
Size freespace; /* free space to be left */
+ bool is_build;
GISTInsertStack *stack;
} GISTInsertState;
@@ -391,7 +392,8 @@ extern void gistdoinsert(Relation r,
IndexTuple itup,
Size freespace,
GISTSTATE *GISTstate,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
/* A List of these is returned from gistplacetopage() in *splitinfo */
typedef struct
@@ -407,7 +409,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markleftchild,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 5117aabf1a..2ee80e244e 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -22,7 +22,6 @@
/* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */
#define XLOG_GIST_PAGE_SPLIT 0x30
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
-#define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
/*
--
2.17.1
0004-SP-GiST-Optimal-WAL-Usage.patchtext/x-patch; name=0004-SP-GiST-Optimal-WAL-Usage.patchDownload
From c89f89c963078dda856394cb1189d9d0580018f6 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 6 Feb 2019 14:41:25 +0500
Subject: [PATCH 4/4] SP-GiST-Optimal-WAL-Usage
---
src/backend/access/rmgrdesc/spgdesc.c | 5 ----
src/backend/access/spgist/spgdoinsert.c | 12 ++++-----
src/backend/access/spgist/spginsert.c | 24 +++--------------
src/backend/access/spgist/spgxlog.c | 35 -------------------------
src/include/access/spgxlog.h | 1 -
5 files changed, 10 insertions(+), 67 deletions(-)
diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c
index 37af31a764..40c1c8b3f9 100644
--- a/src/backend/access/rmgrdesc/spgdesc.c
+++ b/src/backend/access/rmgrdesc/spgdesc.c
@@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- break;
case XLOG_SPGIST_ADD_LEAF:
{
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
@@ -88,9 +86,6 @@ spg_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_SPGIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_SPGIST_ADD_LEAF:
id = "ADD_LEAF";
break;
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 0d07b8b291..c34c44cd8b 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
MarkBufferDirty(nbuf);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
saveCurrent.buffer = InvalidBuffer;
}
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(saveCurrent.buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index f428a15138..5da0183a9b 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -21,6 +21,7 @@
#include "access/spgxlog.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -104,26 +105,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
MarkBufferDirty(nullbuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
-
- /*
- * Replay will re-initialize the pages, so don't take full pages
- * images. No other data to log.
- */
- XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
- PageSetLSN(BufferGetPage(metabuffer), recptr);
- PageSetLSN(BufferGetPage(rootbuffer), recptr);
- PageSetLSN(BufferGetPage(nullbuffer), recptr);
- }
END_CRIT_SECTION();
@@ -150,6 +131,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistUpdateMetaPage(index);
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = buildstate.indtuples;
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 71836ee8a5..ebe6ae8715 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
size);
}
-static void
-spgRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(buffer);
- SpGistInitMetapage(page);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 2);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
static void
spgRedoAddLeaf(XLogReaderState *record)
{
@@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record)
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- spgRedoCreateIndex(record);
- break;
case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(record);
break;
diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h
index 6527fc9eb1..8199b3f250 100644
--- a/src/include/access/spgxlog.h
+++ b/src/include/access/spgxlog.h
@@ -18,7 +18,6 @@
#include "storage/off.h"
/* XLOG record types for SPGiST */
-#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
#define XLOG_SPGIST_MOVE_LEAFS 0x20
#define XLOG_SPGIST_ADD_NODE 0x30
--
2.17.1
On 2/6/19 2:08 PM, Andrey Lepikhov wrote:
The patchset had a problem with all-zero pages, has appeared at index
build stage: the generic_log_relation() routine sends all pages into the
WAL. So lsn field at all-zero page was initialized and the
PageIsVerified() routine detects it as a bad page.
The solution may be:
1. To improve index build algorithms and eliminate the possibility of
not used pages appearing.
2. To mark each page as 'dirty' right after initialization. In this case
we will got 'empty' page instead of the all-zeroed.
3. Do not write into the WAL all-zero pages.In the patchset (see attachment) I used approach No.3.
On 04.02.2019 10:04, Michael Paquier wrote:
On Tue, Dec 18, 2018 at 10:41:48AM +0500, Andrey Lepikhov wrote:
Ok. It is used only for demonstration.
The latest patch set needs a rebase, so moved to next CF, waiting on
author as this got no reviews.
The patch no longer applies so marked Waiting on Author.
Alexander, Heikki, are either of you planning to review the patch in
this CF?
Regards,
--
-David
david@pgmasters.net
On 25/03/2019 09:57, David Steele wrote:
On 2/6/19 2:08 PM, Andrey Lepikhov wrote:
The patchset had a problem with all-zero pages, has appeared at index
build stage: the generic_log_relation() routine sends all pages into the
WAL. So lsn field at all-zero page was initialized and the
PageIsVerified() routine detects it as a bad page.
The solution may be:
1. To improve index build algorithms and eliminate the possibility of
not used pages appearing.
2. To mark each page as 'dirty' right after initialization. In this case
we will got 'empty' page instead of the all-zeroed.
3. Do not write into the WAL all-zero pages.
Hmm. When do we create all-zero pages during index build? That seems
pretty surprising.
On 04.02.2019 10:04, Michael Paquier wrote:
On Tue, Dec 18, 2018 at 10:41:48AM +0500, Andrey Lepikhov wrote:
Ok. It is used only for demonstration.
The latest patch set needs a rebase, so moved to next CF, waiting on
author as this got no reviews.The patch no longer applies so marked Waiting on Author.
Alexander, Heikki, are either of you planning to review the patch in
this CF?
I had another quick look.
I still think using the "generic xlog AM" for this is a wrong level of
abstraction, and we should use the XLOG_FPI records for this directly.
We can extend XLOG_FPI so that it can store multiple pages in a single
record, if it doesn't already handle it.
Another counter-point to using the generic xlog record is that you're
currently doing unnecessary two memcpy's of all pages in the index, in
GenericXLogRegisterBuffer() and GenericXLogFinish(). That's not free.
I guess the generic_log_relation() function can stay where it is, but it
should use XLogRegisterBuffer() and XLogInsert() directly.
- Heikki
On 25/03/2019 15:21, Heikki Linnakangas wrote:
On 25/03/2019 09:57, David Steele wrote:
On 2/6/19 2:08 PM, Andrey Lepikhov wrote:
The patchset had a problem with all-zero pages, has appeared at index
build stage: the generic_log_relation() routine sends all pages into the
WAL. So lsn field at all-zero page was initialized and the
PageIsVerified() routine detects it as a bad page.
The solution may be:
1. To improve index build algorithms and eliminate the possibility of
not used pages appearing.
2. To mark each page as 'dirty' right after initialization. In this case
we will got 'empty' page instead of the all-zeroed.
3. Do not write into the WAL all-zero pages.Hmm. When do we create all-zero pages during index build? That seems
pretty surprising.
GIST uses buffered pages. During GIST build it is possible (very rarely)
what no one index tuple was written to the block page before new block
was allocated. And the page has become an all-zero page.
You can't have problems in the current GIST code, because it writes into
the WAL only changed pages.
But the idea of the patch is traversing blocks of index relation
one-by-one after end of index building process and write this blocks to
the WAL. In this case we will see all-zeroed pages and need to check it.
On 04.02.2019 10:04, Michael Paquier wrote:
On Tue, Dec 18, 2018 at 10:41:48AM +0500, Andrey Lepikhov wrote:
Ok. It is used only for demonstration.
The latest patch set needs a rebase, so moved to next CF, waiting on
author as this got no reviews.The patch no longer applies so marked Waiting on Author.
Alexander, Heikki, are either of you planning to review the patch in
this CF?I had another quick look.
I still think using the "generic xlog AM" for this is a wrong level of
abstraction, and we should use the XLOG_FPI records for this directly.
We can extend XLOG_FPI so that it can store multiple pages in a single
record, if it doesn't already handle it.Another counter-point to using the generic xlog record is that you're
currently doing unnecessary two memcpy's of all pages in the index, in
GenericXLogRegisterBuffer() and GenericXLogFinish(). That's not free.I guess the generic_log_relation() function can stay where it is, but it
should use XLogRegisterBuffer() and XLogInsert() directly.
Ok. This patch waited feedback for a long time. I will check the GIST
code changes from previous review and will try to use your advice.
- Heikki
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
On 26/03/2019 11:29, Andrey Lepikhov wrote:
On 25/03/2019 15:21, Heikki Linnakangas wrote:
Hmm. When do we create all-zero pages during index build? That seems
pretty surprising.GIST uses buffered pages. During GIST build it is possible (very rarely)
what no one index tuple was written to the block page before new block
was allocated. And the page has become an all-zero page.
You can't have problems in the current GIST code, because it writes into
the WAL only changed pages.
Looking at the code, I don't see how that could happen. The only place
where the GiST index file is extended is in gistNewBuffer(), and all
callers of that initialize the page immediately after the call. What am
I missing?
- Heikki
On 26/03/2019 15:59, Heikki Linnakangas wrote:
On 26/03/2019 11:29, Andrey Lepikhov wrote:
On 25/03/2019 15:21, Heikki Linnakangas wrote:
Hmm. When do we create all-zero pages during index build? That seems
pretty surprising.GIST uses buffered pages. During GIST build it is possible (very rarely)
what no one index tuple was written to the block page before new block
was allocated. And the page has become an all-zero page.
You can't have problems in the current GIST code, because it writes into
the WAL only changed pages.Looking at the code, I don't see how that could happen. The only place
where the GiST index file is extended is in gistNewBuffer(), and all
callers of that initialize the page immediately after the call. What am
I missing?
Sorry, This issue was found in SP-GiST AM. You can show it:
1. Apply v2 version of the patch set (see attachment).
2. In the generic_log_relation() routine set logging on PageIsNew(buf)
3. Run script t1.sql (in attachment).
This problem can be resolved by calling MarkBufferDirty() after
SpGistInitBuffer() in the allocNewBuffer() routine. But in this case we
will write to the WAL more pages than necessary.
To avoid it in the patch '0001-Relation-into-WAL-function' I do not
write new pages to the WAL.
Attached patch set is not final version. It is needed for demonstration
of 'all-zero pages' issue only. The sentence for the direct use of
XLOG_FPI records will be considered in v3.
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
Attachments:
v2_0001-Relation-into-WAL-function.patchtext/x-patch; name=v2_0001-Relation-into-WAL-function.patchDownload
From d3093aa9a7628979b892d31449eda6228ef169ce Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Mon, 1 Apr 2019 08:33:46 +0500
Subject: [PATCH 1/4] Relation-into-WAL-function
---
src/backend/access/transam/generic_xlog.c | 48 +++++++++++++++++++++++
src/include/access/generic_xlog.h | 3 ++
2 files changed, 51 insertions(+)
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
index 5b00b7275b..c22e361747 100644
--- a/src/backend/access/transam/generic_xlog.c
+++ b/src/backend/access/transam/generic_xlog.c
@@ -542,3 +542,51 @@ generic_mask(char *page, BlockNumber blkno)
mask_unused_space(page);
}
+
+/*
+ * Function to write generic xlog for every existing block of a relation.
+ * Caller is responsible for locking the relation exclusively.
+ */
+void
+generic_log_relation(Relation rel)
+{
+ BlockNumber blkno;
+ BlockNumber nblocks;
+ int npbuf = 0;
+ GenericXLogState *state = NULL;
+ Buffer bufpack[MAX_GENERIC_XLOG_PAGES];
+
+ CHECK_FOR_INTERRUPTS();
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ /*
+ * Iterate over all index pages and WAL-logging it. Pages are grouping into
+ * the packages before adding to a WAL-record. Zero-pages are
+ * not logged.
+ */
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ Buffer buf;
+
+ buf = ReadBuffer(rel, blkno);
+ if (!PageIsNew(BufferGetPage(buf)))
+ {
+ if (npbuf == 0)
+ state = GenericXLogStart(rel);
+
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE);
+ bufpack[npbuf++] = buf;
+ }
+ else
+ ReleaseBuffer(buf);
+
+ if ((npbuf == MAX_GENERIC_XLOG_PAGES) || (blkno == nblocks-1))
+ {
+ GenericXLogFinish(state);
+
+ for (; npbuf > 0; npbuf--)
+ UnlockReleaseBuffer(bufpack[npbuf-1]);
+ }
+ }
+}
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
index cb5b5b713a..e3bbf014cc 100644
--- a/src/include/access/generic_xlog.h
+++ b/src/include/access/generic_xlog.h
@@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info);
extern void generic_desc(StringInfo buf, XLogReaderState *record);
extern void generic_mask(char *pagedata, BlockNumber blkno);
+/* other utils */
+extern void generic_log_relation(Relation rel);
+
#endif /* GENERIC_XLOG_H */
--
2.17.1
v2_0002-GIN-Optimal-WAL-Usage.patchtext/x-patch; name=v2_0002-GIN-Optimal-WAL-Usage.patchDownload
From 9a0172346c8a942b6a493aca8c47452256a2932f Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Mon, 1 Apr 2019 08:37:32 +0500
Subject: [PATCH 2/4] GIN-Optimal-WAL-Usage
---
src/backend/access/gin/ginbtree.c | 6 ++---
src/backend/access/gin/gindatapage.c | 9 ++++----
src/backend/access/gin/ginentrypage.c | 2 +-
src/backend/access/gin/gininsert.c | 30 ++++++++++--------------
src/backend/access/gin/ginutil.c | 4 ++--
src/backend/access/gin/ginvacuum.c | 2 +-
src/backend/access/gin/ginxlog.c | 33 ---------------------------
src/backend/access/rmgrdesc/gindesc.c | 6 -----
src/include/access/gin.h | 3 ++-
src/include/access/ginxlog.h | 2 --
10 files changed, 26 insertions(+), 71 deletions(-)
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 533949e46a..9f82eef8c3 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -396,7 +396,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* It will fit, perform the insertion */
START_CRIT_SECTION();
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -417,7 +417,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
MarkBufferDirty(childbuf);
}
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
ginxlogInsert xlrec;
@@ -595,7 +595,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/* write WAL record */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 3ad8b76710..f3aff62c8e 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* Great, all the items fit on a single page. If needed, prepare data
* for a WAL record describing the changes we'll make.
*/
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
computeLeafRecompressWALData(leaf);
/*
@@ -719,7 +719,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
dataPlaceToPageLeafRecompress(buf, leaf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
@@ -1152,7 +1152,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1773,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
Pointer ptr;
int nrootitems;
int rootsize;
+ bool is_build = (buildStats != NULL);
/* Construct the new root page in memory first. */
tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1827,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageRestoreTempPage(tmppage, page);
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogCreatePostingTree data;
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 4889de2a4f..1f5ba33d51 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index b02f69b0dc..a15ca7f942 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "access/tableam.h"
#include "catalog/index.h"
#include "miscadmin.h"
@@ -195,6 +196,7 @@ ginEntryInsert(GinState *ginstate,
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+ btree.isBuild = (buildStats != NULL);
stack = ginFindLeafPage(&btree, false, false, NULL);
page = BufferGetPage(stack->buffer);
@@ -347,23 +349,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- Page page;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
- page = BufferGetPage(RootBuffer);
- PageSetLSN(page, recptr);
-
- page = BufferGetPage(MetaBuffer);
- PageSetLSN(page, recptr);
- }
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
@@ -419,7 +404,16 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
- ginUpdateStats(index, &buildstate.buildStats);
+ ginUpdateStats(index, &buildstate.buildStats, true);
+
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
/*
* Return statistics
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index afc20232ac..51b20bca6e 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -661,7 +661,7 @@ ginGetStats(Relation index, GinStatsData *stats)
* Note: nPendingPages and ginVersion are *not* copied over
*/
void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
{
Buffer metabuffer;
Page metapage;
@@ -691,7 +691,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index dfe885b101..b9a28d1863 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -759,7 +759,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
- ginUpdateStats(info->index, &idxStat);
+ ginUpdateStats(info->index, &idxStat, false);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index c467ffa346..b648af1ff6 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
UnlockReleaseBuffer(buffer);
}
-static void
-ginRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer RootBuffer,
- MetaBuffer;
- Page page;
-
- MetaBuffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(MetaBuffer);
-
- GinInitMetabuffer(MetaBuffer);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(MetaBuffer);
-
- RootBuffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
- page = (Page) BufferGetPage(RootBuffer);
-
- GinInitBuffer(RootBuffer, GIN_LEAF);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(RootBuffer);
-
- UnlockReleaseBuffer(RootBuffer);
- UnlockReleaseBuffer(MetaBuffer);
-}
-
static void
ginRedoCreatePTree(XLogReaderState *record)
{
@@ -767,9 +737,6 @@ gin_redo(XLogReaderState *record)
oldCtx = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- ginRedoCreateIndex(record);
- break;
case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(record);
break;
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index ef30ce16b0..f3f4e1b214 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- /* no further information */
- break;
case XLOG_GIN_CREATE_PTREE:
/* no further information */
break;
@@ -188,9 +185,6 @@ gin_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_GIN_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIN_CREATE_PTREE:
id = "CREATE_PTREE";
break;
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 61fa697039..d559ffc703 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -71,6 +71,7 @@ extern int gin_pending_list_limit;
/* ginutil.c */
extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index,
+ const GinStatsData *stats, bool is_build);
#endif /* GIN_H */
diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h
index 9bd4e0b9ba..2c5d743cac 100644
--- a/src/include/access/ginxlog.h
+++ b/src/include/access/ginxlog.h
@@ -16,8 +16,6 @@
#include "lib/stringinfo.h"
#include "storage/off.h"
-#define XLOG_GIN_CREATE_INDEX 0x00
-
#define XLOG_GIN_CREATE_PTREE 0x10
typedef struct ginxlogCreatePostingTree
--
2.17.1
v2_0003-GiST-Optimal-WAL-Usage.patchtext/x-patch; name=v2_0003-GiST-Optimal-WAL-Usage.patchDownload
From 3e73e862c1aa20bf4eeca20ba4381a1d3c6f19d9 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Mon, 1 Apr 2019 09:07:32 +0500
Subject: [PATCH 3/4] GiST-Optimal-WAL-Usage
---
src/backend/access/gist/gist.c | 46 ++++++++++++++++++--------
src/backend/access/gist/gistbuild.c | 32 ++++++++++--------
src/backend/access/gist/gistutil.c | 2 +-
src/backend/access/gist/gistxlog.c | 22 ------------
src/backend/access/rmgrdesc/gistdesc.c | 5 ---
src/include/access/gist_private.h | 7 ++--
src/include/access/gistxlog.h | 1 -
7 files changed, 56 insertions(+), 59 deletions(-)
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 2fddb23496..0e2b6c3014 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -172,7 +172,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
- gistdoinsert(r, itup, 0, giststate, heapRel);
+ gistdoinsert(r, itup, 0, giststate, heapRel, false);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
@@ -219,7 +219,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markfollowright,
- Relation heapRel)
+ Relation heapRel,
+ bool is_build)
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
@@ -458,7 +459,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
- if (RelationNeedsWAL(rel))
+ if (RelationNeedsWAL(rel) && !is_build)
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION();
@@ -479,18 +480,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
- /* Write the WAL record */
- if (RelationNeedsWAL(rel))
+ /*
+ * Write the WAL record.
+ * Do not write XLog entry if the insertion is caused by
+ * index build process.
+ */
+ if (RelationNeedsWAL(rel) && !is_build)
recptr = gistXLogSplit(is_leaf,
- dist, oldrlink, oldnsn, leftchildbuf,
- markfollowright);
+ dist, oldrlink, oldnsn, leftchildbuf,
+ markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
- {
PageSetLSN(ptr->page, recptr);
- }
/*
* Return the new child buffers to the caller.
@@ -544,7 +547,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
- if (RelationNeedsWAL(rel))
+
+ if (RelationNeedsWAL(rel) && !is_build)
{
OffsetNumber ndeloffs = 0,
deloffs[1];
@@ -567,6 +571,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageSetLSN(page, recptr);
}
+
if (newblkno)
*newblkno = blkno;
}
@@ -583,17 +588,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
+ *
+ * We use fakeLSNs for inserions caused by index build. And when it is
+ * finished, we write generic_xlog entry for each index page and update
+ * all LSNs. In order to keep NSNs less then LSNs after this update, we
+ * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
*/
+
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
+ XLogRecPtr fakerecptr = InvalidXLogRecPtr;
- GistPageSetNSN(leftpg, recptr);
- GistClearFollowRight(leftpg);
+ if (!is_build)
+ GistPageSetNSN(leftpg, recptr);
+ else
+ GistPageSetNSN(leftpg, fakerecptr);
+ GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
+
END_CRIT_SECTION();
return is_split;
@@ -606,7 +622,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
*/
void
gistdoinsert(Relation r, IndexTuple itup, Size freespace,
- GISTSTATE *giststate, Relation heapRel)
+ GISTSTATE *giststate, Relation heapRel, bool is_build)
{
ItemId iid;
IndexTuple idxtuple;
@@ -619,6 +635,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
state.freespace = freespace;
state.r = r;
state.heapRel = heapRel;
+ state.is_build = is_build;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1251,7 +1268,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
leftchild,
&splitinfo,
true,
- state->heapRel);
+ state->heapRel,
+ state->is_build);
/*
* Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 3652fde5bb..8d0d285cab 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -17,6 +17,7 @@
#include <math.h>
#include "access/genam.h"
+#include "access/generic_xlog.h"
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/tableam.h"
@@ -181,18 +182,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
- PageSetLSN(page, recptr);
- }
- else
- PageSetLSN(page, gistGetFakeLSN(heap));
+ /*
+ * Do not write index pages to WAL unitl index build is finished.
+ * But we still need increasing LSNs on each page, so use FakeLSN,
+ * even for relations which eventually need WAL.
+ */
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
@@ -226,6 +221,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
freeGISTstate(buildstate.giststate);
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
/*
* Return statistics
*/
@@ -488,7 +492,7 @@ gistBuildCallback(Relation index,
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
- buildstate->giststate, buildstate->heaprel);
+ buildstate->giststate, buildstate->heaprel, true);
}
/* Update tuple count and total size. */
@@ -695,7 +699,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
InvalidBuffer,
&splitinfo,
false,
- buildstate->heaprel);
+ buildstate->heaprel, true);
/*
* If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 2163cc482d..af278e5ded 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -1004,6 +1004,7 @@ gistproperty(Oid index_oid, int attno,
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
* to detect concurrent page splits anyway. This function provides a fake
* sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
@@ -1024,7 +1025,6 @@ gistGetFakeLSN(Relation rel)
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
- Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index cb80ab00cd..4fb1855e89 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -490,25 +490,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
UnlockReleaseBuffer(firstbuffer);
}
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = (Page) BufferGetPage(buffer);
-
- GISTInitBuffer(buffer, F_LEAF);
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
/* redo page deletion */
static void
gistRedoPageDelete(XLogReaderState *record)
@@ -594,9 +575,6 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
- case XLOG_GIST_CREATE_INDEX:
- gistRedoCreateIndex(record);
- break;
case XLOG_GIST_PAGE_DELETE:
gistRedoPageDelete(record);
break;
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index 3ff4f83d38..eb308c72d6 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -71,8 +71,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
- case XLOG_GIST_CREATE_INDEX:
- break;
case XLOG_GIST_PAGE_DELETE:
out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec);
break;
@@ -98,9 +96,6 @@ gist_identify(uint8 info)
case XLOG_GIST_PAGE_SPLIT:
id = "PAGE_SPLIT";
break;
- case XLOG_GIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIST_PAGE_DELETE:
id = "PAGE_DELETE";
break;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 02dc285a78..78e2e3fb31 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -244,6 +244,7 @@ typedef struct
Relation r;
Relation heapRel;
Size freespace; /* free space to be left */
+ bool is_build;
GISTInsertStack *stack;
} GISTInsertState;
@@ -393,7 +394,8 @@ extern void gistdoinsert(Relation r,
IndexTuple itup,
Size freespace,
GISTSTATE *GISTstate,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
/* A List of these is returned from gistplacetopage() in *splitinfo */
typedef struct
@@ -409,7 +411,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markleftchild,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 2f87b67a53..80931497ca 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -23,7 +23,6 @@
* FSM */
#define XLOG_GIST_PAGE_SPLIT 0x30
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
-#define XLOG_GIST_CREATE_INDEX 0x50
#define XLOG_GIST_PAGE_DELETE 0x60
/*
--
2.17.1
v2_0004-SP-GiST-Optimal-WAL-Usage.patchtext/x-patch; name=v2_0004-SP-GiST-Optimal-WAL-Usage.patchDownload
From 19968269bc08f71ed1eaf4f33d1e06da6fda9708 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Mon, 1 Apr 2019 09:07:51 +0500
Subject: [PATCH 4/4] SP-GiST-Optimal-WAL-Usage
---
src/backend/access/rmgrdesc/spgdesc.c | 5 ----
src/backend/access/spgist/spgdoinsert.c | 12 ++++-----
src/backend/access/spgist/spginsert.c | 24 +++--------------
src/backend/access/spgist/spgxlog.c | 35 -------------------------
src/include/access/spgxlog.h | 1 -
5 files changed, 10 insertions(+), 67 deletions(-)
diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c
index 37af31a764..40c1c8b3f9 100644
--- a/src/backend/access/rmgrdesc/spgdesc.c
+++ b/src/backend/access/rmgrdesc/spgdesc.c
@@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- break;
case XLOG_SPGIST_ADD_LEAF:
{
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
@@ -88,9 +86,6 @@ spg_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_SPGIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_SPGIST_ADD_LEAF:
id = "ADD_LEAF";
break;
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 0d07b8b291..c34c44cd8b 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
MarkBufferDirty(nbuf);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
saveCurrent.buffer = InvalidBuffer;
}
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(saveCurrent.buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 390ad9ac51..6ec48b9f93 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -22,6 +22,7 @@
#include "access/tableam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -105,26 +106,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
MarkBufferDirty(nullbuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
-
- /*
- * Replay will re-initialize the pages, so don't take full pages
- * images. No other data to log.
- */
- XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
- PageSetLSN(BufferGetPage(metabuffer), recptr);
- PageSetLSN(BufferGetPage(rootbuffer), recptr);
- PageSetLSN(BufferGetPage(nullbuffer), recptr);
- }
END_CRIT_SECTION();
@@ -151,6 +132,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistUpdateMetaPage(index);
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = buildstate.indtuples;
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 71836ee8a5..ebe6ae8715 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
size);
}
-static void
-spgRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(buffer);
- SpGistInitMetapage(page);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 2);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
static void
spgRedoAddLeaf(XLogReaderState *record)
{
@@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record)
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- spgRedoCreateIndex(record);
- break;
case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(record);
break;
diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h
index 6527fc9eb1..8199b3f250 100644
--- a/src/include/access/spgxlog.h
+++ b/src/include/access/spgxlog.h
@@ -18,7 +18,6 @@
#include "storage/off.h"
/* XLOG record types for SPGiST */
-#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
#define XLOG_SPGIST_MOVE_LEAFS 0x20
#define XLOG_SPGIST_ADD_NODE 0x30
--
2.17.1
On 25/03/2019 15:21, Heikki Linnakangas wrote:
I had another quick look.
I still think using the "generic xlog AM" for this is a wrong level of
abstraction, and we should use the XLOG_FPI records for this directly.
We can extend XLOG_FPI so that it can store multiple pages in a single
record, if it doesn't already handle it.Another counter-point to using the generic xlog record is that you're
currently doing unnecessary two memcpy's of all pages in the index, in
GenericXLogRegisterBuffer() and GenericXLogFinish(). That's not free.I guess the generic_log_relation() function can stay where it is, but it
should use XLogRegisterBuffer() and XLogInsert() directly.
Patch set v.3 uses XLOG_FPI records directly.
As a benchmark I use the script (test.sql in attachment) which show WAL
size increment during index build. In the table below you can see the
influence of the patch on WAL growth.
Results
=======
AM | master | patch |
GIN | 347 MB | 66 MB |
GiST | 157 MB | 43 MB |
SP-GiST | 119 MB | 38 MB |
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
Attachments:
v3_0001-Relation-into-WAL-function.patchtext/x-patch; name=v3_0001-Relation-into-WAL-function.patchDownload
From 2edd0ada13b4749487d0f046191ef2bcf8b11ca3 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 2 Apr 2019 09:42:59 +0500
Subject: [PATCH 1/4] Relation-into-WAL-function
---
src/backend/access/transam/generic_xlog.c | 63 +++++++++++++++++++++++
src/include/access/generic_xlog.h | 3 ++
2 files changed, 66 insertions(+)
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
index 5b00b7275b..0d5025f78a 100644
--- a/src/backend/access/transam/generic_xlog.c
+++ b/src/backend/access/transam/generic_xlog.c
@@ -16,6 +16,7 @@
#include "access/bufmask.h"
#include "access/generic_xlog.h"
#include "access/xlogutils.h"
+#include "catalog/pg_control.h"
#include "miscadmin.h"
#include "utils/memutils.h"
@@ -542,3 +543,65 @@ generic_mask(char *page, BlockNumber blkno)
mask_unused_space(page);
}
+
+/*
+ * Function for WAL-logging all pages of a relation.
+ * Caller is responsible for locking the relation exclusively.
+ */
+void
+log_relation(Relation rel)
+{
+ BlockNumber blkno = 0;
+ BlockNumber nblocks;
+ Buffer bufpack[XLR_MAX_BLOCK_ID];
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Iterate over all index pages and WAL-logging it. Pages are grouping into
+ * the packages before adding to a WAL-record. Zero pages are not logged.
+ */
+ nblocks = RelationGetNumberOfBlocks(rel);
+ while (blkno < nblocks)
+ {
+ XLogRecPtr recptr;
+ int8 nbufs = 0;
+ int8 i;
+
+ /*
+ * Assemble package of relation blocks. Try to combine the maximum
+ * possible number of blocks in one record.
+ */
+ while (nbufs < XLR_MAX_BLOCK_ID && blkno < nblocks)
+ {
+ Buffer buf = ReadBuffer(rel, blkno);
+
+ if (!PageIsNew(BufferGetPage(buf)))
+ bufpack[nbufs++] = buf;
+ else
+ ReleaseBuffer(buf);
+ blkno++;
+ }
+
+ XLogBeginInsert();
+ XLogEnsureRecordSpace(nbufs, 0);
+
+ START_CRIT_SECTION();
+ for (i = 0; i < nbufs; i++)
+ {
+ LockBuffer(bufpack[i], BUFFER_LOCK_EXCLUSIVE);
+ XLogRegisterBuffer(i, bufpack[i], REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ }
+
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
+
+ for (i = 0; i < nbufs; i++)
+ {
+ Page page = BufferGetPage(bufpack[i]);
+ PageSetLSN(page, recptr);
+ MarkBufferDirty(bufpack[i]);
+ UnlockReleaseBuffer(bufpack[i]);
+ }
+ END_CRIT_SECTION();
+ }
+}
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
index cb5b5b713a..8abfa486c7 100644
--- a/src/include/access/generic_xlog.h
+++ b/src/include/access/generic_xlog.h
@@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info);
extern void generic_desc(StringInfo buf, XLogReaderState *record);
extern void generic_mask(char *pagedata, BlockNumber blkno);
+/* other utils */
+extern void log_relation(Relation rel);
+
#endif /* GENERIC_XLOG_H */
--
2.17.1
v3_0002-GIN-Optimal-WAL-Usage.patchtext/x-patch; name=v3_0002-GIN-Optimal-WAL-Usage.patchDownload
From 8857657efa8f3347010d3b251315f800dd03bf8d Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 2 Apr 2019 09:43:20 +0500
Subject: [PATCH 2/4] GIN-Optimal-WAL-Usage
---
src/backend/access/gin/ginbtree.c | 6 ++---
src/backend/access/gin/gindatapage.c | 9 ++++----
src/backend/access/gin/ginentrypage.c | 2 +-
src/backend/access/gin/gininsert.c | 30 ++++++++++--------------
src/backend/access/gin/ginutil.c | 4 ++--
src/backend/access/gin/ginvacuum.c | 2 +-
src/backend/access/gin/ginxlog.c | 33 ---------------------------
src/backend/access/rmgrdesc/gindesc.c | 6 -----
src/include/access/gin.h | 3 ++-
src/include/access/ginxlog.h | 2 --
10 files changed, 26 insertions(+), 71 deletions(-)
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 533949e46a..9f82eef8c3 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -396,7 +396,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* It will fit, perform the insertion */
START_CRIT_SECTION();
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -417,7 +417,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
MarkBufferDirty(childbuf);
}
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
ginxlogInsert xlrec;
@@ -595,7 +595,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/* write WAL record */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 3ad8b76710..f3aff62c8e 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* Great, all the items fit on a single page. If needed, prepare data
* for a WAL record describing the changes we'll make.
*/
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
computeLeafRecompressWALData(leaf);
/*
@@ -719,7 +719,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
dataPlaceToPageLeafRecompress(buf, leaf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
@@ -1152,7 +1152,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1773,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
Pointer ptr;
int nrootitems;
int rootsize;
+ bool is_build = (buildStats != NULL);
/* Construct the new root page in memory first. */
tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1827,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageRestoreTempPage(tmppage, page);
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogCreatePostingTree data;
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 4889de2a4f..1f5ba33d51 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index b02f69b0dc..44f2bdce9c 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "access/tableam.h"
#include "catalog/index.h"
#include "miscadmin.h"
@@ -195,6 +196,7 @@ ginEntryInsert(GinState *ginstate,
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+ btree.isBuild = (buildStats != NULL);
stack = ginFindLeafPage(&btree, false, false, NULL);
page = BufferGetPage(stack->buffer);
@@ -347,23 +349,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- Page page;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
- page = BufferGetPage(RootBuffer);
- PageSetLSN(page, recptr);
-
- page = BufferGetPage(MetaBuffer);
- PageSetLSN(page, recptr);
- }
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
@@ -419,7 +404,16 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
- ginUpdateStats(index, &buildstate.buildStats);
+ ginUpdateStats(index, &buildstate.buildStats, true);
+
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ log_relation(index);
/*
* Return statistics
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index afc20232ac..51b20bca6e 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -661,7 +661,7 @@ ginGetStats(Relation index, GinStatsData *stats)
* Note: nPendingPages and ginVersion are *not* copied over
*/
void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
{
Buffer metabuffer;
Page metapage;
@@ -691,7 +691,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index dfe885b101..b9a28d1863 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -759,7 +759,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
- ginUpdateStats(info->index, &idxStat);
+ ginUpdateStats(info->index, &idxStat, false);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index c467ffa346..b648af1ff6 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
UnlockReleaseBuffer(buffer);
}
-static void
-ginRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer RootBuffer,
- MetaBuffer;
- Page page;
-
- MetaBuffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(MetaBuffer);
-
- GinInitMetabuffer(MetaBuffer);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(MetaBuffer);
-
- RootBuffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
- page = (Page) BufferGetPage(RootBuffer);
-
- GinInitBuffer(RootBuffer, GIN_LEAF);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(RootBuffer);
-
- UnlockReleaseBuffer(RootBuffer);
- UnlockReleaseBuffer(MetaBuffer);
-}
-
static void
ginRedoCreatePTree(XLogReaderState *record)
{
@@ -767,9 +737,6 @@ gin_redo(XLogReaderState *record)
oldCtx = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- ginRedoCreateIndex(record);
- break;
case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(record);
break;
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index ef30ce16b0..f3f4e1b214 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- /* no further information */
- break;
case XLOG_GIN_CREATE_PTREE:
/* no further information */
break;
@@ -188,9 +185,6 @@ gin_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_GIN_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIN_CREATE_PTREE:
id = "CREATE_PTREE";
break;
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 61fa697039..d559ffc703 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -71,6 +71,7 @@ extern int gin_pending_list_limit;
/* ginutil.c */
extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index,
+ const GinStatsData *stats, bool is_build);
#endif /* GIN_H */
diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h
index 9bd4e0b9ba..2c5d743cac 100644
--- a/src/include/access/ginxlog.h
+++ b/src/include/access/ginxlog.h
@@ -16,8 +16,6 @@
#include "lib/stringinfo.h"
#include "storage/off.h"
-#define XLOG_GIN_CREATE_INDEX 0x00
-
#define XLOG_GIN_CREATE_PTREE 0x10
typedef struct ginxlogCreatePostingTree
--
2.17.1
v3_0003-GiST-Optimal-WAL-Usage.patchtext/x-patch; name=v3_0003-GiST-Optimal-WAL-Usage.patchDownload
From 4d1b410dd2817a735e51ccb179ac6df6847eda41 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 2 Apr 2019 09:43:42 +0500
Subject: [PATCH 3/4] GiST-Optimal-WAL-Usage
---
src/backend/access/gist/gist.c | 46 ++++++++++++++++++--------
src/backend/access/gist/gistbuild.c | 32 ++++++++++--------
src/backend/access/gist/gistutil.c | 2 +-
src/backend/access/gist/gistxlog.c | 22 ------------
src/backend/access/rmgrdesc/gistdesc.c | 5 ---
src/include/access/gist_private.h | 7 ++--
src/include/access/gistxlog.h | 1 -
7 files changed, 56 insertions(+), 59 deletions(-)
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 2fddb23496..0e2b6c3014 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -172,7 +172,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
- gistdoinsert(r, itup, 0, giststate, heapRel);
+ gistdoinsert(r, itup, 0, giststate, heapRel, false);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
@@ -219,7 +219,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markfollowright,
- Relation heapRel)
+ Relation heapRel,
+ bool is_build)
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
@@ -458,7 +459,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
- if (RelationNeedsWAL(rel))
+ if (RelationNeedsWAL(rel) && !is_build)
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION();
@@ -479,18 +480,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
- /* Write the WAL record */
- if (RelationNeedsWAL(rel))
+ /*
+ * Write the WAL record.
+ * Do not write XLog entry if the insertion is caused by
+ * index build process.
+ */
+ if (RelationNeedsWAL(rel) && !is_build)
recptr = gistXLogSplit(is_leaf,
- dist, oldrlink, oldnsn, leftchildbuf,
- markfollowright);
+ dist, oldrlink, oldnsn, leftchildbuf,
+ markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
- {
PageSetLSN(ptr->page, recptr);
- }
/*
* Return the new child buffers to the caller.
@@ -544,7 +547,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
- if (RelationNeedsWAL(rel))
+
+ if (RelationNeedsWAL(rel) && !is_build)
{
OffsetNumber ndeloffs = 0,
deloffs[1];
@@ -567,6 +571,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageSetLSN(page, recptr);
}
+
if (newblkno)
*newblkno = blkno;
}
@@ -583,17 +588,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
+ *
+ * We use fakeLSNs for inserions caused by index build. And when it is
+ * finished, we write generic_xlog entry for each index page and update
+ * all LSNs. In order to keep NSNs less then LSNs after this update, we
+ * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
*/
+
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
+ XLogRecPtr fakerecptr = InvalidXLogRecPtr;
- GistPageSetNSN(leftpg, recptr);
- GistClearFollowRight(leftpg);
+ if (!is_build)
+ GistPageSetNSN(leftpg, recptr);
+ else
+ GistPageSetNSN(leftpg, fakerecptr);
+ GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
+
END_CRIT_SECTION();
return is_split;
@@ -606,7 +622,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
*/
void
gistdoinsert(Relation r, IndexTuple itup, Size freespace,
- GISTSTATE *giststate, Relation heapRel)
+ GISTSTATE *giststate, Relation heapRel, bool is_build)
{
ItemId iid;
IndexTuple idxtuple;
@@ -619,6 +635,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
state.freespace = freespace;
state.r = r;
state.heapRel = heapRel;
+ state.is_build = is_build;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1251,7 +1268,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
leftchild,
&splitinfo,
true,
- state->heapRel);
+ state->heapRel,
+ state->is_build);
/*
* Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 3652fde5bb..aa05c0a8ee 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -17,6 +17,7 @@
#include <math.h>
#include "access/genam.h"
+#include "access/generic_xlog.h"
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/tableam.h"
@@ -181,18 +182,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
- PageSetLSN(page, recptr);
- }
- else
- PageSetLSN(page, gistGetFakeLSN(heap));
+ /*
+ * Do not write index pages to WAL unitl index build is finished.
+ * But we still need increasing LSNs on each page, so use FakeLSN,
+ * even for relations which eventually need WAL.
+ */
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
@@ -226,6 +221,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
freeGISTstate(buildstate.giststate);
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ log_relation(index);
+
/*
* Return statistics
*/
@@ -488,7 +492,7 @@ gistBuildCallback(Relation index,
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
- buildstate->giststate, buildstate->heaprel);
+ buildstate->giststate, buildstate->heaprel, true);
}
/* Update tuple count and total size. */
@@ -695,7 +699,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
InvalidBuffer,
&splitinfo,
false,
- buildstate->heaprel);
+ buildstate->heaprel, true);
/*
* If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 2163cc482d..af278e5ded 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -1004,6 +1004,7 @@ gistproperty(Oid index_oid, int attno,
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
* to detect concurrent page splits anyway. This function provides a fake
* sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
@@ -1024,7 +1025,6 @@ gistGetFakeLSN(Relation rel)
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
- Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index cb80ab00cd..4fb1855e89 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -490,25 +490,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
UnlockReleaseBuffer(firstbuffer);
}
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = (Page) BufferGetPage(buffer);
-
- GISTInitBuffer(buffer, F_LEAF);
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
/* redo page deletion */
static void
gistRedoPageDelete(XLogReaderState *record)
@@ -594,9 +575,6 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
- case XLOG_GIST_CREATE_INDEX:
- gistRedoCreateIndex(record);
- break;
case XLOG_GIST_PAGE_DELETE:
gistRedoPageDelete(record);
break;
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index 3ff4f83d38..eb308c72d6 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -71,8 +71,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
- case XLOG_GIST_CREATE_INDEX:
- break;
case XLOG_GIST_PAGE_DELETE:
out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec);
break;
@@ -98,9 +96,6 @@ gist_identify(uint8 info)
case XLOG_GIST_PAGE_SPLIT:
id = "PAGE_SPLIT";
break;
- case XLOG_GIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIST_PAGE_DELETE:
id = "PAGE_DELETE";
break;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 02dc285a78..78e2e3fb31 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -244,6 +244,7 @@ typedef struct
Relation r;
Relation heapRel;
Size freespace; /* free space to be left */
+ bool is_build;
GISTInsertStack *stack;
} GISTInsertState;
@@ -393,7 +394,8 @@ extern void gistdoinsert(Relation r,
IndexTuple itup,
Size freespace,
GISTSTATE *GISTstate,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
/* A List of these is returned from gistplacetopage() in *splitinfo */
typedef struct
@@ -409,7 +411,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
Buffer leftchildbuf,
List **splitinfo,
bool markleftchild,
- Relation heapRel);
+ Relation heapRel,
+ bool is_build);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 2f87b67a53..80931497ca 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -23,7 +23,6 @@
* FSM */
#define XLOG_GIST_PAGE_SPLIT 0x30
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
-#define XLOG_GIST_CREATE_INDEX 0x50
#define XLOG_GIST_PAGE_DELETE 0x60
/*
--
2.17.1
v3_0004-SP-GiST-Optimal-WAL-Usage.patchtext/x-patch; name=v3_0004-SP-GiST-Optimal-WAL-Usage.patchDownload
From 98b3de012d116e1056ae3de9e17d14b59b646808 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 2 Apr 2019 09:43:54 +0500
Subject: [PATCH 4/4] SP-GiST-Optimal-WAL-Usage
---
src/backend/access/rmgrdesc/spgdesc.c | 5 ----
src/backend/access/spgist/spgdoinsert.c | 12 ++++-----
src/backend/access/spgist/spginsert.c | 24 +++--------------
src/backend/access/spgist/spgxlog.c | 35 -------------------------
src/include/access/spgxlog.h | 1 -
5 files changed, 10 insertions(+), 67 deletions(-)
diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c
index 37af31a764..40c1c8b3f9 100644
--- a/src/backend/access/rmgrdesc/spgdesc.c
+++ b/src/backend/access/rmgrdesc/spgdesc.c
@@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- break;
case XLOG_SPGIST_ADD_LEAF:
{
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
@@ -88,9 +86,6 @@ spg_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_SPGIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_SPGIST_ADD_LEAF:
id = "ADD_LEAF";
break;
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 0d07b8b291..c34c44cd8b 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
MarkBufferDirty(nbuf);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
saveCurrent.buffer = InvalidBuffer;
}
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(saveCurrent.buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 390ad9ac51..0c3608ad8a 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -22,6 +22,7 @@
#include "access/tableam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -105,26 +106,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
MarkBufferDirty(nullbuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
-
- /*
- * Replay will re-initialize the pages, so don't take full pages
- * images. No other data to log.
- */
- XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
- PageSetLSN(BufferGetPage(metabuffer), recptr);
- PageSetLSN(BufferGetPage(rootbuffer), recptr);
- PageSetLSN(BufferGetPage(nullbuffer), recptr);
- }
END_CRIT_SECTION();
@@ -151,6 +132,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistUpdateMetaPage(index);
+ if (RelationNeedsWAL(index))
+ log_relation(index);
+
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = buildstate.indtuples;
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 71836ee8a5..ebe6ae8715 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
size);
}
-static void
-spgRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(buffer);
- SpGistInitMetapage(page);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 2);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
static void
spgRedoAddLeaf(XLogReaderState *record)
{
@@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record)
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- spgRedoCreateIndex(record);
- break;
case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(record);
break;
diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h
index 6527fc9eb1..8199b3f250 100644
--- a/src/include/access/spgxlog.h
+++ b/src/include/access/spgxlog.h
@@ -18,7 +18,6 @@
#include "storage/off.h"
/* XLOG record types for SPGiST */
-#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
#define XLOG_SPGIST_MOVE_LEAFS 0x20
#define XLOG_SPGIST_ADD_NODE 0x30
--
2.17.1
On 02/04/2019 08:58, Andrey Lepikhov wrote:
On 25/03/2019 15:21, Heikki Linnakangas wrote:
I had another quick look.
I still think using the "generic xlog AM" for this is a wrong level of
abstraction, and we should use the XLOG_FPI records for this directly.
We can extend XLOG_FPI so that it can store multiple pages in a single
record, if it doesn't already handle it.Another counter-point to using the generic xlog record is that you're
currently doing unnecessary two memcpy's of all pages in the index, in
GenericXLogRegisterBuffer() and GenericXLogFinish(). That's not free.I guess the generic_log_relation() function can stay where it is, but it
should use XLogRegisterBuffer() and XLogInsert() directly.Patch set v.3 uses XLOG_FPI records directly.
Thanks! Committed, with some changes:
* I moved the log_relation() function to xlog.c, so that it sits beside
log_newpage_*() functions. I renamed it to log_newpage_range(), and
changed the argument so that the caller provides the beginning and end
block ranges. I added a 'page_std' flag, instead of just assuming that
all pages use the standard page layout. All of the callers pass
page_std=true at the moment, but seems better to be explicit about it.
I made those changes because I felt that the function was too narrowly
tailored for the current callers. The assumption about standard page
layout, and also the fact that it only logged the main fork. It's more
flexible now, for any future AMs that might not be exactly like that. It
feels like it's at the same level of abstraction now as the other
log_newpage_*() functions. Even if we never need the flexibility, I
think making the 'page_std' and 'forknum' arguments explicit is good, to
draw attention to those details, for anyone calling the function.
* I fixed the REDO code. It was trivially broken, it only restored the
first page in each FPI WAL record.
* Using "fake" unlogged LSNs for GiST index build seemed fishy. I could
not convince myself that it was safe in all corner cases. In a recently
initdb'd cluster, it's theoretically possible that the fake LSN counter
overtakes the real LSN value, and that could lead to strange behavior.
For example, how would the buffer manager behave, if there was a dirty
page in the buffer cache with an LSN value that's greater than the
current WAL flush pointer? I think you'd get "ERROR: xlog flush request
%X/%X is not satisfied --- flushed only to %X/%X".
I changed that so that we use LSN 1 for all pages during index build.
That's smaller than any real or fake LSN. Or actually, the fake LSN
counter used to start at 1 - I bumped that up to 1000, so now it's
safely smaller. Could've used 0 instead, but there's an assertion in
gist scan code that didn't like that.
As a benchmark I use the script (test.sql in attachment) which show WAL
size increment during index build. In the table below you can see the
influence of the patch on WAL growth.Results
=======
AM | master | patch |
GIN | 347 MB | 66 MB |
GiST | 157 MB | 43 MB |
SP-GiST | 119 MB | 38 MB |
Nice!
- Heikki