From 49a5fc72492f4917990bfeb7bd6c4fd06e52a7d5 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:23:11 +0500
Subject: [PATCH 3/4] GIST-Optimal-WAL-Usage

---
 src/backend/access/gist/gist.c         | 46 ++++++++++++++++++--------
 src/backend/access/gist/gistbuild.c    | 32 ++++++++++--------
 src/backend/access/gist/gistutil.c     |  2 +-
 src/backend/access/gist/gistxlog.c     | 22 ------------
 src/backend/access/rmgrdesc/gistdesc.c |  5 ---
 src/include/access/gist_private.h      | 10 +++---
 src/include/access/gistxlog.h          |  1 -
 7 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 8a42effdf7..227998b1f8 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -172,7 +172,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
 						 values, isnull, true /* size is currently bogus */ );
 	itup->t_tid = *ht_ctid;
 
-	gistdoinsert(r, itup, 0, giststate);
+	gistdoinsert(r, itup, 0, giststate, false);
 
 	/* cleanup */
 	MemoryContextSwitchTo(oldCxt);
@@ -218,7 +218,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 				BlockNumber *newblkno,
 				Buffer leftchildbuf,
 				List **splitinfo,
-				bool markfollowright)
+				bool markfollowright,
+				bool is_build)
 {
 	BlockNumber blkno = BufferGetBlockNumber(buffer);
 	Page		page = BufferGetPage(buffer);
@@ -457,7 +458,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		 * insertion for that. NB: The number of pages and data segments
 		 * specified here must match the calculations in gistXLogSplit()!
 		 */
-		if (RelationNeedsWAL(rel))
+		if (RelationNeedsWAL(rel) && !is_build)
 			XLogEnsureRecordSpace(npage, 1 + npage * 2);
 
 		START_CRIT_SECTION();
@@ -478,18 +479,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
 		dist->page = BufferGetPage(dist->buffer);
 
-		/* Write the WAL record */
-		if (RelationNeedsWAL(rel))
+		/*
+		 * Write the WAL record.
+		 * Do not write XLog entry if the insertion is caused by
+		 * index build process.
+		 */
+		if (RelationNeedsWAL(rel) && !is_build)
 			recptr = gistXLogSplit(is_leaf,
-								   dist, oldrlink, oldnsn, leftchildbuf,
-								   markfollowright);
+								dist, oldrlink, oldnsn, leftchildbuf,
+								markfollowright);
 		else
 			recptr = gistGetFakeLSN(rel);
 
 		for (ptr = dist; ptr; ptr = ptr->next)
-		{
 			PageSetLSN(ptr->page, recptr);
-		}
 
 		/*
 		 * Return the new child buffers to the caller.
@@ -543,7 +546,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		if (BufferIsValid(leftchildbuf))
 			MarkBufferDirty(leftchildbuf);
 
-		if (RelationNeedsWAL(rel))
+
+		if (RelationNeedsWAL(rel) && !is_build)
 		{
 			OffsetNumber ndeloffs = 0,
 						deloffs[1];
@@ -566,6 +570,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 			PageSetLSN(page, recptr);
 		}
 
+
 		if (newblkno)
 			*newblkno = blkno;
 	}
@@ -582,17 +587,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 	 * the full page image. There's a chicken-and-egg problem: if we updated
 	 * the child pages first, we wouldn't know the recptr of the WAL record
 	 * we're about to write.
+	 *
+	 * We use fakeLSNs for inserions caused by index build. And when it is
+	 * finished, we write generic_xlog entry for each index page and update
+	 * all LSNs. In order to keep NSNs less then LSNs after this update, we
+	 * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
 	 */
+
 	if (BufferIsValid(leftchildbuf))
 	{
 		Page		leftpg = BufferGetPage(leftchildbuf);
+		XLogRecPtr	fakerecptr = InvalidXLogRecPtr;
 
-		GistPageSetNSN(leftpg, recptr);
-		GistClearFollowRight(leftpg);
+		if (!is_build)
+			GistPageSetNSN(leftpg, recptr);
+		else
+			GistPageSetNSN(leftpg, fakerecptr);
 
+		GistClearFollowRight(leftpg);
 		PageSetLSN(leftpg, recptr);
 	}
 
+
 	END_CRIT_SECTION();
 
 	return is_split;
@@ -604,7 +620,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
  * so it does not bother releasing palloc'd allocations.
  */
 void
-gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
+gistdoinsert(Relation r, IndexTuple itup, Size freespace,
+			 GISTSTATE *giststate, bool is_build)
 {
 	ItemId		iid;
 	IndexTuple	idxtuple;
@@ -616,6 +633,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 	memset(&state, 0, sizeof(GISTInsertState));
 	state.freespace = freespace;
 	state.r = r;
+	state.is_build = is_build;
 
 	/* Start from the root */
 	firststack.blkno = GIST_ROOT_BLKNO;
@@ -1232,7 +1250,7 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
 							   oldoffnum, NULL,
 							   leftchild,
 							   &splitinfo,
-							   true);
+							   true, state->is_build);
 
 	/*
 	 * Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 434f15f014..b61dbf8ac3 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -20,6 +20,7 @@
 #include "access/gist_private.h"
 #include "access/gistxlog.h"
 #include "access/xloginsert.h"
+#include "access/generic_xlog.h"
 #include "catalog/index.h"
 #include "miscadmin.h"
 #include "optimizer/cost.h"
@@ -178,18 +179,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
 	MarkBufferDirty(buffer);
 
-	if (RelationNeedsWAL(index))
-	{
-		XLogRecPtr	recptr;
-
-		XLogBeginInsert();
-		XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
-		recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
-		PageSetLSN(page, recptr);
-	}
-	else
-		PageSetLSN(page, gistGetFakeLSN(heap));
+	/*
+	 * Do not write index pages to WAL unitl index build is finished.
+	 * But we still need increasing LSNs on each page, so use FakeLSN,
+	 * even for relations which eventually need WAL.
+	 */
+	PageSetLSN(page, gistGetFakeLSN(heap));
 
 	UnlockReleaseBuffer(buffer);
 
@@ -222,6 +217,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
 	freeGISTstate(buildstate.giststate);
 
+	/*
+	 * Create generic wal records for all pages of relation, if necessary.
+	 * It seems reasonable not to generate WAL, if we recieved interrupt
+	 * signal.
+	 */
+	CHECK_FOR_INTERRUPTS();
+	if (RelationNeedsWAL(index))
+		generic_log_relation(index);
+
 	/*
 	 * Return statistics
 	 */
@@ -484,7 +488,7 @@ gistBuildCallback(Relation index,
 		 * locked, we call gistdoinsert directly.
 		 */
 		gistdoinsert(index, itup, buildstate->freespace,
-					 buildstate->giststate);
+					 buildstate->giststate, true);
 	}
 
 	/* Update tuple count and total size. */
@@ -690,7 +694,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
 							   itup, ntup, oldoffnum, &placed_to_blk,
 							   InvalidBuffer,
 							   &splitinfo,
-							   false);
+							   false, true);
 
 	/*
 	 * If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 12804c321c..0b3f8db679 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -974,6 +974,7 @@ gistproperty(Oid index_oid, int attno,
  * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
  * to detect concurrent page splits anyway. This function provides a fake
  * sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
  */
 XLogRecPtr
 gistGetFakeLSN(Relation rel)
@@ -994,7 +995,6 @@ gistGetFakeLSN(Relation rel)
 		 * Unlogged relations are accessible from other backends, and survive
 		 * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
 		 */
-		Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
 		return GetFakeLSNForUnloggedRel();
 	}
 }
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 1e09126978..9b0abccdaf 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -281,25 +281,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
 	UnlockReleaseBuffer(firstbuffer);
 }
 
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
-	XLogRecPtr	lsn = record->EndRecPtr;
-	Buffer		buffer;
-	Page		page;
-
-	buffer = XLogInitBufferForRedo(record, 0);
-	Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
-	page = (Page) BufferGetPage(buffer);
-
-	GISTInitBuffer(buffer, F_LEAF);
-
-	PageSetLSN(page, lsn);
-
-	MarkBufferDirty(buffer);
-	UnlockReleaseBuffer(buffer);
-}
-
 void
 gist_redo(XLogReaderState *record)
 {
@@ -321,9 +302,6 @@ gist_redo(XLogReaderState *record)
 		case XLOG_GIST_PAGE_SPLIT:
 			gistRedoPageSplitRecord(record);
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			gistRedoCreateIndex(record);
-			break;
 		default:
 			elog(PANIC, "gist_redo: unknown op code %u", info);
 	}
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index e5e925e0c5..1685a2fb8a 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -44,8 +44,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
 		case XLOG_GIST_PAGE_SPLIT:
 			out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			break;
 	}
 }
 
@@ -62,9 +60,6 @@ gist_identify(uint8 info)
 		case XLOG_GIST_PAGE_SPLIT:
 			id = "PAGE_SPLIT";
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			id = "CREATE_INDEX";
-			break;
 	}
 
 	return id;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 36ed7244ba..0588fc716a 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -241,6 +241,7 @@ typedef struct
 {
 	Relation	r;
 	Size		freespace;		/* free space to be left */
+	bool		is_build;
 
 	GISTInsertStack *stack;
 } GISTInsertState;
@@ -387,9 +388,9 @@ extern MemoryContext createTempGistContext(void);
 extern GISTSTATE *initGISTstate(Relation index);
 extern void freeGISTstate(GISTSTATE *giststate);
 extern void gistdoinsert(Relation r,
-			 IndexTuple itup,
-			 Size freespace,
-			 GISTSTATE *GISTstate);
+						 IndexTuple itup,
+						 Size freespace,
+						 GISTSTATE* giststate, bool is_build);
 
 /* A List of these is returned from gistplacetopage() in *splitinfo */
 typedef struct
@@ -404,7 +405,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 				OffsetNumber oldoffnum, BlockNumber *newblkno,
 				Buffer leftchildbuf,
 				List **splitinfo,
-				bool markleftchild);
+				bool markleftchild,
+				bool is_build);
 
 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
 		  int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 1a2b9496d0..5932fc395b 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -21,7 +21,6 @@
  /* #define XLOG_GIST_NEW_ROOT			 0x20 */	/* not used anymore */
 #define XLOG_GIST_PAGE_SPLIT		0x30
  /* #define XLOG_GIST_INSERT_COMPLETE	 0x40 */	/* not used anymore */
-#define XLOG_GIST_CREATE_INDEX		0x50
  /* #define XLOG_GIST_PAGE_DELETE		 0x60 */	/* not used anymore */
 
 /*
-- 
2.17.1

