From 3e73e862c1aa20bf4eeca20ba4381a1d3c6f19d9 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Mon, 1 Apr 2019 09:07:32 +0500
Subject: [PATCH 3/4] GiST-Optimal-WAL-Usage

---
 src/backend/access/gist/gist.c         | 46 ++++++++++++++++++--------
 src/backend/access/gist/gistbuild.c    | 32 ++++++++++--------
 src/backend/access/gist/gistutil.c     |  2 +-
 src/backend/access/gist/gistxlog.c     | 22 ------------
 src/backend/access/rmgrdesc/gistdesc.c |  5 ---
 src/include/access/gist_private.h      |  7 ++--
 src/include/access/gistxlog.h          |  1 -
 7 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 2fddb23496..0e2b6c3014 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -172,7 +172,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
 						 values, isnull, true /* size is currently bogus */ );
 	itup->t_tid = *ht_ctid;
 
-	gistdoinsert(r, itup, 0, giststate, heapRel);
+	gistdoinsert(r, itup, 0, giststate, heapRel, false);
 
 	/* cleanup */
 	MemoryContextSwitchTo(oldCxt);
@@ -219,7 +219,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 				Buffer leftchildbuf,
 				List **splitinfo,
 				bool markfollowright,
-				Relation heapRel)
+				Relation heapRel,
+				bool is_build)
 {
 	BlockNumber blkno = BufferGetBlockNumber(buffer);
 	Page		page = BufferGetPage(buffer);
@@ -458,7 +459,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		 * insertion for that. NB: The number of pages and data segments
 		 * specified here must match the calculations in gistXLogSplit()!
 		 */
-		if (RelationNeedsWAL(rel))
+		if (RelationNeedsWAL(rel) && !is_build)
 			XLogEnsureRecordSpace(npage, 1 + npage * 2);
 
 		START_CRIT_SECTION();
@@ -479,18 +480,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
 		dist->page = BufferGetPage(dist->buffer);
 
-		/* Write the WAL record */
-		if (RelationNeedsWAL(rel))
+		/*
+		 * Write the WAL record.
+		 * Do not write XLog entry if the insertion is caused by
+		 * index build process.
+		 */
+		if (RelationNeedsWAL(rel) && !is_build)
 			recptr = gistXLogSplit(is_leaf,
-								   dist, oldrlink, oldnsn, leftchildbuf,
-								   markfollowright);
+								dist, oldrlink, oldnsn, leftchildbuf,
+								markfollowright);
 		else
 			recptr = gistGetFakeLSN(rel);
 
 		for (ptr = dist; ptr; ptr = ptr->next)
-		{
 			PageSetLSN(ptr->page, recptr);
-		}
 
 		/*
 		 * Return the new child buffers to the caller.
@@ -544,7 +547,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		if (BufferIsValid(leftchildbuf))
 			MarkBufferDirty(leftchildbuf);
 
-		if (RelationNeedsWAL(rel))
+
+		if (RelationNeedsWAL(rel) && !is_build)
 		{
 			OffsetNumber ndeloffs = 0,
 						deloffs[1];
@@ -567,6 +571,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 			PageSetLSN(page, recptr);
 		}
 
+
 		if (newblkno)
 			*newblkno = blkno;
 	}
@@ -583,17 +588,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 	 * the full page image. There's a chicken-and-egg problem: if we updated
 	 * the child pages first, we wouldn't know the recptr of the WAL record
 	 * we're about to write.
+	 *
+	 * We use fakeLSNs for inserions caused by index build. And when it is
+	 * finished, we write generic_xlog entry for each index page and update
+	 * all LSNs. In order to keep NSNs less then LSNs after this update, we
+	 * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
 	 */
+
 	if (BufferIsValid(leftchildbuf))
 	{
 		Page		leftpg = BufferGetPage(leftchildbuf);
+		XLogRecPtr	fakerecptr = InvalidXLogRecPtr;
 
-		GistPageSetNSN(leftpg, recptr);
-		GistClearFollowRight(leftpg);
+		if (!is_build)
+			GistPageSetNSN(leftpg, recptr);
+		else
+			GistPageSetNSN(leftpg, fakerecptr);
 
+		GistClearFollowRight(leftpg);
 		PageSetLSN(leftpg, recptr);
 	}
 
+
 	END_CRIT_SECTION();
 
 	return is_split;
@@ -606,7 +622,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
  */
 void
 gistdoinsert(Relation r, IndexTuple itup, Size freespace,
-			 GISTSTATE *giststate, Relation heapRel)
+			 GISTSTATE *giststate, Relation heapRel, bool is_build)
 {
 	ItemId		iid;
 	IndexTuple	idxtuple;
@@ -619,6 +635,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
 	state.freespace = freespace;
 	state.r = r;
 	state.heapRel = heapRel;
+	state.is_build = is_build;
 
 	/* Start from the root */
 	firststack.blkno = GIST_ROOT_BLKNO;
@@ -1251,7 +1268,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
 							   leftchild,
 							   &splitinfo,
 							   true,
-							   state->heapRel);
+							   state->heapRel,
+							   state->is_build);
 
 	/*
 	 * Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 3652fde5bb..8d0d285cab 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -17,6 +17,7 @@
 #include <math.h>
 
 #include "access/genam.h"
+#include "access/generic_xlog.h"
 #include "access/gist_private.h"
 #include "access/gistxlog.h"
 #include "access/tableam.h"
@@ -181,18 +182,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
 	MarkBufferDirty(buffer);
 
-	if (RelationNeedsWAL(index))
-	{
-		XLogRecPtr	recptr;
-
-		XLogBeginInsert();
-		XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
-		recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
-		PageSetLSN(page, recptr);
-	}
-	else
-		PageSetLSN(page, gistGetFakeLSN(heap));
+	/*
+	 * Do not write index pages to WAL unitl index build is finished.
+	 * But we still need increasing LSNs on each page, so use FakeLSN,
+	 * even for relations which eventually need WAL.
+	 */
+	PageSetLSN(page, gistGetFakeLSN(heap));
 
 	UnlockReleaseBuffer(buffer);
 
@@ -226,6 +221,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
 	freeGISTstate(buildstate.giststate);
 
+	/*
+	 * Create generic wal records for all pages of relation, if necessary.
+	 * It seems reasonable not to generate WAL, if we recieved interrupt
+	 * signal.
+	 */
+	CHECK_FOR_INTERRUPTS();
+	if (RelationNeedsWAL(index))
+		generic_log_relation(index);
+
 	/*
 	 * Return statistics
 	 */
@@ -488,7 +492,7 @@ gistBuildCallback(Relation index,
 		 * locked, we call gistdoinsert directly.
 		 */
 		gistdoinsert(index, itup, buildstate->freespace,
-					 buildstate->giststate, buildstate->heaprel);
+					 buildstate->giststate, buildstate->heaprel, true);
 	}
 
 	/* Update tuple count and total size. */
@@ -695,7 +699,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
 							   InvalidBuffer,
 							   &splitinfo,
 							   false,
-							   buildstate->heaprel);
+							   buildstate->heaprel, true);
 
 	/*
 	 * If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 2163cc482d..af278e5ded 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -1004,6 +1004,7 @@ gistproperty(Oid index_oid, int attno,
  * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
  * to detect concurrent page splits anyway. This function provides a fake
  * sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
  */
 XLogRecPtr
 gistGetFakeLSN(Relation rel)
@@ -1024,7 +1025,6 @@ gistGetFakeLSN(Relation rel)
 		 * Unlogged relations are accessible from other backends, and survive
 		 * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
 		 */
-		Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
 		return GetFakeLSNForUnloggedRel();
 	}
 }
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index cb80ab00cd..4fb1855e89 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -490,25 +490,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
 	UnlockReleaseBuffer(firstbuffer);
 }
 
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
-	XLogRecPtr	lsn = record->EndRecPtr;
-	Buffer		buffer;
-	Page		page;
-
-	buffer = XLogInitBufferForRedo(record, 0);
-	Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
-	page = (Page) BufferGetPage(buffer);
-
-	GISTInitBuffer(buffer, F_LEAF);
-
-	PageSetLSN(page, lsn);
-
-	MarkBufferDirty(buffer);
-	UnlockReleaseBuffer(buffer);
-}
-
 /* redo page deletion */
 static void
 gistRedoPageDelete(XLogReaderState *record)
@@ -594,9 +575,6 @@ gist_redo(XLogReaderState *record)
 		case XLOG_GIST_PAGE_SPLIT:
 			gistRedoPageSplitRecord(record);
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			gistRedoCreateIndex(record);
-			break;
 		case XLOG_GIST_PAGE_DELETE:
 			gistRedoPageDelete(record);
 			break;
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index 3ff4f83d38..eb308c72d6 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -71,8 +71,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
 		case XLOG_GIST_PAGE_SPLIT:
 			out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			break;
 		case XLOG_GIST_PAGE_DELETE:
 			out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec);
 			break;
@@ -98,9 +96,6 @@ gist_identify(uint8 info)
 		case XLOG_GIST_PAGE_SPLIT:
 			id = "PAGE_SPLIT";
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			id = "CREATE_INDEX";
-			break;
 		case XLOG_GIST_PAGE_DELETE:
 			id = "PAGE_DELETE";
 			break;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 02dc285a78..78e2e3fb31 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -244,6 +244,7 @@ typedef struct
 	Relation	r;
 	Relation	heapRel;
 	Size		freespace;		/* free space to be left */
+	bool		is_build;
 
 	GISTInsertStack *stack;
 } GISTInsertState;
@@ -393,7 +394,8 @@ extern void gistdoinsert(Relation r,
 			 IndexTuple itup,
 			 Size freespace,
 			 GISTSTATE *GISTstate,
-			 Relation heapRel);
+			 Relation heapRel,
+			 bool is_build);
 
 /* A List of these is returned from gistplacetopage() in *splitinfo */
 typedef struct
@@ -409,7 +411,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 				Buffer leftchildbuf,
 				List **splitinfo,
 				bool markleftchild,
-				Relation heapRel);
+				Relation heapRel,
+				bool is_build);
 
 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
 		  int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 2f87b67a53..80931497ca 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -23,7 +23,6 @@
 										  * FSM */
 #define XLOG_GIST_PAGE_SPLIT		0x30
  /* #define XLOG_GIST_INSERT_COMPLETE	 0x40 */	/* not used anymore */
-#define XLOG_GIST_CREATE_INDEX		0x50
 #define XLOG_GIST_PAGE_DELETE		0x60
 
 /*
-- 
2.17.1

