From 679aa38c664eecba4a0d049feffe2537be3ebe21 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 6 Feb 2019 14:41:08 +0500
Subject: [PATCH 3/4] GiST-Optimal-WAL-Usage

---
 src/backend/access/gist/gist.c         | 46 ++++++++++++++++++--------
 src/backend/access/gist/gistbuild.c    | 32 ++++++++++--------
 src/backend/access/gist/gistutil.c     |  2 +-
 src/backend/access/gist/gistxlog.c     | 22 ------------
 src/backend/access/rmgrdesc/gistdesc.c |  5 ---
 src/include/access/gist_private.h      |  7 ++--
 src/include/access/gistxlog.h          |  1 -
 7 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index b75b3a8dac..c8f4e41db9 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -173,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
 						 values, isnull, true /* size is currently bogus */ );
 	itup->t_tid = *ht_ctid;
 
-	gistdoinsert(r, itup, 0, giststate, heapRel);
+	gistdoinsert(r, itup, 0, giststate, heapRel, false);
 
 	/* cleanup */
 	MemoryContextSwitchTo(oldCxt);
@@ -220,7 +220,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 				Buffer leftchildbuf,
 				List **splitinfo,
 				bool markfollowright,
-				Relation heapRel)
+				Relation heapRel,
+				bool is_build)
 {
 	BlockNumber blkno = BufferGetBlockNumber(buffer);
 	Page		page = BufferGetPage(buffer);
@@ -459,7 +460,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		 * insertion for that. NB: The number of pages and data segments
 		 * specified here must match the calculations in gistXLogSplit()!
 		 */
-		if (RelationNeedsWAL(rel))
+		if (RelationNeedsWAL(rel) && !is_build)
 			XLogEnsureRecordSpace(npage, 1 + npage * 2);
 
 		START_CRIT_SECTION();
@@ -480,18 +481,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
 		dist->page = BufferGetPage(dist->buffer);
 
-		/* Write the WAL record */
-		if (RelationNeedsWAL(rel))
+		/*
+		 * Write the WAL record.
+		 * Do not write XLog entry if the insertion is caused by
+		 * index build process.
+		 */
+		if (RelationNeedsWAL(rel) && !is_build)
 			recptr = gistXLogSplit(is_leaf,
-								   dist, oldrlink, oldnsn, leftchildbuf,
-								   markfollowright);
+								dist, oldrlink, oldnsn, leftchildbuf,
+								markfollowright);
 		else
 			recptr = gistGetFakeLSN(rel);
 
 		for (ptr = dist; ptr; ptr = ptr->next)
-		{
 			PageSetLSN(ptr->page, recptr);
-		}
 
 		/*
 		 * Return the new child buffers to the caller.
@@ -545,7 +548,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 		if (BufferIsValid(leftchildbuf))
 			MarkBufferDirty(leftchildbuf);
 
-		if (RelationNeedsWAL(rel))
+
+		if (RelationNeedsWAL(rel) && !is_build)
 		{
 			OffsetNumber ndeloffs = 0,
 						deloffs[1];
@@ -568,6 +572,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 			PageSetLSN(page, recptr);
 		}
 
+
 		if (newblkno)
 			*newblkno = blkno;
 	}
@@ -584,17 +589,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 	 * the full page image. There's a chicken-and-egg problem: if we updated
 	 * the child pages first, we wouldn't know the recptr of the WAL record
 	 * we're about to write.
+	 *
+	 * We use fakeLSNs for inserions caused by index build. And when it is
+	 * finished, we write generic_xlog entry for each index page and update
+	 * all LSNs. In order to keep NSNs less then LSNs after this update, we
+	 * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
 	 */
+
 	if (BufferIsValid(leftchildbuf))
 	{
 		Page		leftpg = BufferGetPage(leftchildbuf);
+		XLogRecPtr	fakerecptr = InvalidXLogRecPtr;
 
-		GistPageSetNSN(leftpg, recptr);
-		GistClearFollowRight(leftpg);
+		if (!is_build)
+			GistPageSetNSN(leftpg, recptr);
+		else
+			GistPageSetNSN(leftpg, fakerecptr);
 
+		GistClearFollowRight(leftpg);
 		PageSetLSN(leftpg, recptr);
 	}
 
+
 	END_CRIT_SECTION();
 
 	return is_split;
@@ -607,7 +623,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
  */
 void
 gistdoinsert(Relation r, IndexTuple itup, Size freespace,
-			 GISTSTATE *giststate, Relation heapRel)
+			 GISTSTATE *giststate, Relation heapRel, bool is_build)
 {
 	ItemId		iid;
 	IndexTuple	idxtuple;
@@ -620,6 +636,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
 	state.freespace = freespace;
 	state.r = r;
 	state.heapRel = heapRel;
+	state.is_build = is_build;
 
 	/* Start from the root */
 	firststack.blkno = GIST_ROOT_BLKNO;
@@ -1237,7 +1254,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
 							   leftchild,
 							   &splitinfo,
 							   true,
-							   state->heapRel);
+							   state->heapRel,
+							   state->is_build);
 
 	/*
 	 * Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index bd142a3560..2e98e55187 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -17,6 +17,7 @@
 #include <math.h>
 
 #include "access/genam.h"
+#include "access/generic_xlog.h"
 #include "access/gist_private.h"
 #include "access/gistxlog.h"
 #include "access/xloginsert.h"
@@ -180,18 +181,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
 	MarkBufferDirty(buffer);
 
-	if (RelationNeedsWAL(index))
-	{
-		XLogRecPtr	recptr;
-
-		XLogBeginInsert();
-		XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
-		recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
-		PageSetLSN(page, recptr);
-	}
-	else
-		PageSetLSN(page, gistGetFakeLSN(heap));
+	/*
+	 * Do not write index pages to WAL unitl index build is finished.
+	 * But we still need increasing LSNs on each page, so use FakeLSN,
+	 * even for relations which eventually need WAL.
+	 */
+	PageSetLSN(page, gistGetFakeLSN(heap));
 
 	UnlockReleaseBuffer(buffer);
 
@@ -224,6 +219,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
 	freeGISTstate(buildstate.giststate);
 
+	/*
+	 * Create generic wal records for all pages of relation, if necessary.
+	 * It seems reasonable not to generate WAL, if we recieved interrupt
+	 * signal.
+	 */
+	CHECK_FOR_INTERRUPTS();
+	if (RelationNeedsWAL(index))
+		generic_log_relation(index);
+
 	/*
 	 * Return statistics
 	 */
@@ -486,7 +490,7 @@ gistBuildCallback(Relation index,
 		 * locked, we call gistdoinsert directly.
 		 */
 		gistdoinsert(index, itup, buildstate->freespace,
-					 buildstate->giststate, buildstate->heaprel);
+					 buildstate->giststate, buildstate->heaprel, true);
 	}
 
 	/* Update tuple count and total size. */
@@ -693,7 +697,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
 							   InvalidBuffer,
 							   &splitinfo,
 							   false,
-							   buildstate->heaprel);
+							   buildstate->heaprel, true);
 
 	/*
 	 * If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 8d3dfad27b..f7088e2d94 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -949,6 +949,7 @@ gistproperty(Oid index_oid, int attno,
  * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
  * to detect concurrent page splits anyway. This function provides a fake
  * sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
  */
 XLogRecPtr
 gistGetFakeLSN(Relation rel)
@@ -969,7 +970,6 @@ gistGetFakeLSN(Relation rel)
 		 * Unlogged relations are accessible from other backends, and survive
 		 * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
 		 */
-		Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
 		return GetFakeLSNForUnloggedRel();
 	}
 }
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 408bd5390a..66d5bb831f 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -489,25 +489,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
 	UnlockReleaseBuffer(firstbuffer);
 }
 
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
-	XLogRecPtr	lsn = record->EndRecPtr;
-	Buffer		buffer;
-	Page		page;
-
-	buffer = XLogInitBufferForRedo(record, 0);
-	Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
-	page = (Page) BufferGetPage(buffer);
-
-	GISTInitBuffer(buffer, F_LEAF);
-
-	PageSetLSN(page, lsn);
-
-	MarkBufferDirty(buffer);
-	UnlockReleaseBuffer(buffer);
-}
-
 void
 gist_redo(XLogReaderState *record)
 {
@@ -532,9 +513,6 @@ gist_redo(XLogReaderState *record)
 		case XLOG_GIST_PAGE_SPLIT:
 			gistRedoPageSplitRecord(record);
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			gistRedoCreateIndex(record);
-			break;
 		default:
 			elog(PANIC, "gist_redo: unknown op code %u", info);
 	}
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index e468c9e15a..75f1ed747e 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -52,8 +52,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
 		case XLOG_GIST_PAGE_SPLIT:
 			out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			break;
 	}
 }
 
@@ -73,9 +71,6 @@ gist_identify(uint8 info)
 		case XLOG_GIST_PAGE_SPLIT:
 			id = "PAGE_SPLIT";
 			break;
-		case XLOG_GIST_CREATE_INDEX:
-			id = "CREATE_INDEX";
-			break;
 	}
 
 	return id;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 3698942f9d..b1680bfd6d 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -242,6 +242,7 @@ typedef struct
 	Relation	r;
 	Relation	heapRel;
 	Size		freespace;		/* free space to be left */
+	bool		is_build;
 
 	GISTInsertStack *stack;
 } GISTInsertState;
@@ -391,7 +392,8 @@ extern void gistdoinsert(Relation r,
 			 IndexTuple itup,
 			 Size freespace,
 			 GISTSTATE *GISTstate,
-			 Relation heapRel);
+			 Relation heapRel,
+			 bool is_build);
 
 /* A List of these is returned from gistplacetopage() in *splitinfo */
 typedef struct
@@ -407,7 +409,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 				Buffer leftchildbuf,
 				List **splitinfo,
 				bool markleftchild,
-				Relation heapRel);
+				Relation heapRel,
+				bool is_build);
 
 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
 		  int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 5117aabf1a..2ee80e244e 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -22,7 +22,6 @@
  /* #define XLOG_GIST_NEW_ROOT			 0x20 */	/* not used anymore */
 #define XLOG_GIST_PAGE_SPLIT		0x30
  /* #define XLOG_GIST_INSERT_COMPLETE	 0x40 */	/* not used anymore */
-#define XLOG_GIST_CREATE_INDEX		0x50
  /* #define XLOG_GIST_PAGE_DELETE		 0x60 */	/* not used anymore */
 
 /*
-- 
2.17.1

