>From fdd15f7dfe1ca66efba5eb4724d08574aa3e02ce Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 7 Nov 2013 19:38:01 +0200
Subject: [PATCH 4/4] Get rid of the post-recovery cleanup step of GIN page
 splits.

Replace it with an approach similar to what GiST uses: when a page is split,
the left sibling is marked with a flag indicating that the parent hasn't been
updated yet. When the parent is updated, the flag is cleared. If an insertion
steps on a page with the flag set, it will finish split before proceeding
with the insertion.

The post-recovery cleanup mechanism was never totally reliable, as insertion
to the parent could fail e.g because of running out of memory or disk space,
leaving the tree in an inconsistent state.
---
 src/backend/access/gin/ginbtree.c     | 529 ++++++++++++++++++++++++----------
 src/backend/access/gin/gindatapage.c  | 207 ++++++-------
 src/backend/access/gin/ginentrypage.c | 125 +++-----
 src/backend/access/gin/gininsert.c    |   9 +-
 src/backend/access/gin/ginxlog.c      | 482 ++++++++++++++-----------------
 src/backend/access/rmgrdesc/gindesc.c |  45 ++-
 src/include/access/gin.h              |   1 -
 src/include/access/gin_private.h      | 108 +++++--
 src/include/access/rmgrlist.h         |   2 +-
 9 files changed, 835 insertions(+), 673 deletions(-)

diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 7248b06..41e460e 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -18,8 +18,16 @@
 #include "miscadmin.h"
 #include "utils/rel.h"
 
+static void ginFindParents(GinBtree btree, GinBtreeStack *stack,
+			   BlockNumber rootBlkno);
+static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
+			   void *insertdata, BlockNumber updateblkno,
+			   Buffer childbuf, GinStatsData *buildStats);
+static void ginFinishSplit(GinBtree btree, BlockNumber rootBlkno,
+			   GinBtreeStack *stack, bool freestack, GinStatsData *buildStats);
+
 /*
- * Locks buffer by needed method for search.
+ * Lock buffer by needed method for search.
  */
 static int
 ginTraverseLock(Buffer buffer, bool searchMode)
@@ -53,7 +61,7 @@ ginTraverseLock(Buffer buffer, bool searchMode)
 }
 
 /*
- * Descends the tree to the leaf page that contains or would contain the
+ * Descend the tree to the leaf page that contains, or would contain, the
  * key we're searching for. The key should already be filled in 'btree',
  * in tree-type specific manner. If btree->fullScan is true, descends to the
  * leftmost leaf page.
@@ -86,6 +94,13 @@ ginFindLeafPage(GinBtree btree, BlockNumber rootBlkno, bool searchMode)
 		access = ginTraverseLock(stack->buffer, searchMode);
 
 		/*
+		 * If we're going to modify the tree, finish any incomplete splits we
+		 * encounter on the way.
+		 */
+		if (!searchMode && GinPageIsIncompleteSplit(page))
+			ginFinishSplit(btree, rootBlkno, stack, false, NULL);
+
+		/*
 		 * ok, page is correctly locked, we should check to move right ..,
 		 * root never has a right link, so small optimization
 		 */
@@ -101,6 +116,9 @@ ginFindLeafPage(GinBtree btree, BlockNumber rootBlkno, bool searchMode)
 			stack->buffer = ginStepRight(stack->buffer, btree->index, access);
 			stack->blkno = rightlink;
 			page = BufferGetPage(stack->buffer);
+
+			if (!searchMode && GinPageIsIncompleteSplit(page))
+				ginFinishSplit(btree, rootBlkno, stack, false, NULL);
 		}
 
 		if (GinPageIsLeaf(page))	/* we found, return locked page */
@@ -188,67 +206,62 @@ freeGinBtreeStack(GinBtreeStack *stack)
  * Function should never release root page to prevent conflicts
  * with vacuum process
  */
-void
-ginFindParents(GinBtree btree, GinBtreeStack *stack,
-			   BlockNumber rootBlkno)
+static void
+ginFindParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno)
 {
 	Page		page;
 	Buffer		buffer;
 	BlockNumber blkno,
 				leftmostBlkno;
 	OffsetNumber offset;
-	GinBtreeStack *root = stack->parent;
+	GinBtreeStack *root;
 	GinBtreeStack *ptr;
 
-	if (!root)
+	/*
+	 * Unwind the stack all the way up to the root, leaving only the root
+	 * item.
+	 *
+	 * Be careful not to release the pin on the root page! The pin on root
+	 * page is required to lock out concurrent vacuums on the tree.
+	 */
+	root = stack->parent;
+	while (root->parent)
 	{
-		/* XLog mode... */
-		root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
-		root->blkno = rootBlkno;
-		root->buffer = ReadBuffer(btree->index, rootBlkno);
-		LockBuffer(root->buffer, GIN_EXCLUSIVE);
-		root->parent = NULL;
+		ReleaseBuffer(root->buffer);
+		root = root->parent;
 	}
-	else
-	{
-		/*
-		 * find root, we should not release root page until update is
-		 * finished!!
-		 */
-		while (root->parent)
-		{
-			ReleaseBuffer(root->buffer);
-			root = root->parent;
-		}
 
-		Assert(root->blkno == rootBlkno);
-		Assert(BufferGetBlockNumber(root->buffer) == rootBlkno);
-		LockBuffer(root->buffer, GIN_EXCLUSIVE);
-	}
+	Assert(root->blkno == rootBlkno);
+	Assert(BufferGetBlockNumber(root->buffer) == rootBlkno);
 	root->off = InvalidOffsetNumber;
 
-	page = BufferGetPage(root->buffer);
-	Assert(!GinPageIsLeaf(page));
-
-	/* check trivial case */
-	if ((root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber)
-	{
-		stack->parent = root;
-		return;
-	}
+	blkno = root->blkno;
+	buffer = root->buffer;
+	offset = InvalidOffsetNumber;
 
-	leftmostBlkno = blkno = btree->getLeftMostChild(btree, page);
-	LockBuffer(root->buffer, GIN_UNLOCK);
-	Assert(blkno != InvalidBlockNumber);
+	ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
 
 	for (;;)
 	{
-		buffer = ReadBuffer(btree->index, blkno);
 		LockBuffer(buffer, GIN_EXCLUSIVE);
 		page = BufferGetPage(buffer);
 		if (GinPageIsLeaf(page))
 			elog(ERROR, "Lost path");
 
+		if (GinPageIsIncompleteSplit(page))
+		{
+			Assert(blkno != rootBlkno);
+			ptr->blkno = blkno;
+			ptr->buffer = buffer;
+			/*
+			 * parent may be wrong, but if so, the ginFinshSplit call will
+			 * recurse to call ginFindParents again to fix it.
+			 */
+			ptr->parent = root;
+			ptr->off = InvalidOffsetNumber;
+			ginFinishSplit(btree, rootBlkno, ptr, false, NULL);
+		}
+
 		leftmostBlkno = btree->getLeftMostChild(btree, page);
 
 		while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
@@ -261,50 +274,146 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack,
 			}
 			buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
 			page = BufferGetPage(buffer);
+
+			/* finish any incomplete splits, as above */
+			if (GinPageIsIncompleteSplit(page))
+			{
+				Assert(blkno != rootBlkno);
+				ptr->blkno = blkno;
+				ptr->buffer = buffer;
+				ptr->parent = root;
+				ptr->off = InvalidOffsetNumber;
+				ginFinishSplit(btree, rootBlkno, ptr, false, NULL);
+			}
 		}
 
 		if (blkno != InvalidBlockNumber)
 		{
-			ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
 			ptr->blkno = blkno;
 			ptr->buffer = buffer;
-			ptr->parent = root; /* it's may be wrong, but in next call we will
+			ptr->parent = root; /* it may be wrong, but in next call we will
 								 * correct */
 			ptr->off = offset;
 			stack->parent = ptr;
 			return;
 		}
 
+		/* Descend down to next level */
 		blkno = leftmostBlkno;
+		buffer = ReadBuffer(btree->index, blkno);
 	}
 }
 
 /*
- * Returns true if the insertion is done, false if the page was split and
- * downlink insertion is pending.
+ * Insert a new item to a page.
+ *
+ * Returns true if the insertion was finished. On false, the page was split and
+ * the parent needs to be updated. (a root update returns true as it doesn't
+ * need any further action by the caller to complete)
  *
  * stack->buffer is locked on entry, and is kept locked.
+ *
+ * When inserting a downlink to a internal page, 'childbuf' contains the
+ * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
+ * atomically with the insert. Also, the existing item at the given location
+ * is updated to point to 'updateblkno'.
  */
 static bool
-ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack,
-			   GinStatsData *buildStats)
+ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
+			   void *insertdata, BlockNumber updateblkno,
+			   Buffer childbuf, GinStatsData *buildStats)
 {
 	Page		page = BufferGetPage(stack->buffer);
-	XLogRecData *rdata;
+	XLogRecData *payloadrdata;
 	bool		fit;
+	uint16		xlflags = 0;
+	Page		childpage = NULL;
+
+	if (GinPageIsData(page))
+		xlflags |= GIN_SPLIT_ISDATA;
+	if (GinPageIsLeaf(page))
+	{
+		xlflags |= GIN_SPLIT_ISLEAF;
+		Assert(!BufferIsValid(childbuf));
+		Assert(updateblkno == InvalidBlockNumber);
+	}
+	else
+	{
+		Assert(BufferIsValid(childbuf));
+		Assert(updateblkno != InvalidBlockNumber);
+		childpage = BufferGetPage(childbuf);
+	}
 
+	/*
+	 * Try to put the incoming tuple on the page. If it doesn't fit,
+	 * placeToPage method will return false and leave the page unmodified,
+	 * and we'll have to split the page.
+	 */
 	START_CRIT_SECTION();
-	fit = btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
+	fit = btree->placeToPage(btree, stack->buffer, stack->off,
+							 insertdata, updateblkno,
+							 &payloadrdata);
 	if (fit)
 	{
 		MarkBufferDirty(stack->buffer);
 
+		/* An insert to an internal page finishes the split of the child. */
+		if (childbuf != InvalidBuffer)
+		{
+			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
+			MarkBufferDirty(childbuf);
+		}
+
 		if (RelationNeedsWAL(btree->index))
 		{
 			XLogRecPtr	recptr;
+			XLogRecData rdata[3];
+			ginxlogInsert xlrec;
+
+			xlrec.node = btree->index->rd_node;
+			xlrec.blkno = BufferGetBlockNumber(stack->buffer);
+			xlrec.offset = stack->off;
+			xlrec.flags = xlflags;
+
+			rdata[0].buffer = InvalidBuffer;
+			rdata[0].data = (char *) &xlrec;
+			rdata[0].len = sizeof(ginxlogInsert);
+
+			/*
+			 * Log information about child if this was an insertion of a
+			 * downlink.
+			 */
+			if (childbuf != InvalidBuffer)
+			{
+				struct
+				{
+					BlockNumber left;
+					BlockNumber right;
+				} children;
+
+				rdata[0].next = &rdata[1];
+
+				children.left = BufferGetBlockNumber(childbuf);
+				children.right = GinPageGetOpaque(childpage)->rightlink;
+
+				rdata[1].buffer = InvalidBuffer;
+				rdata[1].data = (char *) &children;
+				rdata[1].len = sizeof(BlockNumber) * 2;
+				rdata[1].next = &rdata[2];
+
+				rdata[2].buffer = childbuf;
+				rdata[2].buffer_std = false;
+				rdata[2].data = NULL;
+				rdata[2].len = 0;
+				rdata[2].next = payloadrdata;
+			}
+			else
+				rdata[0].next = payloadrdata;
 
 			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
 			PageSetLSN(page, recptr);
+			if (childbuf != InvalidBuffer)
+				PageSetLSN(childpage, recptr);
 		}
 
 		END_CRIT_SECTION();
@@ -317,13 +426,23 @@ ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack,
 		Buffer		rbuffer;
 		Page		newlpage;
 		BlockNumber savedRightLink;
-		GinBtreeStack *parent;
-		Page		lpage,
-					rpage;
+		Page		rpage;
+		XLogRecData rdata[2];
+		ginxlogSplit data;
+		Buffer		lbuffer = InvalidBuffer;
+		Page		newrootpg = NULL;
 
 		END_CRIT_SECTION();
 
 		rbuffer = GinNewBuffer(btree->index);
+		/* During index build, count the new page */
+		if (buildStats)
+		{
+			if (btree->isData)
+				buildStats->nDataPages++;
+			else
+				buildStats->nEntryPages++;
+		}
 
 		savedRightLink = GinPageGetOpaque(page)->rightlink;
 
@@ -331,64 +450,48 @@ ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack,
 		 * newlpage is a pointer to memory page, it is not associated with
 		 * a buffer. stack->buffer is not touched yet.
 		 */
-		newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
+		newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off,
+									insertdata, updateblkno,
+									&payloadrdata);
+
+		data.node = btree->index->rd_node;
+		data.rblkno = BufferGetBlockNumber(rbuffer);
+		data.flags = xlflags;
+		if (childbuf != InvalidBuffer)
+		{
+			Page childpage = BufferGetPage(childbuf);
+			data.leftChildBlkno = BufferGetBlockNumber(childbuf);
+			data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
+		}
+		else
+			data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
 
-		((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
+		rdata[0].buffer = InvalidBuffer;
+		rdata[0].data = (char *) &data;
+		rdata[0].len = sizeof(ginxlogSplit);
 
-		/* During index build, count the newly-split page */
-		if (buildStats)
+		if (childbuf != InvalidBuffer)
 		{
-			if (btree->isData)
-				buildStats->nDataPages++;
-			else
-				buildStats->nEntryPages++;
+			rdata[0].next = &rdata[1];
+
+			rdata[1].buffer = childbuf;
+			rdata[1].buffer_std = false;
+			rdata[1].data = NULL;
+			rdata[1].len = 0;
+			rdata[1].next = payloadrdata;
 		}
+		else
+			rdata[0].next = payloadrdata;
 
-		parent = stack->parent;
+		rpage = BufferGetPage(rbuffer);
 
-		if (parent == NULL)
+		if (stack->parent == NULL)
 		{
 			/*
 			 * split root, so we need to allocate new left page and place
 			 * pointer on root to left and right page
 			 */
-			Buffer		lbuffer = GinNewBuffer(btree->index);
-
-			((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
-			((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
-
-			lpage = BufferGetPage(lbuffer);
-			rpage = BufferGetPage(rbuffer);
-
-			GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
-			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
-			((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
-
-			START_CRIT_SECTION();
-
-			GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF);
-			PageRestoreTempPage(newlpage, lpage);
-			btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer);
-
-			MarkBufferDirty(rbuffer);
-			MarkBufferDirty(lbuffer);
-			MarkBufferDirty(stack->buffer);
-
-			if (RelationNeedsWAL(btree->index))
-			{
-				XLogRecPtr	recptr;
-
-				recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
-				PageSetLSN(page, recptr);
-				PageSetLSN(lpage, recptr);
-				PageSetLSN(rpage, recptr);
-			}
-
-			UnlockReleaseBuffer(rbuffer);
-			UnlockReleaseBuffer(lbuffer);
-			END_CRIT_SECTION();
-
-			/* During index build, count the newly-added root page */
+			lbuffer = GinNewBuffer(btree->index);
 			if (buildStats)
 			{
 				if (btree->isData)
@@ -397,87 +500,139 @@ ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack,
 					buildStats->nEntryPages++;
 			}
 
-			return true;
+			/*
+			 * root never has a right-link, so we borrow the rrlink field to
+			 * store the root block number.
+			 */
+			data.rrlink = BufferGetBlockNumber(stack->buffer);
+			data.lblkno = BufferGetBlockNumber(lbuffer);
+			data.flags |= GIN_SPLIT_ROOT;
+
+			GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
+			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
+
+			/*
+			 * Construct a new root page containing downlinks to the new left
+			 * and right pages. (do this in a temporary copy first rather
+			 * than overwriting the original page directly, so that we can still
+			 * abort gracefully if this fails.)
+			 */
+			newrootpg = PageGetTempPage(rpage);
+			GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF, BLCKSZ);
+
+			btree->fillRoot(btree, newrootpg,
+							BufferGetBlockNumber(lbuffer), newlpage,
+							BufferGetBlockNumber(rbuffer), rpage);
 		}
 		else
 		{
 			/* split non-root page */
-			((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE;
-			((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink;
-
-			lpage = BufferGetPage(stack->buffer);
-			rpage = BufferGetPage(rbuffer);
+			data.rrlink = savedRightLink;
+			data.lblkno = BufferGetBlockNumber(stack->buffer);
 
 			GinPageGetOpaque(rpage)->rightlink = savedRightLink;
+			GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
 			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
+		}
 
-			START_CRIT_SECTION();
-			PageRestoreTempPage(newlpage, lpage);
+		/*
+		 * Ok, we have the new contents of the left page in a temporary copy
+		 * now (newlpage), and the newly-allocated right block has been filled
+		 * in. The original page is still unchanged.
+		 *
+		 * If this is a root split, we also have a temporary page containing
+		 * the new contents of the root. Copy the new left page to a
+		 * newly-allocated block, and initialize the (original) root page the
+		 * new copy. Otherwise, copy over the temporary copy of the new left
+		 * page over the old left page.
+		 */
 
-			MarkBufferDirty(rbuffer);
-			MarkBufferDirty(stack->buffer);
+		START_CRIT_SECTION();
 
-			if (RelationNeedsWAL(btree->index))
-			{
-				XLogRecPtr	recptr;
+		MarkBufferDirty(rbuffer);
 
-				recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
-				PageSetLSN(lpage, recptr);
-				PageSetLSN(rpage, recptr);
-			}
-			UnlockReleaseBuffer(rbuffer);
-			END_CRIT_SECTION();
+		if (stack->parent == NULL)
+		{
+			PageRestoreTempPage(newlpage, BufferGetPage(lbuffer));
+			MarkBufferDirty(lbuffer);
+			newlpage = newrootpg;
+		}
 
-			return false;
+		PageRestoreTempPage(newlpage, BufferGetPage(stack->buffer));
+		MarkBufferDirty(stack->buffer);
+
+		/* write WAL record */
+		if (RelationNeedsWAL(btree->index))
+		{
+			XLogRecPtr	recptr;
+
+			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
+			PageSetLSN(BufferGetPage(stack->buffer), recptr);
+			PageSetLSN(rpage, recptr);
+			if (stack->parent == NULL)
+				PageSetLSN(BufferGetPage(lbuffer), recptr);
 		}
+		END_CRIT_SECTION();
+
+		/*
+		 * We can release the lock on the right page now, but keep the
+		 * original buffer locked.
+		 */
+		UnlockReleaseBuffer(rbuffer);
+		if (stack->parent == NULL)
+			UnlockReleaseBuffer(lbuffer);
+
+		/*
+		 * If we split the root, we're done. Otherwise the split is not
+		 * complete until the downlink for the new page has been inserted to
+		 * the parent.
+		 */
+		if (stack->parent == NULL)
+			return true;
+		else
+			return false;
 	}
 }
 
 /*
- * Insert value (stored in GinBtree) to tree described by stack
+ * Finish a split by inserting the downlink for the new page to parent.
  *
- * During an index build, buildStats is non-null and the counters
- * it contains are incremented as needed.
+ * On entry, stack->buffer is exclusively locked.
  *
- * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
+ * If freestack is true, all the buffers are released and unlocked as we
+ * crawl up the tree, and 'stack' is freed. Otherwise stack is unmodified,
+ * and stack->buffer is kept locked.
  */
-void
-ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
+static void
+ginFinishSplit(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack,
+			   bool freestack, GinStatsData *buildStats)
 {
-	GinBtreeStack *parent;
-	BlockNumber rootBlkno;
 	Page		page;
-
-	/* extract root BlockNumber from stack */
-	Assert(stack != NULL);
-	parent = stack;
-	while (parent->parent)
-		parent = parent->parent;
-	rootBlkno = parent->blkno;
-	Assert(BlockNumberIsValid(rootBlkno));
+	bool		done;
+	bool		first = true;
 
 	/* this loop crawls up the stack until the insertion is complete */
-	for (;;)
+	do
 	{
-		bool done;
-
-		done = ginPlaceToPage(btree, rootBlkno, stack, buildStats);
-
-		/* just to be extra sure we don't delete anything by accident... */
-		btree->isDelete = FALSE;
-
-		if (done)
-		{
-			LockBuffer(stack->buffer, GIN_UNLOCK);
-			freeGinBtreeStack(stack);
-			break;
-		}
-
-		btree->prepareDownlink(btree, stack->buffer);
+		GinBtreeStack *parent = stack->parent;
+		void	   *insertdata;
+		BlockNumber updateblkno;
 
 		/* search parent to lock */
 		LockBuffer(parent->buffer, GIN_EXCLUSIVE);
 
+		/*
+		 * If the parent page was incompletely split, finish that split first,
+		 * then continue with the current one.
+		 *
+		 * Note: we have to finish *all* incomplete splits we encounter, even
+		 * if we have to move right. Otherwise we might choose as the target
+		 * a page that has no downlink in the parent, and splitting it further
+		 * would fail.
+		 */
+		if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
+			ginFinishSplit(btree, rootBlkno, parent, false, buildStats);
+
 		/* move right if it's needed */
 		page = BufferGetPage(parent->buffer);
 		while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
@@ -500,10 +655,78 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
 			parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
 			parent->blkno = rightlink;
 			page = BufferGetPage(parent->buffer);
+
+			if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
+				ginFinishSplit(btree, rootBlkno, parent, false, buildStats);
 		}
 
-		UnlockReleaseBuffer(stack->buffer);
-		pfree(stack);
+		insertdata = btree->prepareDownlink(btree, stack->buffer);
+		updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
+		done = ginPlaceToPage(btree, parent,
+							  insertdata, updateblkno,
+							  stack->buffer, buildStats);
+		pfree(insertdata);
+
+		/*
+		 * If the caller requested to free the stack, unlock and release the
+		 * child buffer now. Otherwise keep it pinned and locked, but if
+		 * we have to recurse up the tree, we can unlock the upper pages,
+		 * only keeping the page at the bottom of the stack locked.
+		 */
+		if (!first || freestack)
+			LockBuffer(stack->buffer, GIN_UNLOCK);
+		if (freestack)
+		{
+			ReleaseBuffer(stack->buffer);
+			pfree(stack);
+		}
 		stack = parent;
+
+		first = false;
+	} while (!done);
+
+	if (freestack)
+	{
+		LockBuffer(stack->buffer, GIN_UNLOCK);
+		freeGinBtreeStack(stack);
 	}
 }
+
+/*
+ * Insert value (stored in GinBtree) to tree described by stack
+ *
+ * During an index build, buildStats is non-null and the counters
+ * it contains are incremented as needed.
+ *
+ * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
+ */
+void
+ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
+			   GinStatsData *buildStats)
+{
+	GinBtreeStack *parent;
+	BlockNumber rootBlkno;
+	bool		done;
+
+	/* extract root BlockNumber from stack */
+	Assert(stack != NULL);
+	parent = stack;
+	while (parent->parent)
+		parent = parent->parent;
+	rootBlkno = parent->blkno;
+	Assert(BlockNumberIsValid(rootBlkno));
+
+	/* If the leaf page was incompletely split, finish the split first */
+	if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
+		ginFinishSplit(btree, rootBlkno, stack, false, buildStats);
+
+	done = ginPlaceToPage(btree, stack, insertdata, InvalidBlockNumber,
+						  InvalidBuffer, buildStats);
+	if (done)
+	{
+		LockBuffer(stack->buffer, GIN_UNLOCK);
+		freeGinBtreeStack(stack);
+	}
+	else
+		ginFinishSplit(btree, rootBlkno, stack, true, buildStats);
+}
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 908e28f..d4458f7 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -30,7 +30,7 @@ dataIsMoveRight(GinBtree btree, Page page)
 	if (GinPageRightMost(page))
 		return FALSE;
 
-	return (ginCompareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
+	return (ginCompareItemPointers(&btree->itemptr, iptr) > 0) ? TRUE : FALSE;
 }
 
 /*
@@ -80,7 +80,7 @@ dataLocateItem(GinBtree btree, GinBtreeStack *stack)
 		else
 		{
 			pitem = GinDataPageGetPostingItem(page, mid);
-			result = ginCompareItemPointers(btree->items + btree->curitem, &(pitem->key));
+			result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
 		}
 
 		if (result == 0)
@@ -138,7 +138,7 @@ dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
 	{
 		OffsetNumber mid = low + ((high - low) / 2);
 
-		result = ginCompareItemPointers(btree->items + btree->curitem,
+		result = ginCompareItemPointers(&btree->itemptr,
 										GinDataPageGetItemPointer(page, mid));
 
 		if (result == 0)
@@ -298,18 +298,18 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
  * item pointer never deletes!
  */
 static bool
-dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
+dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off, void *insertdata)
 {
 	Page		page = BufferGetPage(buf);
 
 	Assert(GinPageIsData(page));
-	Assert(!btree->isDelete);
 
 	if (GinPageIsLeaf(page))
 	{
+		GinBtreeDataLeafInsertData *items = insertdata;
 		if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
 		{
-			if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
+			if ((items->nitem - items->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
 				return true;
 		}
 		else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
@@ -322,31 +322,6 @@ dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
 }
 
 /*
- * In case of previous split update old child blkno to
- * new right page
- * item pointer never deletes!
- */
-static BlockNumber
-dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
-{
-	BlockNumber ret = InvalidBlockNumber;
-
-	Assert(GinPageIsData(page));
-
-	if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
-	{
-		PostingItem *pitem = GinDataPageGetPostingItem(page, off);
-
-		PostingItemSetBlockNumber(pitem, btree->rightblkno);
-		ret = btree->rightblkno;
-	}
-
-	btree->rightblkno = InvalidBlockNumber;
-
-	return ret;
-}
-
-/*
  * Places keys to page and fills WAL record. In case leaf page and
  * build mode puts all ItemPointers to page.
  *
@@ -354,85 +329,74 @@ dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
  */
 static bool
 dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
+				void *insertdata, BlockNumber updateblkno,
 				XLogRecData **prdata)
 {
 	Page		page = BufferGetPage(buf);
 	int			sizeofitem = GinSizeOfDataPageItem(page);
-	int			cnt = 0;
 
 	/* these must be static so they can be returned to caller */
 	static XLogRecData rdata[3];
-	static ginxlogInsert data;
 
 	/* quick exit if it doesn't fit */
-	if (!dataIsEnoughSpace(btree, buf, off))
+	if (!dataIsEnoughSpace(btree, buf, off, insertdata))
 		return false;
 
 	*prdata = rdata;
 	Assert(GinPageIsData(page));
 
-	data.updateBlkno = dataPrepareData(btree, page, off);
-
-	data.node = btree->index->rd_node;
-	data.blkno = BufferGetBlockNumber(buf);
-	data.offset = off;
-	data.nitem = 1;
-	data.isDelete = FALSE;
-	data.isData = TRUE;
-	data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
-
-	/*
-	 * Prevent full page write if child's split occurs. That is needed to
-	 * remove incomplete splits while replaying WAL
-	 *
-	 * data.updateBlkno contains new block number (of newly created right
-	 * page) for recently splited page.
-	 */
-	if (data.updateBlkno == InvalidBlockNumber)
+	/* Update existing downlink to point to next page (on internal page) */
+	if (!GinPageIsLeaf(page))
 	{
-		rdata[0].buffer = buf;
-		rdata[0].buffer_std = FALSE;
-		rdata[0].data = NULL;
-		rdata[0].len = 0;
-		rdata[0].next = &rdata[1];
-		cnt++;
+		PostingItem *pitem = GinDataPageGetPostingItem(page, off);
+		PostingItemSetBlockNumber(pitem, updateblkno);
 	}
 
-	rdata[cnt].buffer = InvalidBuffer;
-	rdata[cnt].data = (char *) &data;
-	rdata[cnt].len = sizeof(ginxlogInsert);
-	rdata[cnt].next = &rdata[cnt + 1];
-	cnt++;
-
-	rdata[cnt].buffer = InvalidBuffer;
-	rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
-	rdata[cnt].len = sizeofitem;
-	rdata[cnt].next = NULL;
-
 	if (GinPageIsLeaf(page))
 	{
+		GinBtreeDataLeafInsertData *items = insertdata;
+		static ginxlogInsertDataLeaf data;
+		data.nitem = 1;
+
 		if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
 		{
 			/* usually, create index... */
-			uint32		savedPos = btree->curitem;
+			uint32		savedPos = items->curitem;
 
-			while (btree->curitem < btree->nitem)
+			while (items->curitem < items->nitem)
 			{
-				GinDataPageAddItemPointer(page, btree->items + btree->curitem, off);
+				GinDataPageAddItemPointer(page, items->items + items->curitem, off);
 				off++;
-				btree->curitem++;
+				items->curitem++;
 			}
-			data.nitem = btree->curitem - savedPos;
-			rdata[cnt].len = sizeofitem * data.nitem;
+			data.nitem = items->curitem - savedPos;
 		}
 		else
 		{
-			GinDataPageAddItemPointer(page, btree->items + btree->curitem, off);
-			btree->curitem++;
+			GinDataPageAddItemPointer(page, items->items + items->curitem, off);
+			items->curitem++;
 		}
+
+		rdata[0].buffer = InvalidBuffer;
+		rdata[0].data = (char *) &data;
+		rdata[0].len = offsetof(ginxlogInsertDataLeaf, items);
+		rdata[0].next = &rdata[1];
+
+		rdata[1].buffer = InvalidBuffer;
+		rdata[1].data = (char *) (items->items + items->curitem);
+		rdata[1].len = data.nitem * sizeofitem;
+		rdata[1].next = NULL;
 	}
 	else
-		GinDataPageAddPostingItem(page, &(btree->pitem), off);
+	{
+		PostingItem *pitem = insertdata;
+		GinDataPageAddPostingItem(page, pitem, off);
+
+		rdata[0].buffer = InvalidBuffer;
+		rdata[0].data = (char *) pitem;
+		rdata[0].len = sizeof(PostingItem);
+		rdata[0].next = NULL;
+	}
 
 	return true;
 }
@@ -444,7 +408,8 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
  * left page
  */
 static Page
-dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
+dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
+			  void *insertdata, BlockNumber updateblkno, XLogRecData **prdata)
 {
 	char	   *ptr;
 	OffsetNumber separator;
@@ -457,20 +422,23 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
 	Page		rpage = BufferGetPage(rbuf);
 	Size		pageSize = PageGetPageSize(lpage);
 	Size		freeSpace;
-	uint32		nCopied = 1;
 
 	/* these must be static so they can be returned to caller */
-	static ginxlogSplit data;
-	static XLogRecData rdata[4];
+	static ginxlogSplitData data;
+	static XLogRecData rdata[2];
 	static char vector[2 * BLCKSZ];
 
 	GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
 	freeSpace = GinDataPageGetFreeSpace(rpage);
 
 	*prdata = rdata;
-	data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
-		InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
-	data.updateBlkno = dataPrepareData(btree, lpage, off);
+
+	/* Update existing downlink to point to next page (on internal page) */
+	if (!isleaf)
+	{
+		PostingItem *pitem = GinDataPageGetPostingItem(lpage, off);
+		PostingItemSetBlockNumber(pitem, updateblkno);
+	}
 
 	if (isleaf)
 	{
@@ -487,16 +455,15 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
 
 	if (isleaf && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
 	{
-		nCopied = 0;
-		while (btree->curitem < btree->nitem &&
+		GinBtreeDataLeafInsertData *items = insertdata;
+		while (items->curitem < items->nitem &&
 			   maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
 		{
 			memcpy(vector + maxoff * sizeof(ItemPointerData),
-				   btree->items + btree->curitem,
+				   items->items + items->curitem,
 				   sizeof(ItemPointerData));
 			maxoff++;
-			nCopied++;
-			btree->curitem++;
+			items->curitem++;
 		}
 	}
 	else
@@ -506,11 +473,15 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
 			memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
 		if (isleaf)
 		{
-			memcpy(ptr, btree->items + btree->curitem, sizeofitem);
-			btree->curitem++;
+			GinBtreeDataLeafInsertData *items = insertdata;
+			memcpy(ptr, items->items + items->curitem, sizeofitem);
+			items->curitem++;
 		}
 		else
-			memcpy(ptr, &(btree->pitem), sizeofitem);
+		{
+			PostingItem *pitem = insertdata;
+			memcpy(ptr, pitem, sizeofitem);
+		}
 
 		maxoff++;
 	}
@@ -559,20 +530,13 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
 	bound = GinDataPageGetRightBound(rpage);
 	*bound = oldbound;
 
-	data.node = btree->index->rd_node;
-	data.rootBlkno = InvalidBlockNumber;
-	data.lblkno = BufferGetBlockNumber(lbuf);
-	data.rblkno = BufferGetBlockNumber(rbuf);
 	data.separator = separator;
 	data.nitem = maxoff;
-	data.isData = TRUE;
-	data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
-	data.isRootSplit = FALSE;
 	data.rightbound = oldbound;
 
 	rdata[0].buffer = InvalidBuffer;
 	rdata[0].data = (char *) &data;
-	rdata[0].len = sizeof(ginxlogSplit);
+	rdata[0].len = sizeof(ginxlogSplitData);
 	rdata[0].next = &rdata[1];
 
 	rdata[1].buffer = InvalidBuffer;
@@ -586,14 +550,16 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
 /*
  * Prepare the state in 'btree' for inserting a downlink for given buffer.
  */
-static void
+static void *
 dataPrepareDownlink(GinBtree btree, Buffer lbuf)
 {
+	PostingItem	*pitem = palloc(sizeof(PostingItem));
 	Page		lpage = BufferGetPage(lbuf);
 
-	PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
-	btree->pitem.key = *GinDataPageGetRightBound(lpage);
-	btree->rightblkno = GinPageGetOpaque(lpage)->rightlink;
+	PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
+	pitem->key = *GinDataPageGetRightBound(lpage);
+
+	return pitem;
 }
 
 /*
@@ -601,21 +567,18 @@ dataPrepareDownlink(GinBtree btree, Buffer lbuf)
  * Also called from ginxlog, should not use btree
  */
 void
-ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
+ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
 {
-	Page		page = BufferGetPage(root),
-				lpage = BufferGetPage(lbuf),
-				rpage = BufferGetPage(rbuf);
 	PostingItem li,
 				ri;
 
 	li.key = *GinDataPageGetRightBound(lpage);
-	PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
-	GinDataPageAddPostingItem(page, &li, InvalidOffsetNumber);
+	PostingItemSetBlockNumber(&li, lblkno);
+	GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
 
 	ri.key = *GinDataPageGetRightBound(rpage);
-	PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
-	GinDataPageAddPostingItem(page, &ri, InvalidOffsetNumber);
+	PostingItemSetBlockNumber(&ri, rblkno);
+	GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
 }
 
 /*
@@ -715,7 +678,6 @@ ginPrepareDataScan(GinBtree btree, Relation index)
 	btree->prepareDownlink = dataPrepareDownlink;
 
 	btree->isData = TRUE;
-	btree->isDelete = FALSE;
 	btree->fullScan = FALSE;
 	btree->isBuild = FALSE;
 }
@@ -729,29 +691,32 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
 					  GinStatsData *buildStats)
 {
 	GinBtreeData btree;
+	GinBtreeDataLeafInsertData insertdata;
 	GinBtreeStack *stack;
 
 	ginPrepareDataScan(&btree, index);
 	btree.isBuild = (buildStats != NULL);
-	btree.items = items;
-	btree.nitem = nitem;
-	btree.curitem = 0;
+	insertdata.items = items;
+	insertdata.nitem = nitem;
+	insertdata.curitem = 0;
 
-	while (btree.curitem < btree.nitem)
+	while (insertdata.curitem < insertdata.nitem)
 	{
+		/* search for the leaf page where the first item should go to */
+		btree.itemptr = insertdata.items[insertdata.curitem];
 		stack = ginFindLeafPage(&btree, rootBlkno, false);
 
 		if (btree.findItem(&btree, stack))
 		{
 			/*
-			 * btree.items[btree.curitem] already exists in index
+			 * Current item already exists in index.
 			 */
-			btree.curitem++;
+			insertdata.curitem++;
 			LockBuffer(stack->buffer, GIN_UNLOCK);
 			freeGinBtreeStack(stack);
 		}
 		else
-			ginInsertValue(&btree, stack, buildStats);
+			ginInsertValue(&btree, stack, &insertdata, buildStats);
 	}
 }
 
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 378bce1..c6096c2 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -431,22 +431,23 @@ entryGetLeftMostPage(GinBtree btree, Page page)
 }
 
 static bool
-entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
+entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off,
+				   GinBtreeEntryInsertData *entry)
 {
 	Size		itupsz = 0;
 	Page		page = BufferGetPage(buf);
 
-	Assert(btree->entry);
+	Assert(entry->entry);
 	Assert(!GinPageIsData(page));
 
-	if (btree->isDelete)
+	if (entry->isDelete)
 	{
 		IndexTuple	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
 
 		itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
 	}
 
-	if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
+	if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(entry->entry)) + sizeof(ItemIdData))
 		return true;
 
 	return false;
@@ -457,31 +458,25 @@ entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
  * should update it, update old child blkno to new right page
  * if child split occurred
  */
-static BlockNumber
-entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
+static void
+entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
+				 GinBtreeEntryInsertData *entry, BlockNumber updateblkno)
 {
-	BlockNumber ret = InvalidBlockNumber;
-
-	Assert(btree->entry);
+	Assert(entry->entry);
 	Assert(!GinPageIsData(page));
 
-	if (btree->isDelete)
+	if (entry->isDelete)
 	{
 		Assert(GinPageIsLeaf(page));
 		PageIndexTupleDelete(page, off);
 	}
 
-	if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
+	if (!GinPageIsLeaf(page) && updateblkno != InvalidBlockNumber)
 	{
 		IndexTuple	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
 
-		GinSetDownlink(itup, btree->rightblkno);
-		ret = btree->rightblkno;
+		GinSetDownlink(itup, updateblkno);
 	}
-
-	btree->rightblkno = InvalidBlockNumber;
-
-	return ret;
 }
 
 /*
@@ -491,66 +486,43 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
  */
 static bool
 entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
+				 void *insertdata, BlockNumber updateblkno,
 				 XLogRecData **prdata)
 {
+	GinBtreeEntryInsertData *entry = insertdata;
 	Page		page = BufferGetPage(buf);
 	OffsetNumber placed;
 	int			cnt = 0;
 
 	/* these must be static so they can be returned to caller */
 	static XLogRecData rdata[3];
-	static ginxlogInsert data;
+	static ginxlogInsertEntry data;
 
 	/* quick exit if it doesn't fit */
-	if (!entryIsEnoughSpace(btree, buf, off))
+	if (!entryIsEnoughSpace(btree, buf, off, entry))
 		return false;
 
 	*prdata = rdata;
-	data.updateBlkno = entryPreparePage(btree, page, off);
+	entryPreparePage(btree, page, off, entry, updateblkno);
 
-	placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false);
+	placed = PageAddItem(page, (Item) entry->entry, IndexTupleSize(entry->entry), off, false, false);
 	if (placed != off)
 		elog(ERROR, "failed to add item to index page in \"%s\"",
 			 RelationGetRelationName(btree->index));
 
-	data.node = btree->index->rd_node;
-	data.blkno = BufferGetBlockNumber(buf);
-	data.offset = off;
-	data.nitem = 1;
-	data.isDelete = btree->isDelete;
-	data.isData = false;
-	data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
-
-	/*
-	 * Prevent full page write if child's split occurs. That is needed to
-	 * remove incomplete splits while replaying WAL
-	 *
-	 * data.updateBlkno contains new block number (of newly created right
-	 * page) for recently splited page.
-	 */
-	if (data.updateBlkno == InvalidBlockNumber)
-	{
-		rdata[0].buffer = buf;
-		rdata[0].buffer_std = TRUE;
-		rdata[0].data = NULL;
-		rdata[0].len = 0;
-		rdata[0].next = &rdata[1];
-		cnt++;
-	}
+	data.isDelete = entry->isDelete;
 
 	rdata[cnt].buffer = InvalidBuffer;
 	rdata[cnt].data = (char *) &data;
-	rdata[cnt].len = sizeof(ginxlogInsert);
+	rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
 	rdata[cnt].next = &rdata[cnt + 1];
 	cnt++;
 
 	rdata[cnt].buffer = InvalidBuffer;
-	rdata[cnt].data = (char *) btree->entry;
-	rdata[cnt].len = IndexTupleSize(btree->entry);
+	rdata[cnt].data = (char *) entry->entry;
+	rdata[cnt].len = IndexTupleSize(entry->entry);
 	rdata[cnt].next = NULL;
 
-	btree->entry = NULL;
-
 	return true;
 }
 
@@ -561,8 +533,10 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
  * an equal number!
  */
 static Page
-entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
+entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
+			   void *insertdata, BlockNumber updateblkno, XLogRecData **prdata)
 {
+	GinBtreeEntryInsertData *entry = insertdata;
 	OffsetNumber i,
 				maxoff,
 				separator = InvalidOffsetNumber;
@@ -578,13 +552,11 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
 
 	/* these must be static so they can be returned to caller */
 	static XLogRecData rdata[2];
-	static ginxlogSplit data;
+	static ginxlogSplitEntry data;
 	static char tupstore[2 * BLCKSZ];
 
 	*prdata = rdata;
-	data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
-		InvalidOffsetNumber : GinGetDownlink(btree->entry);
-	data.updateBlkno = entryPreparePage(btree, lpage, off);
+	entryPreparePage(btree, lpage, off, entry, updateblkno);
 
 	maxoff = PageGetMaxOffsetNumber(lpage);
 	ptr = tupstore;
@@ -593,8 +565,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
 	{
 		if (i == off)
 		{
-			size = MAXALIGN(IndexTupleSize(btree->entry));
-			memcpy(ptr, btree->entry, size);
+			size = MAXALIGN(IndexTupleSize(entry->entry));
+			memcpy(ptr, entry->entry, size);
 			ptr += size;
 			totalsize += size + sizeof(ItemIdData);
 		}
@@ -608,8 +580,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
 
 	if (off == maxoff + 1)
 	{
-		size = MAXALIGN(IndexTupleSize(btree->entry));
-		memcpy(ptr, btree->entry, size);
+		size = MAXALIGN(IndexTupleSize(entry->entry));
+		memcpy(ptr, entry->entry, size);
 		ptr += size;
 		totalsize += size + sizeof(ItemIdData);
 	}
@@ -643,15 +615,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
 		ptr += MAXALIGN(IndexTupleSize(itup));
 	}
 
-	data.node = btree->index->rd_node;
-	data.rootBlkno = InvalidBlockNumber;
-	data.lblkno = BufferGetBlockNumber(lbuf);
-	data.rblkno = BufferGetBlockNumber(rbuf);
 	data.separator = separator;
 	data.nitem = maxoff;
-	data.isData = FALSE;
-	data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
-	data.isRootSplit = FALSE;
 
 	rdata[0].buffer = InvalidBuffer;
 	rdata[0].data = (char *) &data;
@@ -669,17 +634,19 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
 /*
  * Prepare the state in 'btree' for inserting a downlink for given buffer.
  */
-static void
+static void *
 entryPrepareDownlink(GinBtree btree, Buffer lbuf)
 {
+	GinBtreeEntryInsertData *entry = palloc(sizeof(GinBtreeEntryInsertData));
 	Page		lpage = BufferGetPage(lbuf);
 	IndexTuple	itup;
 
 	itup = getRightMostTuple(lpage);
-
-	btree->entry = GinFormInteriorTuple(itup, lpage,
+	entry->entry = GinFormInteriorTuple(itup, lpage,
 										BufferGetBlockNumber(lbuf));
-	btree->rightblkno = GinPageGetOpaque(lpage)->rightlink;
+	entry->isDelete = false;
+
+	return entry;
 }
 
 /*
@@ -687,22 +654,19 @@ entryPrepareDownlink(GinBtree btree, Buffer lbuf)
  * Also called from ginxlog, should not use btree
  */
 void
-ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
+ginEntryFillRoot(GinBtree btree, Page root,
+				 BlockNumber lblkno, Page lpage,
+				 BlockNumber rblkno, Page rpage)
 {
-	Page		page = BufferGetPage(root);
-	Page		lpage = BufferGetPage(lbuf);
-	Page		rpage = BufferGetPage(rbuf);
 	IndexTuple	itup;
 
-	itup = GinFormInteriorTuple(getRightMostTuple(lpage), lpage,
-								BufferGetBlockNumber(lbuf));
-	if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
+	itup = GinFormInteriorTuple(getRightMostTuple(lpage), lpage, lblkno);
+	if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
 		elog(ERROR, "failed to add item to index root page");
 	pfree(itup);
 
-	itup = GinFormInteriorTuple(getRightMostTuple(rpage), rpage,
-								BufferGetBlockNumber(rbuf));
-	if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
+	itup = GinFormInteriorTuple(getRightMostTuple(rpage), rpage, rblkno);
+	if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
 		elog(ERROR, "failed to add item to index root page");
 	pfree(itup);
 }
@@ -740,5 +704,4 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
 	btree->entryAttnum = attnum;
 	btree->entryKey = key;
 	btree->entryCategory = category;
-	btree->isDelete = FALSE;
 }
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 0a2883a..cb58052 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -163,10 +163,13 @@ ginEntryInsert(GinState *ginstate,
 			   GinStatsData *buildStats)
 {
 	GinBtreeData btree;
+	GinBtreeEntryInsertData insertdata;
 	GinBtreeStack *stack;
 	IndexTuple	itup;
 	Page		page;
 
+	insertdata.isDelete = FALSE;
+
 	/* During index build, count the to-be-inserted entry */
 	if (buildStats)
 		buildStats->nEntries++;
@@ -201,7 +204,7 @@ ginEntryInsert(GinState *ginstate,
 		itup = addItemPointersToLeafTuple(ginstate, itup,
 										  items, nitem, buildStats);
 
-		btree.isDelete = TRUE;
+		insertdata.isDelete = TRUE;
 	}
 	else
 	{
@@ -211,8 +214,8 @@ ginEntryInsert(GinState *ginstate,
 	}
 
 	/* Insert the new or modified leaf tuple */
-	btree.entry = itup;
-	ginInsertValue(&btree, stack, buildStats);
+	insertdata.entry = itup;
+	ginInsertValue(&btree, stack, &insertdata, buildStats);
 	pfree(itup);
 }
 
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index ddac343..cae5251 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -18,55 +18,27 @@
 #include "utils/memutils.h"
 
 static MemoryContext opCtx;		/* working memory for operations */
-static MemoryContext topCtx;
-
-typedef struct ginIncompleteSplit
-{
-	RelFileNode node;
-	BlockNumber leftBlkno;
-	BlockNumber rightBlkno;
-	BlockNumber rootBlkno;
-} ginIncompleteSplit;
-
-static List *incomplete_splits;
 
 static void
-pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBlkno, BlockNumber rootBlkno)
+ginRedoClearIncompleteSplit(XLogRecPtr lsn, RelFileNode node, BlockNumber blkno)
 {
-	ginIncompleteSplit *split;
-
-	MemoryContextSwitchTo(topCtx);
-
-	split = palloc(sizeof(ginIncompleteSplit));
-
-	split->node = node;
-	split->leftBlkno = leftBlkno;
-	split->rightBlkno = rightBlkno;
-	split->rootBlkno = rootBlkno;
-
-	incomplete_splits = lappend(incomplete_splits, split);
-
-	MemoryContextSwitchTo(opCtx);
-}
+	Buffer		buffer;
+	Page		page;
 
-static void
-forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno)
-{
-	ListCell   *l;
+	buffer = XLogReadBuffer(node, blkno, false);
+	if (!BufferIsValid(buffer))
+		return;					/* page was deleted, nothing to do */
+	page = (Page) BufferGetPage(buffer);
 
-	foreach(l, incomplete_splits)
+	if (lsn > PageGetLSN(page))
 	{
-		ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
+		GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
 
-		if (RelFileNodeEquals(node, split->node) &&
-			leftBlkno == split->leftBlkno &&
-			updateBlkno == split->rightBlkno)
-		{
-			incomplete_splits = list_delete_ptr(incomplete_splits, split);
-			pfree(split);
-			break;
-		}
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
 	}
+
+	UnlockReleaseBuffer(buffer);
 }
 
 static void
@@ -128,44 +100,104 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
+ginRedoInsertEntry(Buffer buffer, OffsetNumber offset, BlockNumber rightblkno,
+				   void *rdata)
 {
-	ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
-	Buffer		buffer;
-	Page		page;
+	Page		page = BufferGetPage(buffer);
+	ginxlogInsertEntry *data = (ginxlogInsertEntry *) rdata;
+	IndexTuple	itup;
 
-	/* first, forget any incomplete split this insertion completes */
-	if (data->isData)
+	if (rightblkno != InvalidBlockNumber)
 	{
-		Assert(data->isDelete == FALSE);
-		if (!data->isLeaf && data->updateBlkno != InvalidBlockNumber)
-		{
-			PostingItem *pitem;
+		/* update link to right page after split */
+		Assert(!GinPageIsLeaf(page));
+		Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
+		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offset));
+		GinSetDownlink(itup, rightblkno);
+	}
 
-			pitem = (PostingItem *) (XLogRecGetData(record) + sizeof(ginxlogInsert));
-			forgetIncompleteSplit(data->node,
-								  PostingItemGetBlockNumber(pitem),
-								  data->updateBlkno);
-		}
+	if (data->isDelete)
+	{
+		Assert(GinPageIsLeaf(page));
+		Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
+		PageIndexTupleDelete(page, offset);
+	}
+
+	itup = &data->tuple;
+
+	if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), offset, false, false) == InvalidOffsetNumber)
+	{
+		RelFileNode node;
+		ForkNumber forknum;
+		BlockNumber blknum;
 
+		BufferGetTag(buffer, &node, &forknum, &blknum);
+		elog(ERROR, "failed to add item to index page in %u/%u/%u",
+			 node.spcNode, node.dbNode, node.relNode);
+	}
+}
+
+static void
+ginRedoInsertData(Buffer buffer, OffsetNumber offset, BlockNumber rightblkno,
+				  void *rdata)
+{
+	Page		page = BufferGetPage(buffer);
+
+	if (GinPageIsLeaf(page))
+	{
+		ginxlogInsertDataLeaf *data = (ginxlogInsertDataLeaf *) rdata;
+		ItemPointerData *items = data->items;
+		OffsetNumber i;
+
+		for (i = 0; i < data->nitem; i++)
+			GinDataPageAddItemPointer(page, &items[i], offset + i);
 	}
 	else
 	{
-		if (!data->isLeaf && data->updateBlkno != InvalidBlockNumber)
-		{
-			IndexTuple	itup;
+		PostingItem *oldpitem;
 
-			itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsert));
-			forgetIncompleteSplit(data->node,
-								  GinGetDownlink(itup),
-								  data->updateBlkno);
-		}
+		/* update link to right page after split */
+		oldpitem = GinDataPageGetPostingItem(page, offset);
+		PostingItemSetBlockNumber(oldpitem, rightblkno);
+
+		GinDataPageAddPostingItem(page, (PostingItem *) rdata, offset);
+	}
+}
+
+static void
+ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
+{
+	ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+	char	   *payload;
+	BlockNumber leftChildBlkno = InvalidBlockNumber;
+	BlockNumber rightChildBlkno = InvalidBlockNumber;
+	bool		isLeaf = (data->flags & GIN_SPLIT_ISLEAF) != 0;
+
+	payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
+
+	/*
+	 * First clear incomplete-split flag on child page if this finishes
+	 * a split
+	 */
+	if (!isLeaf)
+	{
+		memcpy(&leftChildBlkno, payload, sizeof(BlockNumber));
+		payload += sizeof(BlockNumber);
+		memcpy(&rightChildBlkno, payload, sizeof(BlockNumber));
+		payload += sizeof(BlockNumber);
+
+		if (record->xl_info & XLR_BKP_BLOCK(0))
+			(void) RestoreBackupBlock(lsn, record, 0, false, false);
+		else
+			ginRedoClearIncompleteSplit(lsn, data->node, leftChildBlkno);
 	}
 
 	/* If we have a full-page image, restore it and we're done */
-	if (record->xl_info & XLR_BKP_BLOCK(0))
+	if (record->xl_info & XLR_BKP_BLOCK(isLeaf ? 0 : 1))
 	{
-		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+		(void) RestoreBackupBlock(lsn, record, isLeaf ? 0 : 1, false, false);
 		return;
 	}
 
@@ -176,74 +208,88 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
 
 	if (lsn > PageGetLSN(page))
 	{
-		if (data->isData)
+		/* How to insert the payload is tree-type specific */
+		if (data->flags & GIN_SPLIT_ISDATA)
 		{
 			Assert(GinPageIsData(page));
-
-			if (data->isLeaf)
-			{
-				OffsetNumber i;
-				ItemPointerData *items = (ItemPointerData *) (XLogRecGetData(record) + sizeof(ginxlogInsert));
-
-				Assert(GinPageIsLeaf(page));
-				Assert(data->updateBlkno == InvalidBlockNumber);
-
-				for (i = 0; i < data->nitem; i++)
-					GinDataPageAddItemPointer(page, &items[i], data->offset + i);
-			}
-			else
-			{
-				PostingItem *pitem;
-
-				Assert(!GinPageIsLeaf(page));
-
-				if (data->updateBlkno != InvalidBlockNumber)
-				{
-					/* update link to right page after split */
-					pitem = GinDataPageGetPostingItem(page, data->offset);
-					PostingItemSetBlockNumber(pitem, data->updateBlkno);
-				}
-
-				pitem = (PostingItem *) (XLogRecGetData(record) + sizeof(ginxlogInsert));
-
-				GinDataPageAddPostingItem(page, pitem, data->offset);
-			}
+			ginRedoInsertData(buffer, data->offset, rightChildBlkno, payload);
 		}
 		else
 		{
-			IndexTuple	itup;
-
 			Assert(!GinPageIsData(page));
+			ginRedoInsertEntry(buffer, data->offset, rightChildBlkno, payload);
+		}
 
-			if (data->updateBlkno != InvalidBlockNumber)
-			{
-				/* update link to right page after split */
-				Assert(!GinPageIsLeaf(page));
-				Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page));
-				itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, data->offset));
-				GinSetDownlink(itup, data->updateBlkno);
-			}
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
 
-			if (data->isDelete)
-			{
-				Assert(GinPageIsLeaf(page));
-				Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page));
-				PageIndexTupleDelete(page, data->offset);
-			}
+	UnlockReleaseBuffer(buffer);
+}
 
-			itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsert));
+static void
+ginRedoSplitEntry(Buffer lbuffer, Buffer rbuffer, void *rdata)
+{
+	Page		lpage = BufferGetPage(lbuffer);
+	Page		rpage = BufferGetPage(rbuffer);
+	ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
+	IndexTuple	itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
+	OffsetNumber i;
 
-			if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), data->offset, false, false) == InvalidOffsetNumber)
-				elog(ERROR, "failed to add item to index page in %u/%u/%u",
-				  data->node.spcNode, data->node.dbNode, data->node.relNode);
-		}
+	for (i = 0; i < data->separator; i++)
+	{
+		if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
+			elog(ERROR, "failed to add item to gin index page");
+		itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
+	}
 
-		PageSetLSN(page, lsn);
+	for (i = data->separator; i < data->nitem; i++)
+	{
+		if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
+			elog(ERROR, "failed to add item to gin index page");
+		itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
+	}
+}
 
-		MarkBufferDirty(buffer);
+static void
+ginRedoSplitData(Buffer lbuffer, Buffer rbuffer, void *rdata)
+{
+	Page		lpage = BufferGetPage(lbuffer);
+	Page		rpage = BufferGetPage(rbuffer);
+	ginxlogSplitData *data = (ginxlogSplitData *) rdata;
+	bool		isleaf = GinPageIsLeaf(lpage);
+	char	   *ptr = (char *) rdata + sizeof(ginxlogSplit);
+	Size		sizeofitem = isleaf ? sizeof(ItemPointerData) : sizeof(PostingItem);
+	OffsetNumber i;
+	ItemPointer bound;
+
+	for (i = 0; i < data->separator; i++)
+	{
+		if (isleaf)
+			GinDataPageAddItemPointer(lpage, (ItemPointer) ptr, InvalidOffsetNumber);
+		else
+			GinDataPageAddPostingItem(lpage, (PostingItem *) ptr, InvalidOffsetNumber);
+		ptr += sizeofitem;
 	}
 
-	UnlockReleaseBuffer(buffer);
+	for (i = data->separator; i < data->nitem; i++)
+	{
+		if (isleaf)
+			GinDataPageAddItemPointer(rpage, (ItemPointer) ptr, InvalidOffsetNumber);
+		else
+			GinDataPageAddPostingItem(rpage, (PostingItem *) ptr, InvalidOffsetNumber);
+		ptr += sizeofitem;
+	}
+
+	/* set up right key */
+	bound = GinDataPageGetRightBound(lpage);
+	if (isleaf)
+		*bound = *GinDataPageGetItemPointer(lpage, GinPageGetOpaque(lpage)->maxoff);
+	else
+		*bound = GinDataPageGetPostingItem(lpage, GinPageGetOpaque(lpage)->maxoff)->key;
+
+	bound = GinDataPageGetRightBound(rpage);
+	*bound = data->rightbound;
 }
 
 static void
@@ -255,14 +301,30 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
 	Page		lpage,
 				rpage;
 	uint32		flags = 0;
+	char	   *payload;
+	bool		isLeaf = (data->flags & GIN_SPLIT_ISLEAF) != 0;
+	bool		isData = (data->flags & GIN_SPLIT_ISDATA) != 0;
+	bool		isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
+
+	payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
 
-	if (data->isLeaf)
+	/*
+	 * First clear incomplete-split flag on child page if this finishes
+	 * a split
+	 */
+	if (!isLeaf)
+	{
+		if (record->xl_info & XLR_BKP_BLOCK(0))
+			(void) RestoreBackupBlock(lsn, record, 0, false, false);
+		else
+			ginRedoClearIncompleteSplit(lsn, data->node, data->leftChildBlkno);
+	}
+
+	if (isLeaf)
 		flags |= GIN_LEAF;
-	if (data->isData)
-		flags |= GIN_DATA;
 
-	/* Backup blocks are not used in split records */
-	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+	if (isData)
+		flags |= GIN_DATA;
 
 	lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
 	Assert(BufferIsValid(lbuffer));
@@ -277,62 +339,11 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
 	GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
 	GinPageGetOpaque(rpage)->rightlink = data->rrlink;
 
-	if (data->isData)
-	{
-		char	   *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit);
-		Size		sizeofitem = GinSizeOfDataPageItem(lpage);
-		OffsetNumber i;
-		ItemPointer bound;
-
-		for (i = 0; i < data->separator; i++)
-		{
-			if (data->isLeaf)
-				GinDataPageAddItemPointer(lpage, (ItemPointer) ptr, InvalidOffsetNumber);
-			else
-				GinDataPageAddPostingItem(lpage, (PostingItem *) ptr, InvalidOffsetNumber);
-			ptr += sizeofitem;
-		}
-
-		for (i = data->separator; i < data->nitem; i++)
-		{
-			if (data->isLeaf)
-				GinDataPageAddItemPointer(rpage, (ItemPointer) ptr, InvalidOffsetNumber);
-			else
-				GinDataPageAddPostingItem(rpage, (PostingItem *) ptr, InvalidOffsetNumber);
-			ptr += sizeofitem;
-		}
-
-		/* set up right key */
-		bound = GinDataPageGetRightBound(lpage);
-		if (data->isLeaf)
-			*bound = *GinDataPageGetItemPointer(lpage, GinPageGetOpaque(lpage)->maxoff);
-		else
-			*bound = GinDataPageGetPostingItem(lpage, GinPageGetOpaque(lpage)->maxoff)->key;
-
-		bound = GinDataPageGetRightBound(rpage);
-		*bound = data->rightbound;
-	}
+	/* Do the tree-type specific portion to restore the page contents */
+	if (data->flags & GIN_SPLIT_ISDATA)
+		ginRedoSplitData(lbuffer, rbuffer, payload);
 	else
-	{
-		IndexTuple	itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogSplit));
-		OffsetNumber i;
-
-		for (i = 0; i < data->separator; i++)
-		{
-			if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
-				elog(ERROR, "failed to add item to index page in %u/%u/%u",
-				  data->node.spcNode, data->node.dbNode, data->node.relNode);
-			itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
-		}
-
-		for (i = data->separator; i < data->nitem; i++)
-		{
-			if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
-				elog(ERROR, "failed to add item to index page in %u/%u/%u",
-				  data->node.spcNode, data->node.dbNode, data->node.relNode);
-			itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
-		}
-	}
+		ginRedoSplitEntry(lbuffer, rbuffer, payload);
 
 	PageSetLSN(rpage, lsn);
 	MarkBufferDirty(rbuffer);
@@ -340,25 +351,31 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
 	PageSetLSN(lpage, lsn);
 	MarkBufferDirty(lbuffer);
 
-	if (!data->isLeaf && data->updateBlkno != InvalidBlockNumber)
-		forgetIncompleteSplit(data->node, data->leftChildBlkno, data->updateBlkno);
-
-	if (data->isRootSplit)
+	if (isRoot)
 	{
-		Buffer		rootBuf = XLogReadBuffer(data->node, data->rootBlkno, true);
+		BlockNumber	rootBlkno = data->rrlink;
+		Buffer		rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
 		Page		rootPage = BufferGetPage(rootBuf);
 
 		GinInitBuffer(rootBuf, flags & ~GIN_LEAF);
 
-		if (data->isData)
+		if (isData)
 		{
-			Assert(data->rootBlkno != GIN_ROOT_BLKNO);
-			ginDataFillRoot(NULL, rootBuf, lbuffer, rbuffer);
+			Assert(rootBlkno != GIN_ROOT_BLKNO);
+			ginDataFillRoot(NULL, BufferGetPage(rootBuf),
+							BufferGetBlockNumber(lbuffer),
+							BufferGetPage(lbuffer),
+							BufferGetBlockNumber(rbuffer),
+							BufferGetPage(rbuffer));
 		}
 		else
 		{
-			Assert(data->rootBlkno == GIN_ROOT_BLKNO);
-			ginEntryFillRoot(NULL, rootBuf, lbuffer, rbuffer);
+			Assert(rootBlkno == GIN_ROOT_BLKNO);
+			ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
+							 BufferGetBlockNumber(lbuffer),
+							 BufferGetPage(lbuffer),
+							 BufferGetBlockNumber(rbuffer),
+							 BufferGetPage(rbuffer));
 		}
 
 		PageSetLSN(rootPage, lsn);
@@ -366,8 +383,6 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
 		MarkBufferDirty(rootBuf);
 		UnlockReleaseBuffer(rootBuf);
 	}
-	else
-		pushIncompleteSplit(data->node, data->lblkno, data->rblkno, data->rootBlkno);
 
 	UnlockReleaseBuffer(rbuffer);
 	UnlockReleaseBuffer(lbuffer);
@@ -711,6 +726,7 @@ void
 gin_redo(XLogRecPtr lsn, XLogRecord *record)
 {
 	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+	MemoryContext oldCtx;
 
 	/*
 	 * GIN indexes do not require any conflict processing. NB: If we ever
@@ -718,7 +734,7 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
 	 * killed tuples outside VACUUM, we'll need to handle that here.
 	 */
 
-	topCtx = MemoryContextSwitchTo(opCtx);
+	oldCtx = MemoryContextSwitchTo(opCtx);
 	switch (info)
 	{
 		case XLOG_GIN_CREATE_INDEX:
@@ -751,15 +767,13 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
 		default:
 			elog(PANIC, "gin_redo: unknown op code %u", info);
 	}
-	MemoryContextSwitchTo(topCtx);
+	MemoryContextSwitchTo(oldCtx);
 	MemoryContextReset(opCtx);
 }
 
 void
 gin_xlog_startup(void)
 {
-	incomplete_splits = NIL;
-
 	opCtx = AllocSetContextCreate(CurrentMemoryContext,
 								  "GIN recovery temporary context",
 								  ALLOCSET_DEFAULT_MINSIZE,
@@ -767,84 +781,8 @@ gin_xlog_startup(void)
 								  ALLOCSET_DEFAULT_MAXSIZE);
 }
 
-static void
-ginContinueSplit(ginIncompleteSplit *split)
-{
-	GinBtreeData btree;
-	GinState	ginstate;
-	Relation	reln;
-	Buffer		buffer;
-	GinBtreeStack stack;
-
-	/*
-	 * elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u",  split->rootBlkno,
-	 * split->leftBlkno, split->rightBlkno);
-	 */
-	buffer = XLogReadBuffer(split->node, split->leftBlkno, false);
-
-	/*
-	 * Failure should be impossible here, because we wrote the page earlier.
-	 */
-	if (!BufferIsValid(buffer))
-		elog(PANIC, "ginContinueSplit: left block %u not found",
-			 split->leftBlkno);
-
-	reln = CreateFakeRelcacheEntry(split->node);
-
-	if (split->rootBlkno == GIN_ROOT_BLKNO)
-	{
-		MemSet(&ginstate, 0, sizeof(ginstate));
-		ginstate.index = reln;
-
-		ginPrepareEntryScan(&btree,
-							InvalidOffsetNumber, (Datum) 0, GIN_CAT_NULL_KEY,
-							&ginstate);
-	}
-	else
-	{
-		ginPrepareDataScan(&btree, reln);
-	}
-
-	stack.blkno = split->leftBlkno;
-	stack.buffer = buffer;
-	stack.off = InvalidOffsetNumber;
-	stack.parent = NULL;
-
-	ginFindParents(&btree, &stack, split->rootBlkno);
-
-	btree.prepareDownlink(&btree, buffer);
-	ginInsertValue(&btree, stack.parent, NULL);
-
-	FreeFakeRelcacheEntry(reln);
-
-	UnlockReleaseBuffer(buffer);
-}
-
 void
 gin_xlog_cleanup(void)
 {
-	ListCell   *l;
-	MemoryContext topCtx;
-
-	topCtx = MemoryContextSwitchTo(opCtx);
-
-	foreach(l, incomplete_splits)
-	{
-		ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
-
-		ginContinueSplit(split);
-		MemoryContextReset(opCtx);
-	}
-
-	MemoryContextSwitchTo(topCtx);
 	MemoryContextDelete(opCtx);
-	incomplete_splits = NIL;
-}
-
-bool
-gin_safe_restartpoint(void)
-{
-	if (incomplete_splits)
-		return false;
-	return true;
 }
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index 391f75f..c534c3a 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -41,20 +41,45 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
 			desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
 			break;
 		case XLOG_GIN_INSERT:
-			appendStringInfoString(buf, "Insert item, ");
-			desc_node(buf, ((ginxlogInsert *) rec)->node, ((ginxlogInsert *) rec)->blkno);
-			appendStringInfo(buf, " offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u",
-							 ((ginxlogInsert *) rec)->offset,
-							 ((ginxlogInsert *) rec)->nitem,
-							 (((ginxlogInsert *) rec)->isData) ? 'T' : 'F',
-							 (((ginxlogInsert *) rec)->isLeaf) ? 'T' : 'F',
-							 (((ginxlogInsert *) rec)->isDelete) ? 'T' : 'F',
-							 ((ginxlogInsert *) rec)->updateBlkno);
+			{
+				ginxlogInsert *xlrec = (ginxlogInsert *) rec;
+				char	*payload = rec + sizeof(ginxlogInsert);
+
+				appendStringInfoString(buf, "Insert item, ");
+				desc_node(buf, xlrec->node, xlrec->blkno);
+				appendStringInfo(buf, " offset: %u isdata: %c isleaf: %c",
+								 xlrec->offset,
+								 (xlrec->flags & GIN_SPLIT_ISDATA) ? 'T' : 'F',
+								 (xlrec->flags & GIN_SPLIT_ISLEAF) ? 'T' : 'F');
+				if (!(xlrec->flags & GIN_SPLIT_ISLEAF))
+				{
+					BlockNumber leftChildBlkno;
+					BlockNumber rightChildBlkno;
+
+					memcpy(&leftChildBlkno, payload, sizeof(BlockNumber));
+					payload += sizeof(BlockNumber);
+					memcpy(&rightChildBlkno, payload, sizeof(BlockNumber));
+					payload += sizeof(BlockNumber);
+					appendStringInfo(buf, " children: %u/%u",
+									 leftChildBlkno, rightChildBlkno);
+				}
+				if (!(xlrec->flags & GIN_SPLIT_ISDATA))
+					appendStringInfo(buf, " isdelete: %c",
+									 (((ginxlogInsertEntry *) payload)->isDelete) ? 'T' : 'F');
+				else if (xlrec->flags & GIN_SPLIT_ISLEAF)
+					appendStringInfo(buf, " nitem: %u",
+									 (((ginxlogInsertDataLeaf *) payload)->nitem) ? 'T' : 'F');
+				else
+					appendStringInfo(buf, " pitem: %u-%u/%u",
+									 PostingItemGetBlockNumber((PostingItem *) payload),
+									 ItemPointerGetBlockNumber(&((PostingItem *) payload)->key),
+									 ItemPointerGetOffsetNumber(&((PostingItem *) payload)->key));
+			}
 			break;
 		case XLOG_GIN_SPLIT:
 			appendStringInfoString(buf, "Page split, ");
 			desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
-			appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->isRootSplit) ? 'T' : 'F');
+			appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
 			break;
 		case XLOG_GIN_VACUUM_PAGE:
 			appendStringInfoString(buf, "Vacuum page, ");
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index b6cb48d..7dcb0e0 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -58,6 +58,5 @@ extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
 extern void gin_desc(StringInfo buf, uint8 xl_info, char *rec);
 extern void gin_xlog_startup(void);
 extern void gin_xlog_cleanup(void);
-extern bool gin_safe_restartpoint(void);
 
 #endif   /* GIN_H */
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 3952935..9bce245 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -48,6 +48,7 @@ typedef GinPageOpaqueData *GinPageOpaque;
 #define GIN_META		  (1 << 3)
 #define GIN_LIST		  (1 << 4)
 #define GIN_LIST_FULLROW  (1 << 5)		/* makes sense only on GIN_LIST page */
+#define GIN_INCOMPLETE_SPLIT (1 << 6)	/* page was split, but parent not updated */
 
 /* Page numbers of fixed-location pages */
 #define GIN_METAPAGE_BLKNO	(0)
@@ -119,6 +120,7 @@ typedef struct GinMetaPageData
 #define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
 #define GinPageSetDeleted(page)    ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
 #define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
+#define GinPageIsIncompleteSplit(page) ( GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT)
 
 #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
 
@@ -336,41 +338,77 @@ typedef struct ginxlogInsert
 {
 	RelFileNode node;
 	BlockNumber blkno;
-	BlockNumber updateBlkno;
+	uint16		flags;		/* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
 	OffsetNumber offset;
-	bool		isDelete;
-	bool		isData;
-	bool		isLeaf;
-	OffsetNumber nitem;
 
 	/*
-	 * follows: tuples or ItemPointerData or PostingItem or list of
-	 * ItemPointerData
+	 * FOLLOWS:
+	 * if !isLeaf, left and right child block numbers of the child pages whose
+	 * split this insertion finishes. (non-aligned!)
+	 */
+
+	/*
+	 * follows: one of the following structs, depending on tree type.
+	 *
+	 * NB: the below structs are only 16-bit aligned when appended to a
+	 * ginxlogInsert struct! Beware of adding fields to them that require
+	 * stricter alignment.
 	 */
 } ginxlogInsert;
 
+typedef struct
+{
+	bool		isDelete;
+	IndexTupleData tuple;	/* variable length */
+} ginxlogInsertEntry;
+
+typedef struct
+{
+	OffsetNumber nitem;
+	ItemPointerData items[1]; /* variable length */
+} ginxlogInsertDataLeaf;
+
+/* In an insert to an internal data page, the payload is a PostingItem */
+
+
 #define XLOG_GIN_SPLIT	0x30
 
 typedef struct ginxlogSplit
 {
 	RelFileNode node;
 	BlockNumber lblkno;
-	BlockNumber rootBlkno;
 	BlockNumber rblkno;
-	BlockNumber rrlink;
+	BlockNumber rrlink;				/* right link, or root's blocknumber if root split */
+	BlockNumber	leftChildBlkno;		/* valid on a non-leaf split */
+	BlockNumber	rightChildBlkno;
+	uint16		flags;
+
+	/* follows: one of the following structs */
+} ginxlogSplit;
+
+/*
+ * Flags used in ginxlogInsert and ginxlogSplit records
+ */
+#define GIN_SPLIT_ISDATA	0x01
+#define GIN_SPLIT_ISLEAF	0x02
+#define GIN_SPLIT_ROOT		0x04
+
+typedef struct
+{
 	OffsetNumber separator;
 	OffsetNumber nitem;
 
-	bool		isData;
-	bool		isLeaf;
-	bool		isRootSplit;
+	/* FOLLOWS: IndexTuples */
+} ginxlogSplitEntry;
 
-	BlockNumber leftChildBlkno;
-	BlockNumber updateBlkno;
+typedef struct
+{
+	OffsetNumber separator;
+	OffsetNumber nitem;
+	ItemPointerData rightbound;
 
-	ItemPointerData rightbound; /* used only in posting tree */
-	/* follows: list of tuple or ItemPointerData or PostingItem */
-} ginxlogSplit;
+	/* FOLLOWS: array of ItemPointers (for leaf) or PostingItems (non-leaf) */
+} ginxlogSplitData;
 
 #define XLOG_GIN_VACUUM_PAGE	0x40
 
@@ -485,10 +523,10 @@ typedef struct GinBtreeData
 
 	/* insert methods */
 	OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
-	bool		(*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **);
-	Page		(*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **);
-	void		(*prepareDownlink) (GinBtree, Buffer);
-	void		(*fillRoot) (GinBtree, Buffer, Buffer, Buffer);
+	bool		(*placeToPage) (GinBtree, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
+	Page		(*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
+	void		*(*prepareDownlink) (GinBtree, Buffer);
+	void		(*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
 
 	bool		isData;
 
@@ -497,29 +535,37 @@ typedef struct GinBtreeData
 	bool		fullScan;
 	bool		isBuild;
 
-	BlockNumber rightblkno;
-
-	/* Entry options */
+	/* Search key for Entry tree */
 	OffsetNumber entryAttnum;
 	Datum		entryKey;
 	GinNullCategory entryCategory;
+
+	/* Search key for data tree (posting tree) */
+	ItemPointerData itemptr;
+} GinBtreeData;
+
+/* Entry options */
+typedef struct
+{
 	IndexTuple	entry;
 	bool		isDelete;
+} GinBtreeEntryInsertData;
 
-	/* Data (posting tree) options */
+/* For data (posting tree) leaves */
+typedef struct
+{
 	ItemPointerData *items;
 	uint32		nitem;
 	uint32		curitem;
+} GinBtreeDataLeafInsertData;
 
-	PostingItem pitem;
-} GinBtreeData;
+/* For internal data (posting tree) pages, use PostingItem */
 
 extern GinBtreeStack *ginFindLeafPage(GinBtree btree, BlockNumber rootBlkno, bool searchMode);
 extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
 extern void freeGinBtreeStack(GinBtreeStack *stack);
 extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
-			   GinStatsData *buildStats);
-extern void ginFindParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno);
+			   void *insertdata, GinStatsData *buildStats);
 
 /* ginentrypage.c */
 extern IndexTuple GinFormTuple(GinState *ginstate,
@@ -529,7 +575,7 @@ extern void GinShortenTuple(IndexTuple itup, uint32 nipd);
 extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
 					Datum key, GinNullCategory category,
 					GinState *ginstate);
-extern void ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
+extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
 
 /* gindatapage.c */
 extern BlockNumber createPostingTree(Relation index,
@@ -542,7 +588,7 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
 					  ItemPointerData *items, uint32 nitem,
 					  GinStatsData *buildStats);
 extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
-extern void ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
+extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
 extern void ginPrepareDataScan(GinBtree btree, Relation index);
 
 /* ginscan.c */
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index 7ad71b3..166689d 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -38,7 +38,7 @@ PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL)
 PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, NULL, NULL, NULL)
 PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint)
 PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, NULL, NULL, NULL)
-PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint)
+PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, NULL)
 PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, NULL)
 PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, NULL, NULL, NULL)
 PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup, NULL)
-- 
1.8.4.rc3

