From 91975d892b47f4b3180218d753f462933b9faf49 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 27 Jan 2022 21:24:49 +0200
Subject: [PATCH 2/3] Use the buffer cache when initializing an unlogged index.

Some of the ambuildempty functions used smgrwrite() directly, followed
by smgrimmedsync(). A few small problems with that:

Firstly, one is supposed to use smgrextend() when extending the
relation, not smgrwrite(). That doesn't make much difference in
practice.  smgrextend() updates the relation size cache, so you miss
that, but we don't use that for the init fork of an unlogged relation.
But if you compile with CHECK_WRITE_VS_EXTEND, you get an assertion
failure.

Secondly, the smgrwrite() calls were performed before WAL-logging, so
that the page image written to disk had 0/0 as the LSN, not the LSN of
the WAL record. That's harmless in practice, but seems sloppy.

Thirdly, it's better to use the buffer cache, because then you don't
need to smgrimmedsync() the relation to disk, which adds latency.
Bypassing the cache makes sense for bulk operations like index
creation, but not when you're just initializing an empty index.
Creation of unlogged tables is hardly performance bottleneck in any
real world applications, but nevertheless.
---
 contrib/bloom/blinsert.c              | 29 ++----------
 contrib/bloom/bloom.h                 |  2 +-
 contrib/bloom/blutils.c               |  8 ++--
 src/backend/access/nbtree/nbtree.c    | 41 ++++++++--------
 src/backend/access/spgist/spginsert.c | 68 ++++++++++++---------------
 5 files changed, 59 insertions(+), 89 deletions(-)

diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
index c94cf34e698..b20694d6250 100644
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -129,7 +129,7 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 			 RelationGetRelationName(index));
 
 	/* Initialize the meta page */
-	BloomInitMetapage(index);
+	BloomInitMetapage(index, MAIN_FORKNUM);
 
 	/* Initialize the bloom build state */
 	memset(&buildstate, 0, sizeof(buildstate));
@@ -163,31 +163,8 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 void
 blbuildempty(Relation index)
 {
-	Page		metapage;
-
-	/* Construct metapage. */
-	metapage = (Page) palloc(BLCKSZ);
-	BloomFillMetapage(index, metapage);
-
-	/*
-	 * Write the page and log it.  It might seem that an immediate sync would
-	 * be sufficient to guarantee that the file exists on disk, but recovery
-	 * itself might remove it while replaying, for example, an
-	 * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record.  Therefore, we need
-	 * this even when wal_level=minimal.
-	 */
-	PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
-			  (char *) metapage, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rnode.node, INIT_FORKNUM,
-				BLOOM_METAPAGE_BLKNO, metapage, true);
-
-	/*
-	 * An immediate sync is required even if we xlog'd the page, because the
-	 * write did not go through shared_buffers and therefore a concurrent
-	 * checkpoint may have moved the redo pointer past our xlog record.
-	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	/* Initialize the meta page */
+	BloomInitMetapage(index, INIT_FORKNUM);
 }
 
 /*
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index 9966f3f46e1..e3259b4d126 100644
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -178,7 +178,7 @@ typedef BloomScanOpaqueData *BloomScanOpaque;
 extern void _PG_init(void);
 extern void initBloomState(BloomState *state, Relation index);
 extern void BloomFillMetapage(Relation index, Page metaPage);
-extern void BloomInitMetapage(Relation index);
+extern void BloomInitMetapage(Relation index, ForkNumber forknum);
 extern void BloomInitPage(Page page, uint16 flags);
 extern Buffer BloomNewBuffer(Relation index);
 extern void signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno);
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index a434cf93efd..6ff6dad5cb6 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -451,7 +451,7 @@ BloomFillMetapage(Relation index, Page metaPage)
  * Initialize metapage for bloom index.
  */
 void
-BloomInitMetapage(Relation index)
+BloomInitMetapage(Relation index, ForkNumber forknum)
 {
 	Buffer		metaBuffer;
 	Page		metaPage;
@@ -459,9 +459,11 @@ BloomInitMetapage(Relation index)
 
 	/*
 	 * Make a new page; since it is first page it should be associated with
-	 * block number 0 (BLOOM_METAPAGE_BLKNO).
+	 * block number 0 (BLOOM_METAPAGE_BLKNO).  No need to hold the extension
+	 * lock because there cannot be concurrent inserters yet.
 	 */
-	metaBuffer = BloomNewBuffer(index);
+	metaBuffer = ReadBufferExtended(index, forknum, P_NEW, RBM_NORMAL, NULL);
+	LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
 	Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
 
 	/* Initialize contents of meta page */
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 13024af2faa..a3ef3f50cea 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -150,31 +150,32 @@ bthandler(PG_FUNCTION_ARGS)
 void
 btbuildempty(Relation index)
 {
+	Buffer		metabuf;
 	Page		metapage;
 
-	/* Construct metapage. */
-	metapage = (Page) palloc(BLCKSZ);
-	_bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
-
 	/*
-	 * Write the page and log it.  It might seem that an immediate sync would
-	 * be sufficient to guarantee that the file exists on disk, but recovery
-	 * itself might remove it while replaying, for example, an
-	 * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record.  Therefore, we need
-	 * this even when wal_level=minimal.
+	 * Initalize the metapage.
+	 *
+	 * Regular index build bypasses the buffer manager and uses smgr functions
+	 * directly, with an smgrimmedsync() call at the end.  That makes sense
+	 * when the index is large, but for an empty index, it's better to use
+	 * the buffer cache to avoid the smgrimmedsync().
 	 */
-	PageSetChecksumInplace(metapage, BTREE_METAPAGE);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, BTREE_METAPAGE,
-			  (char *) metapage, true);
-	log_newpage(&RelationGetSmgr(index)->smgr_rnode.node, INIT_FORKNUM,
-				BTREE_METAPAGE, metapage, true);
+	metabuf = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
+	_bt_lockbuf(index, metabuf, BT_WRITE);
 
-	/*
-	 * An immediate sync is required even if we xlog'd the page, because the
-	 * write did not go through shared_buffers and therefore a concurrent
-	 * checkpoint may have moved the redo pointer past our xlog record.
-	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	START_CRIT_SECTION();
+
+	metapage = BufferGetPage(metabuf);
+	_bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
+	MarkBufferDirty(metabuf);
+	log_newpage_buffer(metabuf, true);
+
+	END_CRIT_SECTION();
+
+	_bt_unlockbuf(index, metabuf);
+	ReleaseBuffer(metabuf);
 }
 
 /*
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index bfb74049d0c..2676b0ad74a 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -155,49 +155,39 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 void
 spgbuildempty(Relation index)
 {
-	Page		page;
-
-	/* Construct metapage. */
-	page = (Page) palloc(BLCKSZ);
-	SpGistInitMetapage(page);
+	Buffer		metabuffer,
+				rootbuffer,
+				nullbuffer;
 
 	/*
-	 * Write the page and log it unconditionally.  This is important
-	 * particularly for indexes created on tablespaces and databases whose
-	 * creation happened after the last redo pointer as recovery removes any
-	 * of their existing content when the corresponding create records are
-	 * replayed.
+	 * Initialize the meta page and root pages
 	 */
-	PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
-			  (char *) page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rnode.node, INIT_FORKNUM,
-				SPGIST_METAPAGE_BLKNO, page, true);
-
-	/* Likewise for the root page. */
-	SpGistInitPage(page, SPGIST_LEAF);
-
-	PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_ROOT_BLKNO,
-			  (char *) page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rnode.node, INIT_FORKNUM,
-				SPGIST_ROOT_BLKNO, page, true);
-
-	/* Likewise for the null-tuples root page. */
-	SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
-
-	PageSetChecksumInplace(page, SPGIST_NULL_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_NULL_BLKNO,
-			  (char *) page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rnode.node, INIT_FORKNUM,
-				SPGIST_NULL_BLKNO, page, true);
+	metabuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	rootbuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	nullbuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
 
-	/*
-	 * An immediate sync is required even if we xlog'd the pages, because the
-	 * writes did not go through shared buffers and therefore a concurrent
-	 * checkpoint may have moved the redo pointer past our xlog record.
-	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	Assert(BufferGetBlockNumber(metabuffer) == SPGIST_METAPAGE_BLKNO);
+	Assert(BufferGetBlockNumber(rootbuffer) == SPGIST_ROOT_BLKNO);
+	Assert(BufferGetBlockNumber(nullbuffer) == SPGIST_NULL_BLKNO);
+
+	START_CRIT_SECTION();
+
+	SpGistInitMetapage(BufferGetPage(metabuffer));
+	MarkBufferDirty(metabuffer);
+	SpGistInitBuffer(rootbuffer, SPGIST_LEAF);
+	MarkBufferDirty(rootbuffer);
+	SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
+	MarkBufferDirty(nullbuffer);
+
+	log_newpage_buffer(metabuffer, true);
+	log_newpage_buffer(rootbuffer, true);
+	log_newpage_buffer(nullbuffer, true);
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(metabuffer);
+	UnlockReleaseBuffer(rootbuffer);
+	UnlockReleaseBuffer(nullbuffer);
 }
 
 /*
-- 
2.30.2

