From fdaa0873a37545cf42a90b9ede562bcbc2d72947 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 29 Jun 2023 12:27:52 +1200
Subject: [PATCH 2/2] Use ReadRecentBuffer() for btree root page.

The root page of a btree is accessed on every index scan, so it gets
very hot.  We can measure a speed-up on many workloads by pinning it
with ReadRecentBuffer() instead of ReadBuffer(), after remembering where
it was last time in the AM-private cache space in rel->rd_amcache.

Rearrange the existing use of rd_amcache into a new struct
BTAMCacheData.  It's likely that we'll find more things to put in there
in future work.

Discussion: https://postgr.es/m/20230627020546.t6z4tntmj7wmjrfh%40awork3.anarazel.de
---
 src/backend/access/nbtree/nbtpage.c | 93 +++++++++++++++++++++--------
 src/include/access/nbtree.h         | 10 ++++
 src/tools/pgindent/typedefs.list    |  1 +
 3 files changed, 80 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index d78971bfe8..bf270874d2 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -311,6 +311,29 @@ _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
 	_bt_relbuf(rel, metabuf);
 }
 
+/*
+ * Get our private per-relation cache area.
+ */
+static inline BTAMCacheData *
+_bt_getcache(Relation rel)
+{
+	BTAMCacheData *amcache;
+
+	if (unlikely(rel->rd_amcache == NULL))
+	{
+		/* Set up cache on first time through. */
+		amcache = (BTAMCacheData *)
+			MemoryContextAlloc(rel->rd_indexcxt, sizeof(*amcache));
+		amcache->meta_page_is_valid = false;
+		amcache->recent_root_buffer = InvalidBuffer;
+		rel->rd_amcache = amcache;
+	}
+	else
+		amcache = (BTAMCacheData *) rel->rd_amcache;
+
+	return amcache;
+}
+
 /*
  *	_bt_getroot() -- Get the root page of the btree.
  *
@@ -350,17 +373,21 @@ _bt_getroot(Relation rel, Relation heaprel, int access)
 	BlockNumber rootblkno;
 	uint32		rootlevel;
 	BTMetaPageData *metad;
+	BTAMCacheData *amcache;
 
 	Assert(access == BT_READ || heaprel != NULL);
 
+	amcache = _bt_getcache(rel);
+
 	/*
 	 * Try to use previously-cached metapage data to find the root.  This
 	 * normally saves one buffer access per index search, which is a very
 	 * helpful savings in bufmgr traffic and hence contention.
 	 */
-	if (rel->rd_amcache != NULL)
+	if (amcache->meta_page_is_valid)
 	{
-		metad = (BTMetaPageData *) rel->rd_amcache;
+		metad = &amcache->meta_page;
+
 		/* We shouldn't have cached it if any of these fail */
 		Assert(metad->btm_magic == BTREE_MAGIC);
 		Assert(metad->btm_version >= BTREE_MIN_VERSION);
@@ -373,7 +400,25 @@ _bt_getroot(Relation rel, Relation heaprel, int access)
 		Assert(rootblkno != P_NONE);
 		rootlevel = metad->btm_fastlevel;
 
-		rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
+		/* Try to find the root page in the buffer it was last seen in. */
+		if (BufferIsValid(amcache->recent_root_buffer) &&
+			ReadRecentBuffer(rel->rd_locator, MAIN_FORKNUM, rootblkno,
+							 amcache->recent_root_buffer))
+		{
+			/*
+			 * It's in the same buffer as last time, and we avoided a trip
+			 * through the buffer map.
+			 */
+			rootbuf = amcache->recent_root_buffer;
+			_bt_lockbuf(rel, rootbuf, BT_READ);
+			_bt_checkpage(rel, rootbuf);
+		}
+		else
+		{
+			/* Slow path.  Remember where it is for next time. */
+			rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
+			amcache->recent_root_buffer = rootbuf;
+		}
 		rootpage = BufferGetPage(rootbuf);
 		rootopaque = BTPageGetOpaque(rootpage);
 
@@ -393,10 +438,8 @@ _bt_getroot(Relation rel, Relation heaprel, int access)
 			return rootbuf;
 		}
 		_bt_relbuf(rel, rootbuf);
-		/* Cache is stale, throw it away */
-		if (rel->rd_amcache)
-			pfree(rel->rd_amcache);
-		rel->rd_amcache = NULL;
+		/* Cache is stale, mark it invalid. */
+		amcache->meta_page_is_valid = false;
 	}
 
 	metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
@@ -523,9 +566,8 @@ _bt_getroot(Relation rel, Relation heaprel, int access)
 		/*
 		 * Cache the metapage data for next time
 		 */
-		rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
-											 sizeof(BTMetaPageData));
-		memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
+		amcache->meta_page = *metad;
+		amcache->meta_page_is_valid = true;
 
 		/*
 		 * We are done with the metapage; arrange to release it via first
@@ -588,16 +630,16 @@ _bt_gettrueroot(Relation rel)
 	BlockNumber rootblkno;
 	uint32		rootlevel;
 	BTMetaPageData *metad;
+	BTAMCacheData *amcache;
 
 	/*
 	 * We don't try to use cached metapage data here, since (a) this path is
 	 * not performance-critical, and (b) if we are here it suggests our cache
 	 * is out-of-date anyway.  In light of point (b), it's probably safest to
-	 * actively flush any cached metapage info.
+	 * actively invalidate any cached metapage info.
 	 */
-	if (rel->rd_amcache)
-		pfree(rel->rd_amcache);
-	rel->rd_amcache = NULL;
+	amcache = _bt_getcache(rel);
+	amcache->meta_page_is_valid = false;
 
 	metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
 	metapg = BufferGetPage(metabuf);
@@ -674,9 +716,12 @@ _bt_gettrueroot(Relation rel)
 int
 _bt_getrootheight(Relation rel)
 {
+	BTAMCacheData *amcache;
 	BTMetaPageData *metad;
 
-	if (rel->rd_amcache == NULL)
+	amcache = _bt_getcache(rel);
+
+	if (!amcache->meta_page_is_valid)
 	{
 		Buffer		metabuf;
 
@@ -697,14 +742,13 @@ _bt_getrootheight(Relation rel)
 		/*
 		 * Cache the metapage data for next time
 		 */
-		rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
-											 sizeof(BTMetaPageData));
-		memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
+		amcache->meta_page = *metad;
+		amcache->meta_page_is_valid = true;
 		_bt_relbuf(rel, metabuf);
 	}
 
 	/* Get cached page */
-	metad = (BTMetaPageData *) rel->rd_amcache;
+	metad = &amcache->meta_page;
 	/* We shouldn't have cached it if any of these fail */
 	Assert(metad->btm_magic == BTREE_MAGIC);
 	Assert(metad->btm_version >= BTREE_MIN_VERSION);
@@ -738,9 +782,11 @@ _bt_getrootheight(Relation rel)
 void
 _bt_metaversion(Relation rel, bool *heapkeyspace, bool *allequalimage)
 {
+	BTAMCacheData *amcache;
 	BTMetaPageData *metad;
 
-	if (rel->rd_amcache == NULL)
+	amcache = _bt_getcache(rel);
+	if (!amcache->meta_page_is_valid)
 	{
 		Buffer		metabuf;
 
@@ -770,14 +816,13 @@ _bt_metaversion(Relation rel, bool *heapkeyspace, bool *allequalimage)
 		 * from version 2 to version 3, both of which are !heapkeyspace
 		 * versions.
 		 */
-		rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
-											 sizeof(BTMetaPageData));
-		memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
+		amcache->meta_page = *metad;
+		amcache->meta_page_is_valid = true;
 		_bt_relbuf(rel, metabuf);
 	}
 
 	/* Get cached page */
-	metad = (BTMetaPageData *) rel->rd_amcache;
+	metad = &amcache->meta_page;
 	/* We shouldn't have cached it if any of these fail */
 	Assert(metad->btm_magic == BTREE_MAGIC);
 	Assert(metad->btm_version >= BTREE_MIN_VERSION);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 8891fa7973..85cab606a3 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -151,6 +151,16 @@ typedef struct BTMetaPageData
 #define BTREE_MIN_VERSION	2	/* minimum supported version */
 #define BTREE_NOVAC_VERSION	3	/* version with all meta fields set */
 
+/*
+ * Cache space, stored in rel->rd_amcache.
+ */
+typedef struct BTAMCacheData
+{
+	BTMetaPageData meta_page;
+	bool		meta_page_is_valid;
+	Buffer		recent_root_buffer;
+} BTAMCacheData;
+
 /*
  * Maximum size of a btree index entry, including its tuple header.
  *
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 260854747b..b75d9a5cb2 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -187,6 +187,7 @@ BOOL
 BOOLEAN
 BOX
 BTArrayKeyInfo
+BTAMCacheData
 BTBuildState
 BTCycleId
 BTDedupInterval
-- 
2.40.1

