From 0b162ec9707a2790f92e2e237cb4fa807157c19c Mon Sep 17 00:00:00 2001
From: Rishu Bagga <bagrishu@amazon.com>
Date: Thu, 15 Sep 2022 00:55:25 +0000
Subject: [PATCH] slru to buffercache with page headers, and upgrade logic

---
 contrib/amcheck/verify_nbtree.c             |    2 +-
 contrib/bloom/blinsert.c                    |    6 +-
 contrib/pg_prewarm/autoprewarm.c            |    2 +-
 contrib/pg_prewarm/pg_prewarm.c             |    4 +-
 contrib/pg_visibility/pg_visibility.c       |    5 +-
 src/backend/access/gist/gistbuild.c         |   11 +-
 src/backend/access/hash/hashpage.c          |    2 +-
 src/backend/access/heap/heapam_handler.c    |   32 +-
 src/backend/access/heap/rewriteheap.c       |    6 +-
 src/backend/access/heap/visibilitymap.c     |   41 +-
 src/backend/access/nbtree/nbtree.c          |    6 +-
 src/backend/access/nbtree/nbtsort.c         |    8 +-
 src/backend/access/spgist/spginsert.c       |   17 +-
 src/backend/access/table/tableam.c          |    4 +-
 src/backend/access/transam/clog.c           |  225 +--
 src/backend/access/transam/commit_ts.c      |  199 +--
 src/backend/access/transam/multixact.c      |  508 +++----
 src/backend/access/transam/slru.c           | 1484 ++-----------------
 src/backend/access/transam/subtrans.c       |  114 +-
 src/backend/access/transam/xact.c           |    2 +
 src/backend/access/transam/xlog.c           |   15 +-
 src/backend/access/transam/xlogprefetcher.c |   28 +-
 src/backend/access/transam/xlogutils.c      |   17 +-
 src/backend/catalog/catalog.c               |   53 +-
 src/backend/catalog/index.c                 |    4 +-
 src/backend/catalog/storage.c               |  205 ++-
 src/backend/commands/async.c                |   97 +-
 src/backend/commands/dbcommands.c           |    8 +-
 src/backend/commands/sequence.c             |    8 +-
 src/backend/commands/tablecmds.c            |   19 +-
 src/backend/storage/buffer/buf_init.c       |   17 +-
 src/backend/storage/buffer/bufmgr.c         |  379 ++---
 src/backend/storage/buffer/localbuf.c       |   25 +-
 src/backend/storage/freespace/freespace.c   |   41 +-
 src/backend/storage/ipc/ipci.c              |    4 -
 src/backend/storage/lmgr/predicate.c        |   72 +-
 src/backend/storage/page/bufpage.c          |   23 +-
 src/backend/storage/smgr/md.c               |  366 ++---
 src/backend/storage/smgr/smgr.c             |  399 +++--
 src/backend/storage/sync/sync.c             |   20 +-
 src/backend/utils/cache/inval.c             |   17 +-
 src/backend/utils/cache/relcache.c          |   27 +-
 src/backend/utils/mmgr/mcxt.c               |   13 +-
 src/bin/pg_upgrade/file.c                   |  175 ++-
 src/bin/pg_upgrade/function.c               |   66 +
 src/bin/pg_upgrade/pg_upgrade.c             |   76 +-
 src/bin/pg_upgrade/pg_upgrade.h             |   19 +-
 src/common/relpath.c                        |   31 +-
 src/include/access/clog.h                   |    6 -
 src/include/access/commit_ts.h              |    3 -
 src/include/access/multixact.h              |    3 -
 src/include/access/slru.h                   |  181 +--
 src/include/access/slrudefs.h               |   19 +
 src/include/access/slrulist.h               |   30 +
 src/include/access/subtrans.h               |    3 -
 src/include/catalog/catversion.h            |    4 +-
 src/include/catalog/storage.h               |   11 +-
 src/include/common/relpath.h                |   15 +-
 src/include/storage/buf_internals.h         |   11 +-
 src/include/storage/bufmgr.h                |   38 +-
 src/include/storage/bufpage.h               |   17 +
 src/include/storage/md.h                    |   28 +-
 src/include/storage/relfilelocator.h        |   29 +-
 src/include/storage/smgr.h                  |   99 +-
 src/include/utils/inval.h                   |    2 +-
 src/include/utils/rel.h                     |   33 +-
 66 files changed, 2175 insertions(+), 3259 deletions(-)
 create mode 100644 src/include/access/slrudefs.h
 create mode 100644 src/include/access/slrulist.h

diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 9021d156eb7..2722f121219 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -324,7 +324,7 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
 		bool		heapkeyspace,
 					allequalimage;
 
-		if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM))
+		if (!smgrexists(RelationGetSmgr(indrel, MAIN_FORKNUM)))
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
 					 errmsg("index \"%s\" lacks a main relation fork",
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
index dd26d6ac29a..d068a243e8f 100644
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -177,9 +177,9 @@ blbuildempty(Relation index)
 	 * this even when wal_level=minimal.
 	 */
 	PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
+	smgrwrite(RelationGetSmgr(index, INIT_FORKNUM), BLOOM_METAPAGE_BLKNO,
 			  (char *) metapage, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
+	log_newpage(&index->rd_locator, INIT_FORKNUM,
 				BLOOM_METAPAGE_BLKNO, metapage, true);
 
 	/*
@@ -187,7 +187,7 @@ blbuildempty(Relation index)
 	 * write did not go through shared_buffers and therefore a concurrent
 	 * checkpoint may have moved the redo pointer past our xlog record.
 	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	smgrimmedsync(RelationGetSmgr(index, INIT_FORKNUM));
 }
 
 /*
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index d02a6a1ba04..de07bc593c7 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -535,7 +535,7 @@ autoprewarm_database_main(Datum main_arg)
 			 */
 			if (blk->forknum > InvalidForkNumber &&
 				blk->forknum <= MAX_FORKNUM &&
-				smgrexists(RelationGetSmgr(rel), blk->forknum))
+				smgrexists(RelationGetSmgr(rel, blk->forknum)))
 				nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
 			else
 				nblocks = 0;
diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c
index caff5c4a80f..1e57481ef92 100644
--- a/contrib/pg_prewarm/pg_prewarm.c
+++ b/contrib/pg_prewarm/pg_prewarm.c
@@ -109,7 +109,7 @@ pg_prewarm(PG_FUNCTION_ARGS)
 		aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind), get_rel_name(relOid));
 
 	/* Check that the fork exists. */
-	if (!smgrexists(RelationGetSmgr(rel), forkNumber))
+	if (!smgrexists(RelationGetSmgr(rel, forkNumber)))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("fork \"%s\" does not exist for this relation",
@@ -177,7 +177,7 @@ pg_prewarm(PG_FUNCTION_ARGS)
 		for (block = first_block; block <= last_block; ++block)
 		{
 			CHECK_FOR_INTERRUPTS();
-			smgrread(RelationGetSmgr(rel), forkNumber, block, blockbuffer.data);
+			smgrread(RelationGetSmgr(rel, forkNumber), block, blockbuffer.data);
 			++blocks_done;
 		}
 	}
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index a95f73ec796..38cb56c9bce 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -388,13 +388,14 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS)
 	check_relation_relkind(rel);
 
 	/* Forcibly reset cached file size */
-	RelationGetSmgr(rel)->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber;
+	RelationGetSmgr(rel, VISIBILITYMAP_FORKNUM)->smgr_cached_nblocks = InvalidBlockNumber;
 
 	block = visibilitymap_prepare_truncate(rel, 0);
 	if (BlockNumberIsValid(block))
 	{
 		fork = VISIBILITYMAP_FORKNUM;
-		smgrtruncate(RelationGetSmgr(rel), &fork, 1, &block);
+		DropRelationBuffers(rel->rd_locator, rel->rd_backend, &fork, 1, &block);
+		smgrtruncate_multi(rel->rd_locator, rel->rd_backend, &fork, 1, &block);
 	}
 
 	if (RelationNeedsWAL(rel))
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index fb0f466708c..a78527769e6 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -416,7 +416,7 @@ gist_indexsortbuild(GISTBuildState *state)
 	 * replaced with the real root page at the end.
 	 */
 	page = palloc0(BLCKSZ);
-	smgrextend(RelationGetSmgr(state->indexrel), MAIN_FORKNUM, GIST_ROOT_BLKNO,
+	smgrextend(RelationGetSmgr(state->indexrel, MAIN_FORKNUM), GIST_ROOT_BLKNO,
 			   page, true);
 	state->pages_allocated++;
 	state->pages_written++;
@@ -460,7 +460,7 @@ gist_indexsortbuild(GISTBuildState *state)
 	/* Write out the root */
 	PageSetLSN(levelstate->pages[0], GistBuildLSN);
 	PageSetChecksumInplace(levelstate->pages[0], GIST_ROOT_BLKNO);
-	smgrwrite(RelationGetSmgr(state->indexrel), MAIN_FORKNUM, GIST_ROOT_BLKNO,
+	smgrwrite(RelationGetSmgr(state->indexrel, MAIN_FORKNUM), GIST_ROOT_BLKNO,
 			  levelstate->pages[0], true);
 	if (RelationNeedsWAL(state->indexrel))
 		log_newpage(&state->indexrel->rd_locator, MAIN_FORKNUM, GIST_ROOT_BLKNO,
@@ -479,7 +479,7 @@ gist_indexsortbuild(GISTBuildState *state)
 	 * still not be on disk when the crash occurs.
 	 */
 	if (RelationNeedsWAL(state->indexrel))
-		smgrimmedsync(RelationGetSmgr(state->indexrel), MAIN_FORKNUM);
+		smgrimmedsync(RelationGetSmgr(state->indexrel, MAIN_FORKNUM));
 }
 
 /*
@@ -657,7 +657,7 @@ gist_indexsortbuild_flush_ready_pages(GISTBuildState *state)
 
 		PageSetLSN(page, GistBuildLSN);
 		PageSetChecksumInplace(page, blkno);
-		smgrextend(RelationGetSmgr(state->indexrel), MAIN_FORKNUM, blkno, page,
+		smgrextend(RelationGetSmgr(state->indexrel, MAIN_FORKNUM), blkno, page,
 				   true);
 
 		state->pages_written++;
@@ -943,8 +943,7 @@ gistBuildCallback(Relation index,
 	 */
 	if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&
 		 buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
-		 effective_cache_size < smgrnblocks(RelationGetSmgr(index),
-											MAIN_FORKNUM)) ||
+		 effective_cache_size < smgrnblocks(RelationGetSmgr(index, MAIN_FORKNUM))) ||
 		(buildstate->buildMode == GIST_BUFFERING_STATS &&
 		 buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
 	{
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 55b2929ad51..4aa87d475d5 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -1030,7 +1030,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
 					true);
 
 	PageSetChecksumInplace(page, lastblock);
-	smgrextend(RelationGetSmgr(rel), MAIN_FORKNUM, lastblock, zerobuf.data,
+	smgrextend(RelationGetSmgr(rel, MAIN_FORKNUM), lastblock, zerobuf.data,
 			   false);
 
 	return true;
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index ab1bcf3522d..0601d4735fb 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -572,7 +572,7 @@ heapam_relation_set_new_filelocator(Relation rel,
 									TransactionId *freezeXid,
 									MultiXactId *minmulti)
 {
-	SMgrRelation srel;
+	SMgrFileHandle sfile;
 
 	/*
 	 * Initialize to the minimum XID that could put tuples in the table. We
@@ -591,7 +591,7 @@ heapam_relation_set_new_filelocator(Relation rel,
 	 */
 	*minmulti = GetOldestMultiXactId();
 
-	srel = RelationCreateStorage(*newrlocator, persistence, true);
+	sfile = RelationCreateStorage(*newrlocator, persistence, true);
 
 	/*
 	 * If required, set up an init fork for an unlogged table so that it can
@@ -604,15 +604,18 @@ heapam_relation_set_new_filelocator(Relation rel,
 	 */
 	if (persistence == RELPERSISTENCE_UNLOGGED)
 	{
+		SMgrFileHandle sfile_init;
+
 		Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
 			   rel->rd_rel->relkind == RELKIND_MATVIEW ||
 			   rel->rd_rel->relkind == RELKIND_TOASTVALUE);
-		smgrcreate(srel, INIT_FORKNUM, false);
+		sfile_init = smgropen(*newrlocator, InvalidBackendId, INIT_FORKNUM);
+		smgrcreate(sfile_init, false);
 		log_smgrcreate(newrlocator, INIT_FORKNUM);
-		smgrimmedsync(srel, INIT_FORKNUM);
+		smgrimmedsync(sfile);
 	}
 
-	smgrclose(srel);
+	smgrclose(sfile);
 }
 
 static void
@@ -624,9 +627,7 @@ heapam_relation_nontransactional_truncate(Relation rel)
 static void
 heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
 {
-	SMgrRelation dstrel;
-
-	dstrel = smgropen(*newrlocator, rel->rd_backend);
+	SMgrFileHandle dstmain;
 
 	/*
 	 * Since we copy the file directly without looking at the shared buffers,
@@ -646,16 +647,21 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
 	RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
 
 	/* copy main fork */
-	RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
+	dstmain = smgropen(*newrlocator, rel->rd_backend, MAIN_FORKNUM);
+	RelationCopyStorage(RelationGetSmgr(rel, MAIN_FORKNUM), dstmain,
 						rel->rd_rel->relpersistence);
 
 	/* copy those extra forks that exist */
 	for (ForkNumber forkNum = MAIN_FORKNUM + 1;
 		 forkNum <= MAX_FORKNUM; forkNum++)
 	{
-		if (smgrexists(RelationGetSmgr(rel), forkNum))
+		SMgrFileHandle src_fork = RelationGetSmgr(rel, forkNum);
+
+		if (smgrexists(src_fork))
 		{
-			smgrcreate(dstrel, forkNum, false);
+			SMgrFileHandle dst_fork = smgropen(*newrlocator, rel->rd_backend, forkNum);
+
+			smgrcreate(dst_fork, false);
 
 			/*
 			 * WAL log creation if the relation is persistent, or this is the
@@ -665,7 +671,7 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
 				(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
 				 forkNum == INIT_FORKNUM))
 				log_smgrcreate(newrlocator, forkNum);
-			RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
+			RelationCopyStorage(RelationGetSmgr(rel, forkNum), dst_fork,
 								rel->rd_rel->relpersistence);
 		}
 	}
@@ -673,7 +679,7 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
 
 	/* drop old relation, and close new one */
 	RelationDropStorage(rel);
-	smgrclose(dstrel);
+	smgrclose(dstmain);
 }
 
 static void
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 2fe9e48e500..0eeef205a04 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -325,7 +325,7 @@ end_heap_rewrite(RewriteState state)
 
 		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
 
-		smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM,
+		smgrextend(RelationGetSmgr(state->rs_new_rel, MAIN_FORKNUM),
 				   state->rs_blockno, (char *) state->rs_buffer, true);
 	}
 
@@ -337,7 +337,7 @@ end_heap_rewrite(RewriteState state)
 	 * wrote before the checkpoint.
 	 */
 	if (RelationNeedsWAL(state->rs_new_rel))
-		smgrimmedsync(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM);
+		smgrimmedsync(RelationGetSmgr(state->rs_new_rel, MAIN_FORKNUM));
 
 	logical_end_heap_rewrite(state);
 
@@ -691,7 +691,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 			 */
 			PageSetChecksumInplace(page, state->rs_blockno);
 
-			smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM,
+			smgrextend(RelationGetSmgr(state->rs_new_rel, MAIN_FORKNUM),
 					   state->rs_blockno, (char *) page, true);
 
 			state->rs_blockno++;
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 4ed70275e22..f48416d1e06 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -465,7 +465,7 @@ visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
 	 * If no visibility map has been created yet for this relation, there's
 	 * nothing to truncate.
 	 */
-	if (!smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM))
+	if (!smgrexists(RelationGetSmgr(rel, VISIBILITYMAP_FORKNUM)))
 		return InvalidBlockNumber;
 
 	/*
@@ -532,7 +532,7 @@ visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
 	else
 		newnblocks = truncBlock;
 
-	if (smgrnblocks(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM) <= newnblocks)
+	if (smgrnblocks(RelationGetSmgr(rel, VISIBILITYMAP_FORKNUM)) <= newnblocks)
 	{
 		/* nothing to do, the file was already smaller than requested size */
 		return InvalidBlockNumber;
@@ -551,29 +551,29 @@ static Buffer
 vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
 {
 	Buffer		buf;
-	SMgrRelation reln;
+	SMgrFileHandle vm_sfile;
 
 	/*
 	 * Caution: re-using this smgr pointer could fail if the relcache entry
 	 * gets closed.  It's safe as long as we only do smgr-level operations
 	 * between here and the last use of the pointer.
 	 */
-	reln = RelationGetSmgr(rel);
+	vm_sfile = RelationGetSmgr(rel, VISIBILITYMAP_FORKNUM);
 
 	/*
 	 * If we haven't cached the size of the visibility map fork yet, check it
 	 * first.
 	 */
-	if (reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber)
+	if (vm_sfile->smgr_cached_nblocks == InvalidBlockNumber)
 	{
-		if (smgrexists(reln, VISIBILITYMAP_FORKNUM))
-			smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
+		if (smgrexists(vm_sfile))
+			smgrnblocks(vm_sfile);
 		else
-			reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = 0;
+			vm_sfile->smgr_cached_nblocks = 0;
 	}
 
 	/* Handle requests beyond EOF */
-	if (blkno >= reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM])
+	if (blkno >= vm_sfile->smgr_cached_nblocks)
 	{
 		if (extend)
 			vm_extend(rel, blkno + 1);
@@ -600,8 +600,7 @@ vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
 	 * long as it doesn't depend on the page header having correct contents.
 	 * Current usage is safe because PageGetContents() does not require that.
 	 */
-	buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno,
-							 RBM_ZERO_ON_ERROR, NULL);
+	buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
 	if (PageIsNew(BufferGetPage(buf)))
 	{
 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
@@ -621,7 +620,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
 {
 	BlockNumber vm_nblocks_now;
 	PGAlignedBlock pg;
-	SMgrRelation reln;
+	SMgrFileHandle vm_sfile;
 
 	PageInit((Page) pg.data, BLCKSZ, 0);
 
@@ -642,27 +641,27 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
 	 * gets closed.  It's safe as long as we only do smgr-level operations
 	 * between here and the last use of the pointer.
 	 */
-	reln = RelationGetSmgr(rel);
+	vm_sfile = RelationGetSmgr(rel, VISIBILITYMAP_FORKNUM);
 
 	/*
 	 * Create the file first if it doesn't exist.  If smgr_vm_nblocks is
 	 * positive then it must exist, no need for an smgrexists call.
 	 */
-	if ((reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == 0 ||
-		 reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber) &&
-		!smgrexists(reln, VISIBILITYMAP_FORKNUM))
-		smgrcreate(reln, VISIBILITYMAP_FORKNUM, false);
+	if ((vm_sfile->smgr_cached_nblocks == 0 ||
+		 vm_sfile->smgr_cached_nblocks == InvalidBlockNumber) &&
+		!smgrexists(vm_sfile))
+		smgrcreate(vm_sfile, false);
 
 	/* Invalidate cache so that smgrnblocks() asks the kernel. */
-	reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber;
-	vm_nblocks_now = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
+	vm_sfile->smgr_cached_nblocks = InvalidBlockNumber;
+	vm_nblocks_now = smgrnblocks(vm_sfile);
 
 	/* Now extend the file */
 	while (vm_nblocks_now < vm_nblocks)
 	{
 		PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
 
-		smgrextend(reln, VISIBILITYMAP_FORKNUM, vm_nblocks_now, pg.data, false);
+		smgrextend(vm_sfile, vm_nblocks_now, pg.data, false);
 		vm_nblocks_now++;
 	}
 
@@ -673,7 +672,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
 	 * to keep checking for creation or extension of the file, which happens
 	 * infrequently.
 	 */
-	CacheInvalidateSmgr(reln->smgr_rlocator);
+	CacheInvalidateSmgr(rel->rd_locator, rel->rd_backend);
 
 	UnlockRelationForExtension(rel, ExclusiveLock);
 }
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index b52eca8f38b..644431a7e3c 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -164,9 +164,9 @@ btbuildempty(Relation index)
 	 * this even when wal_level=minimal.
 	 */
 	PageSetChecksumInplace(metapage, BTREE_METAPAGE);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, BTREE_METAPAGE,
+	smgrwrite(RelationGetSmgr(index, INIT_FORKNUM), BTREE_METAPAGE,
 			  (char *) metapage, true);
-	log_newpage(&RelationGetSmgr(index)->smgr_rlocator.locator, INIT_FORKNUM,
+	log_newpage(&index->rd_locator, INIT_FORKNUM,
 				BTREE_METAPAGE, metapage, true);
 
 	/*
@@ -174,7 +174,7 @@ btbuildempty(Relation index)
 	 * write did not go through shared_buffers and therefore a concurrent
 	 * checkpoint may have moved the redo pointer past our xlog record.
 	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	smgrimmedsync(RelationGetSmgr(index, INIT_FORKNUM));
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 501e011ce1e..87bb9f52b4d 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -662,7 +662,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
 		if (!wstate->btws_zeropage)
 			wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
 		/* don't set checksum for all-zero page */
-		smgrextend(RelationGetSmgr(wstate->index), MAIN_FORKNUM,
+		smgrextend(RelationGetSmgr(wstate->index, MAIN_FORKNUM),
 				   wstate->btws_pages_written++,
 				   (char *) wstate->btws_zeropage,
 				   true);
@@ -677,14 +677,14 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
 	if (blkno == wstate->btws_pages_written)
 	{
 		/* extending the file... */
-		smgrextend(RelationGetSmgr(wstate->index), MAIN_FORKNUM, blkno,
+		smgrextend(RelationGetSmgr(wstate->index, MAIN_FORKNUM), blkno,
 				   (char *) page, true);
 		wstate->btws_pages_written++;
 	}
 	else
 	{
 		/* overwriting a block we zero-filled before */
-		smgrwrite(RelationGetSmgr(wstate->index), MAIN_FORKNUM, blkno,
+		smgrwrite(RelationGetSmgr(wstate->index, MAIN_FORKNUM), blkno,
 				  (char *) page, true);
 	}
 
@@ -1431,7 +1431,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 	 * still not be on disk when the crash occurs.
 	 */
 	if (wstate->btws_use_wal)
-		smgrimmedsync(RelationGetSmgr(wstate->index), MAIN_FORKNUM);
+		smgrimmedsync(RelationGetSmgr(wstate->index, MAIN_FORKNUM));
 }
 
 /*
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index c6821b59524..75eb09543d9 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -155,8 +155,11 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 void
 spgbuildempty(Relation index)
 {
+	SMgrFileHandle sfile;
 	Page		page;
 
+	sfile = RelationGetSmgr(index, INIT_FORKNUM);
+
 	/* Construct metapage. */
 	page = (Page) palloc(BLCKSZ);
 	SpGistInitMetapage(page);
@@ -169,27 +172,27 @@ spgbuildempty(Relation index)
 	 * replayed.
 	 */
 	PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
+	smgrwrite(sfile, SPGIST_METAPAGE_BLKNO,
 			  (char *) page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
+	log_newpage(&index->rd_locator, INIT_FORKNUM,
 				SPGIST_METAPAGE_BLKNO, page, true);
 
 	/* Likewise for the root page. */
 	SpGistInitPage(page, SPGIST_LEAF);
 
 	PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_ROOT_BLKNO,
+	smgrwrite(sfile, SPGIST_ROOT_BLKNO,
 			  (char *) page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
+	log_newpage(&index->rd_locator, INIT_FORKNUM,
 				SPGIST_ROOT_BLKNO, page, true);
 
 	/* Likewise for the null-tuples root page. */
 	SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
 
 	PageSetChecksumInplace(page, SPGIST_NULL_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_NULL_BLKNO,
+	smgrwrite(sfile, SPGIST_NULL_BLKNO,
 			  (char *) page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
+	log_newpage(&index->rd_locator, INIT_FORKNUM,
 				SPGIST_NULL_BLKNO, page, true);
 
 	/*
@@ -197,7 +200,7 @@ spgbuildempty(Relation index)
 	 * writes did not go through shared buffers and therefore a concurrent
 	 * checkpoint may have moved the redo pointer past our xlog record.
 	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	smgrimmedsync(sfile);
 }
 
 /*
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 094b24c7c9c..5bc2c3726f1 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -631,10 +631,10 @@ table_block_relation_size(Relation rel, ForkNumber forkNumber)
 	if (forkNumber == InvalidForkNumber)
 	{
 		for (int i = 0; i < MAX_FORKNUM; i++)
-			nblocks += smgrnblocks(RelationGetSmgr(rel), i);
+			nblocks += smgrnblocks(RelationGetSmgr(rel, i));
 	}
 	else
-		nblocks = smgrnblocks(RelationGetSmgr(rel), forkNumber);
+		nblocks = smgrnblocks(RelationGetSmgr(rel, forkNumber));
 
 	return nblocks * BLCKSZ;
 }
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 77d9894dab3..b6af01fcaa1 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -41,6 +41,8 @@
 #include "miscadmin.h"
 #include "pg_trace.h"
 #include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/buf_internals.h"
 #include "storage/proc.h"
 #include "storage/sync.h"
 
@@ -59,7 +61,7 @@
 /* We need two bits per xact, so four xacts fit in a byte */
 #define CLOG_BITS_PER_XACT	2
 #define CLOG_XACTS_PER_BYTE 4
-#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
+#define CLOG_XACTS_PER_PAGE ((BLCKSZ - SizeOfPageHeaderData) * CLOG_XACTS_PER_BYTE)
 #define CLOG_XACT_BITMASK	((1 << CLOG_BITS_PER_XACT) - 1)
 
 #define TransactionIdToPage(xid)	((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)
@@ -81,17 +83,10 @@
  */
 #define THRESHOLD_SUBTRANS_CLOG_OPT	5
 
-/*
- * Link to shared-memory data structures for CLOG control
- */
-static SlruCtlData XactCtlData;
-
-#define XactCtl (&XactCtlData)
 
-
-static int	ZeroCLOGPage(int pageno, bool writeXlog);
+static Buffer ZeroCLOGPage(int pageno, bool writeXlog);
 static bool CLOGPagePrecedes(int page1, int page2);
-static void WriteZeroPageXlogRec(int pageno);
+static XLogRecPtr WriteZeroPageXlogRec(int pageno);
 static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact,
 								 Oid oldestXactDb);
 static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
@@ -99,7 +94,7 @@ static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
 									   XLogRecPtr lsn, int pageno,
 									   bool all_xact_same_page);
 static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
-									  XLogRecPtr lsn, int slotno);
+									  XLogRecPtr lsn, Buffer buffer);
 static void set_status_by_pages(int nsubxids, TransactionId *subxids,
 								XidStatus status, XLogRecPtr lsn);
 static bool TransactionGroupUpdateXidStatus(TransactionId xid,
@@ -339,13 +334,12 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
 								   TransactionId *subxids, XidStatus status,
 								   XLogRecPtr lsn, int pageno)
 {
-	int			slotno;
+	Buffer		buffer;
 	int			i;
 
 	Assert(status == TRANSACTION_STATUS_COMMITTED ||
 		   status == TRANSACTION_STATUS_ABORTED ||
 		   (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
-	Assert(LWLockHeldByMeInMode(XactSLRULock, LW_EXCLUSIVE));
 
 	/*
 	 * If we're doing an async commit (ie, lsn is valid), then we must wait
@@ -356,7 +350,8 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
 	 * write-busy, since we don't care if the update reaches disk sooner than
 	 * we think.
 	 */
-	slotno = SimpleLruReadPage(XactCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
+	buffer = ReadSlruBuffer(SLRU_CLOG_ID, pageno, RBM_NORMAL);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 	/*
 	 * Set the main transaction id, if any.
@@ -374,25 +369,27 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
 		{
 			for (i = 0; i < nsubxids; i++)
 			{
-				Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
+				Assert(pageno == TransactionIdToPage(subxids[i]));
 				TransactionIdSetStatusBit(subxids[i],
 										  TRANSACTION_STATUS_SUB_COMMITTED,
-										  lsn, slotno);
+										  lsn, buffer);
 			}
 		}
 
 		/* ... then the main transaction */
-		TransactionIdSetStatusBit(xid, status, lsn, slotno);
+		TransactionIdSetStatusBit(xid, status, lsn, buffer);
 	}
 
 	/* Set the subtransactions */
 	for (i = 0; i < nsubxids; i++)
 	{
-		Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
-		TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
+		Assert(pageno == TransactionIdToPage(subxids[i]));
+		TransactionIdSetStatusBit(subxids[i], status, lsn, buffer);
 	}
 
-	XactCtl->shared->page_dirty[slotno] = true;
+	
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -566,7 +563,7 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
  * Must be called with XactSLRULock held
  */
 static void
-TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
+TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, Buffer buffer)
 {
 	int			byteno = TransactionIdToByte(xid);
 	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
@@ -574,7 +571,10 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
 	char		byteval;
 	char		curval;
 
-	byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
+	Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(GetBufferDescriptor(buffer - 1)),
+								LW_EXCLUSIVE));
+
+	byteptr = PageGetContents(BufferGetPage(buffer)) + byteno;
 	curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
 
 	/*
@@ -602,8 +602,9 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
 	byteval |= (status << bshift);
 	*byteptr = byteval;
 
+
 	/*
-	 * Update the group LSN if the transaction completion LSN is higher.
+	 * Update the buffer LSN if the transaction completion LSN is higher.
 	 *
 	 * Note: lsn will be invalid when supplied during InRecovery processing,
 	 * so we don't need to do anything special to avoid LSN updates during
@@ -612,10 +613,8 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
 	 */
 	if (!XLogRecPtrIsInvalid(lsn))
 	{
-		int			lsnindex = GetLSNIndex(slotno, xid);
-
-		if (XactCtl->shared->group_lsn[lsnindex] < lsn)
-			XactCtl->shared->group_lsn[lsnindex] = lsn;
+		if (PageGetLSN(BufferGetPage(buffer)) < lsn)
+			PageSetLSN(BufferGetPage(buffer), lsn);
 	}
 }
 
@@ -640,67 +639,21 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 	int			pageno = TransactionIdToPage(xid);
 	int			byteno = TransactionIdToByte(xid);
 	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
-	int			slotno;
-	int			lsnindex;
 	char	   *byteptr;
 	XidStatus	status;
+	Buffer		buffer;
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-
-	slotno = SimpleLruReadPage_ReadOnly(XactCtl, pageno, xid);
-	byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
+	buffer = ReadSlruBuffer(SLRU_CLOG_ID, pageno, RBM_NORMAL);
+	byteptr = PageGetContents(BufferGetPage(buffer)) + byteno;
 
 	status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
+	*lsn   = PageGetLSN(BufferGetPage(buffer));
 
-	lsnindex = GetLSNIndex(slotno, xid);
-	*lsn = XactCtl->shared->group_lsn[lsnindex];
-
-	LWLockRelease(XactSLRULock);
+	ReleaseBuffer(buffer);
 
 	return status;
 }
 
-/*
- * Number of shared CLOG buffers.
- *
- * On larger multi-processor systems, it is possible to have many CLOG page
- * requests in flight at one time which could lead to disk access for CLOG
- * page if the required page is not found in memory.  Testing revealed that we
- * can get the best performance by having 128 CLOG buffers, more than that it
- * doesn't improve performance.
- *
- * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
- * a good idea, because it would increase the minimum amount of shared memory
- * required to start, which could be a problem for people running very small
- * configurations.  The following formula seems to represent a reasonable
- * compromise: people with very low values for shared_buffers will get fewer
- * CLOG buffers as well, and everyone else will get 128.
- */
-Size
-CLOGShmemBuffers(void)
-{
-	return Min(128, Max(4, NBuffers / 512));
-}
-
-/*
- * Initialization of shared memory for CLOG
- */
-Size
-CLOGShmemSize(void)
-{
-	return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
-}
-
-void
-CLOGShmemInit(void)
-{
-	XactCtl->PagePrecedes = CLOGPagePrecedes;
-	SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
-				  XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
-				  SYNC_HANDLER_CLOG);
-	SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
-}
-
 /*
  * This func must be called ONCE on system install.  It creates
  * the initial CLOG segment.  (The CLOG directory is assumed to
@@ -710,18 +663,15 @@ CLOGShmemInit(void)
 void
 BootStrapCLOG(void)
 {
-	int			slotno;
-
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
 
 	/* Create and zero the first page of the commit log */
-	slotno = ZeroCLOGPage(0, false);
+	buffer = ZeroCLOGPage(0, false);
 
 	/* Make sure it's written out */
-	SimpleLruWritePage(XactCtl, slotno);
-	Assert(!XactCtl->shared->page_dirty[slotno]);
+	FlushOneBuffer(buffer);
 
-	LWLockRelease(XactSLRULock);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -733,17 +683,29 @@ BootStrapCLOG(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroCLOGPage(int pageno, bool writeXlog)
 {
-	int			slotno;
+	Buffer		buffer;
+	Page 		page;
+	XLogRecPtr  lsn;
+
+	buffer = ZeroSlruBuffer(SLRU_CLOG_ID, pageno);
+	
+	page = BufferGetPage(buffer);
 
-	slotno = SimpleLruZeroPage(XactCtl, pageno);
+	PageInitSLRU(page, BLCKSZ, 0);
 
+	lsn = 0;
 	if (writeXlog)
-		WriteZeroPageXlogRec(pageno);
+	{
+		lsn = WriteZeroPageXlogRec(pageno);
+	}
+	
+	PageSetHeaderDataNonRel(page, pageno, lsn, BLCKSZ, PG_METAPAGE_LAYOUT_VERSION);
+	MarkBufferDirty(buffer);
 
-	return slotno;
+	return buffer;
 }
 
 /*
@@ -753,17 +715,6 @@ ZeroCLOGPage(int pageno, bool writeXlog)
 void
 StartupCLOG(void)
 {
-	TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
-	int			pageno = TransactionIdToPage(xid);
-
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
-	/*
-	 * Initialize our idea of the latest page number.
-	 */
-	XactCtl->shared->latest_page_number = pageno;
-
-	LWLockRelease(XactSLRULock);
 }
 
 /*
@@ -775,8 +726,6 @@ TrimCLOG(void)
 	TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
 	int			pageno = TransactionIdToPage(xid);
 
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
 	/*
 	 * Zero out the remainder of the current clog page.  Under normal
 	 * circumstances it should be zeroes already, but it seems at least
@@ -793,40 +742,25 @@ TrimCLOG(void)
 	{
 		int			byteno = TransactionIdToByte(xid);
 		int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
-		int			slotno;
 		char	   *byteptr;
+		Buffer		buffer;
 
-		slotno = SimpleLruReadPage(XactCtl, pageno, false, xid);
-		byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
+		buffer = ReadSlruBuffer(SLRU_CLOG_ID, pageno, RBM_TRIM);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		byteptr = PageGetContents(BufferGetPage(buffer)) + byteno;
 
 		/* Zero so-far-unused positions in the current byte */
 		*byteptr &= (1 << bshift) - 1;
 		/* Zero the rest of the page */
 		MemSet(byteptr + 1, 0, BLCKSZ - byteno - 1);
 
-		XactCtl->shared->page_dirty[slotno] = true;
-	}
 
-	LWLockRelease(XactSLRULock);
-}
+		MarkBufferDirty(buffer);
 
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointCLOG(void)
-{
-	/*
-	 * Write dirty CLOG pages to disk.  This may result in sync requests
-	 * queued for later handling by ProcessSyncRequests(), as part of the
-	 * checkpoint.
-	 */
-	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
-	SimpleLruWriteAll(XactCtl, true);
-	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
+		UnlockReleaseBuffer(buffer);
+	}
 }
 
-
 /*
  * Make sure that CLOG has room for a newly-allocated XID.
  *
@@ -850,12 +784,8 @@ ExtendCLOG(TransactionId newestXact)
 
 	pageno = TransactionIdToPage(newestXact);
 
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
 	/* Zero the page and make an XLOG entry about it */
-	ZeroCLOGPage(pageno, true);
-
-	LWLockRelease(XactSLRULock);
+	UnlockReleaseBuffer(ZeroCLOGPage(pageno, true));
 }
 
 
@@ -886,7 +816,8 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
 	cutoffPage = TransactionIdToPage(oldestXact);
 
 	/* Check to see if there's any files that could be removed */
-	if (!SlruScanDirectory(XactCtl, SlruScanDirCbReportPresence, &cutoffPage))
+	if (!SlruScanDirectory(SLRU_CLOG_ID, CLOGPagePrecedes,
+						   SlruScanDirCbReportPresence, &cutoffPage))
 		return;					/* nothing to remove */
 
 	/*
@@ -907,7 +838,7 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
 	WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid);
 
 	/* Now we can remove the old CLOG segment(s) */
-	SimpleLruTruncate(XactCtl, cutoffPage);
+	SimpleLruTruncate(SLRU_CLOG_ID, CLOGPagePrecedes, cutoffPage);
 }
 
 
@@ -948,12 +879,15 @@ CLOGPagePrecedes(int page1, int page2)
 /*
  * Write a ZEROPAGE xlog record
  */
-static void
+static XLogRecPtr 
 WriteZeroPageXlogRec(int pageno)
 {
+	XLogRecPtr lsn;
 	XLogBeginInsert();
 	XLogRegisterData((char *) (&pageno), sizeof(int));
-	(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
+	lsn = XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
+
+	return lsn;
 }
 
 /*
@@ -992,17 +926,13 @@ clog_redo(XLogReaderState *record)
 	if (info == CLOG_ZEROPAGE)
 	{
 		int			pageno;
-		int			slotno;
+		Buffer		buffer;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroCLOGPage(pageno, false);
-		SimpleLruWritePage(XactCtl, slotno);
-		Assert(!XactCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(XactSLRULock);
+		buffer = ZeroCLOGPage(pageno, false);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 	else if (info == CLOG_TRUNCATE)
 	{
@@ -1012,17 +942,8 @@ clog_redo(XLogReaderState *record)
 
 		AdvanceOldestClogXid(xlrec.oldestXact);
 
-		SimpleLruTruncate(XactCtl, xlrec.pageno);
+		SimpleLruTruncate(SLRU_CLOG_ID, CLOGPagePrecedes, xlrec.pageno);
 	}
 	else
 		elog(PANIC, "clog_redo: unknown op code %u", info);
 }
-
-/*
- * Entrypoint for sync.c to sync clog files.
- */
-int
-clogsyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(XactCtl, ftag, path);
-}
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 9aa4675cb79..af88abd5c84 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -63,19 +63,14 @@ typedef struct CommitTimestampEntry
 									sizeof(RepOriginId))
 
 #define COMMIT_TS_XACTS_PER_PAGE \
-	(BLCKSZ / SizeOfCommitTimestampEntry)
+	((BLCKSZ - SizeOfPageHeaderData) / SizeOfCommitTimestampEntry)
 
 #define TransactionIdToCTsPage(xid) \
 	((xid) / (TransactionId) COMMIT_TS_XACTS_PER_PAGE)
 #define TransactionIdToCTsEntry(xid)	\
 	((xid) % (TransactionId) COMMIT_TS_XACTS_PER_PAGE)
 
-/*
- * Link to shared-memory data structures for CommitTs control
- */
-static SlruCtlData CommitTsCtlData;
 
-#define CommitTsCtl (&CommitTsCtlData)
 
 /*
  * We keep a cache of the last value set in shared memory.
@@ -107,11 +102,12 @@ static void SetXidCommitTsInPage(TransactionId xid, int nsubxids,
 static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
 									 RepOriginId nodeid, int slotno);
 static void error_commit_ts_disabled(void);
-static int	ZeroCommitTsPage(int pageno, bool writeXlog);
+static Buffer ZeroCommitTsPage(int pageno, bool writeXlog);
 static bool CommitTsPagePrecedes(int page1, int page2);
 static void ActivateCommitTs(void);
 static void DeactivateCommitTs(void);
-static void WriteZeroPageXlogRec(int pageno);
+static XLogRecPtr WriteZeroPageXlogRec(int pageno);
+
 static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid);
 
 /*
@@ -216,32 +212,31 @@ SetXidCommitTsInPage(TransactionId xid, int nsubxids,
 					 TransactionId *subxids, TimestampTz ts,
 					 RepOriginId nodeid, int pageno)
 {
-	int			slotno;
 	int			i;
+	Buffer		buffer;
 
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
+	buffer = ReadSlruBuffer(SLRU_COMMIT_TS_ID, pageno, RBM_NORMAL);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
-	slotno = SimpleLruReadPage(CommitTsCtl, pageno, true, xid);
-
-	TransactionIdSetCommitTs(xid, ts, nodeid, slotno);
+	TransactionIdSetCommitTs(xid, ts, nodeid, buffer);
 	for (i = 0; i < nsubxids; i++)
-		TransactionIdSetCommitTs(subxids[i], ts, nodeid, slotno);
+		TransactionIdSetCommitTs(subxids[i], ts, nodeid, buffer);
 
-	CommitTsCtl->shared->page_dirty[slotno] = true;
+	MarkBufferDirty(buffer);
 
-	LWLockRelease(CommitTsSLRULock);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
  * Sets the commit timestamp of a single transaction.
- *
- * Must be called with CommitTsSLRULock held
  */
 static void
 TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
-						 RepOriginId nodeid, int slotno)
+						 RepOriginId nodeid, Buffer buffer)
 {
 	int			entryno = TransactionIdToCTsEntry(xid);
+	int 		pageno  = TransactionIdToCTsPage(xid);
+	
 	CommitTimestampEntry entry;
 
 	Assert(TransactionIdIsNormal(xid));
@@ -249,9 +244,12 @@ TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
 	entry.time = ts;
 	entry.nodeid = nodeid;
 
-	memcpy(CommitTsCtl->shared->page_buffer[slotno] +
+	Assert(xid == pageno * COMMIT_TS_XACTS_PER_PAGE + entryno);
+
+	memcpy(PageGetContents(BufferGetPage(buffer)) + \
 		   SizeOfCommitTimestampEntry * entryno,
 		   &entry, SizeOfCommitTimestampEntry);
+
 }
 
 /*
@@ -268,10 +266,10 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 {
 	int			pageno = TransactionIdToCTsPage(xid);
 	int			entryno = TransactionIdToCTsEntry(xid);
-	int			slotno;
 	CommitTimestampEntry entry;
 	TransactionId oldestCommitTsXid;
 	TransactionId newestCommitTsXid;
+	Buffer		buffer;
 
 	if (!TransactionIdIsValid(xid))
 		ereport(ERROR,
@@ -325,18 +323,19 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 		return false;
 	}
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid);
+	buffer = ReadSlruBuffer(SLRU_COMMIT_TS_ID, pageno, RBM_NORMAL);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
 	memcpy(&entry,
-		   CommitTsCtl->shared->page_buffer[slotno] +
-		   SizeOfCommitTimestampEntry * entryno,
-		   SizeOfCommitTimestampEntry);
+			PageGetContents(BufferGetPage(buffer)) + \
+			SizeOfCommitTimestampEntry * entryno,
+			SizeOfCommitTimestampEntry);
 
 	*ts = entry.time;
 	if (nodeid)
 		*nodeid = entry.nodeid;
 
-	LWLockRelease(CommitTsSLRULock);
+	UnlockReleaseBuffer(buffer);
 	return *ts != 0;
 }
 
@@ -505,27 +504,13 @@ pg_xact_commit_timestamp_origin(PG_FUNCTION_ARGS)
 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
 }
 
-/*
- * Number of shared CommitTS buffers.
- *
- * We use a very similar logic as for the number of CLOG buffers (except we
- * scale up twice as fast with shared buffers, and the maximum is twice as
- * high); see comments in CLOGShmemBuffers.
- */
-Size
-CommitTsShmemBuffers(void)
-{
-	return Min(256, Max(4, NBuffers / 256));
-}
-
 /*
  * Shared memory sizing for CommitTs
  */
 Size
 CommitTsShmemSize(void)
 {
-	return SimpleLruShmemSize(CommitTsShmemBuffers(), 0) +
-		sizeof(CommitTimestampShared);
+	return sizeof(CommitTimestampShared);
 }
 
 /*
@@ -537,12 +522,7 @@ CommitTsShmemInit(void)
 {
 	bool		found;
 
-	CommitTsCtl->PagePrecedes = CommitTsPagePrecedes;
-	SimpleLruInit(CommitTsCtl, "CommitTs", CommitTsShmemBuffers(), 0,
-				  CommitTsSLRULock, "pg_commit_ts",
-				  LWTRANCHE_COMMITTS_BUFFER,
-				  SYNC_HANDLER_COMMIT_TS);
-	SlruPagePrecedesUnitTests(CommitTsCtl, COMMIT_TS_XACTS_PER_PAGE);
+	SlruPagePrecedesUnitTests(CommitTsPagePrecedes, COMMIT_TS_XACTS_PER_PAGE);
 
 	commitTsShared = ShmemInitStruct("CommitTs shared",
 									 sizeof(CommitTimestampShared),
@@ -586,17 +566,26 @@ BootStrapCommitTs(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroCommitTsPage(int pageno, bool writeXlog)
 {
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(CommitTsCtl, pageno);
-
+	Buffer		buffer;
+	Page 		page;
+	XLogRecPtr  lsn;
+
+	buffer = ZeroSlruBuffer(SLRU_COMMIT_TS_ID, pageno);
+	
+	page = BufferGetPage(buffer);
+	PageInitSLRU(page, BLCKSZ, 0);
+	
 	if (writeXlog)
-		WriteZeroPageXlogRec(pageno);
+	{
+		lsn = WriteZeroPageXlogRec(pageno);
+		PageSetHeaderDataNonRel(page, pageno, lsn, BLCKSZ, PG_METAPAGE_LAYOUT_VERSION);
+	}
+	MarkBufferDirty(buffer);
 
-	return slotno;
+	return buffer;
 }
 
 /*
@@ -694,13 +683,6 @@ ActivateCommitTs(void)
 	xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
 	pageno = TransactionIdToCTsPage(xid);
 
-	/*
-	 * Re-Initialize our idea of the latest page number.
-	 */
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-	CommitTsCtl->shared->latest_page_number = pageno;
-	LWLockRelease(CommitTsSLRULock);
-
 	/*
 	 * If CommitTs is enabled, but it wasn't in the previous server run, we
 	 * need to set the oldest and newest values to the next Xid; that way, we
@@ -723,15 +705,19 @@ ActivateCommitTs(void)
 	LWLockRelease(CommitTsLock);
 
 	/* Create the current segment file, if necessary */
-	if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno))
+	if (!SimpleLruDoesPhysicalPageExist(SLRU_COMMIT_TS_ID, pageno))
 	{
-		int			slotno;
+		Buffer		buffer;
+		Page 		page;
 
-		LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-		LWLockRelease(CommitTsSLRULock);
+		buffer = ZeroSlruBuffer(SLRU_COMMIT_TS_ID, pageno);
+		
+		page = BufferGetPage(buffer);
+		PageInitSLRU(page, BLCKSZ, 0);
+
+		MarkBufferDirty(buffer);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 
 	/* Change the activation status in shared memory. */
@@ -780,23 +766,9 @@ DeactivateCommitTs(void)
 	 * be overwritten anyway when we wrap around, but it seems better to be
 	 * tidy.)
 	 */
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-	(void) SlruScanDirectory(CommitTsCtl, SlruScanDirCbDeleteAll, NULL);
-	LWLockRelease(CommitTsSLRULock);
-}
-
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointCommitTs(void)
-{
-	/*
-	 * Write dirty CommitTs pages to disk.  This may result in sync requests
-	 * queued for later handling by ProcessSyncRequests(), as part of the
-	 * checkpoint.
-	 */
-	SimpleLruWriteAll(CommitTsCtl, true);
+	(void) SlruScanDirectory(SLRU_COMMIT_TS_ID,
+							 CommitTsPagePrecedes,
+							 SlruScanDirCbDeleteAll, NULL);
 }
 
 /*
@@ -834,12 +806,8 @@ ExtendCommitTs(TransactionId newestXact)
 
 	pageno = TransactionIdToCTsPage(newestXact);
 
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-
 	/* Zero the page and make an XLOG entry about it */
-	ZeroCommitTsPage(pageno, !InRecovery);
-
-	LWLockRelease(CommitTsSLRULock);
+	UnlockReleaseBuffer(ZeroCommitTsPage(pageno, !InRecovery));
 }
 
 /*
@@ -860,7 +828,9 @@ TruncateCommitTs(TransactionId oldestXact)
 	cutoffPage = TransactionIdToCTsPage(oldestXact);
 
 	/* Check to see if there's any files that could be removed */
-	if (!SlruScanDirectory(CommitTsCtl, SlruScanDirCbReportPresence,
+	if (!SlruScanDirectory(SLRU_COMMIT_TS_ID,
+						   CommitTsPagePrecedes,
+						   SlruScanDirCbReportPresence,
 						   &cutoffPage))
 		return;					/* nothing to remove */
 
@@ -868,7 +838,7 @@ TruncateCommitTs(TransactionId oldestXact)
 	WriteTruncateXlogRec(cutoffPage, oldestXact);
 
 	/* Now we can remove the old CommitTs segment(s) */
-	SimpleLruTruncate(CommitTsCtl, cutoffPage);
+	SimpleLruTruncate(SLRU_COMMIT_TS_ID, CommitTsPagePrecedes, cutoffPage);
 }
 
 /*
@@ -954,12 +924,16 @@ CommitTsPagePrecedes(int page1, int page2)
 /*
  * Write a ZEROPAGE xlog record
  */
-static void
+static XLogRecPtr 
 WriteZeroPageXlogRec(int pageno)
 {
+	XLogRecPtr lsn;
+	
 	XLogBeginInsert();
 	XLogRegisterData((char *) (&pageno), sizeof(int));
-	(void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE);
+	lsn = XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE);
+	
+	return lsn;
 }
 
 /*
@@ -992,17 +966,19 @@ commit_ts_redo(XLogReaderState *record)
 	if (info == COMMIT_TS_ZEROPAGE)
 	{
 		int			pageno;
-		int			slotno;
+		Buffer		buffer;
+		Page		page;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(CommitTsSLRULock);
+		buffer = ZeroSlruBuffer(SLRU_COMMIT_TS_ID, pageno);
+		
+		page = BufferGetPage(buffer);
+		PageInitSLRU(page, BLCKSZ, 0);
+		
+		MarkBufferDirty(buffer);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 	else if (info == COMMIT_TS_TRUNCATE)
 	{
@@ -1010,23 +986,8 @@ commit_ts_redo(XLogReaderState *record)
 
 		AdvanceOldestCommitTsXid(trunc->oldestXid);
 
-		/*
-		 * During XLOG replay, latest_page_number isn't set up yet; insert a
-		 * suitable value to bypass the sanity test in SimpleLruTruncate.
-		 */
-		CommitTsCtl->shared->latest_page_number = trunc->pageno;
-
-		SimpleLruTruncate(CommitTsCtl, trunc->pageno);
+		SimpleLruTruncate(SLRU_COMMIT_TS_ID, CommitTsPagePrecedes, trunc->pageno);
 	}
 	else
 		elog(PANIC, "commit_ts_redo: unknown op code %u", info);
 }
-
-/*
- * Entrypoint for sync.c to sync commit_ts files.
- */
-int
-committssyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(CommitTsCtl, ftag, path);
-}
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index e1191a7564c..358c67fa65c 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -106,7 +106,7 @@
  */
 
 /* We need four bytes per offset */
-#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+#define MULTIXACT_OFFSETS_PER_PAGE ((BLCKSZ - SizeOfPageHeaderData)/ sizeof(MultiXactOffset))
 
 #define MultiXactIdToOffsetPage(xid) \
 	((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
@@ -138,7 +138,7 @@
 /* size in bytes of a complete group */
 #define MULTIXACT_MEMBERGROUP_SIZE \
 	(sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
-#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE ((BLCKSZ - SizeOfPageHeaderData) / MULTIXACT_MEMBERGROUP_SIZE)
 #define MULTIXACT_MEMBERS_PER_PAGE	\
 	(MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
 
@@ -161,9 +161,9 @@
 
 /* Location (byte offset within page) of flag word for a given member */
 #define MXOffsetToFlagsOffset(xid) \
-	((((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_MEMBERGROUP) % \
+	(((((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_MEMBERGROUP) % \
 	  (TransactionId) MULTIXACT_MEMBERGROUPS_PER_PAGE) * \
-	 (TransactionId) MULTIXACT_MEMBERGROUP_SIZE)
+	 (TransactionId) MULTIXACT_MEMBERGROUP_SIZE))
 #define MXOffsetToFlagsBitShift(xid) \
 	(((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_MEMBERGROUP) * \
 	 MXACT_MEMBER_BITS_PER_XACT)
@@ -181,15 +181,6 @@
 #define PreviousMultiXactId(xid) \
 	((xid) == FirstMultiXactId ? MaxMultiXactId : (xid) - 1)
 
-/*
- * Links to shared-memory data structures for MultiXact control
- */
-static SlruCtlData MultiXactOffsetCtlData;
-static SlruCtlData MultiXactMemberCtlData;
-
-#define MultiXactOffsetCtl	(&MultiXactOffsetCtlData)
-#define MultiXactMemberCtl	(&MultiXactMemberCtlData)
-
 /*
  * MultiXact state shared across all backends.  All this state is protected
  * by MultiXactGenLock.  (We also use MultiXactOffsetSLRULock and
@@ -339,8 +330,8 @@ static MemoryContext MXactContext = NULL;
 /* internal MultiXactId management */
 static void MultiXactIdSetOldestVisible(void);
 static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
-							   int nmembers, MultiXactMember *members);
-static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
+							   int nmembers, MultiXactMember *members, Buffer * offset_buf_ptr, Buffer * member_bufs);
+static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset, Buffer * offset_buf, Buffer ** member_bufs);
 
 /* MultiXact cache management */
 static int	mxactMemberComparator(const void *arg1, const void *arg2);
@@ -352,19 +343,18 @@ static void mXactCachePut(MultiXactId multi, int nmembers,
 static char *mxstatus_to_string(MultiXactStatus status);
 
 /* management of SLRU infrastructure */
-static int	ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
-static int	ZeroMultiXactMemberPage(int pageno, bool writeXlog);
+static Buffer ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
+static Buffer ZeroMultiXactMemberPage(int pageno, bool writeXlog);
 static bool MultiXactOffsetPagePrecedes(int page1, int page2);
-static bool MultiXactMemberPagePrecedes(int page1, int page2);
 static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
 									MultiXactOffset offset2);
-static void ExtendMultiXactOffset(MultiXactId multi);
-static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
+static void ExtendMultiXactOffset(MultiXactId multi, Buffer * buffer);
+static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers, Buffer ** member_buffers);
 static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
 									 MultiXactOffset start, uint32 distance);
 static bool SetOffsetVacuumLimit(bool is_startup);
 static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
-static void WriteMZeroPageXlogRec(int pageno, uint8 info);
+static XLogRecPtr WriteMZeroPageXlogRec(int pageno, uint8 info);
 static void WriteMTruncateXlogRec(Oid oldestMultiDB,
 								  MultiXactId startTruncOff,
 								  MultiXactId endTruncOff,
@@ -765,6 +755,9 @@ ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
 MultiXactId
 MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
 {
+	Buffer * member_bufs;
+	Buffer offset_buff;
+
 	MultiXactId multi;
 	MultiXactOffset offset;
 	xl_multixact_create xlrec;
@@ -818,7 +811,8 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
 	 * in vacuum.  During vacuum, in particular, it would be unacceptable to
 	 * keep OldestMulti set, in case it runs for long.
 	 */
-	multi = GetNewMultiXactId(nmembers, &offset);
+
+	multi = GetNewMultiXactId(nmembers, &offset, &offset_buff, &member_bufs);
 
 	/* Make an XLOG entry describing the new MXID. */
 	xlrec.mid = multi;
@@ -838,8 +832,8 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
 	(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
 
 	/* Now enter the information into the OFFSETs and MEMBERs logs */
-	RecordNewMultiXact(multi, offset, nmembers, members);
-
+	RecordNewMultiXact(multi, offset, nmembers, members, &offset_buff, member_bufs);
+	
 	/* Done with critical section */
 	END_CRIT_SECTION();
 
@@ -860,40 +854,38 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
  */
 static void
 RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
-				   int nmembers, MultiXactMember *members)
+				   int nmembers, MultiXactMember *members, Buffer * offset_buf_ptr, Buffer * member_bufs)
 {
 	int			pageno;
 	int			prev_pageno;
+	int 		min_pageno;
 	int			entryno;
-	int			slotno;
 	MultiXactOffset *offptr;
 	int			i;
-
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
+	Buffer		offset_buf;
 
 	pageno = MultiXactIdToOffsetPage(multi);
 	entryno = MultiXactIdToOffsetEntry(multi);
 
-	/*
-	 * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
-	 * to complain about if there's any I/O error.  This is kinda bogus, but
-	 * since the errors will always give the full pathname, it should be clear
-	 * enough that a MultiXactId is really involved.  Perhaps someday we'll
-	 * take the trouble to generalize the slru.c error reporting code.
-	 */
-	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
-	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
-	offptr += entryno;
+	if (offset_buf_ptr)
+		offset_buf = *offset_buf_ptr;
+	else 
+		offset_buf = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno, RBM_NORMAL);
 
-	*offptr = offset;
+	LockBuffer(offset_buf, BUFFER_LOCK_EXCLUSIVE);
+						
 
-	MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
+	offptr = (MultiXactOffset *) PageGetContents(BufferGetPage(offset_buf));
+	offptr += entryno;
 
-	/* Exchange our lock */
-	LWLockRelease(MultiXactOffsetSLRULock);
+	*offptr = offset;
 
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
+	MarkBufferDirty(offset_buf);
+	UnlockReleaseBuffer(offset_buf);
+	buffer = InvalidBuffer;
 
+	min_pageno = MXOffsetToMemberPage(offset);
 	prev_pageno = -1;
 
 	for (i = 0; i < nmembers; i++, offset++)
@@ -914,27 +906,35 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
 
 		if (pageno != prev_pageno)
 		{
-			slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
+			if (BufferIsValid(buffer))
+				UnlockReleaseBuffer(buffer);
+
+			if (member_bufs)
+				buffer = member_bufs[pageno - min_pageno];
+			else
+				buffer = ReadSlruBuffer(SLRU_MULTIXACT_MEMBER_ID, pageno, RBM_NORMAL);
+
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 			prev_pageno = pageno;
 		}
 
-		memberptr = (TransactionId *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+		memberptr = (TransactionId *) (PageGetContents(BufferGetPage(buffer)) + memberoff);
 
 		*memberptr = members[i].xid;
 
-		flagsptr = (uint32 *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+		flagsptr = (uint32 *) (PageGetContents(BufferGetPage(buffer)) + flagsoff);
 
 		flagsval = *flagsptr;
 		flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
 		flagsval |= (members[i].status << bshift);
 		*flagsptr = flagsval;
 
-		MultiXactMemberCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
 	}
 
-	LWLockRelease(MultiXactMemberSLRULock);
+	UnlockReleaseBuffer(buffer);
+	if (member_bufs != NULL)
+		pfree(member_bufs);
 }
 
 /*
@@ -953,8 +953,11 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
  * caller must end the critical section after writing SLRU data.
  */
 static MultiXactId
-GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
+GetNewMultiXactId(int nmembers, MultiXactOffset *offset, Buffer * offset_buf, Buffer ** member_bufs)
 {
+	int min_pageno;
+	int max_pageno;
+
 	MultiXactId result;
 	MultiXactOffset nextOffset;
 
@@ -1072,7 +1075,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 	}
 
 	/* Make sure there is room for the MXID in the file.  */
-	ExtendMultiXactOffset(result);
+	ExtendMultiXactOffset(result, offset_buf);
 
 	/*
 	 * Reserve the members space, similarly to above.  Also, be careful not to
@@ -1160,7 +1163,12 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 							   MultiXactState->offsetStopLimit - nextOffset + nmembers),
 				 errhint("Execute a database-wide VACUUM in that database with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.")));
 
-	ExtendMultiXactMember(nextOffset, nmembers);
+	min_pageno = MXOffsetToMemberPage(nextOffset);
+	max_pageno = MXOffsetToMemberPage(nextOffset + nmembers - 1);
+
+	*member_bufs = (Buffer *) palloc(sizeof(Buffer) * (max_pageno - min_pageno + 1));
+
+	ExtendMultiXactMember(nextOffset, nmembers, member_bufs);
 
 	/*
 	 * Critical section from here until caller has written the data into the
@@ -1226,7 +1234,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	int			pageno;
 	int			prev_pageno;
 	int			entryno;
-	int			slotno;
 	MultiXactOffset *offptr;
 	MultiXactOffset offset;
 	int			length;
@@ -1237,6 +1244,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	MultiXactId tmpMXact;
 	MultiXactOffset nextOffset;
 	MultiXactMember *ptr;
+	Buffer		buffer;
 
 	debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
 
@@ -1340,13 +1348,12 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	 * time on every multixact creation.
 	 */
 retry:
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
 	pageno = MultiXactIdToOffsetPage(multi);
 	entryno = MultiXactIdToOffsetEntry(multi);
 
-	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
-	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+	buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno, RBM_NORMAL);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	offptr = (MultiXactOffset *) (PageGetContents(BufferGetPage(buffer)));
 	offptr += entryno;
 	offset = *offptr;
 
@@ -1377,16 +1384,20 @@ retry:
 		entryno = MultiXactIdToOffsetEntry(tmpMXact);
 
 		if (pageno != prev_pageno)
-			slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
+		{
+			UnlockReleaseBuffer(buffer);
+			buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno, RBM_NORMAL);
+			LockBuffer(buffer, BUFFER_LOCK_SHARE);
+		}
 
-		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+		offptr = (MultiXactOffset *) (PageGetContents(BufferGetPage(buffer)));
 		offptr += entryno;
 		nextMXOffset = *offptr;
 
 		if (nextMXOffset == 0)
 		{
 			/* Corner case 2: next multixact is still being filled in */
-			LWLockRelease(MultiXactOffsetSLRULock);
+			UnlockReleaseBuffer(buffer);
 			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
 			goto retry;
@@ -1394,14 +1405,11 @@ retry:
 
 		length = nextMXOffset - offset;
 	}
-
-	LWLockRelease(MultiXactOffsetSLRULock);
+	UnlockReleaseBuffer(buffer);
+	buffer = InvalidBuffer;
 
 	ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
 
-	/* Now get the members themselves. */
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
 	truelength = 0;
 	prev_pageno = -1;
 	for (i = 0; i < length; i++, offset++)
@@ -1417,12 +1425,14 @@ retry:
 
 		if (pageno != prev_pageno)
 		{
-			slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
+			if (BufferIsValid(buffer))
+				UnlockReleaseBuffer(buffer);
+			buffer = ReadSlruBuffer(SLRU_MULTIXACT_MEMBER_ID, pageno, RBM_NORMAL);
+			LockBuffer(buffer, BUFFER_LOCK_SHARE);
 			prev_pageno = pageno;
 		}
 
-		xactptr = (TransactionId *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+		xactptr = (TransactionId *) (PageGetContents((BufferGetPage(buffer)) + memberoff));
 
 		if (!TransactionIdIsValid(*xactptr))
 		{
@@ -1433,14 +1443,13 @@ retry:
 
 		flagsoff = MXOffsetToFlagsOffset(offset);
 		bshift = MXOffsetToFlagsBitShift(offset);
-		flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+		flagsptr = (uint32 *) (PageGetContents(BufferGetPage(buffer)) + flagsoff);
 
 		ptr[truelength].xid = *xactptr;
 		ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
 		truelength++;
 	}
-
-	LWLockRelease(MultiXactMemberSLRULock);
+	UnlockReleaseBuffer(buffer);
 
 	/* A multixid with zero members should not happen */
 	Assert(truelength > 0);
@@ -1832,8 +1841,6 @@ MultiXactShmemSize(void)
 			 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
 
 	size = SHARED_MULTIXACT_STATE_SIZE;
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTOFFSET_BUFFERS, 0));
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTMEMBER_BUFFERS, 0));
 
 	return size;
 }
@@ -1845,22 +1852,6 @@ MultiXactShmemInit(void)
 
 	debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
 
-	MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
-	MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
-
-	SimpleLruInit(MultiXactOffsetCtl,
-				  "MultiXactOffset", NUM_MULTIXACTOFFSET_BUFFERS, 0,
-				  MultiXactOffsetSLRULock, "pg_multixact/offsets",
-				  LWTRANCHE_MULTIXACTOFFSET_BUFFER,
-				  SYNC_HANDLER_MULTIXACT_OFFSET);
-	SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
-	SimpleLruInit(MultiXactMemberCtl,
-				  "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0,
-				  MultiXactMemberSLRULock, "pg_multixact/members",
-				  LWTRANCHE_MULTIXACTMEMBER_BUFFER,
-				  SYNC_HANDLER_MULTIXACT_MEMBER);
-	/* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
-
 	/* Initialize our shared state struct */
 	MultiXactState = ShmemInitStruct("Shared MultiXact State",
 									 SHARED_MULTIXACT_STATE_SIZE,
@@ -1891,29 +1882,17 @@ MultiXactShmemInit(void)
 void
 BootStrapMultiXact(void)
 {
-	int			slotno;
-
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
 
 	/* Create and zero the first page of the offsets log */
-	slotno = ZeroMultiXactOffsetPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-	Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(MultiXactOffsetSLRULock);
-
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
+	buffer = ZeroMultiXactOffsetPage(0, false);
+	FlushOneBuffer(buffer);
+	UnlockReleaseBuffer(buffer);
 
 	/* Create and zero the first page of the members log */
-	slotno = ZeroMultiXactMemberPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactMemberCtl, slotno);
-	Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(MultiXactMemberSLRULock);
+	buffer = ZeroMultiXactMemberPage(0, false);
+	FlushOneBuffer(buffer);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -1925,33 +1904,54 @@ BootStrapMultiXact(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroMultiXactOffsetPage(int pageno, bool writeXlog)
 {
-	int			slotno;
+	Buffer		buffer;
+	Page		page;
+	XLogRecPtr	recptr;
 
-	slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+	buffer = ZeroSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno);
+	page = BufferGetPage(buffer);
+	PageInitSLRU(page, BLCKSZ, 0);
 
+	recptr = 0;
+	
 	if (writeXlog)
-		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
+	{
+		recptr = WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
+	}
+
+	PageSetHeaderDataNonRel(page, pageno, recptr, BLCKSZ, PG_METAPAGE_LAYOUT_VERSION);
+	MarkBufferDirty(buffer);
 
-	return slotno;
+	return buffer;
 }
 
 /*
  * Ditto, for MultiXactMember
  */
-static int
+static Buffer
 ZeroMultiXactMemberPage(int pageno, bool writeXlog)
 {
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
-
+	Buffer		buffer;
+	Page 		page;
+	XLogRecPtr 	recptr;
+
+	buffer = ZeroSlruBuffer(SLRU_MULTIXACT_MEMBER_ID, pageno);
+	page = BufferGetPage(buffer);
+	PageInitSLRU(page, BLCKSZ, 0);
+		
+	recptr = 0;
 	if (writeXlog)
-		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
-
-	return slotno;
+	{
+		recptr = WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
+	}
+	
+	PageSetHeaderDataNonRel(page, pageno, recptr, BLCKSZ, PG_METAPAGE_LAYOUT_VERSION);
+	MarkBufferDirty(buffer);
+	
+	return buffer;
 }
 
 /*
@@ -1976,22 +1976,14 @@ MaybeExtendOffsetSlru(void)
 
 	pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
 
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
-	if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
+	if (!SimpleLruDoesPhysicalPageExist(SLRU_MULTIXACT_OFFSET_ID, pageno))
 	{
-		int			slotno;
+		Buffer			buffer;
 
-		/*
-		 * Fortunately for us, SimpleLruWritePage is already prepared to deal
-		 * with creating a new segment file even if the page we're writing is
-		 * not the first in it, so this is enough.
-		 */
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
+		buffer = ZeroMultiXactOffsetPage(pageno, false);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
-
-	LWLockRelease(MultiXactOffsetSLRULock);
 }
 
 /*
@@ -2005,21 +1997,6 @@ MaybeExtendOffsetSlru(void)
 void
 StartupMultiXact(void)
 {
-	MultiXactId multi = MultiXactState->nextMXact;
-	MultiXactOffset offset = MultiXactState->nextOffset;
-	int			pageno;
-
-	/*
-	 * Initialize offset's idea of the latest page number.
-	 */
-	pageno = MultiXactIdToOffsetPage(multi);
-	MultiXactOffsetCtl->shared->latest_page_number = pageno;
-
-	/*
-	 * Initialize member's idea of the latest page number.
-	 */
-	pageno = MXOffsetToMemberPage(offset);
-	MultiXactMemberCtl->shared->latest_page_number = pageno;
 }
 
 /*
@@ -2043,14 +2020,8 @@ TrimMultiXact(void)
 	oldestMXactDB = MultiXactState->oldestMultiXactDB;
 	LWLockRelease(MultiXactGenLock);
 
-	/* Clean up offsets state */
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
-	/*
-	 * (Re-)Initialize our idea of the latest page number for offsets.
-	 */
 	pageno = MultiXactIdToOffsetPage(nextMXact);
-	MultiXactOffsetCtl->shared->latest_page_number = pageno;
+	
 
 	/*
 	 * Zero out the remainder of the current offsets page.  See notes in
@@ -2063,46 +2034,39 @@ TrimMultiXact(void)
 	entryno = MultiXactIdToOffsetEntry(nextMXact);
 	if (entryno != 0)
 	{
-		int			slotno;
 		MultiXactOffset *offptr;
+		Buffer		buffer;
 
-		slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
-		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+		buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno, RBM_TRIM);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		offptr = (MultiXactOffset *) (PageGetContents(BufferGetPage(buffer)));
 		offptr += entryno;
 
-		MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
+		MemSet(offptr, 0, BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - (entryno * sizeof(MultiXactOffset)));
 
-		MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 
-	LWLockRelease(MultiXactOffsetSLRULock);
-
-	/* And the same for members */
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
-	/*
-	 * (Re-)Initialize our idea of the latest page number for members.
-	 */
-	pageno = MXOffsetToMemberPage(offset);
-	MultiXactMemberCtl->shared->latest_page_number = pageno;
-
 	/*
 	 * Zero out the remainder of the current members page.  See notes in
 	 * TrimCLOG() for motivation.
 	 */
+
+	pageno = MXOffsetToMemberPage(offset);
 	flagsoff = MXOffsetToFlagsOffset(offset);
 	if (flagsoff != 0)
 	{
-		int			slotno;
 		TransactionId *xidptr;
 		int			memberoff;
+		Buffer		buffer;
 
 		memberoff = MXOffsetToMemberOffset(offset);
-		slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
-		xidptr = (TransactionId *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+		buffer = ReadSlruBuffer(SLRU_MULTIXACT_MEMBER_ID, pageno, RBM_TRIM);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		xidptr = (TransactionId *) (PageGetContents(BufferGetPage(buffer)) + memberoff);
 
-		MemSet(xidptr, 0, BLCKSZ - memberoff);
+		MemSet(xidptr, 0, BLCKSZ - memberoff - MAXALIGN(SizeOfPageHeaderData));
 
 		/*
 		 * Note: we don't need to zero out the flag bits in the remaining
@@ -2110,11 +2074,10 @@ TrimMultiXact(void)
 		 * writing.
 		 */
 
-		MultiXactMemberCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 
-	LWLockRelease(MultiXactMemberSLRULock);
-
 	/* signal that we're officially up */
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->finishedStartup = true;
@@ -2146,25 +2109,6 @@ MultiXactGetCheckptMulti(bool is_shutdown,
 				*nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
 }
 
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointMultiXact(void)
-{
-	TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
-
-	/*
-	 * Write dirty MultiXact pages to disk.  This may result in sync requests
-	 * queued for later handling by ProcessSyncRequests(), as part of the
-	 * checkpoint.
-	 */
-	SimpleLruWriteAll(MultiXactOffsetCtl, true);
-	SimpleLruWriteAll(MultiXactMemberCtl, true);
-
-	TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
-}
-
 /*
  * Set the next-to-be-assigned MultiXactId and offset
  *
@@ -2399,26 +2343,30 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
  * room in shared memory.
  */
 static void
-ExtendMultiXactOffset(MultiXactId multi)
+ExtendMultiXactOffset(MultiXactId multi, Buffer * buffer)
 {
 	int			pageno;
 
+
 	/*
-	 * No work except at first MultiXactId of a page.  But beware: just after
-	 * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
+	 * Make a ReadBuffer call for the page we need beforehand so that we don't need
+	 * to malloc later.
+	 * If we're at the first MultiXactId of a page, make sure we also zero the page
 	 */
+
+	pageno = MultiXactIdToOffsetPage(multi);
 	if (MultiXactIdToOffsetEntry(multi) != 0 &&
 		multi != FirstMultiXactId)
+	{
+		/* make a read buffer call to enlarge the resource owner */
+		*buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno, RBM_NORMAL);
 		return;
-
-	pageno = MultiXactIdToOffsetPage(multi);
-
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
-	/* Zero the page and make an XLOG entry about it */
-	ZeroMultiXactOffsetPage(pageno, true);
-
-	LWLockRelease(MultiXactOffsetSLRULock);
+	} else
+	{
+		/* Zero the page and make an XLOG entry about it */
+		*buffer = ZeroMultiXactOffsetPage(pageno, true);
+		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); /* release lock but don't unpin */
+	}
 }
 
 /*
@@ -2429,7 +2377,7 @@ ExtendMultiXactOffset(MultiXactId multi)
  * same comments apply.
  */
 static void
-ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
+ExtendMultiXactMember(MultiXactOffset offset, int nmembers, Buffer ** buffers)
 {
 	/*
 	 * It's possible that the members span more than one page of the members
@@ -2437,10 +2385,17 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
 	 * optimal if the members span several pages, but that seems unusual
 	 * enough to not worry much about.
 	 */
+	int min_pageno;
+
+	min_pageno = MXOffsetToMemberPage(offset);
 	while (nmembers > 0)
 	{
+		Buffer buf;
+
 		int			flagsoff;
 		int			flagsbit;
+		int			pageno;
+
 		uint32		difference;
 
 		/*
@@ -2448,20 +2403,24 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
 		 */
 		flagsoff = MXOffsetToFlagsOffset(offset);
 		flagsbit = MXOffsetToFlagsBitShift(offset);
+		pageno = MXOffsetToMemberPage(offset);
+		
+		
+		
 		if (flagsoff == 0 && flagsbit == 0)
 		{
-			int			pageno;
-
-			pageno = MXOffsetToMemberPage(offset);
-
-			LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
 			/* Zero the page and make an XLOG entry about it */
-			ZeroMultiXactMemberPage(pageno, true);
-
-			LWLockRelease(MultiXactMemberSLRULock);
+			buf = ZeroMultiXactMemberPage(pageno, true);
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+		} else
+		{
+			/* do a read buffer call to allocate space beforehand */
+			buf = ReadSlruBuffer(SLRU_MULTIXACT_MEMBER_ID, pageno, RBM_NORMAL);
 		}
 
+		if (buffers)
+			(*buffers)[pageno - min_pageno] = buf;
+
 		/*
 		 * Compute the number of items till end of current page.  Careful: if
 		 * addition of unsigned ints wraps around, we're at the last page of
@@ -2734,8 +2693,8 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	MultiXactOffset offset;
 	int			pageno;
 	int			entryno;
-	int			slotno;
 	MultiXactOffset *offptr;
+	Buffer		buffer;
 
 	Assert(MultiXactState->finishedStartup);
 
@@ -2743,20 +2702,19 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	entryno = MultiXactIdToOffsetEntry(multi);
 
 	/*
-	 * Write out dirty data, so PhysicalPageExists can work correctly.
+	 * Cope with missing/bogus oldest MultiXact in inconsistent states (see
+	 * commit 068cfadf9).
 	 */
-	SimpleLruWriteAll(MultiXactOffsetCtl, true);
-	SimpleLruWriteAll(MultiXactMemberCtl, true);
-
-	if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
+	if (!ProbeSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno) &&
+		!SimpleLruDoesPhysicalPageExist(SLRU_MULTIXACT_OFFSET_ID, pageno))
 		return false;
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
-	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+	buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_ID, pageno, RBM_NORMAL);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	offptr = (MultiXactOffset *) (PageGetContents(BufferGetPage(buffer)));
 	offptr += entryno;
 	offset = *offptr;
-	LWLockRelease(MultiXactOffsetSLRULock);
+	UnlockReleaseBuffer(buffer);
 
 	*result = offset;
 	return true;
@@ -2863,12 +2821,13 @@ typedef struct mxtruncinfo
  *		This callback determines the earliest existing page number.
  */
 static bool
-SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbFindEarliest(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+						  char *filename, int segpage, void *data)
 {
 	mxtruncinfo *trunc = (mxtruncinfo *) data;
 
 	if (trunc->earliestExistingPage == -1 ||
-		ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
+		PagePrecedes(segpage, trunc->earliestExistingPage))
 	{
 		trunc->earliestExistingPage = segpage;
 	}
@@ -2900,7 +2859,7 @@ PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldest
 	while (segment != endsegment)
 	{
 		elog(DEBUG2, "truncating multixact members segment %x", segment);
-		SlruDeleteSegment(MultiXactMemberCtl, segment);
+		SlruDeleteSegment(SLRU_MULTIXACT_MEMBER_ID, segment);
 
 		/* move to next segment, handling wraparound correctly */
 		if (segment == maxsegment)
@@ -2923,7 +2882,8 @@ PerformOffsetsTruncation(MultiXactId oldestMulti, MultiXactId newOldestMulti)
 	 * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
 	 * detection.
 	 */
-	SimpleLruTruncate(MultiXactOffsetCtl,
+	SimpleLruTruncate(SLRU_MULTIXACT_OFFSET_ID,
+					  MultiXactOffsetPagePrecedes,
 					  MultiXactIdToOffsetPage(PreviousMultiXactId(newOldestMulti)));
 }
 
@@ -2997,7 +2957,9 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
 	 * been truncated away, and we crashed before updating oldestMulti.
 	 */
 	trunc.earliestExistingPage = -1;
-	SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
+	SlruScanDirectory(SLRU_MULTIXACT_OFFSET_ID,
+					  MultiXactOffsetPagePrecedes,
+					  SlruScanDirCbFindEarliest, &trunc);
 	earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE;
 	if (earliest < FirstMultiXactId)
 		earliest = FirstMultiXactId;
@@ -3129,24 +3091,6 @@ MultiXactOffsetPagePrecedes(int page1, int page2)
 								multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
 }
 
-/*
- * Decide whether a MultiXactMember page number is "older" for truncation
- * purposes.  There is no "invalid offset number" so use the numbers verbatim.
- */
-static bool
-MultiXactMemberPagePrecedes(int page1, int page2)
-{
-	MultiXactOffset offset1;
-	MultiXactOffset offset2;
-
-	offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
-	offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
-
-	return (MultiXactOffsetPrecedes(offset1, offset2) &&
-			MultiXactOffsetPrecedes(offset1,
-									offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));
-}
-
 /*
  * Decide which of two MultiXactIds is earlier.
  *
@@ -3191,12 +3135,16 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
  * Write an xlog record reflecting the zeroing of either a MEMBERs or
  * OFFSETs page (info shows which)
  */
-static void
+static XLogRecPtr 
 WriteMZeroPageXlogRec(int pageno, uint8 info)
 {
+	XLogRecPtr recptr;
+	
 	XLogBeginInsert();
 	XLogRegisterData((char *) (&pageno), sizeof(int));
-	(void) XLogInsert(RM_MULTIXACT_ID, info);
+	recptr = XLogInsert(RM_MULTIXACT_ID, info);
+
+	return recptr;
 }
 
 /*
@@ -3241,32 +3189,18 @@ multixact_redo(XLogReaderState *record)
 	if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
 	{
 		int			pageno;
-		int			slotno;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-		Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(MultiXactOffsetSLRULock);
+		UnlockReleaseBuffer(ZeroMultiXactOffsetPage(pageno, false));
 	}
 	else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
 	{
 		int			pageno;
-		int			slotno;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactMemberPage(pageno, false);
-		SimpleLruWritePage(MultiXactMemberCtl, slotno);
-		Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(MultiXactMemberSLRULock);
+		UnlockReleaseBuffer(ZeroMultiXactMemberPage(pageno, false));
 	}
 	else if (info == XLOG_MULTIXACT_CREATE_ID)
 	{
@@ -3277,7 +3211,7 @@ multixact_redo(XLogReaderState *record)
 
 		/* Store the data back into the SLRU files */
 		RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
-						   xlrec->members);
+						   xlrec->members, NULL, NULL);
 
 		/* Make sure nextMXact/nextOffset are beyond what this record has */
 		MultiXactAdvanceNextMXact(xlrec->mid + 1,
@@ -3300,7 +3234,6 @@ multixact_redo(XLogReaderState *record)
 	else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
 	{
 		xl_multixact_truncate xlrec;
-		int			pageno;
 
 		memcpy(&xlrec, XLogRecGetData(record),
 			   SizeOfMultiXactTruncate);
@@ -3326,13 +3259,6 @@ multixact_redo(XLogReaderState *record)
 
 		PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
 
-		/*
-		 * During XLOG replay, latest_page_number isn't necessarily set up
-		 * yet; insert a suitable value to bypass the sanity test in
-		 * SimpleLruTruncate.
-		 */
-		pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
-		MultiXactOffsetCtl->shared->latest_page_number = pageno;
 		PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
 
 		LWLockRelease(MultiXactTruncationLock);
@@ -3405,21 +3331,3 @@ pg_get_multixact_members(PG_FUNCTION_ARGS)
 
 	SRF_RETURN_DONE(funccxt);
 }
-
-/*
- * Entrypoint for sync.c to sync offsets files.
- */
-int
-multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
-}
-
-/*
- * Entrypoint for sync.c to sync members files.
- */
-int
-multixactmemberssyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
-}
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 6feda87f574..2d8445e1307 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -1,41 +1,9 @@
 /*-------------------------------------------------------------------------
  *
  * slru.c
- *		Simple LRU buffering for transaction status logfiles
+ *		Simple buffering for transaction status logfiles
  *
- * We use a simple least-recently-used scheme to manage a pool of page
- * buffers.  Under ordinary circumstances we expect that write
- * traffic will occur mostly to the latest page (and to the just-prior
- * page, soon after a page transition).  Read traffic will probably touch
- * a larger span of pages, but in any case a fairly small number of page
- * buffers should be sufficient.  So, we just search the buffers using plain
- * linear search; there's no need for a hashtable or anything fancy.
- * The management algorithm is straight LRU except that we will never swap
- * out the latest page (since we know it's going to be hit again eventually).
- *
- * We use a control LWLock to protect the shared data structures, plus
- * per-buffer LWLocks that synchronize I/O for each buffer.  The control lock
- * must be held to examine or modify any shared state.  A process that is
- * reading in or writing out a page buffer does not hold the control lock,
- * only the per-buffer lock for the buffer it is working on.
- *
- * "Holding the control lock" means exclusive lock in all cases except for
- * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
- * the implications of that.
- *
- * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
- * before releasing the control lock.  The per-buffer lock is released after
- * completing the I/O, re-acquiring the control lock, and updating the shared
- * state.  (Deadlock is not possible here, because we never try to initiate
- * I/O when someone else is already doing I/O on the same buffer.)
- * To wait for I/O to complete, release the control lock, acquire the
- * per-buffer lock in shared mode, immediately release the per-buffer lock,
- * reacquire the control lock, and then recheck state (since arbitrary things
- * could have happened while we didn't have the lock).
- *
- * As with the regular buffer manager, it is possible for another process
- * to re-dirty a page that is currently being written out.  This is handled
- * by re-setting the page's page_dirty flag.
+ * XXX write me
  *
  *
  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
@@ -60,562 +28,34 @@
 #include "storage/fd.h"
 #include "storage/shmem.h"
 
-#define SlruFileName(ctl, path, seg) \
-	snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
-
 /*
- * During SimpleLruWriteAll(), we will usually not need to write more than one
- * or two physical files, but we may need to write several pages per file.  We
- * can consolidate the I/O requests by leaving files open until control returns
- * to SimpleLruWriteAll().  This data structure remembers which files are open.
+ * SLRU ID to path mapping
  */
-#define MAX_WRITEALL_BUFFERS	16
+#define PG_SLRU(symname,name,path,synchronize) \
+	path,
 
-typedef struct SlruWriteAllData
+static char *slru_dirs[] =
 {
-	int			num_files;		/* # files actually open */
-	int			fd[MAX_WRITEALL_BUFFERS];	/* their FD's */
-	int			segno[MAX_WRITEALL_BUFFERS];	/* their log seg#s */
-} SlruWriteAllData;
-
-typedef struct SlruWriteAllData *SlruWriteAll;
+#include "access/slrulist.h"
+};
 
 /*
- * Populate a file tag describing a segment file.  We only use the segment
- * number, since we can derive everything else we need by having separate
- * sync handler functions for clog, multixact etc.
+ * We'll maintain a little cache of recently seen buffers, to try to avoid the
+ * buffer mapping table on repeat access (ie the busy end of the CLOG).  One
+ * entry per SLRU.
  */
-#define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \
-( \
-	memset(&(a), 0, sizeof(FileTag)), \
-	(a).handler = (xx_handler), \
-	(a).segno = (xx_segno) \
-)
-
-/*
- * Macro to mark a buffer slot "most recently used".  Note multiple evaluation
- * of arguments!
- *
- * The reason for the if-test is that there are often many consecutive
- * accesses to the same page (particularly the latest page).  By suppressing
- * useless increments of cur_lru_count, we reduce the probability that old
- * pages' counts will "wrap around" and make them appear recently used.
- *
- * We allow this code to be executed concurrently by multiple processes within
- * SimpleLruReadPage_ReadOnly().  As long as int reads and writes are atomic,
- * this should not cause any completely-bogus values to enter the computation.
- * However, it is possible for either cur_lru_count or individual
- * page_lru_count entries to be "reset" to lower values than they should have,
- * in case a process is delayed while it executes this macro.  With care in
- * SlruSelectLRUPage(), this does little harm, and in any case the absolute
- * worst possible consequence is a nonoptimal choice of page to evict.  The
- * gain from allowing concurrent reads of SLRU pages seems worth it.
- */
-#define SlruRecentlyUsed(shared, slotno)	\
-	do { \
-		int		new_lru_count = (shared)->cur_lru_count; \
-		if (new_lru_count != (shared)->page_lru_count[slotno]) { \
-			(shared)->cur_lru_count = ++new_lru_count; \
-			(shared)->page_lru_count[slotno] = new_lru_count; \
-		} \
-	} while (0)
-
-/* Saved info for SlruReportIOError */
-typedef enum
-{
-	SLRU_OPEN_FAILED,
-	SLRU_SEEK_FAILED,
-	SLRU_READ_FAILED,
-	SLRU_WRITE_FAILED,
-	SLRU_FSYNC_FAILED,
-	SLRU_CLOSE_FAILED
-} SlruErrorCause;
+struct SlruRecentBuffer {
+	int			pageno;
+	Buffer		recent_buffer;
+};
 
-static SlruErrorCause slru_errcause;
-static int	slru_errno;
+static struct SlruRecentBuffer slru_recent_buffers[SLRU_NEXT_ID];
 
-
-static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
-static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
-static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
-static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
-static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
-								  SlruWriteAll fdata);
-static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
-static int	SlruSelectLRUPage(SlruCtl ctl, int pageno);
-
-static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
+static bool SlruScanDirCbDeleteCutoff(int slru_id,
+									  SlruPagePrecedesFunction PagePrecedes,
+									  char *filename,
 									  int segpage, void *data);
-static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
-
-/*
- * Initialization of shared memory
- */
-
-Size
-SimpleLruShmemSize(int nslots, int nlsns)
-{
-	Size		sz;
-
-	/* we assume nslots isn't so large as to risk overflow */
-	sz = MAXALIGN(sizeof(SlruSharedData));
-	sz += MAXALIGN(nslots * sizeof(char *));	/* page_buffer[] */
-	sz += MAXALIGN(nslots * sizeof(SlruPageStatus));	/* page_status[] */
-	sz += MAXALIGN(nslots * sizeof(bool));	/* page_dirty[] */
-	sz += MAXALIGN(nslots * sizeof(int));	/* page_number[] */
-	sz += MAXALIGN(nslots * sizeof(int));	/* page_lru_count[] */
-	sz += MAXALIGN(nslots * sizeof(LWLockPadded));	/* buffer_locks[] */
-
-	if (nlsns > 0)
-		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
-
-	return BUFFERALIGN(sz) + BLCKSZ * nslots;
-}
-
-/*
- * Initialize, or attach to, a simple LRU cache in shared memory.
- *
- * ctl: address of local (unshared) control structure.
- * name: name of SLRU.  (This is user-visible, pick with care!)
- * nslots: number of page slots to use.
- * nlsns: number of LSN groups per page (set to zero if not relevant).
- * ctllock: LWLock to use to control access to the shared control structure.
- * subdir: PGDATA-relative subdirectory that will contain the files.
- * tranche_id: LWLock tranche ID to use for the SLRU's per-buffer LWLocks.
- * sync_handler: which set of functions to use to handle sync requests
- */
-void
-SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-			  LWLock *ctllock, const char *subdir, int tranche_id,
-			  SyncRequestHandler sync_handler)
-{
-	SlruShared	shared;
-	bool		found;
-
-	shared = (SlruShared) ShmemInitStruct(name,
-										  SimpleLruShmemSize(nslots, nlsns),
-										  &found);
-
-	if (!IsUnderPostmaster)
-	{
-		/* Initialize locks and shared memory area */
-		char	   *ptr;
-		Size		offset;
-		int			slotno;
-
-		Assert(!found);
-
-		memset(shared, 0, sizeof(SlruSharedData));
-
-		shared->ControlLock = ctllock;
-
-		shared->num_slots = nslots;
-		shared->lsn_groups_per_page = nlsns;
-
-		shared->cur_lru_count = 0;
-
-		/* shared->latest_page_number will be set later */
-
-		shared->slru_stats_idx = pgstat_get_slru_index(name);
-
-		ptr = (char *) shared;
-		offset = MAXALIGN(sizeof(SlruSharedData));
-		shared->page_buffer = (char **) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(char *));
-		shared->page_status = (SlruPageStatus *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
-		shared->page_dirty = (bool *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(bool));
-		shared->page_number = (int *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(int));
-		shared->page_lru_count = (int *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(int));
-
-		/* Initialize LWLocks */
-		shared->buffer_locks = (LWLockPadded *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(LWLockPadded));
-
-		if (nlsns > 0)
-		{
-			shared->group_lsn = (XLogRecPtr *) (ptr + offset);
-			offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
-		}
-
-		ptr += BUFFERALIGN(offset);
-		for (slotno = 0; slotno < nslots; slotno++)
-		{
-			LWLockInitialize(&shared->buffer_locks[slotno].lock,
-							 tranche_id);
-
-			shared->page_buffer[slotno] = ptr;
-			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			shared->page_dirty[slotno] = false;
-			shared->page_lru_count[slotno] = 0;
-			ptr += BLCKSZ;
-		}
-
-		/* Should fit to estimated shmem size */
-		Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
-	}
-	else
-		Assert(found);
-
-	/*
-	 * Initialize the unshared control struct, including directory path. We
-	 * assume caller set PagePrecedes.
-	 */
-	ctl->shared = shared;
-	ctl->sync_handler = sync_handler;
-	strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
-}
-
-/*
- * Initialize (or reinitialize) a page to zeroes.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-int
-SimpleLruZeroPage(SlruCtl ctl, int pageno)
-{
-	SlruShared	shared = ctl->shared;
-	int			slotno;
-
-	/* Find a suitable buffer slot for the page */
-	slotno = SlruSelectLRUPage(ctl, pageno);
-	Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
-		   (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-			!shared->page_dirty[slotno]) ||
-		   shared->page_number[slotno] == pageno);
-
-	/* Mark the slot as containing this page */
-	shared->page_number[slotno] = pageno;
-	shared->page_status[slotno] = SLRU_PAGE_VALID;
-	shared->page_dirty[slotno] = true;
-	SlruRecentlyUsed(shared, slotno);
-
-	/* Set the buffer to zeroes */
-	MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
-
-	/* Set the LSNs for this new page to zero */
-	SimpleLruZeroLSNs(ctl, slotno);
-
-	/* Assume this page is now the latest active page */
-	shared->latest_page_number = pageno;
-
-	/* update the stats counter of zeroed pages */
-	pgstat_count_slru_page_zeroed(shared->slru_stats_idx);
-
-	return slotno;
-}
-
-/*
- * Zero all the LSNs we store for this slru page.
- *
- * This should be called each time we create a new page, and each time we read
- * in a page from disk into an existing buffer.  (Such an old page cannot
- * have any interesting LSNs, since we'd have flushed them before writing
- * the page in the first place.)
- *
- * This assumes that InvalidXLogRecPtr is bitwise-all-0.
- */
-static void
-SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
-{
-	SlruShared	shared = ctl->shared;
-
-	if (shared->lsn_groups_per_page > 0)
-		MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
-			   shared->lsn_groups_per_page * sizeof(XLogRecPtr));
-}
-
-/*
- * Wait for any active I/O on a page slot to finish.  (This does not
- * guarantee that new I/O hasn't been started before we return, though.
- * In fact the slot might not even contain the same page anymore.)
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static void
-SimpleLruWaitIO(SlruCtl ctl, int slotno)
-{
-	SlruShared	shared = ctl->shared;
-
-	/* See notes at top of file */
-	LWLockRelease(shared->ControlLock);
-	LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
-	LWLockRelease(&shared->buffer_locks[slotno].lock);
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	/*
-	 * If the slot is still in an io-in-progress state, then either someone
-	 * already started a new I/O on the slot, or a previous I/O failed and
-	 * neglected to reset the page state.  That shouldn't happen, really, but
-	 * it seems worth a few extra cycles to check and recover from it. We can
-	 * cheaply test for failure by seeing if the buffer lock is still held (we
-	 * assume that transaction abort would release the lock).
-	 */
-	if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
-		shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
-	{
-		if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
-		{
-			/* indeed, the I/O must have failed */
-			if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
-				shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			else				/* write_in_progress */
-			{
-				shared->page_status[slotno] = SLRU_PAGE_VALID;
-				shared->page_dirty[slotno] = true;
-			}
-			LWLockRelease(&shared->buffer_locks[slotno].lock);
-		}
-	}
-}
-
-/*
- * Find a page in a shared buffer, reading it in if necessary.
- * The page number must correspond to an already-initialized page.
- *
- * If write_ok is true then it is OK to return a page that is in
- * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
- * that modification of the page is safe.  If write_ok is false then we
- * will not return the page until it is not undergoing active I/O.
- *
- * The passed-in xid is used only for error reporting, and may be
- * InvalidTransactionId if no specific xid is associated with the action.
- *
- * Return value is the shared-buffer slot number now holding the page.
- * The buffer's LRU access info is updated.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-int
-SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
-				  TransactionId xid)
-{
-	SlruShared	shared = ctl->shared;
-
-	/* Outer loop handles restart if we must wait for someone else's I/O */
-	for (;;)
-	{
-		int			slotno;
-		bool		ok;
-
-		/* See if page already is in memory; if not, pick victim slot */
-		slotno = SlruSelectLRUPage(ctl, pageno);
-
-		/* Did we find the page in memory? */
-		if (shared->page_number[slotno] == pageno &&
-			shared->page_status[slotno] != SLRU_PAGE_EMPTY)
-		{
-			/*
-			 * If page is still being read in, we must wait for I/O.  Likewise
-			 * if the page is being written and the caller said that's not OK.
-			 */
-			if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
-				(shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
-				 !write_ok))
-			{
-				SimpleLruWaitIO(ctl, slotno);
-				/* Now we must recheck state from the top */
-				continue;
-			}
-			/* Otherwise, it's ready to use */
-			SlruRecentlyUsed(shared, slotno);
-
-			/* update the stats counter of pages found in the SLRU */
-			pgstat_count_slru_page_hit(shared->slru_stats_idx);
-
-			return slotno;
-		}
-
-		/* We found no match; assert we selected a freeable slot */
-		Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
-			   (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-				!shared->page_dirty[slotno]));
-
-		/* Mark the slot read-busy */
-		shared->page_number[slotno] = pageno;
-		shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
-		shared->page_dirty[slotno] = false;
-
-		/* Acquire per-buffer lock (cannot deadlock, see notes at top) */
-		LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
-
-		/* Release control lock while doing I/O */
-		LWLockRelease(shared->ControlLock);
-
-		/* Do the read */
-		ok = SlruPhysicalReadPage(ctl, pageno, slotno);
-
-		/* Set the LSNs for this newly read-in page to zero */
-		SimpleLruZeroLSNs(ctl, slotno);
-
-		/* Re-acquire control lock and update page state */
-		LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-		Assert(shared->page_number[slotno] == pageno &&
-			   shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
-			   !shared->page_dirty[slotno]);
-
-		shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
-
-		LWLockRelease(&shared->buffer_locks[slotno].lock);
-
-		/* Now it's okay to ereport if we failed */
-		if (!ok)
-			SlruReportIOError(ctl, pageno, xid);
-
-		SlruRecentlyUsed(shared, slotno);
-
-		/* update the stats counter of pages not found in SLRU */
-		pgstat_count_slru_page_read(shared->slru_stats_idx);
-
-		return slotno;
-	}
-}
-
-/*
- * Find a page in a shared buffer, reading it in if necessary.
- * The page number must correspond to an already-initialized page.
- * The caller must intend only read-only access to the page.
- *
- * The passed-in xid is used only for error reporting, and may be
- * InvalidTransactionId if no specific xid is associated with the action.
- *
- * Return value is the shared-buffer slot number now holding the page.
- * The buffer's LRU access info is updated.
- *
- * Control lock must NOT be held at entry, but will be held at exit.
- * It is unspecified whether the lock will be shared or exclusive.
- */
-int
-SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
-{
-	SlruShared	shared = ctl->shared;
-	int			slotno;
-
-	/* Try to find the page while holding only shared lock */
-	LWLockAcquire(shared->ControlLock, LW_SHARED);
-
-	/* See if page is already in a buffer */
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		if (shared->page_number[slotno] == pageno &&
-			shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
-			shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
-		{
-			/* See comments for SlruRecentlyUsed macro */
-			SlruRecentlyUsed(shared, slotno);
-
-			/* update the stats counter of pages found in the SLRU */
-			pgstat_count_slru_page_hit(shared->slru_stats_idx);
-
-			return slotno;
-		}
-	}
-
-	/* No luck, so switch to normal exclusive lock and do regular read */
-	LWLockRelease(shared->ControlLock);
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	return SimpleLruReadPage(ctl, pageno, true, xid);
-}
-
-/*
- * Write a page from a shared buffer, if necessary.
- * Does nothing if the specified slot is not dirty.
- *
- * NOTE: only one write attempt is made here.  Hence, it is possible that
- * the page is still dirty at exit (if someone else re-dirtied it during
- * the write).  However, we *do* attempt a fresh write even if the page
- * is already being written; this is for checkpoints.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static void
-SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
-{
-	SlruShared	shared = ctl->shared;
-	int			pageno = shared->page_number[slotno];
-	bool		ok;
-
-	/* If a write is in progress, wait for it to finish */
-	while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
-		   shared->page_number[slotno] == pageno)
-	{
-		SimpleLruWaitIO(ctl, slotno);
-	}
-
-	/*
-	 * Do nothing if page is not dirty, or if buffer no longer contains the
-	 * same page we were called for.
-	 */
-	if (!shared->page_dirty[slotno] ||
-		shared->page_status[slotno] != SLRU_PAGE_VALID ||
-		shared->page_number[slotno] != pageno)
-		return;
-
-	/*
-	 * Mark the slot write-busy, and clear the dirtybit.  After this point, a
-	 * transaction status update on this page will mark it dirty again.
-	 */
-	shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
-	shared->page_dirty[slotno] = false;
-
-	/* Acquire per-buffer lock (cannot deadlock, see notes at top) */
-	LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
-
-	/* Release control lock while doing I/O */
-	LWLockRelease(shared->ControlLock);
-
-	/* Do the write */
-	ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
-
-	/* If we failed, and we're in a flush, better close the files */
-	if (!ok && fdata)
-	{
-		int			i;
-
-		for (i = 0; i < fdata->num_files; i++)
-			CloseTransientFile(fdata->fd[i]);
-	}
-
-	/* Re-acquire control lock and update page state */
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	Assert(shared->page_number[slotno] == pageno &&
-		   shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
-
-	/* If we failed to write, mark the page dirty again */
-	if (!ok)
-		shared->page_dirty[slotno] = true;
-
-	shared->page_status[slotno] = SLRU_PAGE_VALID;
-
-	LWLockRelease(&shared->buffer_locks[slotno].lock);
-
-	/* Now it's okay to ereport if we failed */
-	if (!ok)
-		SlruReportIOError(ctl, pageno, InvalidTransactionId);
-
-	/* If part of a checkpoint, count this as a buffer written. */
-	if (fdata)
-		CheckpointStats.ckpt_bufs_written++;
-}
-
-/*
- * Wrapper of SlruInternalWritePage, for external callers.
- * fdata is always passed a NULL here.
- */
-void
-SimpleLruWritePage(SlruCtl ctl, int slotno)
-{
-	SlruInternalWritePage(ctl, slotno, NULL);
-}
+static void SlruInternalDeleteSegment(int slru_id, int segno);
 
 /*
  * Return whether the given page exists on disk.
@@ -624,592 +64,24 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
  * large enough to contain the given page.
  */
 bool
-SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
+SimpleLruDoesPhysicalPageExist(int slru_id, int pageno)
 {
-	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
-	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	int			offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-	int			fd;
-	bool		result;
-	off_t		endpos;
-
-	/* update the stats counter of checked pages */
-	pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
-
-	SlruFileName(ctl, path, segno);
-
-	fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
-	if (fd < 0)
-	{
-		/* expected: file doesn't exist */
-		if (errno == ENOENT)
-			return false;
-
-		/* report error normally */
-		slru_errcause = SLRU_OPEN_FAILED;
-		slru_errno = errno;
-		SlruReportIOError(ctl, pageno, 0);
-	}
-
-	if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
-	{
-		slru_errcause = SLRU_SEEK_FAILED;
-		slru_errno = errno;
-		SlruReportIOError(ctl, pageno, 0);
-	}
-
-	result = endpos >= (off_t) (offset + BLCKSZ);
-
-	if (CloseTransientFile(fd) != 0)
-	{
-		slru_errcause = SLRU_CLOSE_FAILED;
-		slru_errno = errno;
-		return false;
-	}
-
-	return result;
-}
-
-/*
- * Physical read of a (previously existing) page into a buffer slot
- *
- * On failure, we cannot just ereport(ERROR) since caller has put state in
- * shared memory that must be undone.  So, we return false and save enough
- * info in static variables to let SlruReportIOError make the report.
- *
- * For now, assume it's not worth keeping a file pointer open across
- * read/write operations.  We could cache one virtual file pointer ...
- */
-static bool
-SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
-{
-	SlruShared	shared = ctl->shared;
-	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
-	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	off_t		offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-	int			fd;
-
-	SlruFileName(ctl, path, segno);
-
-	/*
-	 * In a crash-and-restart situation, it's possible for us to receive
-	 * commands to set the commit status of transactions whose bits are in
-	 * already-truncated segments of the commit log (see notes in
-	 * SlruPhysicalWritePage).  Hence, if we are InRecovery, allow the case
-	 * where the file doesn't exist, and return zeroes instead.
-	 */
-	fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
-	if (fd < 0)
-	{
-		if (errno != ENOENT || !InRecovery)
-		{
-			slru_errcause = SLRU_OPEN_FAILED;
-			slru_errno = errno;
-			return false;
-		}
-
-		ereport(LOG,
-				(errmsg("file \"%s\" doesn't exist, reading as zeroes",
-						path)));
-		MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
-		return true;
-	}
-
-	errno = 0;
-	pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
-	if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
-	{
-		pgstat_report_wait_end();
-		slru_errcause = SLRU_READ_FAILED;
-		slru_errno = errno;
-		CloseTransientFile(fd);
-		return false;
-	}
-	pgstat_report_wait_end();
-
-	if (CloseTransientFile(fd) != 0)
-	{
-		slru_errcause = SLRU_CLOSE_FAILED;
-		slru_errno = errno;
-		return false;
-	}
-
-	return true;
-}
-
-/*
- * Physical write of a page from a buffer slot
- *
- * On failure, we cannot just ereport(ERROR) since caller has put state in
- * shared memory that must be undone.  So, we return false and save enough
- * info in static variables to let SlruReportIOError make the report.
- *
- * For now, assume it's not worth keeping a file pointer open across
- * independent read/write operations.  We do batch operations during
- * SimpleLruWriteAll, though.
- *
- * fdata is NULL for a standalone write, pointer to open-file info during
- * SimpleLruWriteAll.
- */
-static bool
-SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
-{
-	SlruShared	shared = ctl->shared;
 	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
 	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
 	off_t		offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-	int			fd = -1;
-
-	/* update the stats counter of written pages */
-	pgstat_count_slru_page_written(shared->slru_stats_idx);
-
-	/*
-	 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
-	 * write out data before associated WAL records.  This is the same action
-	 * performed during FlushBuffer() in the main buffer manager.
-	 */
-	if (shared->group_lsn != NULL)
-	{
-		/*
-		 * We must determine the largest async-commit LSN for the page. This
-		 * is a bit tedious, but since this entire function is a slow path
-		 * anyway, it seems better to do this here than to maintain a per-page
-		 * LSN variable (which'd need an extra comparison in the
-		 * transaction-commit path).
-		 */
-		XLogRecPtr	max_lsn;
-		int			lsnindex,
-					lsnoff;
-
-		lsnindex = slotno * shared->lsn_groups_per_page;
-		max_lsn = shared->group_lsn[lsnindex++];
-		for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
-		{
-			XLogRecPtr	this_lsn = shared->group_lsn[lsnindex++];
-
-			if (max_lsn < this_lsn)
-				max_lsn = this_lsn;
-		}
-
-		if (!XLogRecPtrIsInvalid(max_lsn))
-		{
-			/*
-			 * As noted above, elog(ERROR) is not acceptable here, so if
-			 * XLogFlush were to fail, we must PANIC.  This isn't much of a
-			 * restriction because XLogFlush is just about all critical
-			 * section anyway, but let's make sure.
-			 */
-			START_CRIT_SECTION();
-			XLogFlush(max_lsn);
-			END_CRIT_SECTION();
-		}
-	}
-
-	/*
-	 * During a WriteAll, we may already have the desired file open.
-	 */
-	if (fdata)
-	{
-		int			i;
-
-		for (i = 0; i < fdata->num_files; i++)
-		{
-			if (fdata->segno[i] == segno)
-			{
-				fd = fdata->fd[i];
-				break;
-			}
-		}
-	}
-
-	if (fd < 0)
-	{
-		/*
-		 * If the file doesn't already exist, we should create it.  It is
-		 * possible for this to need to happen when writing a page that's not
-		 * first in its segment; we assume the OS can cope with that. (Note:
-		 * it might seem that it'd be okay to create files only when
-		 * SimpleLruZeroPage is called for the first page of a segment.
-		 * However, if after a crash and restart the REDO logic elects to
-		 * replay the log from a checkpoint before the latest one, then it's
-		 * possible that we will get commands to set transaction status of
-		 * transactions that have already been truncated from the commit log.
-		 * Easiest way to deal with that is to accept references to
-		 * nonexistent files here and in SlruPhysicalReadPage.)
-		 *
-		 * Note: it is possible for more than one backend to be executing this
-		 * code simultaneously for different pages of the same file. Hence,
-		 * don't use O_EXCL or O_TRUNC or anything like that.
-		 */
-		SlruFileName(ctl, path, segno);
-		fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
-		if (fd < 0)
-		{
-			slru_errcause = SLRU_OPEN_FAILED;
-			slru_errno = errno;
-			return false;
-		}
-
-		if (fdata)
-		{
-			if (fdata->num_files < MAX_WRITEALL_BUFFERS)
-			{
-				fdata->fd[fdata->num_files] = fd;
-				fdata->segno[fdata->num_files] = segno;
-				fdata->num_files++;
-			}
-			else
-			{
-				/*
-				 * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
-				 * fall back to treating it as a standalone write.
-				 */
-				fdata = NULL;
-			}
-		}
-	}
-
-	errno = 0;
-	pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
-	if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
-	{
-		pgstat_report_wait_end();
-		/* if write didn't set errno, assume problem is no disk space */
-		if (errno == 0)
-			errno = ENOSPC;
-		slru_errcause = SLRU_WRITE_FAILED;
-		slru_errno = errno;
-		if (!fdata)
-			CloseTransientFile(fd);
-		return false;
-	}
-	pgstat_report_wait_end();
-
-	/* Queue up a sync request for the checkpointer. */
-	if (ctl->sync_handler != SYNC_HANDLER_NONE)
-	{
-		FileTag		tag;
-
-		INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
-		if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
-		{
-			/* No space to enqueue sync request.  Do it synchronously. */
-			pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
-			if (pg_fsync(fd) != 0)
-			{
-				pgstat_report_wait_end();
-				slru_errcause = SLRU_FSYNC_FAILED;
-				slru_errno = errno;
-				CloseTransientFile(fd);
-				return false;
-			}
-			pgstat_report_wait_end();
-		}
-	}
-
-	/* Close file, unless part of flush request. */
-	if (!fdata)
-	{
-		if (CloseTransientFile(fd) != 0)
-		{
-			slru_errcause = SLRU_CLOSE_FAILED;
-			slru_errno = errno;
-			return false;
-		}
-	}
-
-	return true;
-}
-
-/*
- * Issue the error message after failure of SlruPhysicalReadPage or
- * SlruPhysicalWritePage.  Call this after cleaning up shared-memory state.
- */
-static void
-SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
-{
-	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
-	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	int			offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-
-	SlruFileName(ctl, path, segno);
-	errno = slru_errno;
-	switch (slru_errcause)
-	{
-		case SLRU_OPEN_FAILED:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not open file \"%s\": %m.", path)));
-			break;
-		case SLRU_SEEK_FAILED:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not seek in file \"%s\" to offset %d: %m.",
-							   path, offset)));
-			break;
-		case SLRU_READ_FAILED:
-			if (errno)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not read from file \"%s\" at offset %d: %m.",
-								   path, offset)));
-			else
-				ereport(ERROR,
-						(errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
-			break;
-		case SLRU_WRITE_FAILED:
-			if (errno)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not write to file \"%s\" at offset %d: %m.",
-								   path, offset)));
-			else
-				ereport(ERROR,
-						(errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
-								   path, offset)));
-			break;
-		case SLRU_FSYNC_FAILED:
-			ereport(data_sync_elevel(ERROR),
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not fsync file \"%s\": %m.",
-							   path)));
-			break;
-		case SLRU_CLOSE_FAILED:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not close file \"%s\": %m.",
-							   path)));
-			break;
-		default:
-			/* can't get here, we trust */
-			elog(ERROR, "unrecognized SimpleLru error cause: %d",
-				 (int) slru_errcause);
-			break;
-	}
-}
-
-/*
- * Select the slot to re-use when we need a free slot.
- *
- * The target page number is passed because we need to consider the
- * possibility that some other process reads in the target page while
- * we are doing I/O to free a slot.  Hence, check or recheck to see if
- * any slot already holds the target page, and return that slot if so.
- * Thus, the returned slot is *either* a slot already holding the pageno
- * (could be any state except EMPTY), *or* a freeable slot (state EMPTY
- * or CLEAN).
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-SlruSelectLRUPage(SlruCtl ctl, int pageno)
-{
-	SlruShared	shared = ctl->shared;
-
-	/* Outer loop handles restart after I/O */
-	for (;;)
-	{
-		int			slotno;
-		int			cur_count;
-		int			bestvalidslot = 0;	/* keep compiler quiet */
-		int			best_valid_delta = -1;
-		int			best_valid_page_number = 0; /* keep compiler quiet */
-		int			bestinvalidslot = 0;	/* keep compiler quiet */
-		int			best_invalid_delta = -1;
-		int			best_invalid_page_number = 0;	/* keep compiler quiet */
-
-		/* See if page already has a buffer assigned */
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
-		{
-			if (shared->page_number[slotno] == pageno &&
-				shared->page_status[slotno] != SLRU_PAGE_EMPTY)
-				return slotno;
-		}
-
-		/*
-		 * If we find any EMPTY slot, just select that one. Else choose a
-		 * victim page to replace.  We normally take the least recently used
-		 * valid page, but we will never take the slot containing
-		 * latest_page_number, even if it appears least recently used.  We
-		 * will select a slot that is already I/O busy only if there is no
-		 * other choice: a read-busy slot will not be least recently used once
-		 * the read finishes, and waiting for an I/O on a write-busy slot is
-		 * inferior to just picking some other slot.  Testing shows the slot
-		 * we pick instead will often be clean, allowing us to begin a read at
-		 * once.
-		 *
-		 * Normally the page_lru_count values will all be different and so
-		 * there will be a well-defined LRU page.  But since we allow
-		 * concurrent execution of SlruRecentlyUsed() within
-		 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
-		 * acquire the same lru_count values.  In that case we break ties by
-		 * choosing the furthest-back page.
-		 *
-		 * Notice that this next line forcibly advances cur_lru_count to a
-		 * value that is certainly beyond any value that will be in the
-		 * page_lru_count array after the loop finishes.  This ensures that
-		 * the next execution of SlruRecentlyUsed will mark the page newly
-		 * used, even if it's for a page that has the current counter value.
-		 * That gets us back on the path to having good data when there are
-		 * multiple pages with the same lru_count.
-		 */
-		cur_count = (shared->cur_lru_count)++;
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
-		{
-			int			this_delta;
-			int			this_page_number;
-
-			if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
-				return slotno;
-			this_delta = cur_count - shared->page_lru_count[slotno];
-			if (this_delta < 0)
-			{
-				/*
-				 * Clean up in case shared updates have caused cur_count
-				 * increments to get "lost".  We back off the page counts,
-				 * rather than trying to increase cur_count, to avoid any
-				 * question of infinite loops or failure in the presence of
-				 * wrapped-around counts.
-				 */
-				shared->page_lru_count[slotno] = cur_count;
-				this_delta = 0;
-			}
-			this_page_number = shared->page_number[slotno];
-			if (this_page_number == shared->latest_page_number)
-				continue;
-			if (shared->page_status[slotno] == SLRU_PAGE_VALID)
-			{
-				if (this_delta > best_valid_delta ||
-					(this_delta == best_valid_delta &&
-					 ctl->PagePrecedes(this_page_number,
-									   best_valid_page_number)))
-				{
-					bestvalidslot = slotno;
-					best_valid_delta = this_delta;
-					best_valid_page_number = this_page_number;
-				}
-			}
-			else
-			{
-				if (this_delta > best_invalid_delta ||
-					(this_delta == best_invalid_delta &&
-					 ctl->PagePrecedes(this_page_number,
-									   best_invalid_page_number)))
-				{
-					bestinvalidslot = slotno;
-					best_invalid_delta = this_delta;
-					best_invalid_page_number = this_page_number;
-				}
-			}
-		}
-
-		/*
-		 * If all pages (except possibly the latest one) are I/O busy, we'll
-		 * have to wait for an I/O to complete and then retry.  In that
-		 * unhappy case, we choose to wait for the I/O on the least recently
-		 * used slot, on the assumption that it was likely initiated first of
-		 * all the I/Os in progress and may therefore finish first.
-		 */
-		if (best_valid_delta < 0)
-		{
-			SimpleLruWaitIO(ctl, bestinvalidslot);
-			continue;
-		}
-
-		/*
-		 * If the selected page is clean, we're set.
-		 */
-		if (!shared->page_dirty[bestvalidslot])
-			return bestvalidslot;
-
-		/*
-		 * Write the page.
-		 */
-		SlruInternalWritePage(ctl, bestvalidslot, NULL);
-
-		/*
-		 * Now loop back and try again.  This is the easiest way of dealing
-		 * with corner cases such as the victim page being re-dirtied while we
-		 * wrote it.
-		 */
-	}
-}
-
-/*
- * Write dirty pages to disk during checkpoint or database shutdown.  Flushing
- * is deferred until the next call to ProcessSyncRequests(), though we do fsync
- * the containing directory here to make sure that newly created directory
- * entries are on disk.
- */
-void
-SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
-{
-	SlruShared	shared = ctl->shared;
-	SlruWriteAllData fdata;
-	int			slotno;
-	int			pageno = 0;
-	int			i;
-	bool		ok;
-
-	/* update the stats counter of flushes */
-	pgstat_count_slru_flush(shared->slru_stats_idx);
-
-	/*
-	 * Find and write dirty pages
-	 */
-	fdata.num_files = 0;
+	off_t		size;
+	RelFileLocator rlocator = SlruRelFileLocator(slru_id, segno);
+	SMgrFileHandle sfile = smgropen(rlocator, InvalidBackendId, MAIN_FORKNUM);
 
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		SlruInternalWritePage(ctl, slotno, &fdata);
-
-		/*
-		 * In some places (e.g. checkpoints), we cannot assert that the slot
-		 * is clean now, since another process might have re-dirtied it
-		 * already.  That's okay.
-		 */
-		Assert(allow_redirtied ||
-			   shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
-			   (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-				!shared->page_dirty[slotno]));
-	}
-
-	LWLockRelease(shared->ControlLock);
+	/* update the stats counter of checked pages */
+	pgstat_count_slru_page_exists(slru_id);
 
-	/*
-	 * Now close any files that were open
-	 */
-	ok = true;
-	for (i = 0; i < fdata.num_files; i++)
-	{
-		if (CloseTransientFile(fdata.fd[i]) != 0)
-		{
-			slru_errcause = SLRU_CLOSE_FAILED;
-			slru_errno = errno;
-			pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
-			ok = false;
-		}
-	}
-	if (!ok)
-		SlruReportIOError(ctl, pageno, InvalidTransactionId);
+	if (smgrexists(sfile))
+		size = smgrnblocks(sfile);
+	else
+		size = 0;
 
-	/* Ensure that directory entries for new files are on disk. */
-	if (ctl->sync_handler != SYNC_HANDLER_NONE)
-		fsync_fname(ctl->Dir, true);
+	return size >= offset + BLCKSZ;
 }
 
 /*
@@ -1224,75 +96,14 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
  * after it has accrued freshly-written data.
  */
 void
-SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
+SimpleLruTruncate(int slru_id, SlruPagePrecedesFunction PagePrecedes, int cutoffPage)
 {
-	SlruShared	shared = ctl->shared;
-	int			slotno;
-
 	/* update the stats counter of truncates */
-	pgstat_count_slru_truncate(shared->slru_stats_idx);
-
-	/*
-	 * Scan shared memory and remove any pages preceding the cutoff page, to
-	 * ensure we won't rewrite them later.  (Since this is normally called in
-	 * or just after a checkpoint, any dirty pages should have been flushed
-	 * already ... we're just being extra careful here.)
-	 */
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-restart:
-
-	/*
-	 * While we are holding the lock, make an important safety check: the
-	 * current endpoint page must not be eligible for removal.
-	 */
-	if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
-	{
-		LWLockRelease(shared->ControlLock);
-		ereport(LOG,
-				(errmsg("could not truncate directory \"%s\": apparent wraparound",
-						ctl->Dir)));
-		return;
-	}
-
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
-			continue;
-		if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
-			continue;
-
-		/*
-		 * If page is clean, just change state to EMPTY (expected case).
-		 */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-			!shared->page_dirty[slotno])
-		{
-			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			continue;
-		}
-
-		/*
-		 * Hmm, we have (or may have) I/O operations acting on the page, so
-		 * we've got to wait for them to finish and then start again. This is
-		 * the same logic as in SlruSelectLRUPage.  (XXX if page is dirty,
-		 * wouldn't it be OK to just discard it without writing it?
-		 * SlruMayDeleteSegment() uses a stricter qualification, so we might
-		 * not delete this page in the end; even if we don't delete it, we
-		 * won't have cause to read its data again.  For now, keep the logic
-		 * the same as it was.)
-		 */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID)
-			SlruInternalWritePage(ctl, slotno, NULL);
-		else
-			SimpleLruWaitIO(ctl, slotno);
-		goto restart;
-	}
-
-	LWLockRelease(shared->ControlLock);
+	pgstat_count_slru_truncate(slru_id);
 
 	/* Now we can remove the old segment(s) */
-	(void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
+	(void) SlruScanDirectory(slru_id, PagePrecedes, SlruScanDirCbDeleteCutoff,
+							 &cutoffPage);
 }
 
 /*
@@ -1302,77 +113,22 @@ restart:
  * they either can't yet contain anything, or have already been cleaned out.
  */
 static void
-SlruInternalDeleteSegment(SlruCtl ctl, int segno)
+SlruInternalDeleteSegment(int slru_id, int segno)
 {
-	char		path[MAXPGPATH];
-
-	/* Forget any fsync requests queued for this segment. */
-	if (ctl->sync_handler != SYNC_HANDLER_NONE)
-	{
-		FileTag		tag;
-
-		INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
-		RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true);
-	}
+	RelFileLocator rlocator = SlruRelFileLocator(slru_id, segno);
+	SMgrFileHandle sfile = smgropen(rlocator, InvalidBackendId, MAIN_FORKNUM);
 
 	/* Unlink the file. */
-	SlruFileName(ctl, path, segno);
-	ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
-	unlink(path);
+	smgrunlink(sfile, false);
 }
 
 /*
  * Delete an individual SLRU segment, identified by the segment number.
  */
 void
-SlruDeleteSegment(SlruCtl ctl, int segno)
+SlruDeleteSegment(int slru_id, int segno)
 {
-	SlruShared	shared = ctl->shared;
-	int			slotno;
-	bool		did_write;
-
-	/* Clean out any possibly existing references to the segment. */
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-restart:
-	did_write = false;
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		int			pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
-
-		if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
-			continue;
-
-		/* not the segment we're looking for */
-		if (pagesegno != segno)
-			continue;
-
-		/* If page is clean, just change state to EMPTY (expected case). */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-			!shared->page_dirty[slotno])
-		{
-			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			continue;
-		}
-
-		/* Same logic as SimpleLruTruncate() */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID)
-			SlruInternalWritePage(ctl, slotno, NULL);
-		else
-			SimpleLruWaitIO(ctl, slotno);
-
-		did_write = true;
-	}
-
-	/*
-	 * Be extra careful and re-check. The IO functions release the control
-	 * lock, so new pages could have been read in.
-	 */
-	if (did_write)
-		goto restart;
-
-	SlruInternalDeleteSegment(ctl, segno);
-
-	LWLockRelease(shared->ControlLock);
+	SlruInternalDeleteSegment(slru_id, segno);
 }
 
 /*
@@ -1389,19 +145,21 @@ restart:
  * first>=cutoff && last>=cutoff: no; every page of this segment is too young
  */
 static bool
-SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage)
+SlruMayDeleteSegment(SlruPagePrecedesFunction PagePrecedes,
+					 int segpage, int cutoffPage)
 {
 	int			seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
 
 	Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
 
-	return (ctl->PagePrecedes(segpage, cutoffPage) &&
-			ctl->PagePrecedes(seg_last_page, cutoffPage));
+	return (PagePrecedes(segpage, cutoffPage) &&
+			PagePrecedes(seg_last_page, cutoffPage));
 }
 
 #ifdef USE_ASSERT_CHECKING
 static void
-SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
+SlruPagePrecedesTestOffset(SlruPagePrecedesFunction PagePrecedes,
+						   int per_page, uint32 offset)
 {
 	TransactionId lhs,
 				rhs;
@@ -1426,19 +184,19 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
 	Assert(!TransactionIdPrecedes(rhs, lhs + 1));
 	Assert(!TransactionIdFollowsOrEquals(lhs, rhs));
 	Assert(!TransactionIdFollowsOrEquals(rhs, lhs));
-	Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page));
-	Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page));
-	Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page));
-	Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
-	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
-	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
-	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
+	Assert(!PagePrecedes(lhs / per_page, lhs / per_page));
+	Assert(!PagePrecedes(lhs / per_page, rhs / per_page));
+	Assert(!PagePrecedes(rhs / per_page, lhs / per_page));
+	Assert(!PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
+	Assert(PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
+	Assert(PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
+	Assert(PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
 		   || (1U << 31) % per_page != 0);	/* See CommitTsPagePrecedes() */
-	Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
+	Assert(PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
 		   || (1U << 31) % per_page != 0);
-	Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
-	Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
-	Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
+	Assert(PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
+	Assert(PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
+	Assert(!PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
 
 	/*
 	 * GetNewTransactionId() has assigned the last XID it can safely use, and
@@ -1451,7 +209,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
 	oldestXact = newestXact + 1;
 	oldestXact -= 1U << 31;
 	oldestPage = oldestXact / per_page;
-	Assert(!SlruMayDeleteSegment(ctl,
+	Assert(!SlruMayDeleteSegment(PagePrecedes,
 								 (newestPage -
 								  newestPage % SLRU_PAGES_PER_SEGMENT),
 								 oldestPage));
@@ -1467,7 +225,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
 	oldestXact = newestXact + 1;
 	oldestXact -= 1U << 31;
 	oldestPage = oldestXact / per_page;
-	Assert(!SlruMayDeleteSegment(ctl,
+	Assert(!SlruMayDeleteSegment(PagePrecedes,
 								 (newestPage -
 								  newestPage % SLRU_PAGES_PER_SEGMENT),
 								 oldestPage));
@@ -1483,12 +241,12 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
  * do not apply to them.)
  */
 void
-SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page)
+SlruPagePrecedesUnitTests(SlruPagePrecedesFunction PagePrecedes, int per_page)
 {
 	/* Test first, middle and last entries of a page. */
-	SlruPagePrecedesTestOffset(ctl, per_page, 0);
-	SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2);
-	SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1);
+	SlruPagePrecedesTestOffset(PagePrecedes, per_page, 0);
+	SlruPagePrecedesTestOffset(PagePrecedes, per_page, per_page / 2);
+	SlruPagePrecedesTestOffset(PagePrecedes, per_page, per_page - 1);
 }
 #endif
 
@@ -1498,11 +256,12 @@ SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page)
  *		one containing the page passed as "data".
  */
 bool
-SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbReportPresence(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+							char *filename, int segpage, void *data)
 {
 	int			cutoffPage = *(int *) data;
 
-	if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
+	if (SlruMayDeleteSegment(PagePrecedes, segpage, cutoffPage))
 		return true;			/* found one; don't iterate any more */
 
 	return false;				/* keep going */
@@ -1513,12 +272,15 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data
  *		This callback deletes segments prior to the one passed in as "data".
  */
 static bool
-SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbDeleteCutoff(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+						  char *filename, int segpage, void *data)
 {
 	int			cutoffPage = *(int *) data;
 
-	if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
-		SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
+	if (SlruMayDeleteSegment(PagePrecedes, segpage, cutoffPage))
+	{
+		SlruDeleteSegment(slru_id, segpage / SLRU_PAGES_PER_SEGMENT);
+	}
 
 	return false;				/* keep going */
 }
@@ -1528,9 +290,10 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
  *		This callback deletes all segments.
  */
 bool
-SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbDeleteAll(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+					   char *filename, int segpage, void *data)
 {
-	SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
+	SlruInternalDeleteSegment(slru_id, segpage / SLRU_PAGES_PER_SEGMENT);
 
 	return false;				/* keep going */
 }
@@ -1551,16 +314,20 @@ SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
  * Note that no locking is applied.
  */
 bool
-SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
+SlruScanDirectory(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+				  SlruScanCallback callback, void *data)
 {
 	bool		retval = false;
 	DIR		   *cldir;
 	struct dirent *clde;
 	int			segno;
 	int			segpage;
+	const char *path;
+
+	path = slru_dirs[slru_id];
 
-	cldir = AllocateDir(ctl->Dir);
-	while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
+	cldir = AllocateDir(path);
+	while ((clde = ReadDir(cldir, path)) != NULL)
 	{
 		size_t		len;
 
@@ -1573,8 +340,8 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
 			segpage = segno * SLRU_PAGES_PER_SEGMENT;
 
 			elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
-				 ctl->Dir, clde->d_name);
-			retval = callback(ctl, clde->d_name, segpage, data);
+				 path, clde->d_name);
+			retval = callback(slru_id, PagePrecedes, clde->d_name, segpage, data);
 			if (retval)
 				break;
 		}
@@ -1585,29 +352,74 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
 }
 
 /*
- * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
- * that they can provide the correct "SlruCtl" (otherwise we don't know how to
- * build the path), but they just forward to this common implementation that
- * performs the fsync.
+ * Read a buffer.  Buffer is pinned on return.
+ */
+Buffer
+ReadSlruBuffer(int slru_id, int pageno, ReadBufferMode mode)
+{
+	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
+	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
+	RelFileLocator rlocator = SlruRelFileLocator(slru_id, segno);
+	Buffer		buffer;
+	bool		hit;
+
+	/* Try to avoid doing a buffer mapping table lookup for repeated access. */
+	buffer = slru_recent_buffers[slru_id].recent_buffer;
+	if (slru_recent_buffers[slru_id].pageno == pageno &&
+		BufferIsValid(buffer) &&
+		ReadRecentBuffer(rlocator, MAIN_FORKNUM, pageno, buffer))
+	{
+		pgstat_count_slru_page_hit(slru_id);
+		return buffer;
+	}
+
+	/* Regular lookup. */
+	buffer = ReadBufferWithoutRelcacheWithHit(rlocator, MAIN_FORKNUM, rpageno,
+											  mode, NULL, true, &hit);
+
+	/* Remember where this page is for next time. */
+	slru_recent_buffers[slru_id].pageno = pageno;
+	slru_recent_buffers[slru_id].recent_buffer = buffer;
+
+	if (hit)
+		pgstat_count_slru_page_hit(slru_id);
+
+	return buffer;
+}
+
+/*
+ * Zero-initialize a buffer.  Buffer is pinned and exclusively locked on return.
  */
-int
-SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
+Buffer
+ZeroSlruBuffer(int slru_id, int pageno)
 {
-	int			fd;
-	int			save_errno;
-	int			result;
+	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
+	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
+	RelFileLocator rlocator = SlruRelFileLocator(slru_id, segno);
+	Buffer		buffer;
+	SMgrFileHandle sfile;
 
-	SlruFileName(ctl, path, ftag->segno);
+	sfile = smgropen(rlocator, InvalidBackendId, MAIN_FORKNUM);
+	if (!smgrexists(sfile))
+		smgrcreate(sfile, false);
+	
+	buffer = ReadBufferWithoutRelcache(rlocator, MAIN_FORKNUM, rpageno, RBM_ZERO_AND_LOCK, NULL, true);
 
-	fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
-	if (fd < 0)
-		return -1;
+	/* Remember where this page is for next time. */
+	slru_recent_buffers[slru_id].pageno = pageno;
+	slru_recent_buffers[slru_id].recent_buffer = buffer;
 
-	result = pg_fsync(fd);
-	save_errno = errno;
+	pgstat_count_slru_page_zeroed(slru_id);
 
-	CloseTransientFile(fd);
+	return buffer;
+}
+
+bool
+ProbeSlruBuffer(int slru_id, int pageno)
+{
+	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
+	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
+	RelFileLocator rlocator = SlruRelFileLocator(slru_id, segno);
 
-	errno = save_errno;
-	return result;
+	return BufferProbe(rlocator, MAIN_FORKNUM, rpageno);
 }
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 66d35481552..47e00c18766 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -32,6 +32,7 @@
 #include "access/subtrans.h"
 #include "access/transam.h"
 #include "pg_trace.h"
+#include "storage/bufmgr.h"
 #include "utils/snapmgr.h"
 
 
@@ -49,21 +50,13 @@
  */
 
 /* We need four bytes per xact */
-#define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
+#define SUBTRANS_XACTS_PER_PAGE ((BLCKSZ - SizeOfPageHeaderData) / sizeof(TransactionId))
 
 #define TransactionIdToPage(xid) ((xid) / (TransactionId) SUBTRANS_XACTS_PER_PAGE)
 #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE)
 
 
-/*
- * Link to shared-memory data structures for SUBTRANS control
- */
-static SlruCtlData SubTransCtlData;
-
-#define SubTransCtl  (&SubTransCtlData)
-
-
-static int	ZeroSUBTRANSPage(int pageno);
+static Buffer ZeroSUBTRANSPage(int pageno);
 static bool SubTransPagePrecedes(int page1, int page2);
 
 
@@ -75,16 +68,15 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
 {
 	int			pageno = TransactionIdToPage(xid);
 	int			entryno = TransactionIdToEntry(xid);
-	int			slotno;
 	TransactionId *ptr;
+	Buffer		buffer;
 
 	Assert(TransactionIdIsValid(parent));
 	Assert(TransactionIdFollows(xid, parent));
 
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
-
-	slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid);
-	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
+	buffer = ReadSlruBuffer(SLRU_SUBTRANS_ID, pageno, RBM_NORMAL);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	ptr = (TransactionId *) PageGetContents(BufferGetPage(buffer));
 	ptr += entryno;
 
 	/*
@@ -96,10 +88,10 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
 	{
 		Assert(*ptr == InvalidTransactionId);
 		*ptr = parent;
-		SubTransCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
 	}
 
-	LWLockRelease(SubtransSLRULock);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -110,9 +102,9 @@ SubTransGetParent(TransactionId xid)
 {
 	int			pageno = TransactionIdToPage(xid);
 	int			entryno = TransactionIdToEntry(xid);
-	int			slotno;
 	TransactionId *ptr;
 	TransactionId parent;
+	Buffer		buffer;
 
 	/* Can't ask about stuff that might not be around anymore */
 	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
@@ -121,15 +113,14 @@ SubTransGetParent(TransactionId xid)
 	if (!TransactionIdIsNormal(xid))
 		return InvalidTransactionId;
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
+	buffer = ReadSlruBuffer(SLRU_SUBTRANS_ID, pageno, RBM_NORMAL);
 
-	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
-	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
+	ptr = (TransactionId *) PageGetContents(BufferGetPage(buffer));
 	ptr += entryno;
 
 	parent = *ptr;
 
-	LWLockRelease(SubtransSLRULock);
+	ReleaseBuffer(buffer);
 
 	return parent;
 }
@@ -177,26 +168,6 @@ SubTransGetTopmostTransaction(TransactionId xid)
 	return previousXid;
 }
 
-
-/*
- * Initialization of shared memory for SUBTRANS
- */
-Size
-SUBTRANSShmemSize(void)
-{
-	return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
-}
-
-void
-SUBTRANSShmemInit(void)
-{
-	SubTransCtl->PagePrecedes = SubTransPagePrecedes;
-	SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0,
-				  SubtransSLRULock, "pg_subtrans",
-				  LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE);
-	SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
-}
-
 /*
  * This func must be called ONCE on system install.  It creates
  * the initial SUBTRANS segment.  (The SUBTRANS directory is assumed to
@@ -210,18 +181,16 @@ SUBTRANSShmemInit(void)
 void
 BootStrapSUBTRANS(void)
 {
-	int			slotno;
+	Buffer		buffer;
 
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
+	SlruPagePrecedesUnitTests(SubTransPagePrecedes, SUBTRANS_XACTS_PER_PAGE);
 
 	/* Create and zero the first page of the subtrans log */
-	slotno = ZeroSUBTRANSPage(0);
+	buffer = ZeroSUBTRANSPage(0);
 
 	/* Make sure it's written out */
-	SimpleLruWritePage(SubTransCtl, slotno);
-	Assert(!SubTransCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(SubtransSLRULock);
+	FlushOneBuffer(buffer);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -232,10 +201,19 @@ BootStrapSUBTRANS(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroSUBTRANSPage(int pageno)
 {
-	return SimpleLruZeroPage(SubTransCtl, pageno);
+	Buffer		buffer;
+	Page		page;
+
+	buffer = ZeroSlruBuffer(SLRU_SUBTRANS_ID, pageno);
+	page = BufferGetPage(buffer);
+	PageInitSLRU(page, BLCKSZ, 0);
+
+	MarkBufferDirty(buffer);
+
+	return buffer;
 }
 
 /*
@@ -258,7 +236,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
 	 * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
 	 * the new page without regard to whatever was previously on disk.
 	 */
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
 
 	startPage = TransactionIdToPage(oldestActiveXID);
 	nextXid = ShmemVariableCache->nextXid;
@@ -266,36 +243,15 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
 
 	while (startPage != endPage)
 	{
-		(void) ZeroSUBTRANSPage(startPage);
+		UnlockReleaseBuffer(ZeroSUBTRANSPage(startPage));
 		startPage++;
 		/* must account for wraparound */
 		if (startPage > TransactionIdToPage(MaxTransactionId))
 			startPage = 0;
 	}
-	(void) ZeroSUBTRANSPage(startPage);
-
-	LWLockRelease(SubtransSLRULock);
-}
-
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointSUBTRANS(void)
-{
-	/*
-	 * Write dirty SUBTRANS pages to disk
-	 *
-	 * This is not actually necessary from a correctness point of view. We do
-	 * it merely to improve the odds that writing of dirty pages is done by
-	 * the checkpoint process and not by backends.
-	 */
-	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true);
-	SimpleLruWriteAll(SubTransCtl, true);
-	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true);
+	UnlockReleaseBuffer(ZeroSUBTRANSPage(startPage));
 }
 
-
 /*
  * Make sure that SUBTRANS has room for a newly-allocated XID.
  *
@@ -319,12 +275,8 @@ ExtendSUBTRANS(TransactionId newestXact)
 
 	pageno = TransactionIdToPage(newestXact);
 
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
-
 	/* Zero the page */
-	ZeroSUBTRANSPage(pageno);
-
-	LWLockRelease(SubtransSLRULock);
+	UnlockReleaseBuffer(ZeroSUBTRANSPage(pageno));
 }
 
 
@@ -350,7 +302,7 @@ TruncateSUBTRANS(TransactionId oldestXact)
 	TransactionIdRetreat(oldestXact);
 	cutoffPage = TransactionIdToPage(oldestXact);
 
-	SimpleLruTruncate(SubTransCtl, cutoffPage);
+	SimpleLruTruncate(SLRU_SUBTRANS_ID, SubTransPagePrecedes, cutoffPage);
 }
 
 
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 8086b857b96..d792186cf97 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -67,6 +67,7 @@
 #include "utils/inval.h"
 #include "utils/memutils.h"
 #include "utils/relmapper.h"
+#include "utils/resowner_private.h"
 #include "utils/snapmgr.h"
 #include "utils/timeout.h"
 #include "utils/timestamp.h"
@@ -1396,6 +1397,7 @@ RecordTransactionCommit(void)
 		 * are delaying the checkpoint a bit fuzzy, but it doesn't matter.
 		 */
 		Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0);
+
 		START_CRIT_SECTION();
 		MyProc->delayChkptFlags |= DELAY_CHKPT_START;
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a31fbbff78d..9e2ac0d5392 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4645,6 +4645,7 @@ BootStrapXLOG(void)
 	uint64		sysidentifier;
 	struct timeval tv;
 	pg_crc32c	crc;
+	ResourceOwner resowner;
 
 	/* allow ordinary WAL segment creation, like StartupXLOG() would */
 	SetInstallXLogFileSegmentActive();
@@ -4784,10 +4785,14 @@ BootStrapXLOG(void)
 	WriteControlFile();
 
 	/* Bootstrap the commit log, too */
+	resowner = ResourceOwnerCreate(NULL, "bootstrap resowner");
+	CurrentResourceOwner = resowner;
 	BootStrapCLOG();
 	BootStrapCommitTs();
 	BootStrapSUBTRANS();
 	BootStrapMultiXact();
+	CurrentResourceOwner = NULL;
+	ResourceOwnerDelete(resowner);
 
 	pfree(buffer);
 
@@ -4796,6 +4801,8 @@ BootStrapXLOG(void)
 	 * otherwise never run the checks and GUC related initializations therein.
 	 */
 	ReadControlFile();
+
+	smgrcloseall();
 }
 
 static char *
@@ -7004,15 +7011,11 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
 	CheckPointSnapBuild();
 	CheckPointLogicalRewriteHeap();
 	CheckPointReplicationOrigin();
+	CheckPointPredicate();
 
-	/* Write out all dirty data in SLRUs and the main buffer pool */
+	/* Write out all dirty data in the buffer pool */
 	TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
 	CheckpointStats.ckpt_write_t = GetCurrentTimestamp();
-	CheckPointCLOG();
-	CheckPointCommitTs();
-	CheckPointSUBTRANS();
-	CheckPointMultiXact();
-	CheckPointPredicate();
 	CheckPointBuffers(flags);
 
 	/* Perform all queued up fsyncs */
diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c
index 0cf03945eec..e7b2a2f9272 100644
--- a/src/backend/access/transam/xlogprefetcher.c
+++ b/src/backend/access/transam/xlogprefetcher.c
@@ -652,7 +652,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 		{
 			int			block_id = prefetcher->next_block_id++;
 			DecodedBkpBlock *block = &record->blocks[block_id];
-			SMgrRelation reln;
+			SMgrFileHandle sfile;
 			PrefetchBufferResult result;
 
 			if (!block->in_use)
@@ -722,7 +722,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 			 * same relation (with some scheme to handle invalidations
 			 * safely), but for now we'll call smgropen() every time.
 			 */
-			reln = smgropen(block->rlocator, InvalidBackendId);
+			sfile = smgropen(block->rlocator, InvalidBackendId, block->forknum);
 
 			/*
 			 * If the relation file doesn't exist on disk, for example because
@@ -731,14 +731,14 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 			 * further prefetching in the relation until this record is
 			 * replayed.
 			 */
-			if (!smgrexists(reln, MAIN_FORKNUM))
+			if (!smgrexists(sfile))
 			{
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 				elog(XLOGPREFETCHER_DEBUG_LEVEL,
 					 "suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk",
-					 reln->smgr_rlocator.locator.spcOid,
-					 reln->smgr_rlocator.locator.dbOid,
-					 reln->smgr_rlocator.locator.relNumber,
+					 sfile->smgr_rlocator.locator.spcOid,
+					 sfile->smgr_rlocator.locator.dbOid,
+					 sfile->smgr_rlocator.locator.relNumber,
 					 LSN_FORMAT_ARGS(record->lsn));
 #endif
 				XLogPrefetcherAddFilter(prefetcher, block->rlocator, 0,
@@ -752,14 +752,14 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 			 * block yet, suppress prefetching of this block and higher until
 			 * this record is replayed.
 			 */
-			if (block->blkno >= smgrnblocks(reln, block->forknum))
+			if (block->blkno >= smgrnblocks(sfile))
 			{
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 				elog(XLOGPREFETCHER_DEBUG_LEVEL,
 					 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small",
-					 reln->smgr_rlocator.locator.spcOid,
-					 reln->smgr_rlocator.locator.dbOid,
-					 reln->smgr_rlocator.locator.relNumber,
+					 sfile->smgr_rlocator.locator.spcOid,
+					 sfile->smgr_rlocator.locator.dbOid,
+					 sfile->smgr_rlocator.locator.relNumber,
 					 block->blkno,
 					 LSN_FORMAT_ARGS(record->lsn));
 #endif
@@ -770,7 +770,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 			}
 
 			/* Try to initiate prefetching. */
-			result = PrefetchSharedBuffer(reln, block->forknum, block->blkno);
+			result = PrefetchSharedBuffer(sfile, block->blkno);
 			if (BufferIsValid(result.recent_buffer))
 			{
 				/* Cache hit, nothing to do. */
@@ -796,9 +796,9 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 				 */
 				elog(ERROR,
 					 "could not prefetch relation %u/%u/%u block %u",
-					 reln->smgr_rlocator.locator.spcOid,
-					 reln->smgr_rlocator.locator.dbOid,
-					 reln->smgr_rlocator.locator.relNumber,
+					 sfile->smgr_locator.locator.spcOid,
+					 sfile->smgr_locator.locator.dbOid,
+					 sfile->smgr_locator.locator.relNumber,
 					 block->blkno);
 			}
 		}
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 563cba258dd..ab59bfe66c9 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -477,7 +477,7 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum,
 {
 	BlockNumber lastblock;
 	Buffer		buffer;
-	SMgrRelation smgr;
+	SMgrFileHandle sfile;
 
 	Assert(blkno != P_NEW);
 
@@ -491,7 +491,7 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum,
 	}
 
 	/* Open the relation at smgr level */
-	smgr = smgropen(rlocator, InvalidBackendId);
+	sfile = smgropen(rlocator, InvalidBackendId, forknum);
 
 	/*
 	 * Create the target file if it doesn't already exist.  This lets us cope
@@ -501,9 +501,9 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum,
 	 * filesystem loses an inode during a crash.  Better to write the data
 	 * until we are actually told to delete the file.)
 	 */
-	smgrcreate(smgr, forknum, true);
+	smgrcreate(sfile, true);
 
-	lastblock = smgrnblocks(smgr, forknum);
+	lastblock = smgrnblocks(sfile);
 
 	if (blkno < lastblock)
 	{
@@ -631,7 +631,7 @@ CreateFakeRelcacheEntry(RelFileLocator rlocator)
 	rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
 	rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber;
 
-	rel->rd_smgr = NULL;
+	MemSet(rel->rd_smgr, 0, sizeof(rel->rd_smgr));
 
 	return rel;
 }
@@ -643,8 +643,11 @@ void
 FreeFakeRelcacheEntry(Relation fakerel)
 {
 	/* make sure the fakerel is not referenced by the SmgrRelation anymore */
-	if (fakerel->rd_smgr != NULL)
-		smgrclearowner(&fakerel->rd_smgr, fakerel->rd_smgr);
+	for (int i = 0; i <= MAX_FORKNUM; i++)
+	{
+		if (fakerel->rd_smgr[i] != NULL)
+			smgrclearowner(&fakerel->rd_smgr[i], fakerel->rd_smgr[i]);
+	}
 	pfree(fakerel);
 }
 
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index 2abd6b007a2..f536cfc2a9b 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -501,10 +501,10 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
 RelFileNumber
 GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence)
 {
-	RelFileLocatorBackend rlocator;
-	char	   *rpath;
-	bool		collides;
+	RelFileLocator rlocator;
 	BackendId	backend;
+	SMgrFileHandle sfile;
+	bool		collides;
 
 	/*
 	 * If we ever get here during pg_upgrade, there's something wrong; all
@@ -513,6 +513,11 @@ GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence)
 	 */
 	Assert(!IsBinaryUpgrade);
 
+	/*
+	 * The relpath will vary based on the backend ID, so we must initialize
+	 * that properly here to make sure that any collisions based on filename
+	 * are properly detected.
+	 */
 	switch (relpersistence)
 	{
 		case RELPERSISTENCE_TEMP:
@@ -528,53 +533,29 @@ GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence)
 	}
 
 	/* This logic should match RelationInitPhysicalAddr */
-	rlocator.locator.spcOid = reltablespace ? reltablespace : MyDatabaseTableSpace;
-	rlocator.locator.dbOid =
-		(rlocator.locator.spcOid == GLOBALTABLESPACE_OID) ?
+	rlocator.spcOid = reltablespace ? reltablespace : MyDatabaseTableSpace;
+	rlocator.dbOid =
+		(rlocator.spcOid == GLOBALTABLESPACE_OID) ?
 		InvalidOid : MyDatabaseId;
 
-	/*
-	 * The relpath will vary based on the backend ID, so we must initialize
-	 * that properly here to make sure that any collisions based on filename
-	 * are properly detected.
-	 */
-	rlocator.backend = backend;
-
 	do
 	{
 		CHECK_FOR_INTERRUPTS();
 
 		/* Generate the OID */
 		if (pg_class)
-			rlocator.locator.relNumber = GetNewOidWithIndex(pg_class, ClassOidIndexId,
+			rlocator.relNumber = GetNewOidWithIndex(pg_class, ClassOidIndexId,
 															Anum_pg_class_oid);
 		else
-			rlocator.locator.relNumber = GetNewObjectId();
+			rlocator.relNumber = GetNewObjectId();
 
 		/* Check for existing file of same name */
-		rpath = relpath(rlocator, MAIN_FORKNUM);
-
-		if (access(rpath, F_OK) == 0)
-		{
-			/* definite collision */
-			collides = true;
-		}
-		else
-		{
-			/*
-			 * Here we have a little bit of a dilemma: if errno is something
-			 * other than ENOENT, should we declare a collision and loop? In
-			 * practice it seems best to go ahead regardless of the errno.  If
-			 * there is a colliding file we will get an smgr failure when we
-			 * attempt to create the new relation file.
-			 */
-			collides = false;
-		}
-
-		pfree(rpath);
+		sfile = smgropen(rlocator, backend, MAIN_FORKNUM);
+		collides = smgrexists(sfile);
+		smgrclose(sfile);
 	} while (collides);
 
-	return rlocator.locator.relNumber;
+	return rlocator.relNumber;
 }
 
 /*
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 61f1d3926a9..8ae943c4914 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3029,9 +3029,9 @@ index_build(Relation heapRelation,
 	 * relfilenumber won't change, and nothing needs to be done here.
 	 */
 	if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
-		!smgrexists(RelationGetSmgr(indexRelation), INIT_FORKNUM))
+		!smgrexists(RelationGetSmgr(indexRelation, INIT_FORKNUM)))
 	{
-		smgrcreate(RelationGetSmgr(indexRelation), INIT_FORKNUM, false);
+		smgrcreate(RelationGetSmgr(indexRelation, INIT_FORKNUM), false);
 		indexRelation->rd_indam->ambuildempty(indexRelation);
 	}
 
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index d708af19ed2..5d47864e3a9 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -116,11 +116,11 @@ AddPendingSync(const RelFileLocator *rlocator)
  * that does not want the storage to be destroyed in case of an abort may
  * pass register_delete = false.
  */
-SMgrRelation
+SMgrFileHandle
 RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
 					  bool register_delete)
 {
-	SMgrRelation srel;
+	SMgrFileHandle sfile;
 	BackendId	backend;
 	bool		needs_wal;
 
@@ -145,11 +145,11 @@ RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
 			return NULL;		/* placate compiler */
 	}
 
-	srel = smgropen(rlocator, backend);
-	smgrcreate(srel, MAIN_FORKNUM, false);
+	sfile = smgropen(rlocator, backend, MAIN_FORKNUM);
+	smgrcreate(sfile, false);
 
 	if (needs_wal)
-		log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM);
+		log_smgrcreate(&rlocator, MAIN_FORKNUM);
 
 	/*
 	 * Add the relation to the list of stuff to delete at abort, if we are
@@ -175,7 +175,7 @@ RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
 		AddPendingSync(&rlocator);
 	}
 
-	return srel;
+	return sfile;
 }
 
 /*
@@ -292,16 +292,18 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
 	ForkNumber	forks[MAX_FORKNUM];
 	BlockNumber blocks[MAX_FORKNUM];
 	int			nforks = 0;
-	SMgrRelation reln;
 
 	/*
 	 * Make sure smgr_targblock etc aren't pointing somewhere past new end.
 	 * (Note: don't rely on this reln pointer below this loop.)
 	 */
-	reln = RelationGetSmgr(rel);
-	reln->smgr_targblock = InvalidBlockNumber;
-	for (int i = 0; i <= MAX_FORKNUM; ++i)
-		reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
+	for (int i = 0; i <= MAX_FORKNUM; i++)
+	{
+		SMgrFileHandle sfile = RelationGetSmgr(rel, i);
+
+		sfile->smgr_targblock = InvalidBlockNumber;
+		sfile->smgr_cached_nblocks = InvalidBlockNumber;
+	}
 
 	/* Prepare for truncation of MAIN fork of the relation */
 	forks[nforks] = MAIN_FORKNUM;
@@ -309,7 +311,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
 	nforks++;
 
 	/* Prepare for truncation of the FSM if it exists */
-	fsm = smgrexists(RelationGetSmgr(rel), FSM_FORKNUM);
+	fsm = smgrexists(RelationGetSmgr(rel, FSM_FORKNUM));
 	if (fsm)
 	{
 		blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
@@ -322,7 +324,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
 	}
 
 	/* Prepare for truncation of the visibility map too if it exists */
-	vm = smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM);
+	vm = smgrexists(RelationGetSmgr(rel, VISIBILITYMAP_FORKNUM));
 	if (vm)
 	{
 		blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
@@ -390,11 +392,12 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
 	}
 
 	/*
-	 * This will first remove any buffers from the buffer pool that should no
+	 * First remove any buffers from the buffer pool that should no
 	 * longer exist after truncation is complete, and then truncate the
 	 * corresponding files on disk.
 	 */
-	smgrtruncate(RelationGetSmgr(rel), forks, nforks, blocks);
+	DropRelationBuffers(rel->rd_locator, rel->rd_backend, forks, nforks, blocks);
+	smgrtruncate_multi(rel->rd_locator, rel->rd_backend, forks, nforks, blocks);
 
 	/* We've done all the critical work, so checkpoints are OK now. */
 	MyProc->delayChkptFlags &= ~DELAY_CHKPT_COMPLETE;
@@ -428,7 +431,7 @@ RelationPreTruncate(Relation rel)
 		return;
 
 	pending = hash_search(pendingSyncHash,
-						  &(RelationGetSmgr(rel)->smgr_rlocator.locator),
+						  &rel->rd_locator,
 						  HASH_FIND, NULL);
 	if (pending)
 		pending->is_truncated = true;
@@ -444,12 +447,12 @@ RelationPreTruncate(Relation rel)
  * Also note that this is frequently called via locutions such as
  *		RelationCopyStorage(RelationGetSmgr(rel), ...);
  * That's safe only because we perform only smgr and WAL operations here.
- * If we invoked anything else, a relcache flush could cause our SMgrRelation
+ * If we invoked anything else, a relcache flush could cause our SMgrFileHandle
  * argument to become a dangling pointer.
  */
 void
-RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
-					ForkNumber forkNum, char relpersistence)
+RelationCopyStorage(SMgrFileHandle src, SMgrFileHandle dst,
+					char relpersistence)
 {
 	PGAlignedBlock buf;
 	Page		page;
@@ -466,7 +469,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 	 * it needs to be synced to disk.
 	 */
 	copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
-		forkNum == INIT_FORKNUM;
+		src->smgr_locator.forknum == INIT_FORKNUM;
 
 	/*
 	 * We need to log the copied data in WAL iff WAL archiving/streaming is
@@ -477,14 +480,14 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 	use_wal = XLogIsNeeded() &&
 		(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
 
-	nblocks = smgrnblocks(src, forkNum);
+	nblocks = smgrnblocks(src);
 
 	for (blkno = 0; blkno < nblocks; blkno++)
 	{
 		/* If we got a cancel signal during the copy of the data, quit */
 		CHECK_FOR_INTERRUPTS();
 
-		smgrread(src, forkNum, blkno, buf.data);
+		smgrread(src, blkno, buf.data);
 
 		if (!PageIsVerifiedExtended(page, blkno,
 									PIV_LOG_WARNING | PIV_REPORT_STAT))
@@ -496,9 +499,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 			 * (errcontext callbacks shouldn't be risking any such thing, but
 			 * people have been known to forget that rule.)
 			 */
-			char	   *relpath = relpathbackend(src->smgr_rlocator.locator,
-												 src->smgr_rlocator.backend,
-												 forkNum);
+			char	   *relpath = smgrfilepath(src->smgr_locator);
 
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
@@ -512,7 +513,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 		 * space.
 		 */
 		if (use_wal)
-			log_newpage(&dst->smgr_rlocator.locator, forkNum, blkno, page, false);
+			log_newpage(&dst->smgr_locator.locator, dst->smgr_locator.forknum, blkno, page, false);
 
 		PageSetChecksumInplace(page, blkno);
 
@@ -521,7 +522,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 		 * need for smgr to schedule an fsync for this write; we'll do it
 		 * ourselves below.
 		 */
-		smgrextend(dst, forkNum, blkno, buf.data, true);
+		smgrextend(dst, blkno, buf.data, true);
 	}
 
 	/*
@@ -534,7 +535,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 	 * they might still not be on disk when the crash occurs.
 	 */
 	if (use_wal || copying_initfork)
-		smgrimmedsync(dst, forkNum);
+		smgrimmedsync(dst);
 }
 
 /*
@@ -653,9 +654,9 @@ smgrDoPendingDeletes(bool isCommit)
 	PendingRelDelete *pending;
 	PendingRelDelete *prev;
 	PendingRelDelete *next;
-	int			nrels = 0,
-				maxrels = 0;
-	SMgrRelation *srels = NULL;
+	int			nlocators = 0,
+				maxlocators = 0;
+	RelFileLocatorBackend *locators = NULL;
 
 	prev = NULL;
 	for (pending = pendingDeletes; pending != NULL; pending = next)
@@ -676,23 +677,21 @@ smgrDoPendingDeletes(bool isCommit)
 			/* do deletion if called for */
 			if (pending->atCommit == isCommit)
 			{
-				SMgrRelation srel;
-
-				srel = smgropen(pending->rlocator, pending->backend);
+				RelFileLocatorBackend rlocator = { pending->rlocator, pending->backend };
 
 				/* allocate the initial array, or extend it, if needed */
-				if (maxrels == 0)
+				if (maxlocators == 0)
 				{
-					maxrels = 8;
-					srels = palloc(sizeof(SMgrRelation) * maxrels);
+					maxlocators = 8;
+					locators = palloc(sizeof(RelFileLocatorBackend) * maxlocators);
 				}
-				else if (maxrels <= nrels)
+				else if (maxlocators <= nlocators)
 				{
-					maxrels *= 2;
-					srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
+					maxlocators *= 2;
+					locators = repalloc(locators, sizeof(RelFileLocatorBackend) * maxlocators);
 				}
 
-				srels[nrels++] = srel;
+				locators[nlocators++] = rlocator;
 			}
 			/* must explicitly free the list entry */
 			pfree(pending);
@@ -700,15 +699,58 @@ smgrDoPendingDeletes(bool isCommit)
 		}
 	}
 
-	if (nrels > 0)
+	if (nlocators > 0)
 	{
-		smgrdounlinkall(srels, nrels, false);
+		ForkNumber forks[MAX_FORKNUM + 1];
+
+		for (int i = 0; i <= MAX_FORKNUM; i++)
+			forks[i] = i;
+
+		/*
+		 * Get rid of any remaining buffers for the relations.  bufmgr will just
+		 * drop them without bothering to write the contents.
+		 */
+		DropRelationsAllBuffers(locators, nlocators);
 
-		for (int i = 0; i < nrels; i++)
-			smgrclose(srels[i]);
+		for (int i = 0; i < nlocators; i++)
+			smgrunlink_multi(locators[i].locator, locators[i].backend, forks, MAX_FORKNUM + 1, false);
+		pfree(locators);
+	}
+}
+
+/*
+ * DropRelationFiles -- drop files of all given relations
+ */
+void
+DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
+{
+	RelFileLocatorBackend *locators;
+	int			i;
+	ForkNumber	all_forks[MAX_FORKNUM + 1];
 
-		pfree(srels);
+	locators = palloc(sizeof(RelFileLocatorBackend) * ndelrels);
+	for (i = 0; i < ndelrels; i++)
+	{
+		if (isRedo)
+		{
+			for (int fork = 0; fork <= MAX_FORKNUM; fork++)
+				XLogDropRelation(delrels[i], fork);
+		}
+		locators[i].locator = delrels[i];
+		locators[i].backend = InvalidBackendId;
 	}
+
+	/*
+	 * Get rid of any remaining buffers for the relations.  bufmgr will just
+	 * drop them without bothering to write the contents.
+	 */
+	DropRelationsAllBuffers(locators, ndelrels);
+
+	for (int fork = 0; fork <= MAX_FORKNUM; fork++)
+		all_forks[fork] = fork;
+
+	for (i = 0; i < ndelrels; i++)
+		smgrunlink_multi(locators[i].locator, locators[i].backend, all_forks, MAX_FORKNUM + 1, true);
 }
 
 /*
@@ -718,9 +760,9 @@ void
 smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
 {
 	PendingRelDelete *pending;
-	int			nrels = 0,
-				maxrels = 0;
-	SMgrRelation *srels = NULL;
+	int			nlocators = 0,
+				maxlocators = 0;
+	RelFileLocator *locators = NULL;
 	HASH_SEQ_STATUS scan;
 	PendingRelSync *pendingsync;
 
@@ -757,9 +799,6 @@ smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
 		ForkNumber	fork;
 		BlockNumber nblocks[MAX_FORKNUM + 1];
 		BlockNumber total_blocks = 0;
-		SMgrRelation srel;
-
-		srel = smgropen(pendingsync->rlocator, InvalidBackendId);
 
 		/*
 		 * We emit newpage WAL records for smaller relations.
@@ -773,9 +812,12 @@ smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
 		{
 			for (fork = 0; fork <= MAX_FORKNUM; fork++)
 			{
-				if (smgrexists(srel, fork))
+				SMgrFileHandle sfile;
+
+				sfile = smgropen(pendingsync->rlocator, InvalidBackendId, fork);
+				if (smgrexists(sfile))
 				{
-					BlockNumber n = smgrnblocks(srel, fork);
+					BlockNumber n = smgrnblocks(sfile);
 
 					/* we shouldn't come here for unlogged relations */
 					Assert(fork != INIT_FORKNUM);
@@ -803,18 +845,19 @@ smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
 			total_blocks * BLCKSZ / 1024 >= wal_skip_threshold)
 		{
 			/* allocate the initial array, or extend it, if needed */
-			if (maxrels == 0)
+			if (maxlocators == 0)
 			{
-				maxrels = 8;
-				srels = palloc(sizeof(SMgrRelation) * maxrels);
+				maxlocators = 8;
+				locators = palloc(sizeof(RelFileLocatorBackend) * maxlocators);
 			}
-			else if (maxrels <= nrels)
+			else if (maxlocators <= nlocators)
 			{
-				maxrels *= 2;
-				srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
+				maxlocators *= 2;
+				locators = repalloc(locators, sizeof(RelFileLocatorBackend) * maxlocators);
 			}
 
-			srels[nrels++] = srel;
+			locators[nlocators] = pendingsync->rlocator;
+			nlocators++;
 		}
 		else
 		{
@@ -833,7 +876,7 @@ smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
 				 * page including any unused space.  ReadBufferExtended()
 				 * counts some pgstat events; unfortunately, we discard them.
 				 */
-				rel = CreateFakeRelcacheEntry(srel->smgr_rlocator.locator);
+				rel = CreateFakeRelcacheEntry(pendingsync->rlocator);
 				log_newpage_range(rel, fork, 0, n, false);
 				FreeFakeRelcacheEntry(rel);
 			}
@@ -842,11 +885,20 @@ smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
 
 	pendingSyncHash = NULL;
 
-	if (nrels > 0)
+	FlushRelationsAllBuffers(locators, nlocators);
+
+	for (int i = 0; i < nlocators; i++)
 	{
-		smgrdosyncall(srels, nrels);
-		pfree(srels);
+		for (int fork = 0; fork <= MAX_FORKNUM; fork++)
+		{
+			SMgrFileHandle sfile = smgropen(locators[i], InvalidBackendId, fork);
+
+			if (smgrexists(sfile))
+				smgrimmedsync(sfile);
+		}
 	}
+	if (locators != NULL)
+		pfree(locators);
 }
 
 /*
@@ -966,22 +1018,22 @@ smgr_redo(XLogReaderState *record)
 	if (info == XLOG_SMGR_CREATE)
 	{
 		xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
-		SMgrRelation reln;
+		SMgrFileHandle sfile;
 
-		reln = smgropen(xlrec->rlocator, InvalidBackendId);
-		smgrcreate(reln, xlrec->forkNum, true);
+		sfile = smgropen(xlrec->rlocator, InvalidBackendId, xlrec->forkNum);
+		smgrcreate(sfile, true);
 	}
 	else if (info == XLOG_SMGR_TRUNCATE)
 	{
 		xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
-		SMgrRelation reln;
+		SMgrFileHandle sfile;
 		Relation	rel;
 		ForkNumber	forks[MAX_FORKNUM];
 		BlockNumber blocks[MAX_FORKNUM];
 		int			nforks = 0;
 		bool		need_fsm_vacuum = false;
 
-		reln = smgropen(xlrec->rlocator, InvalidBackendId);
+		sfile = smgropen(xlrec->rlocator, InvalidBackendId, MAIN_FORKNUM);
 
 		/*
 		 * Forcibly create relation if it doesn't exist (which suggests that
@@ -989,7 +1041,7 @@ smgr_redo(XLogReaderState *record)
 		 * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
 		 * log as best we can until the drop is seen.
 		 */
-		smgrcreate(reln, MAIN_FORKNUM, true);
+		smgrcreate(sfile, true);
 
 		/*
 		 * Before we perform the truncation, update minimum recovery point to
@@ -1022,8 +1074,10 @@ smgr_redo(XLogReaderState *record)
 		/* Prepare for truncation of FSM and VM too */
 		rel = CreateFakeRelcacheEntry(xlrec->rlocator);
 
+		DropRelationBuffers(xlrec->rlocator, InvalidBackendId, forks, nforks, blocks);
+
 		if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
-			smgrexists(reln, FSM_FORKNUM))
+			smgrexists(smgropen(xlrec->rlocator, InvalidBackendId, FSM_FORKNUM)))
 		{
 			blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
 			if (BlockNumberIsValid(blocks[nforks]))
@@ -1034,7 +1088,7 @@ smgr_redo(XLogReaderState *record)
 			}
 		}
 		if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
-			smgrexists(reln, VISIBILITYMAP_FORKNUM))
+			smgrexists(smgropen(xlrec->rlocator, InvalidBackendId, VISIBILITYMAP_FORKNUM)))
 		{
 			blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
 			if (BlockNumberIsValid(blocks[nforks]))
@@ -1046,7 +1100,10 @@ smgr_redo(XLogReaderState *record)
 
 		/* Do the real work to truncate relation forks */
 		if (nforks > 0)
-			smgrtruncate(reln, forks, nforks, blocks);
+		{
+			DropRelationBuffers(xlrec->rlocator, InvalidBackendId, forks, nforks, blocks);
+			smgrtruncate_multi(xlrec->rlocator, InvalidBackendId, forks, nforks, blocks);
+		}
 
 		/*
 		 * Update upper-level FSM pages to account for the truncation. This is
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 3e1b92df030..b9e362ccab0 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -141,6 +141,7 @@
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
+#include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/proc.h"
@@ -163,7 +164,7 @@
  * than that, so changes in that data structure won't affect user-visible
  * restrictions.
  */
-#define NOTIFY_PAYLOAD_MAX_LENGTH	(BLCKSZ - NAMEDATALEN - 128)
+#define NOTIFY_PAYLOAD_MAX_LENGTH	(BLCKSZ - NAMEDATALEN - SizeOfPageHeaderData - 128)
 
 /*
  * Struct representing an entry in the global notify queue
@@ -213,7 +214,7 @@ typedef struct QueuePosition
 	((x).page == (y).page && (x).offset == (y).offset)
 
 #define QUEUE_POS_IS_ZERO(x) \
-	((x).page == 0 && (x).offset == 0)
+	((x).page == 0 && (x).offset == MAXALIGN(SizeOfPageHeaderData))
 
 /* choose logically smaller QueuePosition */
 #define QUEUE_POS_MIN(x,y) \
@@ -305,12 +306,6 @@ static AsyncQueueControl *asyncQueueControl;
 #define QUEUE_NEXT_LISTENER(i)		(asyncQueueControl->backend[i].nextListener)
 #define QUEUE_BACKEND_POS(i)		(asyncQueueControl->backend[i].pos)
 
-/*
- * The SLRU buffer area through which we access the notification queue
- */
-static SlruCtlData NotifyCtlData;
-
-#define NotifyCtl					(&NotifyCtlData)
 #define QUEUE_PAGESIZE				BLCKSZ
 #define QUEUE_FULL_WARN_INTERVAL	5000	/* warn at most once every 5s */
 
@@ -521,8 +516,6 @@ AsyncShmemSize(void)
 	size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
 	size = add_size(size, offsetof(AsyncQueueControl, backend));
 
-	size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0));
-
 	return size;
 }
 
@@ -550,8 +543,8 @@ AsyncShmemInit(void)
 	if (!found)
 	{
 		/* First time through, so initialize it */
-		SET_QUEUE_POS(QUEUE_HEAD, 0, 0);
-		SET_QUEUE_POS(QUEUE_TAIL, 0, 0);
+		SET_QUEUE_POS(QUEUE_HEAD, 0, MAXALIGN(SizeOfPageHeaderData));
+		SET_QUEUE_POS(QUEUE_TAIL, 0, MAXALIGN(SizeOfPageHeaderData));
 		QUEUE_STOP_PAGE = 0;
 		QUEUE_FIRST_LISTENER = InvalidBackendId;
 		asyncQueueControl->lastQueueFillWarn = 0;
@@ -561,24 +554,17 @@ AsyncShmemInit(void)
 			QUEUE_BACKEND_PID(i) = InvalidPid;
 			QUEUE_BACKEND_DBOID(i) = InvalidOid;
 			QUEUE_NEXT_LISTENER(i) = InvalidBackendId;
-			SET_QUEUE_POS(QUEUE_BACKEND_POS(i), 0, 0);
+			SET_QUEUE_POS(QUEUE_BACKEND_POS(i), 0, MAXALIGN(SizeOfPageHeaderData));
 		}
 	}
 
-	/*
-	 * Set up SLRU management of the pg_notify data.
-	 */
-	NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
-	SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
-				  NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
-				  SYNC_HANDLER_NONE);
-
 	if (!found)
 	{
 		/*
 		 * During start or reboot, clean out the pg_notify directory.
 		 */
-		(void) SlruScanDirectory(NotifyCtl, SlruScanDirCbDeleteAll, NULL);
+		(void) SlruScanDirectory(SLRU_NOTIFY_ID, asyncQueuePagePrecedes,
+								 SlruScanDirCbDeleteAll, NULL);
 	}
 }
 
@@ -1345,19 +1331,19 @@ asyncQueueAdvance(volatile QueuePosition *position, int entryLength)
 	 * written or read.
 	 */
 	offset += entryLength;
-	Assert(offset <= QUEUE_PAGESIZE);
+	Assert(offset <= QUEUE_PAGESIZE - MAXALIGN(SizeOfPageHeaderData));
 
 	/*
 	 * In a second step check if another entry can possibly be written to the
 	 * page. If so, stay here, we have reached the next position. If not, then
 	 * we need to move on to the next page.
 	 */
-	if (offset + QUEUEALIGN(AsyncQueueEntryEmptySize) > QUEUE_PAGESIZE)
+	if (offset + QUEUEALIGN(AsyncQueueEntryEmptySize) > QUEUE_PAGESIZE - MAXALIGN(SizeOfPageHeaderData))
 	{
 		pageno++;
 		if (pageno > QUEUE_MAX_PAGE)
 			pageno = 0;			/* wrap around */
-		offset = 0;
+		offset = MAXALIGN(SizeOfPageHeaderData); /* start at SizeOfPageHeaderData */
 		pageJump = true;
 	}
 
@@ -1411,10 +1397,7 @@ asyncQueueAddEntries(ListCell *nextNotify)
 	QueuePosition queue_head;
 	int			pageno;
 	int			offset;
-	int			slotno;
-
-	/* We hold both NotifyQueueLock and NotifySLRULock during this operation */
-	LWLockAcquire(NotifySLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
 
 	/*
 	 * We work with a local copy of QUEUE_HEAD, which we write back to shared
@@ -1439,13 +1422,20 @@ asyncQueueAddEntries(ListCell *nextNotify)
 	 */
 	pageno = QUEUE_POS_PAGE(queue_head);
 	if (QUEUE_POS_IS_ZERO(queue_head))
-		slotno = SimpleLruZeroPage(NotifyCtl, pageno);
+	{
+		buffer = ZeroSlruBuffer(SLRU_NOTIFY_ID, pageno);
+		PageSetHeaderDataNonRel(BufferGetPage(buffer), pageno, InvalidXLogRecPtr, BLCKSZ, PG_METAPAGE_LAYOUT_VERSION);
+
+	}
 	else
-		slotno = SimpleLruReadPage(NotifyCtl, pageno, true,
-								   InvalidTransactionId);
+	{
+		buffer = ReadSlruBuffer(SLRU_NOTIFY_ID, pageno, RBM_NORMAL);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	}
 
+	
 	/* Note we mark the page dirty before writing in it */
-	NotifyCtl->shared->page_dirty[slotno] = true;
+	MarkBufferDirty(buffer);
 
 	while (nextNotify != NULL)
 	{
@@ -1457,7 +1447,7 @@ asyncQueueAddEntries(ListCell *nextNotify)
 		offset = QUEUE_POS_OFFSET(queue_head);
 
 		/* Check whether the entry really fits on the current page */
-		if (offset + qe.length <= QUEUE_PAGESIZE)
+		if (offset + qe.length <= QUEUE_PAGESIZE - MAXALIGN(SizeOfPageHeaderData))
 		{
 			/* OK, so advance nextNotify past this item */
 			nextNotify = lnext(pendingNotifies->events, nextNotify);
@@ -1469,17 +1459,18 @@ asyncQueueAddEntries(ListCell *nextNotify)
 			 * only check dboid and since it won't match any reader's database
 			 * OID, they will ignore this entry and move on.
 			 */
-			qe.length = QUEUE_PAGESIZE - offset;
+			qe.length = QUEUE_PAGESIZE - MAXALIGN(SizeOfPageHeaderData) - offset;
 			qe.dboid = InvalidOid;
 			qe.data[0] = '\0';	/* empty channel */
 			qe.data[1] = '\0';	/* empty payload */
 		}
 
 		/* Now copy qe into the shared buffer page */
-		memcpy(NotifyCtl->shared->page_buffer[slotno] + offset,
+		memcpy(PageGetContents(BufferGetPage(buffer)) + offset,
 			   &qe,
 			   qe.length);
 
+
 		/* Advance queue_head appropriately, and detect if page is full */
 		if (asyncQueueAdvance(&(queue_head), qe.length))
 		{
@@ -1491,7 +1482,10 @@ asyncQueueAddEntries(ListCell *nextNotify)
 			 * asyncQueueIsFull() ensured that there is room to create this
 			 * page without overrunning the queue.
 			 */
-			slotno = SimpleLruZeroPage(NotifyCtl, QUEUE_POS_PAGE(queue_head));
+			UnlockReleaseBuffer(buffer);
+			buffer = ZeroSlruBuffer(SLRU_NOTIFY_ID,
+									QUEUE_POS_PAGE(queue_head));
+			MarkBufferDirty(buffer);
 
 			/*
 			 * If the new page address is a multiple of QUEUE_CLEANUP_DELAY,
@@ -1505,12 +1499,11 @@ asyncQueueAddEntries(ListCell *nextNotify)
 			break;
 		}
 	}
+	UnlockReleaseBuffer(buffer);
 
 	/* Success, so update the global QUEUE_HEAD */
 	QUEUE_HEAD = queue_head;
 
-	LWLockRelease(NotifySLRULock);
-
 	return nextNotify;
 }
 
@@ -1983,17 +1976,16 @@ asyncQueueReadAllNotifications(void)
 		{
 			int			curpage = QUEUE_POS_PAGE(pos);
 			int			curoffset = QUEUE_POS_OFFSET(pos);
-			int			slotno;
 			int			copysize;
+			Buffer		buffer;
 
 			/*
-			 * We copy the data from SLRU into a local buffer, so as to avoid
-			 * holding the NotifySLRULock while we are examining the entries
-			 * and possibly transmitting them to our frontend.  Copy only the
-			 * part of the page we will actually inspect.
+			 * We copy the data into a local buffer, so as to avoid holding a
+			 * buffer pin while we are examining the entries and possibly
+			 * transmitting them to our frontend.  Copy only the part of the
+			 * page we will actually inspect.
 			 */
-			slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage,
-												InvalidTransactionId);
+			buffer = ReadSlruBuffer(SLRU_NOTIFY_ID, curpage, RBM_NORMAL);
 			if (curpage == QUEUE_POS_PAGE(head))
 			{
 				/* we only want to read as far as head */
@@ -2004,13 +1996,12 @@ asyncQueueReadAllNotifications(void)
 			else
 			{
 				/* fetch all the rest of the page */
-				copysize = QUEUE_PAGESIZE - curoffset;
+				copysize = QUEUE_PAGESIZE - MAXALIGN(SizeOfPageHeaderData) - curoffset;
 			}
-			memcpy(page_buffer.buf + curoffset,
-				   NotifyCtl->shared->page_buffer[slotno] + curoffset,
+			memcpy(PageGetContents(page_buffer.buf) +  curoffset,
+				   PageGetContents(BufferGetPage(buffer)) + curoffset,
 				   copysize);
-			/* Release lock that we got from SimpleLruReadPage_ReadOnly() */
-			LWLockRelease(NotifySLRULock);
+			ReleaseBuffer(buffer);
 
 			/*
 			 * Process messages up to the stop position, end of page, or an
@@ -2078,7 +2069,7 @@ asyncQueueProcessPageEntries(volatile QueuePosition *current,
 		if (QUEUE_POS_EQUAL(thisentry, stop))
 			break;
 
-		qe = (AsyncQueueEntry *) (page_buffer + QUEUE_POS_OFFSET(thisentry));
+		qe = (AsyncQueueEntry *) (PageGetContents(page_buffer) + QUEUE_POS_OFFSET(thisentry));
 
 		/*
 		 * Advance *current over this message, possibly to the next page. As
@@ -2207,7 +2198,7 @@ asyncQueueAdvanceTail(void)
 		 * SimpleLruTruncate() will ask for NotifySLRULock but will also
 		 * release the lock again.
 		 */
-		SimpleLruTruncate(NotifyCtl, newtailpage);
+		SimpleLruTruncate(SLRU_NOTIFY_ID, asyncQueuePagePrecedes, newtailpage);
 
 		/*
 		 * Update QUEUE_STOP_PAGE.  This changes asyncQueueIsFull()'s verdict
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 6eb87427181..70241e6f4c7 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -259,7 +259,7 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
 	List	   *rlocatorlist = NIL;
 	LockRelId	relid;
 	Snapshot	snapshot;
-	SMgrRelation	smgr;
+	SMgrFileHandle sfile;
 	BufferAccessStrategy bstrategy;
 
 	/* Get pg_class relfilenumber. */
@@ -276,9 +276,9 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
 	rlocator.dbOid = dbid;
 	rlocator.relNumber = relfilenumber;
 
-	smgr = smgropen(rlocator, InvalidBackendId);
-	nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
-	smgrclose(smgr);
+	sfile = smgropen(rlocator, InvalidBackendId, MAIN_FORKNUM);
+	nblocks = smgrnblocks(sfile);
+	smgrclose(sfile);
 
 	/* Use a buffer access strategy since this is a bulk read operation. */
 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 99c9f91cba5..ee11ef63c0e 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -353,14 +353,14 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
 
 	if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
 	{
-		SMgrRelation srel;
+		SMgrFileHandle sfile;
 
-		srel = smgropen(rel->rd_locator, InvalidBackendId);
-		smgrcreate(srel, INIT_FORKNUM, false);
+		sfile = smgropen(rel->rd_locator, InvalidBackendId, INIT_FORKNUM);
+		smgrcreate(sfile, false);
 		log_smgrcreate(&rel->rd_locator, INIT_FORKNUM);
 		fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM);
 		FlushRelationBuffers(rel);
-		smgrclose(srel);
+		smgrclose(sfile);
 	}
 }
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index ee88e87d76d..06862dfa346 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -14675,9 +14675,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
 static void
 index_copy_data(Relation rel, RelFileLocator newrlocator)
 {
-	SMgrRelation dstrel;
-
-	dstrel = smgropen(newrlocator, rel->rd_backend);
+	SMgrFileHandle dstmain;
 
 	/*
 	 * Since we copy the file directly without looking at the shared buffers,
@@ -14697,16 +14695,20 @@ index_copy_data(Relation rel, RelFileLocator newrlocator)
 	RelationCreateStorage(newrlocator, rel->rd_rel->relpersistence, true);
 
 	/* copy main fork */
-	RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
+	dstmain = smgropen(newrlocator, rel->rd_backend, MAIN_FORKNUM);
+	RelationCopyStorage(RelationGetSmgr(rel, MAIN_FORKNUM), dstmain,
 						rel->rd_rel->relpersistence);
 
 	/* copy those extra forks that exist */
 	for (ForkNumber forkNum = MAIN_FORKNUM + 1;
 		 forkNum <= MAX_FORKNUM; forkNum++)
 	{
-		if (smgrexists(RelationGetSmgr(rel), forkNum))
+		if (smgrexists(RelationGetSmgr(rel, forkNum)))
 		{
-			smgrcreate(dstrel, forkNum, false);
+			SMgrFileHandle src_fork = RelationGetSmgr(rel, forkNum);
+			SMgrFileHandle dst_fork = smgropen(newrlocator, rel->rd_backend, forkNum);
+
+			smgrcreate(dst_fork, false);
 
 			/*
 			 * WAL log creation if the relation is persistent, or this is the
@@ -14716,14 +14718,15 @@ index_copy_data(Relation rel, RelFileLocator newrlocator)
 				(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
 				 forkNum == INIT_FORKNUM))
 				log_smgrcreate(&newrlocator, forkNum);
-			RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
+			RelationCopyStorage(src_fork, dst_fork,
 								rel->rd_rel->relpersistence);
+			smgrclose(dst_fork);
 		}
 	}
 
 	/* drop old relation, and close new one */
 	RelationDropStorage(rel);
-	smgrclose(dstrel);
+	smgrclose(dstmain);
 }
 
 /*
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 6b6264854e6..41819c590e0 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -20,6 +20,7 @@
 
 BufferDescPadded *BufferDescriptors;
 char	   *BufferBlocks;
+XLogRecPtr *BufferExternalLSNs;
 ConditionVariableMinimallyPadded *BufferIOCVArray;
 WritebackContext BackendWritebackContext;
 CkptSortItem *CkptBufferIds;
@@ -69,9 +70,11 @@ InitBufferPool(void)
 {
 	bool		foundBufs,
 				foundDescs,
+				foundLSNs,
 				foundIOCV,
 				foundBufCkpt;
 
+
 	/* Align descriptors to a cacheline boundary. */
 	BufferDescriptors = (BufferDescPadded *)
 		ShmemInitStruct("Buffer Descriptors",
@@ -88,6 +91,11 @@ InitBufferPool(void)
 						NBuffers * sizeof(ConditionVariableMinimallyPadded),
 						&foundIOCV);
 
+	BufferExternalLSNs = (XLogRecPtr *)
+		ShmemInitStruct("Buffer External LSNs",
+						NBuffers * sizeof(XLogRecPtr),
+						&foundLSNs);
+
 	/*
 	 * The array used to sort to-be-checkpointed buffer ids is located in
 	 * shared memory, to avoid having to allocate significant amounts of
@@ -99,10 +107,10 @@ InitBufferPool(void)
 		ShmemInitStruct("Checkpoint BufferIds",
 						NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
 
-	if (foundDescs || foundBufs || foundIOCV || foundBufCkpt)
+	if (foundDescs || foundBufs || foundIOCV || foundBufCkpt || foundLSNs)
 	{
 		/* should find all of these, or none of them */
-		Assert(foundDescs && foundBufs && foundIOCV && foundBufCkpt);
+		Assert(foundDescs && foundBufs && foundIOCV && foundBufCkpt && foundLSNs);
 		/* note: this path is only taken in EXEC_BACKEND case */
 	}
 	else
@@ -133,6 +141,8 @@ InitBufferPool(void)
 							 LWTRANCHE_BUFFER_CONTENT);
 
 			ConditionVariableInit(BufferDescriptorGetIOCV(buf));
+
+			BufferExternalLSNs[i] = InvalidXLogRecPtr;
 		}
 
 		/* Correct last entry of linked list */
@@ -166,6 +176,9 @@ BufferShmemSize(void)
 	/* size of data pages */
 	size = add_size(size, mul_size(NBuffers, BLCKSZ));
 
+	/* size of external LSNs */
+	size = add_size(size, mul_size(NBuffers, sizeof(XLogRecPtr)));
+
 	/* size of stuff controlled by freelist.c */
 	size = add_size(size, StrategyShmemSize());
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 73d30bf6191..8154e4b013f 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -57,10 +57,17 @@
 #include "utils/resowner_private.h"
 #include "utils/timestamp.h"
 
+/*
+ * XXX Ideally we'd switch to standard pages for SLRU data, but in the
+ * meantime we need some way to identify buffers that hold raw data (no
+ * invasive LSN, no checksums).
+ */
 
 /* Note: these two macros only work on shared buffers, not local ones! */
 #define BufHdrGetBlock(bufHdr)	((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
-#define BufferGetLSN(bufHdr)	(PageGetLSN(BufHdrGetBlock(bufHdr)))
+
+#define BufferGetLSN(bufHdr) \
+		PageGetLSN(BufHdrGetBlock(bufHdr))
 
 /* Note: this macro only works on local buffers, not shared ones! */
 #define LocalBufHdrGetBlock(bufHdr) \
@@ -117,19 +124,6 @@ typedef struct CkptTsStatus
 	int			index;
 } CkptTsStatus;
 
-/*
- * Type for array used to sort SMgrRelations
- *
- * FlushRelationsAllBuffers shares the same comparator function with
- * DropRelationsAllBuffers. Pointer to this struct and RelFileLocator must be
- * compatible.
- */
-typedef struct SMgrSortArray
-{
-	RelFileLocator rlocator;	/* This must be the first member */
-	SMgrRelation srel;
-} SMgrSortArray;
-
 /* GUC variables */
 bool		zero_damaged_pages = false;
 int			bgwriter_lru_maxpages = 100;
@@ -459,8 +453,8 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
 )
 
 
-static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence,
-								ForkNumber forkNum, BlockNumber blockNum,
+static Buffer ReadBuffer_common(SMgrFileHandle sfile, char relpersistence,
+								BlockNumber blockNum,
 								ReadBufferMode mode, BufferAccessStrategy strategy,
 								bool *hit);
 static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy);
@@ -476,20 +470,19 @@ static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty,
 							  uint32 set_flag_bits);
 static void shared_buffer_write_error_callback(void *arg);
 static void local_buffer_write_error_callback(void *arg);
-static BufferDesc *BufferAlloc(SMgrRelation smgr,
+static BufferDesc *BufferAlloc(SMgrFileHandle smgr,
 							   char relpersistence,
-							   ForkNumber forkNum,
 							   BlockNumber blockNum,
 							   BufferAccessStrategy strategy,
 							   bool *foundPtr);
-static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+static void FlushBuffer(BufferDesc *buf, SMgrFileHandle sfile);
 static void FindAndDropRelationBuffers(RelFileLocator rlocator,
-									   ForkNumber forkNum,
+									   ForkNumber forknum,
 									   BlockNumber nForkBlock,
 									   BlockNumber firstDelBlock);
 static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator,
 										   RelFileLocator dstlocator,
-										   ForkNumber forkNum, bool permanent);
+										   ForkNumber forknum, bool permanent);
 static void AtProcExit_Buffers(int code, Datum arg);
 static void CheckForBufferLeaks(void);
 static int	rlocator_comparator(const void *p1, const void *p2);
@@ -502,9 +495,7 @@ static int	ts_ckpt_progress_comparator(Datum a, Datum b, void *arg);
  * Implementation of PrefetchBuffer() for shared buffers.
  */
 PrefetchBufferResult
-PrefetchSharedBuffer(SMgrRelation smgr_reln,
-					 ForkNumber forkNum,
-					 BlockNumber blockNum)
+PrefetchSharedBuffer(SMgrFileHandle sfile, BlockNumber blockNum)
 {
 	PrefetchBufferResult result = {InvalidBuffer, false};
 	BufferTag	newTag;			/* identity of requested block */
@@ -515,8 +506,8 @@ PrefetchSharedBuffer(SMgrRelation smgr_reln,
 	Assert(BlockNumberIsValid(blockNum));
 
 	/* create a tag so we can lookup the buffer */
-	InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
-				   forkNum, blockNum);
+	InitBufferTag(&newTag, &sfile->smgr_locator.locator,
+				   sfile->smgr_locator.forknum, blockNum);
 
 	/* determine its hash code and partition lock ID */
 	newHash = BufTableHashCode(&newTag);
@@ -535,7 +526,7 @@ PrefetchSharedBuffer(SMgrRelation smgr_reln,
 		 * Try to initiate an asynchronous read.  This returns false in
 		 * recovery if the relation file doesn't exist.
 		 */
-		if (smgrprefetch(smgr_reln, forkNum, blockNum))
+		if (smgrprefetch(sfile, blockNum))
 			result.initiated_io = true;
 #endif							/* USE_PREFETCH */
 	}
@@ -589,7 +580,7 @@ PrefetchSharedBuffer(SMgrRelation smgr_reln,
  * recovery, an error is raised).
  */
 PrefetchBufferResult
-PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
+PrefetchBuffer(Relation reln, ForkNumber forknum, BlockNumber blockNum)
 {
 	Assert(RelationIsValid(reln));
 	Assert(BlockNumberIsValid(blockNum));
@@ -603,12 +594,12 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 					 errmsg("cannot access temporary tables of other sessions")));
 
 		/* pass it off to localbuf.c */
-		return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
+		return PrefetchLocalBuffer(RelationGetSmgr(reln, forknum), blockNum);
 	}
 	else
 	{
 		/* pass it to the shared buffer version */
-		return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
+		return PrefetchSharedBuffer(RelationGetSmgr(reln, forknum), blockNum);
 	}
 }
 
@@ -620,7 +611,7 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
  * tag.  In that case, the buffer is pinned and the usage count is bumped.
  */
 bool
-ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum,
+ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blockNum,
 				 Buffer recent_buffer)
 {
 	BufferDesc *bufHdr;
@@ -632,7 +623,7 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN
 
 	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 	ReservePrivateRefCountEntry();
-	InitBufferTag(&tag, &rlocator, forkNum, blockNum);
+	InitBufferTag(&tag, &rlocator, forknum, blockNum);
 
 	if (BufferIsLocal(recent_buffer))
 	{
@@ -756,7 +747,7 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
  * See buffer/README for details.
  */
 Buffer
-ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
+ReadBufferExtended(Relation reln, ForkNumber forknum, BlockNumber blockNum,
 				   ReadBufferMode mode, BufferAccessStrategy strategy)
 {
 	bool		hit;
@@ -777,8 +768,9 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
 	 * miss.
 	 */
 	pgstat_count_buffer_read(reln);
-	buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence,
-							forkNum, blockNum, mode, strategy, &hit);
+	buf = ReadBuffer_common(RelationGetSmgr(reln, forknum),
+							reln->rd_rel->relpersistence,
+							blockNum, mode, strategy, &hit);
 	if (hit)
 		pgstat_count_buffer_hit(reln);
 	return buf;
@@ -796,19 +788,31 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
  * BackendId).
  */
 Buffer
-ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum,
+ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forknum,
 						  BlockNumber blockNum, ReadBufferMode mode,
 						  BufferAccessStrategy strategy, bool permanent)
 {
 	bool		hit;
 
-	SMgrRelation smgr = smgropen(rlocator, InvalidBackendId);
+	SMgrFileHandle sfile = smgropen(rlocator, InvalidBackendId, forknum);
 
-	return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT :
-							 RELPERSISTENCE_UNLOGGED, forkNum, blockNum,
+	return ReadBuffer_common(sfile, permanent ? RELPERSISTENCE_PERMANENT :
+							 RELPERSISTENCE_UNLOGGED, blockNum,
 							 mode, strategy, &hit);
 }
 
+Buffer
+ReadBufferWithoutRelcacheWithHit(RelFileLocator rlocator, ForkNumber forknum,
+								 BlockNumber blockNum, ReadBufferMode mode,
+								 BufferAccessStrategy strategy, bool permanent, bool *hit)
+{
+	SMgrFileHandle sfile = smgropen(rlocator, InvalidBackendId, forknum);
+
+	return ReadBuffer_common(sfile, permanent ? RELPERSISTENCE_PERMANENT :
+							 RELPERSISTENCE_UNLOGGED, blockNum,
+							 mode, strategy, hit);
+}
+
 
 /*
  * ReadBuffer_common -- common logic for all ReadBuffer variants
@@ -816,7 +820,7 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum,
  * *hit is set to true if the request was satisfied from shared buffer cache.
  */
 static Buffer
-ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
+ReadBuffer_common(SMgrFileHandle sfile, char relpersistence,
 				  BlockNumber blockNum, ReadBufferMode mode,
 				  BufferAccessStrategy strategy, bool *hit)
 {
@@ -824,7 +828,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	Block		bufBlock;
 	bool		found;
 	bool		isExtend;
-	bool		isLocalBuf = SmgrIsTemp(smgr);
+	bool		isLocalBuf = SmgrIsTemp(sfile);
 
 	*hit = false;
 
@@ -833,29 +837,30 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	isExtend = (blockNum == P_NEW);
 
-	TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
-									   smgr->smgr_rlocator.locator.spcOid,
-									   smgr->smgr_rlocator.locator.dbOid,
-									   smgr->smgr_rlocator.locator.relNumber,
-									   smgr->smgr_rlocator.backend,
+	TRACE_POSTGRESQL_BUFFER_READ_START(smgr->smgr_locator.forknum,
+									   blockNum,
+									   smgr->smgr_locator.locator.spcOid,
+									   smgr->smgr_locator.locator.dbOid,
+									   smgr->smgr_locator.locator.relNumber,
+									   smgr->smgr_locator.backend,
 									   isExtend);
 
 	/* Substitute proper block number if caller asked for P_NEW */
 	if (isExtend)
 	{
-		blockNum = smgrnblocks(smgr, forkNum);
+		blockNum = smgrnblocks(sfile);
 		/* Fail if relation is already at maximum possible length */
 		if (blockNum == P_NEW)
 			ereport(ERROR,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("cannot extend relation %s beyond %u blocks",
-							relpath(smgr->smgr_rlocator, forkNum),
+							smgrfilepath(sfile->smgr_locator),
 							P_NEW)));
 	}
 
 	if (isLocalBuf)
 	{
-		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
+		bufHdr = LocalBufferAlloc(sfile, blockNum, &found);
 		if (found)
 			pgBufferUsage.local_blks_hit++;
 		else if (isExtend)
@@ -870,7 +875,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested block is
 		 * not currently in memory.
 		 */
-		bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
+		bufHdr = BufferAlloc(sfile, relpersistence, blockNum,
 							 strategy, &found);
 		if (found)
 			pgBufferUsage.shared_blks_hit++;
@@ -895,11 +900,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			if (VacuumCostActive)
 				VacuumCostBalance += VacuumCostPageHit;
 
-			TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
-											  smgr->smgr_rlocator.locator.spcOid,
-											  smgr->smgr_rlocator.locator.dbOid,
-											  smgr->smgr_rlocator.locator.relNumber,
-											  smgr->smgr_rlocator.backend,
+			TRACE_POSTGRESQL_BUFFER_READ_DONE(sfile->smgr_forknum,
+											  blockNum,
+											  sfile->smgr_rlocator.locator.spcOid,
+											  sfile->smgr_rlocator.locator.dbOid,
+											  sfile->smgr_rlocator.locator.relNumber,
+											  sfile->smgr_rlocator.backend,
 											  isExtend,
 											  found);
 
@@ -936,7 +942,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		if (!PageIsNew((Page) bufBlock))
 			ereport(ERROR,
 					(errmsg("unexpected data beyond EOF in block %u of relation %s",
-							blockNum, relpath(smgr->smgr_rlocator, forkNum)),
+							blockNum, smgrfilepath(sfile->smgr_locator)),
 					 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
 
 		/*
@@ -993,7 +999,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		/* new buffers are zero-filled */
 		MemSet((char *) bufBlock, 0, BLCKSZ);
 		/* don't set checksum for all-zero page */
-		smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
+		smgrextend(sfile, blockNum, (char *) bufBlock, false);
 
 		/*
 		 * NB: we're *not* doing a ScheduleBufferTagForWriteback here;
@@ -1018,7 +1024,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			if (track_io_timing)
 				INSTR_TIME_SET_CURRENT(io_start);
 
-			smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
+			smgrread(sfile, blockNum, (char *) bufBlock);
 
 			if (track_io_timing)
 			{
@@ -1029,8 +1035,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			}
 
 			/* check for garbage data */
-			if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
-										PIV_LOG_WARNING | PIV_REPORT_STAT))
+			if ((!(mode == RBM_TRIM)) && (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
+										PIV_LOG_WARNING | PIV_REPORT_STAT)))
 			{
 				if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
 				{
@@ -1038,7 +1044,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 							(errcode(ERRCODE_DATA_CORRUPTED),
 							 errmsg("invalid page in block %u of relation %s; zeroing out page",
 									blockNum,
-									relpath(smgr->smgr_rlocator, forkNum))));
+									smgrfilepath(sfile->smgr_locator))));
 					MemSet((char *) bufBlock, 0, BLCKSZ);
 				}
 				else
@@ -1046,7 +1052,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 							(errcode(ERRCODE_DATA_CORRUPTED),
 							 errmsg("invalid page in block %u of relation %s",
 									blockNum,
-									relpath(smgr->smgr_rlocator, forkNum))));
+									smgrfilepath(sfile->smgr_locator))));
 			}
 		}
 	}
@@ -1085,11 +1091,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	if (VacuumCostActive)
 		VacuumCostBalance += VacuumCostPageMiss;
 
-	TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
-									  smgr->smgr_rlocator.locator.spcOid,
-									  smgr->smgr_rlocator.locator.dbOid,
-									  smgr->smgr_rlocator.locator.relNumber,
-									  smgr->smgr_rlocator.backend,
+	TRACE_POSTGRESQL_BUFFER_READ_DONE(sfile->smgr_locator.forknum,
+									  blockNum,
+									  sfile->smgr_locator.locator.spcOid,
+									  sfile->smgr_locator.locator.dbOid,
+									  sfile->smgr_locator.locator.relNumber,
+									  sfile->smgr_locator.backend,
 									  isExtend,
 									  found);
 
@@ -1116,7 +1123,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
  * No locks are held either at entry or exit.
  */
 static BufferDesc *
-BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
+BufferAlloc(SMgrFileHandle sfile, char relpersistence,
 			BlockNumber blockNum,
 			BufferAccessStrategy strategy,
 			bool *foundPtr)
@@ -1134,7 +1141,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	uint32		buf_state;
 
 	/* create a tag so we can lookup the buffer */
-	InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
+	InitBufferTag(&newTag, &sfile->smgr_locator.locator, sfile->smgr_locator.forknum, blockNum);
 
 	/* determine its hash code and partition lock ID */
 	newHash = BufTableHashCode(&newTag);
@@ -1264,10 +1271,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 				}
 
 				/* OK, do the I/O */
-				TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
-														  smgr->smgr_rlocator.locator.spcOid,
-														  smgr->smgr_rlocator.locator.dbOid,
-														  smgr->smgr_rlocator.locator.relNumber);
+				TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(sfile->smgr_locator.forknum,
+														  blockNum,
+														  sfile->smgr_locator.locator.spcOid,
+														  sfile->smgr_locator.locator.dbOid,
+														  sfile->smgr_locator.locator.relNumber);
 
 				FlushBuffer(buf, NULL);
 				LWLockRelease(BufferDescriptorGetContentLock(buf));
@@ -1275,10 +1283,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 				ScheduleBufferTagForWriteback(&BackendWritebackContext,
 											  &buf->tag);
 
-				TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
-														 smgr->smgr_rlocator.locator.spcOid,
-														 smgr->smgr_rlocator.locator.dbOid,
-														 smgr->smgr_rlocator.locator.relNumber);
+				TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(sfile->smgr_locator.forknum,
+														 blockNum,
+														 sfile->smgr_locator.locator.spcOid,
+														 sfile->smgr_locator.locator.dbOid,
+														 sfile->smgr_locator.locator.relNumber);
 			}
 			else
 			{
@@ -1434,7 +1443,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
 				   BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT |
 				   BUF_USAGECOUNT_MASK);
-	if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
+	if (relpersistence == RELPERSISTENCE_PERMANENT || sfile->smgr_locator.forknum == INIT_FORKNUM)
 		buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
 	else
 		buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
@@ -1647,7 +1656,7 @@ ReleaseAndReadBuffer(Buffer buffer,
 					 Relation relation,
 					 BlockNumber blockNum)
 {
-	ForkNumber	forkNum = MAIN_FORKNUM;
+	ForkNumber	forknum = MAIN_FORKNUM;
 	BufferDesc *bufHdr;
 
 	if (BufferIsValid(buffer))
@@ -1658,7 +1667,7 @@ ReleaseAndReadBuffer(Buffer buffer,
 			bufHdr = GetLocalBufferDescriptor(-buffer - 1);
 			if (bufHdr->tag.blockNum == blockNum &&
 				BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
-				BufTagGetForkNum(&bufHdr->tag) == forkNum)
+				BufTagGetForkNum(&bufHdr->tag) == forknum)
 				return buffer;
 			ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
 			LocalRefCount[-buffer - 1]--;
@@ -1669,7 +1678,7 @@ ReleaseAndReadBuffer(Buffer buffer,
 			/* we have pin, so it's ok to examine tag without spinlock */
 			if (bufHdr->tag.blockNum == blockNum &&
 				BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
-				BufTagGetForkNum(&bufHdr->tag) == forkNum)
+				BufTagGetForkNum(&bufHdr->tag) == forknum)
 				return buffer;
 			UnpinBuffer(bufHdr);
 		}
@@ -2820,7 +2829,7 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
  * as the second parameter.  If not, pass NULL.
  */
 static void
-FlushBuffer(BufferDesc *buf, SMgrRelation reln)
+FlushBuffer(BufferDesc *buf, SMgrFileHandle sfile)
 {
 	XLogRecPtr	recptr;
 	ErrorContextCallback errcallback;
@@ -2845,14 +2854,14 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	error_context_stack = &errcallback;
 
 	/* Find smgr relation for buffer */
-	if (reln == NULL)
-		reln = smgropen(BufTagGetRelFileLocator(&buf->tag), InvalidBackendId);
+	if (sfile == NULL)
+		sfile = smgropen(BufTagGetRelFileLocator(&buf->tag), InvalidBackendId, BufTagGetForkNum(&buf->tag));
 
-	TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag),
+	TRACE_POSTGRESQL_BUFFER_FLUSH_START(sfile->smgr_locator.forknum,
 										buf->tag.blockNum,
-										reln->smgr_rlocator.locator.spcOid,
-										reln->smgr_rlocator.locator.dbOid,
-										reln->smgr_rlocator.locator.relNumber);
+										sfile->smgr_locator.locator.spcOid,
+										sfile->smgr_locator.locator.dbOid,
+										sfile->smgr_locator.locator.relNumber);
 
 	buf_state = LockBufHdr(buf);
 
@@ -2906,8 +2915,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	/*
 	 * bufToWrite is either the shared buffer or a copy, as appropriate.
 	 */
-	smgrwrite(reln,
-			  BufTagGetForkNum(&buf->tag),
+	smgrwrite(sfile,
 			  buf->tag.blockNum,
 			  bufToWrite,
 			  false);
@@ -2928,11 +2936,11 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	 */
 	TerminateBufferIO(buf, true, 0);
 
-	TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag),
+	TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(sfile->smgr_locator.forknum,
 									   buf->tag.blockNum,
-									   reln->smgr_rlocator.locator.spcOid,
-									   reln->smgr_rlocator.locator.dbOid,
-									   reln->smgr_rlocator.locator.relNumber);
+									   sfile->smgr_locator.locator.spcOid,
+									   sfile->smgr_locator.locator.dbOid,
+									   sfile->smgr_locator.locator.relNumber);
 
 	/* Pop the error context stack */
 	error_context_stack = errcallback.previous;
@@ -2947,7 +2955,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
  * it might not be.
  */
 BlockNumber
-RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
+RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forknum)
 {
 	if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
 	{
@@ -2959,13 +2967,13 @@ RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
 		 */
 		uint64		szbytes;
 
-		szbytes = table_relation_size(relation, forkNum);
+		szbytes = table_relation_size(relation, forknum);
 
 		return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
 	}
 	else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
 	{
-		return smgrnblocks(RelationGetSmgr(relation), forkNum);
+		return smgrnblocks(RelationGetSmgr(relation, forknum));
 	}
 	else
 		Assert(false);
@@ -3028,6 +3036,7 @@ BufferGetLSNAtomic(Buffer buffer)
 
 	buf_state = LockBufHdr(bufHdr);
 	lsn = PageGetLSN(page);
+	
 	UnlockBufHdr(bufHdr, buf_state);
 
 	return lsn;
@@ -3055,26 +3064,20 @@ BufferGetLSNAtomic(Buffer buffer)
  * --------------------------------------------------------------------
  */
 void
-DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
+DropRelationBuffers(RelFileLocator rlocator, BackendId backend, ForkNumber *forknum,
 					int nforks, BlockNumber *firstDelBlock)
 {
 	int			i;
 	int			j;
-	RelFileLocatorBackend rlocator;
 	BlockNumber nForkBlock[MAX_FORKNUM];
 	uint64		nBlocksToInvalidate = 0;
 
-	rlocator = smgr_reln->smgr_rlocator;
-
 	/* If it's a local relation, it's localbuf.c's problem. */
-	if (RelFileLocatorBackendIsTemp(rlocator))
+	if (backend == MyBackendId)
 	{
-		if (rlocator.backend == MyBackendId)
-		{
-			for (j = 0; j < nforks; j++)
-				DropRelationLocalBuffers(rlocator.locator, forkNum[j],
-										 firstDelBlock[j]);
-		}
+		for (j = 0; j < nforks; j++)
+			DropRelationLocalBuffers(rlocator, forknum[j],
+									 firstDelBlock[j]);
 		return;
 	}
 
@@ -3103,7 +3106,10 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 	for (i = 0; i < nforks; i++)
 	{
 		/* Get the number of blocks for a relation's fork */
-		nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
+		SMgrFileHandle sfile;
+
+		sfile = smgropen(rlocator, backend, forknum[i]);
+		nForkBlock[i] = smgrnblocks_cached(sfile);
 
 		if (nForkBlock[i] == InvalidBlockNumber)
 		{
@@ -3123,7 +3129,7 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 		nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
 	{
 		for (j = 0; j < nforks; j++)
-			FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
+			FindAndDropRelationBuffers(rlocator, forknum[j],
 									   nForkBlock[j], firstDelBlock[j]);
 		return;
 	}
@@ -3146,18 +3152,18 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 		 * false positives are safe because we'll recheck after getting the
 		 * buffer lock.
 		 *
-		 * We could check forkNum and blockNum as well as the rlocator, but
+		 * We could check forknum and blockNum as well as the rlocator, but
 		 * the incremental win from doing so seems small.
 		 */
-		if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
+		if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator))
 			continue;
 
 		buf_state = LockBufHdr(bufHdr);
 
 		for (j = 0; j < nforks; j++)
 		{
-			if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
-				BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
+			if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
+				BufTagGetForkNum(&bufHdr->tag) == forknum[j] &&
 				bufHdr->tag.blockNum >= firstDelBlock[j])
 			{
 				InvalidateBuffer(bufHdr);	/* releases spinlock */
@@ -3178,11 +3184,10 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
  *		--------------------------------------------------------------------
  */
 void
-DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
+DropRelationsAllBuffers(RelFileLocatorBackend *rlocators, int nlocators)
 {
 	int			i;
 	int			n = 0;
-	SMgrRelation *rels;
 	BlockNumber (*block)[MAX_FORKNUM + 1];
 	uint64		nBlocksToInvalidate = 0;
 	RelFileLocator *locators;
@@ -3192,18 +3197,18 @@ DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
 	if (nlocators == 0)
 		return;
 
-	rels = palloc(sizeof(SMgrRelation) * nlocators);	/* non-local relations */
+	locators = palloc(sizeof(RelFileLocator) * nlocators);	/* non-local relations */
 
 	/* If it's a local relation, it's localbuf.c's problem. */
 	for (i = 0; i < nlocators; i++)
 	{
-		if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
+		if (rlocators[i].backend != InvalidBackendId)
 		{
-			if (smgr_reln[i]->smgr_rlocator.backend == MyBackendId)
-				DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
+			if (rlocators[i].backend == MyBackendId)
+				DropRelationAllLocalBuffers(rlocators[i].locator);
 		}
 		else
-			rels[n++] = smgr_reln[i];
+			locators[n++] = rlocators[i].locator;
 	}
 
 	/*
@@ -3212,7 +3217,7 @@ DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
 	 */
 	if (n == 0)
 	{
-		pfree(rels);
+		pfree(locators);
 		return;
 	}
 
@@ -3232,12 +3237,13 @@ DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
 		for (int j = 0; j <= MAX_FORKNUM; j++)
 		{
 			/* Get the number of blocks for a relation's fork. */
-			block[i][j] = smgrnblocks_cached(rels[i], j);
+			SMgrFileHandle sfile = smgropen(locators[i], InvalidBackendId, j);
+			block[i][j] = smgrnblocks_cached(sfile);
 
 			/* We need to only consider the relation forks that exists. */
 			if (block[i][j] == InvalidBlockNumber)
 			{
-				if (!smgrexists(rels[i], j))
+				if (!smgrexists(sfile))
 					continue;
 				cached = false;
 				break;
@@ -3263,20 +3269,17 @@ DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
 					continue;
 
 				/* drop all the buffers for a particular relation fork */
-				FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
+				FindAndDropRelationBuffers(locators[i],
 										   j, block[i][j], 0);
 			}
 		}
 
 		pfree(block);
-		pfree(rels);
+		pfree(locators);
 		return;
 	}
 
 	pfree(block);
-	locators = palloc(sizeof(RelFileLocator) * n);	/* non-local relations */
-	for (i = 0; i < n; i++)
-		locators[i] = rels[i]->smgr_rlocator.locator;
 
 	/*
 	 * For low number of relations to drop just use a simple walk through, to
@@ -3336,7 +3339,6 @@ DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
 	}
 
 	pfree(locators);
-	pfree(rels);
 }
 
 /* ---------------------------------------------------------------------
@@ -3349,7 +3351,7 @@ DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
  * --------------------------------------------------------------------
  */
 static void
-FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum,
+FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forknum,
 						   BlockNumber nForkBlock,
 						   BlockNumber firstDelBlock)
 {
@@ -3365,7 +3367,7 @@ FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum,
 		uint32		buf_state;
 
 		/* create a tag so we can lookup the buffer */
-		InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
+		InitBufferTag(&bufTag, &rlocator, forknum, curBlock);
 
 		/* determine its hash code and partition lock ID */
 		bufHash = BufTableHashCode(&bufTag);
@@ -3390,7 +3392,7 @@ FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum,
 		buf_state = LockBufHdr(bufHdr);
 
 		if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
-			BufTagGetForkNum(&bufHdr->tag) == forkNum &&
+			BufTagGetForkNum(&bufHdr->tag) == forknum &&
 			bufHdr->tag.blockNum >= firstDelBlock)
 			InvalidateBuffer(bufHdr);	/* releases spinlock */
 		else
@@ -3545,8 +3547,7 @@ FlushRelationBuffers(Relation rel)
 
 				PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
-				smgrwrite(RelationGetSmgr(rel),
-						  BufTagGetForkNum(&bufHdr->tag),
+				smgrwrite(RelationGetSmgr(rel, BufTagGetForkNum(&bufHdr->tag)),
 						  bufHdr->tag.blockNum,
 						  localpage,
 						  false);
@@ -3586,7 +3587,7 @@ FlushRelationBuffers(Relation rel)
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, RelationGetSmgr(rel));
+			FlushBuffer(bufHdr, RelationGetSmgr(rel, bufHdr->tag.forkNum));
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr);
 		}
@@ -3605,25 +3606,20 @@ FlushRelationBuffers(Relation rel)
  * --------------------------------------------------------------------
  */
 void
-FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
+FlushRelationsAllBuffers(RelFileLocator *rels, int nrels)
 {
 	int			i;
-	SMgrSortArray *srels;
+	RelFileLocator *locators = NULL;
 	bool		use_bsearch;
 
 	if (nrels == 0)
 		return;
 
 	/* fill-in array for qsort */
-	srels = palloc(sizeof(SMgrSortArray) * nrels);
+	locators = palloc(sizeof(RelFileLocator) * nrels);
 
 	for (i = 0; i < nrels; i++)
-	{
-		Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
-
-		srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
-		srels[i].srel = smgrs[i];
-	}
+		locators[i] = rels[i];
 
 	/*
 	 * Save the bsearch overhead for low number of relations to sync. See
@@ -3631,16 +3627,16 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
 	 */
 	use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
 
-	/* sort the list of SMgrRelations if necessary */
+	/* sort the list of locators if necessary */
 	if (use_bsearch)
-		pg_qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
+		pg_qsort(locators, nrels, sizeof(RelFileLocator), rlocator_comparator);
 
 	/* Make sure we can handle the pin inside the loop */
 	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 
 	for (i = 0; i < NBuffers; i++)
 	{
-		SMgrSortArray *srelent = NULL;
+		RelFileLocator *found = NULL;
 		BufferDesc *bufHdr = GetBufferDescriptor(i);
 		uint32		buf_state;
 
@@ -3655,9 +3651,9 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
 
 			for (j = 0; j < nrels; j++)
 			{
-				if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
+				if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
 				{
-					srelent = &srels[j];
+					found = &locators[j];
 					break;
 				}
 			}
@@ -3667,24 +3663,26 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
 			RelFileLocator rlocator;
 
 			rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
-			srelent = bsearch((const void *) &(rlocator),
-							  srels, nrels, sizeof(SMgrSortArray),
+			found = bsearch((const void *) &(rlocator),
+							  locators, nrels, sizeof(RelFileLocator),
 							  rlocator_comparator);
 		}
 
 		/* buffer doesn't belong to any of the given relfilelocators; skip it */
-		if (srelent == NULL)
+		if (found == NULL)
 			continue;
 
+		/* FIXME: cache SMgrFileHandles for the rels, and pass to FlushBuffer */
+
 		ReservePrivateRefCountEntry();
 
 		buf_state = LockBufHdr(bufHdr);
-		if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
+		if (BufTagMatchesRelFileLocator(&bufHdr->tag, found) &&
 			(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, srelent->srel);
+			FlushBuffer(bufHdr, NULL);
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr);
 		}
@@ -3692,7 +3690,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
 			UnlockBufHdr(bufHdr, buf_state);
 	}
 
-	pfree(srels);
+	pfree(locators);
 }
 
 /* ---------------------------------------------------------------------
@@ -3708,7 +3706,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
 static void
 RelationCopyStorageUsingBuffer(RelFileLocator srclocator,
 							   RelFileLocator dstlocator,
-							   ForkNumber forkNum, bool permanent)
+							   ForkNumber forknum, bool permanent)
 {
 	Buffer		srcBuf;
 	Buffer		dstBuf;
@@ -3726,11 +3724,11 @@ RelationCopyStorageUsingBuffer(RelFileLocator srclocator,
 	 * can skip it when copying any fork of an unlogged relation other than
 	 * the init fork.
 	 */
-	use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
+	use_wal = XLogIsNeeded() && (permanent || forknum == INIT_FORKNUM);
 
 	/* Get number of blocks in the source relation. */
-	nblocks = smgrnblocks(smgropen(srclocator, InvalidBackendId),
-						  forkNum);
+	nblocks = smgrnblocks(smgropen(srclocator, InvalidBackendId, forknum));
+						  
 
 	/* Nothing to copy; just return. */
 	if (nblocks == 0)
@@ -3741,7 +3739,7 @@ RelationCopyStorageUsingBuffer(RelFileLocator srclocator,
 	 * relation before starting to copy block by block.
 	 */
 	memset(buf.data, 0, BLCKSZ);
-	smgrextend(smgropen(dstlocator, InvalidBackendId), forkNum, nblocks - 1,
+	smgrextend(smgropen(dstlocator, InvalidBackendId, forknum), nblocks - 1,
 			   buf.data, true);
 
 	/* This is a bulk operation, so use buffer access strategies. */
@@ -3754,14 +3752,14 @@ RelationCopyStorageUsingBuffer(RelFileLocator srclocator,
 		CHECK_FOR_INTERRUPTS();
 
 		/* Read block from source relation. */
-		srcBuf = ReadBufferWithoutRelcache(srclocator, forkNum, blkno,
+		srcBuf = ReadBufferWithoutRelcache(srclocator, forknum, blkno,
 										   RBM_NORMAL, bstrategy_src,
 										   permanent);
 		LockBuffer(srcBuf, BUFFER_LOCK_SHARE);
 		srcPage = BufferGetPage(srcBuf);
 
 		/* Use P_NEW to extend the destination relation. */
-		dstBuf = ReadBufferWithoutRelcache(dstlocator, forkNum, blkno,
+		dstBuf = ReadBufferWithoutRelcache(dstlocator, forknum, blkno,
 										   RBM_NORMAL, bstrategy_dst,
 										   permanent);
 		LockBuffer(dstBuf, BUFFER_LOCK_EXCLUSIVE);
@@ -3799,7 +3797,6 @@ void
 CreateAndCopyRelationData(RelFileLocator src_rlocator,
 						  RelFileLocator dst_rlocator, bool permanent)
 {
-	RelFileLocatorBackend rlocator;
 	char		relpersistence;
 
 	/* Set the relpersistence. */
@@ -3819,34 +3816,30 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator,
 								   permanent);
 
 	/* copy those extra forks that exist */
-	for (ForkNumber forkNum = MAIN_FORKNUM + 1;
-		 forkNum <= MAX_FORKNUM; forkNum++)
+	for (ForkNumber forknum = MAIN_FORKNUM + 1;
+		 forknum <= MAX_FORKNUM; forknum++)
 	{
-		if (smgrexists(smgropen(src_rlocator, InvalidBackendId), forkNum))
+		if (smgrexists(smgropen(src_rlocator, InvalidBackendId, forknum)))
 		{
-			smgrcreate(smgropen(dst_rlocator, InvalidBackendId), forkNum, false);
+			smgrcreate(smgropen(dst_rlocator, InvalidBackendId, forknum), false);
 
 			/*
 			 * WAL log creation if the relation is persistent, or this is the
 			 * init fork of an unlogged relation.
 			 */
-			if (permanent || forkNum == INIT_FORKNUM)
-				log_smgrcreate(&dst_rlocator, forkNum);
+			if (permanent || forknum == INIT_FORKNUM)
+				log_smgrcreate(&dst_rlocator, forknum);
 
 			/* Copy a fork's data, block by block. */
-			RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
+			RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forknum,
 										   permanent);
 		}
 	}
 
-	/* close source and destination smgr if exists. */
-	rlocator.backend = InvalidBackendId;
 
-	rlocator.locator = src_rlocator;
-	smgrcloserellocator(rlocator);
+	smgrcloserellocator(src_rlocator, InvalidBackendId);
 
-	rlocator.locator = dst_rlocator;
-	smgrcloserellocator(rlocator);
+	smgrcloserellocator(dst_rlocator, InvalidBackendId);
 }
 
 /* ---------------------------------------------------------------------
@@ -4967,7 +4960,7 @@ IssuePendingWritebacks(WritebackContext *context)
 	{
 		PendingWriteback *cur;
 		PendingWriteback *next;
-		SMgrRelation reln;
+		SMgrFileHandle sfile;
 		int			ahead;
 		BufferTag	tag;
 		RelFileLocator currlocator;
@@ -5007,8 +5000,8 @@ IssuePendingWritebacks(WritebackContext *context)
 		i += ahead;
 
 		/* and finally tell the kernel to write the data to storage */
-		reln = smgropen(currlocator, InvalidBackendId);
-		smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
+		sfile = smgropen(currlocator, InvalidBackendId, BufTagGetForkNum(&tag));
+		smgrwriteback(sfile, tag.blockNum, nblocks);
 	}
 
 	context->nr_pending = 0;
@@ -5030,3 +5023,29 @@ TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
 				(errcode(ERRCODE_SNAPSHOT_TOO_OLD),
 				 errmsg("snapshot too old")));
 }
+
+/*
+ * Check if a buffer tag is currently mapped.
+ *
+ * XXX Dubious semantics; needed only for multixact's handling for
+ * inconsistent states.
+ */
+bool
+BufferProbe(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blockNum)
+{
+	BufferTag	tag;
+	uint32		hash;
+	LWLock	   *partitionLock;
+	int			buf_id;
+
+	InitBufferTag(&tag, &rlocator, forknum, blockNum);
+
+	hash = BufTableHashCode(&tag);
+	partitionLock = BufMappingPartitionLock(hash);
+
+	LWLockAcquire(partitionLock, LW_SHARED);
+	buf_id = BufTableLookup(&tag, hash);
+	LWLockRelease(partitionLock);
+
+	return buf_id >= 0;
+}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 30d67d1c40d..a5629532d72 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -61,14 +61,13 @@ static Block GetLocalBufferStorage(void);
  * No-op if prefetching isn't compiled in.
  */
 PrefetchBufferResult
-PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
-					BlockNumber blockNum)
+PrefetchLocalBuffer(SMgrFileHandle sfile, BlockNumber blockNum)
 {
 	PrefetchBufferResult result = {InvalidBuffer, false};
 	BufferTag	newTag;			/* identity of requested block */
 	LocalBufferLookupEnt *hresult;
 
-	InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
+	InitBufferTag(&newTag, &sfile->smgr_locator.locator, sfile->smgr_locator.forknum, blockNum);
 
 	/* Initialize local buffers if first request in this session */
 	if (LocalBufHash == NULL)
@@ -87,7 +86,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
 	{
 #ifdef USE_PREFETCH
 		/* Not in buffers, so initiate prefetch */
-		smgrprefetch(smgr, forkNum, blockNum);
+		smgrprefetch(sfile, blockNum);
 		result.initiated_io = true;
 #endif							/* USE_PREFETCH */
 	}
@@ -106,8 +105,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
  * (hence, usage_count is always advanced).
  */
 BufferDesc *
-LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
-				 bool *foundPtr)
+LocalBufferAlloc(SMgrFileHandle sfile, BlockNumber blockNum, bool *foundPtr)
 {
 	BufferTag	newTag;			/* identity of requested block */
 	LocalBufferLookupEnt *hresult;
@@ -117,7 +115,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	bool		found;
 	uint32		buf_state;
 
-	InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
+	InitBufferTag(&newTag, &sfile->smgr_locator.locator, sfile->smgr_locator.forknum, blockNum);
 
 	/* Initialize local buffers if first request in this session */
 	if (LocalBufHash == NULL)
@@ -134,7 +132,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 		Assert(BufferTagsEqual(&bufHdr->tag, &newTag));
 #ifdef LBDEBUG
 		fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
-				smgr->smgr_rlocator.locator.relNumber, forkNum, blockNum, -b - 1);
+				sfile->smgr_locator.locator.relNumber, sfile->smgr_locator.forknum, blockNum, -b - 1);
 #endif
 		buf_state = pg_atomic_read_u32(&bufHdr->state);
 
@@ -162,7 +160,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 
 #ifdef LBDEBUG
 	fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
-			smgr->smgr_rlocator.locator.relNumber, forkNum, blockNum,
+			sfile->smgr_locator.locator.relNumber, sfile->smgr_locator.forknum, blockNum,
 			-nextFreeLocalBuf - 1);
 #endif
 
@@ -211,17 +209,16 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	 */
 	if (buf_state & BM_DIRTY)
 	{
-		SMgrRelation oreln;
+		SMgrFileHandle ofile;
 		Page		localpage = (char *) LocalBufHdrGetBlock(bufHdr);
 
-		/* Find smgr relation for buffer */
-		oreln = smgropen(BufTagGetRelFileLocator(&bufHdr->tag), MyBackendId);
+		/* Find smgr file handle for buffer */
+		ofile = smgropen(BufTagGetRelFileLocator(&bufHdr->tag), MyBackendId, BufTagGetForkNum(&bufHdr->tag));
 
 		PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
 		/* And write... */
-		smgrwrite(oreln,
-				  BufTagGetForkNum(&bufHdr->tag),
+		smgrwrite(ofile,
 				  bufHdr->tag.blockNum,
 				  localpage,
 				  false);
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index a6b05331032..8636f6bec43 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -271,7 +271,7 @@ FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
 	 * If no FSM has been created yet for this relation, there's nothing to
 	 * truncate.
 	 */
-	if (!smgrexists(RelationGetSmgr(rel), FSM_FORKNUM))
+	if (!smgrexists(RelationGetSmgr(rel, FSM_FORKNUM)))
 		return InvalidBlockNumber;
 
 	/* Get the location in the FSM of the first removed heap block */
@@ -317,7 +317,7 @@ FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
 	else
 	{
 		new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
-		if (smgrnblocks(RelationGetSmgr(rel), FSM_FORKNUM) <= new_nfsmblocks)
+		if (smgrnblocks(RelationGetSmgr(rel, FSM_FORKNUM)) <= new_nfsmblocks)
 			return InvalidBlockNumber;	/* nothing to do; the FSM was already
 										 * smaller */
 	}
@@ -532,14 +532,14 @@ fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
 {
 	BlockNumber blkno = fsm_logical_to_physical(addr);
 	Buffer		buf;
-	SMgrRelation reln;
+	SMgrFileHandle fsm_file;
 
 	/*
 	 * Caution: re-using this smgr pointer could fail if the relcache entry
 	 * gets closed.  It's safe as long as we only do smgr-level operations
 	 * between here and the last use of the pointer.
 	 */
-	reln = RelationGetSmgr(rel);
+	fsm_file = RelationGetSmgr(rel, FSM_FORKNUM);
 
 	/*
 	 * If we haven't cached the size of the FSM yet, check it first.  Also
@@ -547,19 +547,19 @@ fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
 	 * value might be stale.  (We send smgr inval messages on truncation, but
 	 * not on extension.)
 	 */
-	if (reln->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber ||
-		blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
+	if (fsm_file->smgr_cached_nblocks == InvalidBlockNumber ||
+		blkno >= fsm_file->smgr_cached_nblocks)
 	{
 		/* Invalidate the cache so smgrnblocks asks the kernel. */
-		reln->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber;
-		if (smgrexists(reln, FSM_FORKNUM))
-			smgrnblocks(reln, FSM_FORKNUM);
+		fsm_file->smgr_cached_nblocks = InvalidBlockNumber;
+		if (smgrexists(fsm_file))
+			smgrnblocks(fsm_file);
 		else
-			reln->smgr_cached_nblocks[FSM_FORKNUM] = 0;
+			fsm_file->smgr_cached_nblocks = 0;
 	}
 
 	/* Handle requests beyond EOF */
-	if (blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
+	if (blkno >= fsm_file->smgr_cached_nblocks)
 	{
 		if (extend)
 			fsm_extend(rel, blkno + 1);
@@ -609,7 +609,7 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
 {
 	BlockNumber fsm_nblocks_now;
 	PGAlignedBlock pg;
-	SMgrRelation reln;
+	SMgrFileHandle fsm_file;
 
 	PageInit((Page) pg.data, BLCKSZ, 0);
 
@@ -630,29 +630,28 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
 	 * gets closed.  It's safe as long as we only do smgr-level operations
 	 * between here and the last use of the pointer.
 	 */
-	reln = RelationGetSmgr(rel);
+	fsm_file = RelationGetSmgr(rel, FSM_FORKNUM);
 
 	/*
 	 * Create the FSM file first if it doesn't exist.  If
 	 * smgr_cached_nblocks[FSM_FORKNUM] is positive then it must exist, no
 	 * need for an smgrexists call.
 	 */
-	if ((reln->smgr_cached_nblocks[FSM_FORKNUM] == 0 ||
-		 reln->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber) &&
-		!smgrexists(reln, FSM_FORKNUM))
-		smgrcreate(reln, FSM_FORKNUM, false);
+	if ((fsm_file->smgr_cached_nblocks == 0 ||
+		 fsm_file->smgr_cached_nblocks == InvalidBlockNumber) &&
+		!smgrexists(fsm_file))
+		smgrcreate(fsm_file, false);
 
 	/* Invalidate cache so that smgrnblocks() asks the kernel. */
-	reln->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber;
-	fsm_nblocks_now = smgrnblocks(reln, FSM_FORKNUM);
+	fsm_file->smgr_cached_nblocks = InvalidBlockNumber;
+	fsm_nblocks_now = smgrnblocks(fsm_file);
 
 	/* Extend as needed. */
 	while (fsm_nblocks_now < fsm_nblocks)
 	{
 		PageSetChecksumInplace((Page) pg.data, fsm_nblocks_now);
 
-		smgrextend(reln, FSM_FORKNUM, fsm_nblocks_now,
-				   pg.data, false);
+		smgrextend(fsm_file, fsm_nblocks_now, pg.data, false);
 		fsm_nblocks_now++;
 	}
 
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index b204ecdbc32..240f0e1a3ff 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -117,9 +117,7 @@ CalculateShmemSize(int *num_semaphores)
 	size = add_size(size, XLogPrefetchShmemSize());
 	size = add_size(size, XLOGShmemSize());
 	size = add_size(size, XLogRecoveryShmemSize());
-	size = add_size(size, CLOGShmemSize());
 	size = add_size(size, CommitTsShmemSize());
-	size = add_size(size, SUBTRANSShmemSize());
 	size = add_size(size, TwoPhaseShmemSize());
 	size = add_size(size, BackgroundWorkerShmemSize());
 	size = add_size(size, MultiXactShmemSize());
@@ -241,9 +239,7 @@ CreateSharedMemoryAndSemaphores(void)
 	XLOGShmemInit();
 	XLogPrefetchShmemInit();
 	XLogRecoveryShmemInit();
-	CLOGShmemInit();
 	CommitTsShmemInit();
-	SUBTRANSShmemInit();
 	MultiXactShmemInit();
 	InitBufferPool();
 
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index df1c0d72e97..9c1674ebb0d 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -313,17 +313,9 @@
 	((targethash) ^ ((uint32) PointerGetDatum((predicatelocktag)->myXact)) \
 	 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
 
-
-/*
- * The SLRU buffer area through which we access the old xids.
- */
-static SlruCtlData SerialSlruCtlData;
-
-#define SerialSlruCtl			(&SerialSlruCtlData)
-
 #define SERIAL_PAGESIZE			BLCKSZ
 #define SERIAL_ENTRYSIZE			sizeof(SerCommitSeqNo)
-#define SERIAL_ENTRIESPERPAGE	(SERIAL_PAGESIZE / SERIAL_ENTRYSIZE)
+#define SERIAL_ENTRIESPERPAGE	((SERIAL_PAGESIZE - SizeOfPageHeaderData) / SERIAL_ENTRYSIZE)
 
 /*
  * Set maximum pages based on the number needed to track all transactions.
@@ -332,12 +324,13 @@ static SlruCtlData SerialSlruCtlData;
 
 #define SerialNextPage(page) (((page) >= SERIAL_MAX_PAGE) ? 0 : (page) + 1)
 
-#define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \
-	(SerialSlruCtl->shared->page_buffer[slotno] + \
+#define SerialValue(buffer, xid) (*((SerCommitSeqNo *) \
+	(PageGetContents(BufferGetPage(buffer)) + \
 	((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
 
 #define SerialPage(xid)	(((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
 
+
 typedef struct SerialControlData
 {
 	int			headPage;		/* newest initialized page */
@@ -849,10 +842,14 @@ SerialPagePrecedesLogicallyUnitTests(void)
 	 * requires burning ~2B XIDs in single-user mode, a negligible
 	 * possibility.  Moreover, if it does happen, the consequence would be
 	 * mild, namely a new transaction failing in SimpleLruReadPage().
+	 * 
+	 * NOTE:  After adding page headers, the defect affects two pages.
+	 * We now assert correct treatment of its second to prior page.
+	 * 
 	 */
 	headPage = oldestPage;
 	targetPage = newestPage;
-	Assert(SerialPagePrecedesLogically(headPage, targetPage - 1));
+	Assert(SerialPagePrecedesLogically(headPage, targetPage - 2));
 #if 0
 	Assert(SerialPagePrecedesLogically(headPage, targetPage));
 #endif
@@ -867,17 +864,10 @@ SerialInit(void)
 {
 	bool		found;
 
-	/*
-	 * Set up SLRU management of the pg_serial data.
-	 */
-	SerialSlruCtl->PagePrecedes = SerialPagePrecedesLogically;
-	SimpleLruInit(SerialSlruCtl, "Serial",
-				  NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial",
-				  LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE);
 #ifdef USE_ASSERT_CHECKING
 	SerialPagePrecedesLogicallyUnitTests();
 #endif
-	SlruPagePrecedesUnitTests(SerialSlruCtl, SERIAL_ENTRIESPERPAGE);
+	SlruPagePrecedesUnitTests(SerialPagePrecedesLogically, SERIAL_ENTRIESPERPAGE);
 
 	/*
 	 * Create or attach to the SerialControl structure.
@@ -907,9 +897,9 @@ SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 {
 	TransactionId tailXid;
 	int			targetPage;
-	int			slotno;
 	int			firstZeroPage;
 	bool		isNewPage;
+	Buffer		buffer;
 
 	Assert(TransactionIdIsValid(xid));
 
@@ -954,16 +944,23 @@ SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 		/* Initialize intervening pages. */
 		while (firstZeroPage != targetPage)
 		{
-			(void) SimpleLruZeroPage(SerialSlruCtl, firstZeroPage);
+			buffer = ZeroSlruBuffer(SLRU_SERIAL_ID, firstZeroPage);
+			PageSetHeaderDataNonRel(BufferGetPage(buffer), firstZeroPage, InvalidXLogRecPtr, BLCKSZ, PG_METAPAGE_LAYOUT_VERSION);
+			MarkBufferDirty(buffer);
+			UnlockReleaseBuffer(buffer);
 			firstZeroPage = SerialNextPage(firstZeroPage);
 		}
-		slotno = SimpleLruZeroPage(SerialSlruCtl, targetPage);
+		buffer = ZeroSlruBuffer(SLRU_SERIAL_ID, targetPage);
 	}
 	else
-		slotno = SimpleLruReadPage(SerialSlruCtl, targetPage, true, xid);
+	{
+		buffer = ReadSlruBuffer(SLRU_SERIAL_ID, targetPage, RBM_NORMAL);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	}
 
-	SerialValue(slotno, xid) = minConflictCommitSeqNo;
-	SerialSlruCtl->shared->page_dirty[slotno] = true;
+	SerialValue(buffer, xid) = minConflictCommitSeqNo;
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
 
 	LWLockRelease(SerialSLRULock);
 }
@@ -979,7 +976,7 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 	TransactionId headXid;
 	TransactionId tailXid;
 	SerCommitSeqNo val;
-	int			slotno;
+	Buffer		buffer;
 
 	Assert(TransactionIdIsValid(xid));
 
@@ -1001,9 +998,9 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 	 * The following function must be called without holding SerialSLRULock,
 	 * but will return with that lock held, which must then be released.
 	 */
-	slotno = SimpleLruReadPage_ReadOnly(SerialSlruCtl,
-										SerialPage(xid), xid);
-	val = SerialValue(slotno, xid);
+	buffer = ReadSlruBuffer(SLRU_SERIAL_ID, SerialPage(xid), RBM_NORMAL);
+	val = SerialValue(buffer, xid);
+	ReleaseBuffer(buffer);
 	LWLockRelease(SerialSLRULock);
 	return val;
 }
@@ -1122,19 +1119,7 @@ CheckPointPredicate(void)
 	LWLockRelease(SerialSLRULock);
 
 	/* Truncate away pages that are no longer required */
-	SimpleLruTruncate(SerialSlruCtl, tailPage);
-
-	/*
-	 * Write dirty SLRU pages to disk
-	 *
-	 * This is not actually necessary from a correctness point of view. We do
-	 * it merely as a debugging aid.
-	 *
-	 * We're doing this after the truncation to avoid writing pages right
-	 * before deleting the file in which they sit, which would be completely
-	 * pointless.
-	 */
-	SimpleLruWriteAll(SerialSlruCtl, true);
+	SimpleLruTruncate(SLRU_SERIAL_ID, SerialPagePrecedesLogically, tailPage);
 }
 
 /*------------------------------------------------------------------------*/
@@ -1396,7 +1381,6 @@ PredicateLockShmemSize(void)
 
 	/* Shared memory structures for SLRU tracking of old committed xids. */
 	size = add_size(size, sizeof(SerialControlData));
-	size = add_size(size, SimpleLruShmemSize(NUM_SERIAL_BUFFERS, 0));
 
 	return size;
 }
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 8b617c7e79d..857cce9a28c 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -59,6 +59,27 @@ PageInit(Page page, Size pageSize, Size specialSize)
 	/* p->pd_prune_xid = InvalidTransactionId;		done by above MemSet */
 }
 
+void
+PageInitSLRU(Page page, Size pageSize, Size specialSize)
+{
+	PageHeader	p = (PageHeader) page;
+
+	specialSize = MAXALIGN(specialSize);
+
+	Assert(pageSize == BLCKSZ);
+	Assert(pageSize > specialSize + SizeOfPageHeaderData);
+
+	/* Make sure all fields of page are zero, as well as unused space */
+	MemSet(p, 0, pageSize);
+
+	p->pd_flags = 0;
+	p->pd_lower = SizeOfPageHeaderData;
+	p->pd_upper = pageSize - specialSize;
+	p->pd_special = pageSize - specialSize;
+	PageSetPageSizeAndVersion(page, pageSize, PG_METAPAGE_LAYOUT_VERSION);
+	/* p->pd_prune_xid = InvalidTransactionId;		done by above MemSet */
+}
+
 
 /*
  * PageIsVerifiedExtended
@@ -103,7 +124,7 @@ PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
 		if (DataChecksumsEnabled())
 		{
 			checksum = pg_checksum_page((char *) page, blkno);
-
+			
 			if (checksum != p->pd_checksum)
 				checksum_failure = true;
 		}
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 14b6fa0fd90..b9a41cb9427 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -66,10 +66,10 @@
  *	out to an unlinked old copy of a segment file that will eventually
  *	disappear.
  *
- *	File descriptors are stored in the per-fork md_seg_fds arrays inside
- *	SMgrRelation. The length of these arrays is stored in md_num_open_segs.
- *	Note that a fork's md_num_open_segs having a specific value does not
- *	necessarily mean the relation doesn't have additional segments; we may
+ *	File descriptors are stored in the md_seg_fds array inside
+ *	SMgrFileData. The length of the array is stored in md_num_open_segs.
+ *	Note that md_num_open_segs having a specific value does not
+ *	necessarily mean the file doesn't have additional segments; we may
  *	just not have opened the next segment yet.  (We could not have "all
  *	segments are in the array" as an invariant anyway, since another backend
  *	could extend the relation while we aren't looking.)  We do not have
@@ -121,26 +121,18 @@ static MemoryContext MdCxt;		/* context for all MdfdVec objects */
 
 
 /* local routines */
-static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum,
-						 bool isRedo);
-static MdfdVec *mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior);
-static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
-								   MdfdVec *seg);
-static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum,
-									BlockNumber segno);
-static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum,
-									BlockNumber segno);
-static void _fdvec_resize(SMgrRelation reln,
-						  ForkNumber forknum,
-						  int nseg);
-static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum,
-						   BlockNumber segno);
-static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forknum,
+static void mdunlinkfork(SMgrFileLocator slocator, bool isRedo);
+static MdfdVec *mdopenfork(SMgrFileHandle sfile, int behavior);
+static void register_dirty_segment(SMgrFileHandle sfile, MdfdVec *seg);
+static void register_unlink_segment(SMgrFileLocator slocator, BlockNumber segno);
+static void register_forget_request(SMgrFileLocator slocator, BlockNumber segno);
+static void _fdvec_resize(SMgrFileHandle sfile, int nseg);
+static char *_mdfd_segpath(SMgrFileHandle sfile, BlockNumber segno);
+static MdfdVec *_mdfd_openseg(SMgrFileHandle sfile,
 							  BlockNumber segno, int oflags);
-static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum,
+static MdfdVec *_mdfd_getseg(SMgrFileHandle sfile,
 							 BlockNumber blkno, bool skipFsync, int behavior);
-static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
-							  MdfdVec *seg);
+static BlockNumber _mdnblocks(SMgrFileHandle sfile, MdfdVec *seg);
 
 
 /*
@@ -160,7 +152,7 @@ mdinit(void)
  * Note: this will return true for lingering files, with pending deletions
  */
 bool
-mdexists(SMgrRelation reln, ForkNumber forknum)
+mdexists(SMgrFileHandle sfile)
 {
 	/*
 	 * Close it first, to ensure that we notice if the fork has been unlinked
@@ -168,9 +160,9 @@ mdexists(SMgrRelation reln, ForkNumber forknum)
 	 * which already closes relations when dropping them.
 	 */
 	if (!InRecovery)
-		mdclose(reln, forknum);
+		mdclose(sfile);
 
-	return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
+	return (mdopenfork(sfile, EXTENSION_RETURN_NULL) != NULL);
 }
 
 /*
@@ -179,16 +171,16 @@ mdexists(SMgrRelation reln, ForkNumber forknum)
  * If isRedo is true, it's okay for the relation to exist already.
  */
 void
-mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
+mdcreate(SMgrFileHandle sfile, bool isRedo)
 {
 	MdfdVec    *mdfd;
 	char	   *path;
 	File		fd;
 
-	if (isRedo && reln->md_num_open_segs[forknum] > 0)
+	if (isRedo && sfile->md_num_open_segs > 0)
 		return;					/* created and opened already... */
 
-	Assert(reln->md_num_open_segs[forknum] == 0);
+	Assert(sfile->md_num_open_segs == 0);
 
 	/*
 	 * We may be using the target table space for the first time in this
@@ -199,11 +191,14 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 	 * should be here and not in commands/tablespace.c?  But that would imply
 	 * importing a lot of stuff that smgr.c oughtn't know, either.
 	 */
-	TablespaceCreateDbspace(reln->smgr_rlocator.locator.spcOid,
-							reln->smgr_rlocator.locator.dbOid,
-							isRedo);
+	if (sfile->smgr_locator.locator.spcOid != SLRU_SPC_OID)
+	{
+		TablespaceCreateDbspace(sfile->smgr_locator.locator.spcOid,
+								sfile->smgr_locator.locator.dbOid,
+								isRedo);
+	}
 
-	path = relpath(reln->smgr_rlocator, forknum);
+	path = smgrfilepath(sfile->smgr_locator);
 
 	fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
 
@@ -225,8 +220,8 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
 	pfree(path);
 
-	_fdvec_resize(reln, forknum, 1);
-	mdfd = &reln->md_seg_fds[forknum][0];
+	_fdvec_resize(sfile, 1);
+	mdfd = &sfile->md_seg_fds[0];
 	mdfd->mdfd_vfd = fd;
 	mdfd->mdfd_segno = 0;
 }
@@ -293,16 +288,9 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
  * we are usually not in a transaction anymore when this is called.
  */
 void
-mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
+mdunlink(SMgrFileLocator slocator, bool isRedo)
 {
-	/* Now do the per-fork work */
-	if (forknum == InvalidForkNumber)
-	{
-		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-			mdunlinkfork(rlocator, forknum, isRedo);
-	}
-	else
-		mdunlinkfork(rlocator, forknum, isRedo);
+	mdunlinkfork(slocator, isRedo);
 }
 
 /*
@@ -330,29 +318,29 @@ do_truncate(const char *path)
 }
 
 static void
-mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
+mdunlinkfork(SMgrFileLocator slocator, bool isRedo)
 {
 	char	   *path;
 	int			ret;
 	int			save_errno;
 
-	path = relpath(rlocator, forknum);
+	path = smgrfilepath(slocator);
 
 	/*
 	 * Truncate and then unlink the first segment, or just register a request
 	 * to unlink it later, as described in the comments for mdunlink().
 	 */
-	if (isRedo || IsBinaryUpgrade || forknum != MAIN_FORKNUM ||
-		RelFileLocatorBackendIsTemp(rlocator))
+	if (isRedo || IsBinaryUpgrade || slocator.forknum != MAIN_FORKNUM ||
+		SMgrFileLocatorIsTemp(slocator))
 	{
-		if (!RelFileLocatorBackendIsTemp(rlocator))
+		if (!SMgrFileLocatorIsTemp(slocator))
 		{
 			/* Prevent other backends' fds from holding on to the disk space */
 			ret = do_truncate(path);
 
 			/* Forget any pending sync requests for the first segment */
 			save_errno = errno;
-			register_forget_request(rlocator, forknum, 0 /* first seg */ );
+			register_forget_request(slocator, 0 /* first seg */ );
 			errno = save_errno;
 		}
 		else
@@ -379,7 +367,7 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
 		/* Register request to unlink first segment later */
 		save_errno = errno;
-		register_unlink_segment(rlocator, forknum, 0 /* first seg */ );
+		register_unlink_segment(slocator, 0 /* first seg */ );
 		errno = save_errno;
 	}
 
@@ -404,7 +392,7 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 		{
 			sprintf(segpath, "%s.%u", path, segno);
 
-			if (!RelFileLocatorBackendIsTemp(rlocator))
+			if (!SMgrFileLocatorIsTemp(slocator))
 			{
 				/*
 				 * Prevent other backends' fds from holding on to the disk
@@ -417,7 +405,7 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 				 * Forget any pending sync requests for this segment before we
 				 * try to unlink.
 				 */
-				register_forget_request(rlocator, forknum, segno);
+				register_forget_request(slocator, segno);
 			}
 
 			if (unlink(segpath) < 0)
@@ -446,7 +434,7 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
  *		causes intervening file space to become filled with zeroes.
  */
 void
-mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+mdextend(SMgrFileHandle sfile, BlockNumber blocknum,
 		 char *buffer, bool skipFsync)
 {
 	off_t		seekpos;
@@ -455,7 +443,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	/* This assert is too expensive to have on normally ... */
 #ifdef CHECK_WRITE_VS_EXTEND
-	Assert(blocknum >= mdnblocks(reln, forknum));
+	Assert(blocknum >= mdnblocks(sfile));
 #endif
 
 	/*
@@ -468,10 +456,10 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("cannot extend file \"%s\" beyond %u blocks",
-						relpath(reln->smgr_rlocator, forknum),
+						smgrfilepath(sfile->smgr_locator),
 						InvalidBlockNumber)));
 
-	v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
+	v = _mdfd_getseg(sfile, blocknum, skipFsync, EXTENSION_CREATE);
 
 	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
@@ -494,10 +482,10 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				 errhint("Check free disk space.")));
 	}
 
-	if (!skipFsync && !SmgrIsTemp(reln))
-		register_dirty_segment(reln, forknum, v);
+	if (!skipFsync && !SmgrIsTemp(sfile))
+		register_dirty_segment(sfile, v);
 
-	Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(sfile, v) <= ((BlockNumber) RELSEG_SIZE));
 }
 
 /*
@@ -511,17 +499,17 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  * invent one out of whole cloth.
  */
 static MdfdVec *
-mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
+mdopenfork(SMgrFileHandle sfile, int behavior)
 {
 	MdfdVec    *mdfd;
 	char	   *path;
 	File		fd;
 
 	/* No work if already open */
-	if (reln->md_num_open_segs[forknum] > 0)
-		return &reln->md_seg_fds[forknum][0];
+	if (sfile->md_num_open_segs > 0)
+		return &sfile->md_seg_fds[0];
 
-	path = relpath(reln->smgr_rlocator, forknum);
+	path = smgrfilepath(sfile->smgr_locator);
 
 	fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
 
@@ -540,12 +528,12 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
 
 	pfree(path);
 
-	_fdvec_resize(reln, forknum, 1);
-	mdfd = &reln->md_seg_fds[forknum][0];
+	_fdvec_resize(sfile, 1);
+	mdfd = &sfile->md_seg_fds[0];
 	mdfd->mdfd_vfd = fd;
 	mdfd->mdfd_segno = 0;
 
-	Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(sfile, mdfd) <= ((BlockNumber) RELSEG_SIZE));
 
 	return mdfd;
 }
@@ -554,20 +542,19 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
  *  mdopen() -- Initialize newly-opened relation.
  */
 void
-mdopen(SMgrRelation reln)
+mdopen(SMgrFileHandle sfile)
 {
 	/* mark it not open */
-	for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-		reln->md_num_open_segs[forknum] = 0;
+	sfile->md_num_open_segs = 0;
 }
 
 /*
  *	mdclose() -- Close the specified relation, if it isn't closed already.
  */
 void
-mdclose(SMgrRelation reln, ForkNumber forknum)
+mdclose(SMgrFileHandle sfile)
 {
-	int			nopensegs = reln->md_num_open_segs[forknum];
+	int			nopensegs = sfile->md_num_open_segs;
 
 	/* No work if already closed */
 	if (nopensegs == 0)
@@ -576,10 +563,10 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
 	/* close segments starting from the end */
 	while (nopensegs > 0)
 	{
-		MdfdVec    *v = &reln->md_seg_fds[forknum][nopensegs - 1];
+		MdfdVec    *v = &sfile->md_seg_fds[nopensegs - 1];
 
 		FileClose(v->mdfd_vfd);
-		_fdvec_resize(reln, forknum, nopensegs - 1);
+		_fdvec_resize(sfile, nopensegs - 1);
 		nopensegs--;
 	}
 }
@@ -588,13 +575,13 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
  *	mdprefetch() -- Initiate asynchronous read of the specified block of a relation
  */
 bool
-mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+mdprefetch(SMgrFileHandle sfile, BlockNumber blocknum)
 {
 #ifdef USE_PREFETCH
 	off_t		seekpos;
 	MdfdVec    *v;
 
-	v = _mdfd_getseg(reln, forknum, blocknum, false,
+	v = _mdfd_getseg(sfile, blocknum, false,
 					 InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL);
 	if (v == NULL)
 		return false;
@@ -616,8 +603,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
  * considerably more efficient than doing so individually.
  */
 void
-mdwriteback(SMgrRelation reln, ForkNumber forknum,
-			BlockNumber blocknum, BlockNumber nblocks)
+mdwriteback(SMgrFileHandle sfile, BlockNumber blocknum, BlockNumber nblocks)
 {
 	/*
 	 * Issue flush requests in as few requests as possible; have to split at
@@ -631,7 +617,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
 		int			segnum_start,
 					segnum_end;
 
-		v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
+		v = _mdfd_getseg(sfile, blocknum, true /* not used */ ,
 						 EXTENSION_DONT_OPEN);
 
 		/*
@@ -668,20 +654,21 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
  *	mdread() -- Read the specified block from a relation.
  */
 void
-mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+mdread(SMgrFileHandle sfile, BlockNumber blocknum,
 	   char *buffer)
 {
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
 
-	TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
-										reln->smgr_rlocator.locator.spcOid,
-										reln->smgr_rlocator.locator.dbOid,
-										reln->smgr_rlocator.locator.relNumber,
-										reln->smgr_rlocator.backend);
+	TRACE_POSTGRESQL_SMGR_MD_READ_START(sfile->smgr_forknum,
+										blocknum,
+										sfile->smgr_rlocator.locator.spcOid,
+										sfile->smgr_rlocator.locator.dbOid,
+										sfile->smgr_rlocator.locator.relNumber,
+										sfile->smgr_rlocator.backend);
 
-	v = _mdfd_getseg(reln, forknum, blocknum, false,
+	v = _mdfd_getseg(sfile, blocknum, false,
 					 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
 	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
@@ -690,11 +677,12 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
 
-	TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
-									   reln->smgr_rlocator.locator.spcOid,
-									   reln->smgr_rlocator.locator.dbOid,
-									   reln->smgr_rlocator.locator.relNumber,
-									   reln->smgr_rlocator.backend,
+	TRACE_POSTGRESQL_SMGR_MD_READ_DONE(sfile->smgr_forknum,
+									   blocknum,
+									   sfile->smgr_rlocator.locator.spcOid,
+									   sfile->smgr_rlocator.locator.dbOid,
+									   sfile->smgr_rlocator.locator.relNumber,
+									   sfile->smgr_rlocator.backend,
 									   nbytes,
 									   BLCKSZ);
 
@@ -733,7 +721,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  *		use mdextend().
  */
 void
-mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+mdwrite(SMgrFileHandle sfile, BlockNumber blocknum,
 		char *buffer, bool skipFsync)
 {
 	off_t		seekpos;
@@ -742,16 +730,17 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	/* This assert is too expensive to have on normally ... */
 #ifdef CHECK_WRITE_VS_EXTEND
-	Assert(blocknum < mdnblocks(reln, forknum));
+	Assert(blocknum < mdnblocks(sfile));
 #endif
 
-	TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
-										 reln->smgr_rlocator.locator.spcOid,
-										 reln->smgr_rlocator.locator.dbOid,
-										 reln->smgr_rlocator.locator.relNumber,
-										 reln->smgr_rlocator.backend);
+	TRACE_POSTGRESQL_SMGR_MD_WRITE_START(sfile->smgr_forknum,
+										 blocknum,
+										 sfile->smgr_rlocator.locator.spcOid,
+										 sfile->smgr_rlocator.locator.dbOid,
+										 sfile->smgr_rlocator.locator.relNumber,
+										 sfile->smgr_rlocator.backend);
 
-	v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
+	v = _mdfd_getseg(sfile, blocknum, skipFsync,
 					 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
 	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
@@ -760,11 +749,12 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
 
-	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
-										reln->smgr_rlocator.locator.spcOid,
-										reln->smgr_rlocator.locator.dbOid,
-										reln->smgr_rlocator.locator.relNumber,
-										reln->smgr_rlocator.backend,
+	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(sfile->smgr_forknum,
+										blocknum,
+										sfile->smgr_rlocator.locator.spcOid,
+										sfile->smgr_rlocator.locator.dbOid,
+										sfile->smgr_rlocator.locator.relNumber,
+										sfile->smgr_rlocator.backend,
 										nbytes,
 										BLCKSZ);
 
@@ -785,8 +775,8 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				 errhint("Check free disk space.")));
 	}
 
-	if (!skipFsync && !SmgrIsTemp(reln))
-		register_dirty_segment(reln, forknum, v);
+	if (!skipFsync && !SmgrIsTemp(sfile))
+		register_dirty_segment(sfile, v);
 }
 
 /*
@@ -798,16 +788,16 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  *		are present in the array.
  */
 BlockNumber
-mdnblocks(SMgrRelation reln, ForkNumber forknum)
+mdnblocks(SMgrFileHandle sfile)
 {
 	MdfdVec    *v;
 	BlockNumber nblocks;
 	BlockNumber segno;
 
-	mdopenfork(reln, forknum, EXTENSION_FAIL);
+	mdopenfork(sfile, EXTENSION_FAIL);
 
 	/* mdopen has opened the first segment */
-	Assert(reln->md_num_open_segs[forknum] > 0);
+	Assert(sfile->md_num_open_segs > 0);
 
 	/*
 	 * Start from the last open segments, to avoid redundant seeks.  We have
@@ -822,12 +812,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 	 * that's OK because the checkpointer never needs to compute relation
 	 * size.)
 	 */
-	segno = reln->md_num_open_segs[forknum] - 1;
-	v = &reln->md_seg_fds[forknum][segno];
+	segno = sfile->md_num_open_segs - 1;
+	v = &sfile->md_seg_fds[segno];
 
 	for (;;)
 	{
-		nblocks = _mdnblocks(reln, forknum, v);
+		nblocks = _mdnblocks(sfile, v);
 		if (nblocks > ((BlockNumber) RELSEG_SIZE))
 			elog(FATAL, "segment too big");
 		if (nblocks < ((BlockNumber) RELSEG_SIZE))
@@ -845,7 +835,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 		 * undermines _mdfd_getseg's attempts to notice and report an error
 		 * upon access to a missing segment.
 		 */
-		v = _mdfd_openseg(reln, forknum, segno, 0);
+		v = _mdfd_openseg(sfile, segno, 0);
 		if (v == NULL)
 			return segno * ((BlockNumber) RELSEG_SIZE);
 	}
@@ -855,7 +845,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
  *	mdtruncate() -- Truncate relation to specified number of blocks.
  */
 void
-mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
+mdtruncate(SMgrFileHandle sfile, BlockNumber nblocks)
 {
 	BlockNumber curnblk;
 	BlockNumber priorblocks;
@@ -865,7 +855,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 	 * NOTE: mdnblocks makes sure we have opened all active segments, so that
 	 * truncation loop will get them all!
 	 */
-	curnblk = mdnblocks(reln, forknum);
+	curnblk = mdnblocks(sfile);
 	if (nblocks > curnblk)
 	{
 		/* Bogus request ... but no complaint if InRecovery */
@@ -873,7 +863,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 			return;
 		ereport(ERROR,
 				(errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
-						relpath(reln->smgr_rlocator, forknum),
+						smgrfilepath(sfile->smgr_locator),
 						nblocks, curnblk)));
 	}
 	if (nblocks == curnblk)
@@ -883,14 +873,14 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 	 * Truncate segments, starting at the last one. Starting at the end makes
 	 * managing the memory for the fd array easier, should there be errors.
 	 */
-	curopensegs = reln->md_num_open_segs[forknum];
+	curopensegs = sfile->md_num_open_segs;
 	while (curopensegs > 0)
 	{
 		MdfdVec    *v;
 
 		priorblocks = (curopensegs - 1) * RELSEG_SIZE;
 
-		v = &reln->md_seg_fds[forknum][curopensegs - 1];
+		v = &sfile->md_seg_fds[curopensegs - 1];
 
 		if (priorblocks > nblocks)
 		{
@@ -904,14 +894,14 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 						 errmsg("could not truncate file \"%s\": %m",
 								FilePathName(v->mdfd_vfd))));
 
-			if (!SmgrIsTemp(reln))
-				register_dirty_segment(reln, forknum, v);
+			if (!SmgrIsTemp(sfile))
+				register_dirty_segment(sfile, v);
 
 			/* we never drop the 1st segment */
-			Assert(v != &reln->md_seg_fds[forknum][0]);
+			Assert(v != &sfile->md_seg_fds[0]);
 
 			FileClose(v->mdfd_vfd);
-			_fdvec_resize(reln, forknum, curopensegs - 1);
+			_fdvec_resize(sfile, curopensegs - 1);
 		}
 		else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
 		{
@@ -930,8 +920,8 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 						 errmsg("could not truncate file \"%s\" to %u blocks: %m",
 								FilePathName(v->mdfd_vfd),
 								nblocks)));
-			if (!SmgrIsTemp(reln))
-				register_dirty_segment(reln, forknum, v);
+			if (!SmgrIsTemp(sfile))
+				register_dirty_segment(sfile, v);
 		}
 		else
 		{
@@ -957,7 +947,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
  * segment may survive recovery, reintroducing unwanted data into the table.
  */
 void
-mdimmedsync(SMgrRelation reln, ForkNumber forknum)
+mdimmedsync(SMgrFileHandle sfile)
 {
 	int			segno;
 	int			min_inactive_seg;
@@ -966,9 +956,9 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 	 * NOTE: mdnblocks makes sure we have opened all active segments, so that
 	 * fsync loop will get them all!
 	 */
-	mdnblocks(reln, forknum);
+	mdnblocks(sfile);
 
-	min_inactive_seg = segno = reln->md_num_open_segs[forknum];
+	min_inactive_seg = segno = sfile->md_num_open_segs;
 
 	/*
 	 * Temporarily open inactive segments, then close them after sync.  There
@@ -976,12 +966,12 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 	 * is harmless.  We don't bother to clean them up and take a risk of
 	 * further trouble.  The next mdclose() will soon close them.
 	 */
-	while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
+	while (_mdfd_openseg(sfile, segno, 0) != NULL)
 		segno++;
 
 	while (segno > 0)
 	{
-		MdfdVec    *v = &reln->md_seg_fds[forknum][segno - 1];
+		MdfdVec    *v = &sfile->md_seg_fds[segno - 1];
 
 		if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
 			ereport(data_sync_elevel(ERROR),
@@ -993,7 +983,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 		if (segno > min_inactive_seg)
 		{
 			FileClose(v->mdfd_vfd);
-			_fdvec_resize(reln, forknum, segno - 1);
+			_fdvec_resize(sfile, segno - 1);
 		}
 
 		segno--;
@@ -1010,14 +1000,14 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
  * enough to be a performance problem).
  */
 static void
-register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
+register_dirty_segment(SMgrFileHandle sfile, MdfdVec *seg)
 {
 	FileTag		tag;
 
-	INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
+	INIT_MD_FILETAG(tag, sfile->smgr_locator.locator, sfile->smgr_locator.forknum, seg->mdfd_segno);
 
 	/* Temp relations should never be fsync'd */
-	Assert(!SmgrIsTemp(reln));
+	Assert(!SmgrIsTemp(sfile));
 
 	if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
 	{
@@ -1036,15 +1026,14 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
  * register_unlink_segment() -- Schedule a file to be deleted after next checkpoint
  */
 static void
-register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum,
-						BlockNumber segno)
+register_unlink_segment(SMgrFileLocator slocator, BlockNumber segno)
 {
 	FileTag		tag;
 
-	INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
+	INIT_MD_FILETAG(tag, slocator.locator, slocator.forknum, segno);
 
 	/* Should never be used with temp relations */
-	Assert(!RelFileLocatorBackendIsTemp(rlocator));
+	Assert(!SMgrFileLocatorIsTemp(slocator));
 
 	RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
 }
@@ -1053,12 +1042,11 @@ register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum,
  * register_forget_request() -- forget any fsyncs for a relation fork's segment
  */
 static void
-register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum,
-						BlockNumber segno)
+register_forget_request(SMgrFileLocator slocator, BlockNumber segno)
 {
 	FileTag		tag;
 
-	INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
+	INIT_MD_FILETAG(tag, slocator.locator, slocator.forknum, segno);
 
 	RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
 }
@@ -1081,57 +1069,25 @@ ForgetDatabaseSyncRequests(Oid dbid)
 	RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
 }
 
-/*
- * DropRelationFiles -- drop files of all given relations
- */
-void
-DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
-{
-	SMgrRelation *srels;
-	int			i;
-
-	srels = palloc(sizeof(SMgrRelation) * ndelrels);
-	for (i = 0; i < ndelrels; i++)
-	{
-		SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
-
-		if (isRedo)
-		{
-			ForkNumber	fork;
-
-			for (fork = 0; fork <= MAX_FORKNUM; fork++)
-				XLogDropRelation(delrels[i], fork);
-		}
-		srels[i] = srel;
-	}
-
-	smgrdounlinkall(srels, ndelrels, isRedo);
-
-	for (i = 0; i < ndelrels; i++)
-		smgrclose(srels[i]);
-	pfree(srels);
-}
-
 
 /*
  *	_fdvec_resize() -- Resize the fork's open segments array
  */
 static void
-_fdvec_resize(SMgrRelation reln,
-			  ForkNumber forknum,
+_fdvec_resize(SMgrFileHandle sfile,
 			  int nseg)
 {
 	if (nseg == 0)
 	{
-		if (reln->md_num_open_segs[forknum] > 0)
+		if (sfile->md_num_open_segs > 0)
 		{
-			pfree(reln->md_seg_fds[forknum]);
-			reln->md_seg_fds[forknum] = NULL;
+			pfree(sfile->md_seg_fds);
+			sfile->md_seg_fds = NULL;
 		}
 	}
-	else if (reln->md_num_open_segs[forknum] == 0)
+	else if (sfile->md_num_open_segs == 0)
 	{
-		reln->md_seg_fds[forknum] =
+		sfile->md_seg_fds =
 			MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
 	}
 	else
@@ -1142,12 +1098,12 @@ _fdvec_resize(SMgrRelation reln,
 		 * FileClose(), and the memory context internally will sometimes avoid
 		 * doing an actual reallocation.
 		 */
-		reln->md_seg_fds[forknum] =
-			repalloc(reln->md_seg_fds[forknum],
+		sfile->md_seg_fds =
+			repalloc(sfile->md_seg_fds,
 					 sizeof(MdfdVec) * nseg);
 	}
 
-	reln->md_num_open_segs[forknum] = nseg;
+	sfile->md_num_open_segs = nseg;
 }
 
 /*
@@ -1155,12 +1111,12 @@ _fdvec_resize(SMgrRelation reln,
  * returned string is palloc'd.
  */
 static char *
-_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
+_mdfd_segpath(SMgrFileHandle sfile, BlockNumber segno)
 {
 	char	   *path,
 			   *fullpath;
 
-	path = relpath(reln->smgr_rlocator, forknum);
+	path = smgrfilepath(sfile->smgr_locator);
 
 	if (segno > 0)
 	{
@@ -1178,14 +1134,14 @@ _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
  * and make a MdfdVec object for it.  Returns NULL on failure.
  */
 static MdfdVec *
-_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
+_mdfd_openseg(SMgrFileHandle sfile, BlockNumber segno,
 			  int oflags)
 {
 	MdfdVec    *v;
 	File		fd;
 	char	   *fullpath;
 
-	fullpath = _mdfd_segpath(reln, forknum, segno);
+	fullpath = _mdfd_segpath(sfile, segno);
 
 	/* open the file */
 	fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags);
@@ -1199,16 +1155,16 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
 	 * Segments are always opened in order from lowest to highest, so we must
 	 * be adding a new one at the end.
 	 */
-	Assert(segno == reln->md_num_open_segs[forknum]);
+	Assert(segno == sfile->md_num_open_segs);
 
-	_fdvec_resize(reln, forknum, segno + 1);
+	_fdvec_resize(sfile, segno + 1);
 
 	/* fill the entry */
-	v = &reln->md_seg_fds[forknum][segno];
+	v = &sfile->md_seg_fds[segno];
 	v->mdfd_vfd = fd;
 	v->mdfd_segno = segno;
 
-	Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(sfile, v) <= ((BlockNumber) RELSEG_SIZE));
 
 	/* all done */
 	return v;
@@ -1223,7 +1179,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
  * EXTENSION_CREATE case.
  */
 static MdfdVec *
-_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
+_mdfd_getseg(SMgrFileHandle sfile, BlockNumber blkno,
 			 bool skipFsync, int behavior)
 {
 	MdfdVec    *v;
@@ -1238,9 +1194,9 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 	targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
 
 	/* if an existing and opened segment, we're done */
-	if (targetseg < reln->md_num_open_segs[forknum])
+	if (targetseg < sfile->md_num_open_segs)
 	{
-		v = &reln->md_seg_fds[forknum][targetseg];
+		v = &sfile->md_seg_fds[targetseg];
 		return v;
 	}
 
@@ -1255,19 +1211,19 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 	 * 'behavior'). Start with either the last opened, or the first segment if
 	 * none was opened before.
 	 */
-	if (reln->md_num_open_segs[forknum] > 0)
-		v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
+	if (sfile->md_num_open_segs > 0)
+		v = &sfile->md_seg_fds[sfile->md_num_open_segs - 1];
 	else
 	{
-		v = mdopenfork(reln, forknum, behavior);
+		v = mdopenfork(sfile, behavior);
 		if (!v)
 			return NULL;		/* if behavior & EXTENSION_RETURN_NULL */
 	}
 
-	for (nextsegno = reln->md_num_open_segs[forknum];
+	for (nextsegno = sfile->md_num_open_segs;
 		 nextsegno <= targetseg; nextsegno++)
 	{
-		BlockNumber nblocks = _mdnblocks(reln, forknum, v);
+		BlockNumber nblocks = _mdnblocks(sfile, v);
 		int			flags = 0;
 
 		Assert(nextsegno == v->mdfd_segno + 1);
@@ -1296,7 +1252,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			{
 				char	   *zerobuf = palloc0(BLCKSZ);
 
-				mdextend(reln, forknum,
+				mdextend(sfile,
 						 nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
 						 zerobuf, skipFsync);
 				pfree(zerobuf);
@@ -1327,11 +1283,11 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
-							_mdfd_segpath(reln, forknum, nextsegno),
+							_mdfd_segpath(sfile, nextsegno),
 							blkno, nblocks)));
 		}
 
-		v = _mdfd_openseg(reln, forknum, nextsegno, flags);
+		v = _mdfd_openseg(sfile, nextsegno, flags);
 
 		if (v == NULL)
 		{
@@ -1341,7 +1297,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not open file \"%s\" (target block %u): %m",
-							_mdfd_segpath(reln, forknum, nextsegno),
+							_mdfd_segpath(sfile, nextsegno),
 							blkno)));
 		}
 	}
@@ -1353,7 +1309,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
  * Get number of blocks present in a single disk file
  */
 static BlockNumber
-_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
+_mdnblocks(SMgrFileHandle sfile, MdfdVec *seg)
 {
 	off_t		len;
 
@@ -1376,16 +1332,16 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 int
 mdsyncfiletag(const FileTag *ftag, char *path)
 {
-	SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId);
+	SMgrFileHandle sfile = smgropen(ftag->rlocator, InvalidBackendId, ftag->forknum);
 	File		file;
 	bool		need_to_close;
 	int			result,
 				save_errno;
 
 	/* See if we already have the file open, or need to open it. */
-	if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
+	if (ftag->segno < sfile->md_num_open_segs)
 	{
-		file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
+		file = sfile->md_seg_fds[ftag->segno].mdfd_vfd;
 		strlcpy(path, FilePathName(file), MAXPGPATH);
 		need_to_close = false;
 	}
@@ -1393,7 +1349,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
 	{
 		char	   *p;
 
-		p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
+		p = _mdfd_segpath(sfile, ftag->segno);
 		strlcpy(path, p, MAXPGPATH);
 		pfree(p);
 
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index c1a5febcbfd..de103320574 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -41,27 +41,24 @@ typedef struct f_smgr
 {
 	void		(*smgr_init) (void);	/* may be NULL */
 	void		(*smgr_shutdown) (void);	/* may be NULL */
-	void		(*smgr_open) (SMgrRelation reln);
-	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
-								bool isRedo);
-	bool		(*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
-								bool isRedo);
-	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
+	void		(*smgr_open) (SMgrFileHandle sfile);
+	void		(*smgr_close) (SMgrFileHandle sfile);
+	void		(*smgr_create) (SMgrFileHandle sfile, bool isRedo);
+	bool		(*smgr_exists) (SMgrFileHandle sfile);
+	void		(*smgr_unlink) (SMgrFileLocator slocator, bool isRedo);
+	void		(*smgr_extend) (SMgrFileHandle sfile,
 								BlockNumber blocknum, char *buffer, bool skipFsync);
-	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
+	bool		(*smgr_prefetch) (SMgrFileHandle sfile,
 								  BlockNumber blocknum);
-	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
+	void		(*smgr_read) (SMgrFileHandle sfile,
 							  BlockNumber blocknum, char *buffer);
-	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
+	void		(*smgr_write) (SMgrFileHandle sfile,
 							   BlockNumber blocknum, char *buffer, bool skipFsync);
-	void		(*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
+	void		(*smgr_writeback) (SMgrFileHandle sfile,
 								   BlockNumber blocknum, BlockNumber nblocks);
-	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
-								  BlockNumber nblocks);
-	void		(*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
+	BlockNumber (*smgr_nblocks) (SMgrFileHandle sfile);
+	void		(*smgr_truncate) (SMgrFileHandle sfile, BlockNumber nblocks);
+	void		(*smgr_immedsync) (SMgrFileHandle sfile);
 } f_smgr;
 
 static const f_smgr smgrsw[] = {
@@ -88,12 +85,12 @@ static const f_smgr smgrsw[] = {
 static const int NSmgr = lengthof(smgrsw);
 
 /*
- * Each backend has a hashtable that stores all extant SMgrRelation objects.
- * In addition, "unowned" SMgrRelation objects are chained together in a list.
+ * Each backend has a hashtable that stores all extant SMgrFileData objects.
+ * In addition, "unowned" SMgrFile objects are chained together in a list.
  */
-static HTAB *SMgrRelationHash = NULL;
+static HTAB *SMgrFileHash = NULL;
 
-static dlist_head unowned_relns;
+static dlist_head unowned_sfiles;
 
 /* local function prototypes */
 static void smgrshutdown(int code, Datum arg);
@@ -142,50 +139,50 @@ smgrshutdown(int code, Datum arg)
  *
  *		This does not attempt to actually open the underlying file.
  */
-SMgrRelation
-smgropen(RelFileLocator rlocator, BackendId backend)
+SMgrFileHandle
+smgropen(RelFileLocator rlocator, BackendId backend, ForkNumber forkNum)
 {
-	RelFileLocatorBackend brlocator;
-	SMgrRelation reln;
+	SMgrFileLocator slocator;
+	SMgrFileHandle sfile;
 	bool		found;
 
-	if (SMgrRelationHash == NULL)
+	if (SMgrFileHash == NULL)
 	{
 		/* First time through: initialize the hash table */
 		HASHCTL		ctl;
 
-		ctl.keysize = sizeof(RelFileLocatorBackend);
-		ctl.entrysize = sizeof(SMgrRelationData);
-		SMgrRelationHash = hash_create("smgr relation table", 400,
+		ctl.keysize = sizeof(SMgrFileLocator);
+		ctl.entrysize = sizeof(SMgrFileData);
+		SMgrFileHash = hash_create("smgr relation table", 400,
 									   &ctl, HASH_ELEM | HASH_BLOBS);
-		dlist_init(&unowned_relns);
+		dlist_init(&unowned_sfiles);
 	}
 
 	/* Look up or create an entry */
-	brlocator.locator = rlocator;
-	brlocator.backend = backend;
-	reln = (SMgrRelation) hash_search(SMgrRelationHash,
-									  (void *) &brlocator,
-									  HASH_ENTER, &found);
+	slocator.locator = rlocator;
+	slocator.backend = backend;
+	slocator.forknum = forkNum;
+	sfile = (SMgrFileHandle) hash_search(SMgrFileHash,
+										 (void *) &slocator,
+										 HASH_ENTER, &found);
 
 	/* Initialize it if not present before */
 	if (!found)
 	{
 		/* hash_search already filled in the lookup key */
-		reln->smgr_owner = NULL;
-		reln->smgr_targblock = InvalidBlockNumber;
-		for (int i = 0; i <= MAX_FORKNUM; ++i)
-			reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
-		reln->smgr_which = 0;	/* we only have md.c at present */
+		sfile->smgr_owner = NULL;
+		sfile->smgr_targblock = InvalidBlockNumber;
+		sfile->smgr_cached_nblocks = InvalidBlockNumber;
+		sfile->smgr_which = 0;	/* we only have md.c at present */
 
 		/* implementation-specific initialization */
-		smgrsw[reln->smgr_which].smgr_open(reln);
+		smgrsw[sfile->smgr_which].smgr_open(sfile);
 
 		/* it has no owner yet */
-		dlist_push_tail(&unowned_relns, &reln->node);
+		dlist_push_tail(&unowned_sfiles, &sfile->node);
 	}
 
-	return reln;
+	return sfile;
 }
 
 /*
@@ -195,7 +192,7 @@ smgropen(RelFileLocator rlocator, BackendId backend)
  * the only such owners exist in the relcache.
  */
 void
-smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
+smgrsetowner(SMgrFileHandle *owner, SMgrFileHandle sfile)
 {
 	/* We don't support "disowning" an SMgrRelation here, use smgrclearowner */
 	Assert(owner != NULL);
@@ -206,68 +203,66 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
 	 * depending on the order of processing.  It's ok to close the old
 	 * relcache entry early in that case.)
 	 *
-	 * If there isn't an old owner, then the reln should be in the unowned
+	 * If there isn't an old owner, then the sfile should be in the unowned
 	 * list, and we need to remove it.
 	 */
-	if (reln->smgr_owner)
-		*(reln->smgr_owner) = NULL;
+	if (sfile->smgr_owner)
+		*(sfile->smgr_owner) = NULL;
 	else
-		dlist_delete(&reln->node);
+		dlist_delete(&sfile->node);
 
 	/* Now establish the ownership relationship. */
-	reln->smgr_owner = owner;
-	*owner = reln;
+	sfile->smgr_owner = owner;
+	*owner = sfile;
 }
 
 /*
- * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object
+ * smgrclearowner() -- Remove long-lived reference to an SMgrFileHandle object
  *					   if one exists
  */
 void
-smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
+smgrclearowner(SMgrFileHandle *owner, SMgrFileHandle sfile)
 {
 	/* Do nothing if the SMgrRelation object is not owned by the owner */
-	if (reln->smgr_owner != owner)
+	if (sfile->smgr_owner != owner)
 		return;
 
 	/* unset the owner's reference */
 	*owner = NULL;
 
 	/* unset our reference to the owner */
-	reln->smgr_owner = NULL;
+	sfile->smgr_owner = NULL;
 
 	/* add to list of unowned relations */
-	dlist_push_tail(&unowned_relns, &reln->node);
+	dlist_push_tail(&unowned_sfiles, &sfile->node);
 }
 
 /*
- *	smgrexists() -- Does the underlying file for a fork exist?
+ *	smgrexists() -- Does the underlying file exist?
  */
 bool
-smgrexists(SMgrRelation reln, ForkNumber forknum)
+smgrexists(SMgrFileHandle sfile)
 {
-	return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
+	return smgrsw[sfile->smgr_which].smgr_exists(sfile);
 }
 
 /*
- *	smgrclose() -- Close and delete an SMgrRelation object.
+ *	smgrclose() -- Close and delete an SMgrFile object.
  */
 void
-smgrclose(SMgrRelation reln)
+smgrclose(SMgrFileHandle sfile)
 {
-	SMgrRelation *owner;
-	ForkNumber	forknum;
+	SMgrFileHandle *owner;
 
-	for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-		smgrsw[reln->smgr_which].smgr_close(reln, forknum);
+	smgrsw[sfile->smgr_which].smgr_close(sfile);
 
-	owner = reln->smgr_owner;
+	owner = sfile->smgr_owner;
 
 	if (!owner)
-		dlist_delete(&reln->node);
+		dlist_delete(&sfile->node);
 
-	if (hash_search(SMgrRelationHash,
-					(void *) &(reln->smgr_rlocator),
+	if (hash_search(SMgrFileHash,
+					(void *) &(sfile->smgr_locator),
 					HASH_REMOVE, NULL) == NULL)
 		elog(ERROR, "SMgrRelation hashtable corrupted");
 
@@ -284,14 +279,11 @@ smgrclose(SMgrRelation reln)
  *
  *	The object remains valid.
  */
-void
-smgrrelease(SMgrRelation reln)
+static void
+smgrrelease(SMgrFileHandle sfile)
 {
-	for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-	{
-		smgrsw[reln->smgr_which].smgr_close(reln, forknum);
-		reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
-	}
+	smgrsw[sfile->smgr_which].smgr_close(sfile);
+	sfile->smgr_cached_nblocks = InvalidBlockNumber;
 }
 
 /*
@@ -303,16 +295,16 @@ void
 smgrreleaseall(void)
 {
 	HASH_SEQ_STATUS status;
-	SMgrRelation reln;
+	SMgrFileHandle sfile;
 
 	/* Nothing to do if hashtable not set up */
-	if (SMgrRelationHash == NULL)
+	if (SMgrFileHash == NULL)
 		return;
 
-	hash_seq_init(&status, SMgrRelationHash);
+	hash_seq_init(&status, SMgrFileHash);
 
-	while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
-		smgrrelease(reln);
+	while ((sfile = (SMgrFileHandle) hash_seq_search(&status)) != NULL)
+		smgrrelease(sfile);
 }
 
 /*
@@ -322,16 +314,16 @@ void
 smgrcloseall(void)
 {
 	HASH_SEQ_STATUS status;
-	SMgrRelation reln;
+	SMgrFileHandle sfile;
 
 	/* Nothing to do if hashtable not set up */
-	if (SMgrRelationHash == NULL)
+	if (SMgrFileHash == NULL)
 		return;
 
-	hash_seq_init(&status, SMgrRelationHash);
+	hash_seq_init(&status, SMgrFileHash);
 
-	while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
-		smgrclose(reln);
+	while ((sfile = (SMgrFileHandle) hash_seq_search(&status)) != NULL)
+		smgrclose(sfile);
 }
 
 /*
@@ -343,111 +335,112 @@ smgrcloseall(void)
  * such entry exists already.
  */
 void
-smgrcloserellocator(RelFileLocatorBackend rlocator)
+smgrcloserellocator(RelFileLocator rlocator, BackendId backend)
 {
-	SMgrRelation reln;
+	SMgrFileHandle sfile;
 
 	/* Nothing to do if hashtable not set up */
-	if (SMgrRelationHash == NULL)
+	if (SMgrFileHash == NULL)
 		return;
 
-	reln = (SMgrRelation) hash_search(SMgrRelationHash,
-									  (void *) &rlocator,
-									  HASH_FIND, NULL);
-	if (reln != NULL)
-		smgrclose(reln);
+	for (int i = 0; i <= MAX_FORKNUM; i++)
+	{
+		SMgrFileLocator slocator = { rlocator, backend, i };
+
+		sfile = (SMgrFileHandle) hash_search(SMgrFileHash,
+											 (void *) &slocator,
+											 HASH_FIND, NULL);
+		if (sfile != NULL)
+			smgrclose(sfile);
+	}
 }
 
 /*
- *	smgrcreate() -- Create a new relation.
+ *	smgrcreate() -- Create a new file.
  *
- *		Given an already-created (but presumably unused) SMgrRelation,
+ *		Given an already-created (but presumably unused) SMgrFileHandle,
  *		cause the underlying disk file or other storage for the fork
  *		to be created.
  */
 void
-smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
+smgrcreate(SMgrFileHandle sfile, bool isRedo)
 {
-	smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
+	smgrsw[sfile->smgr_which].smgr_create(sfile, isRedo);
 }
 
 /*
- *	smgrdosyncall() -- Immediately sync all forks of all given relations
+ *	smgrunlink_multi() -- Immediately unlink given forks of given relation
  *
- *		All forks of all given relations are synced out to the store.
+ *		The given forks of the relation are removed from the store.  This
+ *		should not be used during transactional operations, since it can't be
+ *		undone.
+ *
+ *		This handles multiple forks in one call, because the cache invalidation
+ *		happens at relation granularity. If we had an smgrunlink() function
+ *		to unlink just a single smgr file, and the caller wanted to delete
+ *		multiple forks of a single relation, each call would send a new
+ *		cache invalidation event, which would be wasteful.
  *
- *		This is equivalent to FlushRelationBuffers() for each smgr relation,
- *		then calling smgrimmedsync() for all forks of each relation, but it's
- *		significantly quicker so should be preferred when possible.
+ *		If isRedo is true, it is okay for the underlying file(s) to be gone
+ *		already.
  */
 void
-smgrdosyncall(SMgrRelation *rels, int nrels)
+smgrunlink_multi(RelFileLocator rlocator, BackendId backend, ForkNumber *forks, int nforks, bool isRedo)
 {
+	int			which;
 	int			i = 0;
-	ForkNumber	forknum;
 
-	if (nrels == 0)
-		return;
+	which = 0;	/* we only have md.c at present */
 
-	FlushRelationsAllBuffers(rels, nrels);
+	/* Close the forks at smgr level */
+	smgrcloserellocator(rlocator, backend);
 
 	/*
-	 * Sync the physical file(s).
+	 * Send a shared-inval message to force other backends to close any
+	 * dangling smgr references they may have for these rels.  We should do
+	 * this before starting the actual unlinking, in case we fail partway
+	 * through that step.  Note that the sinval messages will eventually come
+	 * back to this backend, too, and thereby provide a backstop that we
+	 * closed our own smgr rel.
 	 */
-	for (i = 0; i < nrels; i++)
-	{
-		int			which = rels[i]->smgr_which;
+	CacheInvalidateSmgr(rlocator, backend);
 
-		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-		{
-			if (smgrsw[which].smgr_exists(rels[i], forknum))
-				smgrsw[which].smgr_immedsync(rels[i], forknum);
-		}
+	/*
+	 * Delete the physical file(s).
+	 *
+	 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
+	 * ERROR, because we've already decided to commit or abort the current
+	 * xact.
+	 */
+	for (i = 0; i < nforks; i++)
+	{
+		SMgrFileLocator slocator = { rlocator, backend, forks[i] };
+		smgrsw[which].smgr_unlink(slocator, isRedo);
 	}
 }
 
 /*
- *	smgrdounlinkall() -- Immediately unlink all forks of all given relations
- *
- *		All forks of all given relations are removed from the store.  This
- *		should not be used during transactional operations, since it can't be
- *		undone.
+ *	smgrdounlink() -- Immediately unlink a file
  *
  *		If isRedo is true, it is okay for the underlying file(s) to be gone
  *		already.
+ *
+ * To remove a relation transactionally, see RelationDropStorage() instead.
+ * This will cause cache invalidation of all forks of the relation, not just
+ * this one.
  */
 void
-smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
+smgrunlink(SMgrFileHandle sfile, bool isRedo)
 {
-	int			i = 0;
-	RelFileLocatorBackend *rlocators;
-	ForkNumber	forknum;
-
-	if (nrels == 0)
-		return;
-
-	/*
-	 * Get rid of any remaining buffers for the relations.  bufmgr will just
-	 * drop them without bothering to write the contents.
-	 */
-	DropRelationsAllBuffers(rels, nrels);
-
-	/*
-	 * create an array which contains all relations to be dropped, and close
-	 * each relation's forks at the smgr level while at it
-	 */
-	rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
-	for (i = 0; i < nrels; i++)
-	{
-		RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
-		int			which = rels[i]->smgr_which;
+	SMgrFileLocator locator;
+	int			which;
 
-		rlocators[i] = rlocator;
+	/* remember before closing */
+	which = sfile->smgr_which;
+	locator = sfile->smgr_locator;
 
-		/* Close the forks at smgr level */
-		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-			smgrsw[which].smgr_close(rels[i], forknum);
-	}
+	/* Close the file at smgr level */
+	smgrclose(sfile);
 
 	/*
 	 * Send a shared-inval message to force other backends to close any
@@ -457,8 +450,7 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 	 * back to this backend, too, and thereby provide a backstop that we
 	 * closed our own smgr rel.
 	 */
-	for (i = 0; i < nrels; i++)
-		CacheInvalidateSmgr(rlocators[i]);
+	CacheInvalidateSmgr(locator.locator, locator.backend);
 
 	/*
 	 * Delete the physical file(s).
@@ -467,16 +459,7 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 	 * ERROR, because we've already decided to commit or abort the current
 	 * xact.
 	 */
-
-	for (i = 0; i < nrels; i++)
-	{
-		int			which = rels[i]->smgr_which;
-
-		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-			smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
-	}
-
-	pfree(rlocators);
+	smgrsw[which].smgr_unlink(locator, isRedo);
 }
 
 
@@ -490,21 +473,21 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
  *		causes intervening file space to become filled with zeroes.
  */
 void
-smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+smgrextend(SMgrFileHandle sfile, BlockNumber blocknum,
 		   char *buffer, bool skipFsync)
 {
-	smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
-										 buffer, skipFsync);
+	smgrsw[sfile->smgr_which].smgr_extend(sfile, blocknum,
+										  buffer, skipFsync);
 
 	/*
 	 * Normally we expect this to increase nblocks by one, but if the cached
 	 * value isn't as expected, just invalidate it so the next call asks the
 	 * kernel.
 	 */
-	if (reln->smgr_cached_nblocks[forknum] == blocknum)
-		reln->smgr_cached_nblocks[forknum] = blocknum + 1;
+	if (sfile->smgr_cached_nblocks == blocknum)
+		sfile->smgr_cached_nblocks = blocknum + 1;
 	else
-		reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
+		sfile->smgr_cached_nblocks = InvalidBlockNumber;
 }
 
 /*
@@ -515,13 +498,13 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  *		record).
  */
 bool
-smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+smgrprefetch(SMgrFileHandle sfile, BlockNumber blocknum)
 {
-	return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
+	return smgrsw[sfile->smgr_which].smgr_prefetch(sfile, blocknum);
 }
 
 /*
- *	smgrread() -- read a particular block from a relation into the supplied
+ *	smgrread() -- read a particular block from a file into the supplied
  *				  buffer.
  *
  *		This routine is called from the buffer manager in order to
@@ -529,10 +512,9 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
  *		return pages in the format that POSTGRES expects.
  */
 void
-smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-		 char *buffer)
+smgrread(SMgrFileHandle sfile, BlockNumber blocknum, char *buffer)
 {
-	smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer);
+	smgrsw[sfile->smgr_which].smgr_read(sfile, blocknum, buffer);
 }
 
 /*
@@ -551,11 +533,11 @@ smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  *		do not require fsync.
  */
 void
-smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+smgrwrite(SMgrFileHandle sfile, BlockNumber blocknum,
 		  char *buffer, bool skipFsync)
 {
-	smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum,
-										buffer, skipFsync);
+	smgrsw[sfile->smgr_which].smgr_write(sfile, blocknum,
+										 buffer, skipFsync);
 }
 
 
@@ -564,11 +546,11 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  *					   blocks.
  */
 void
-smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+smgrwriteback(SMgrFileHandle sfile, BlockNumber blocknum,
 			  BlockNumber nblocks)
 {
-	smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
-											nblocks);
+	smgrsw[sfile->smgr_which].smgr_writeback(sfile, blocknum,
+											 nblocks);
 }
 
 /*
@@ -576,18 +558,18 @@ smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
  *					 supplied relation.
  */
 BlockNumber
-smgrnblocks(SMgrRelation reln, ForkNumber forknum)
+smgrnblocks(SMgrFileHandle sfile)
 {
 	BlockNumber result;
 
 	/* Check and return if we get the cached value for the number of blocks. */
-	result = smgrnblocks_cached(reln, forknum);
+	result = smgrnblocks_cached(sfile);
 	if (result != InvalidBlockNumber)
 		return result;
 
-	result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
+	result = smgrsw[sfile->smgr_which].smgr_nblocks(sfile);
 
-	reln->smgr_cached_nblocks[forknum] = result;
+	sfile->smgr_cached_nblocks = result;
 
 	return result;
 }
@@ -600,38 +582,41 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum)
  * fork size is not cached.
  */
 BlockNumber
-smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
+smgrnblocks_cached(SMgrFileHandle sfile)
 {
 	/*
 	 * For now, we only use cached values in recovery due to lack of a shared
 	 * invalidation mechanism for changes in file size.
 	 */
-	if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
-		return reln->smgr_cached_nblocks[forknum];
+	if (InRecovery && sfile->smgr_cached_nblocks != InvalidBlockNumber)
+		return sfile->smgr_cached_nblocks;
 
 	return InvalidBlockNumber;
 }
 
 /*
- *	smgrtruncate() -- Truncate the given forks of supplied relation to
- *					  each specified numbers of blocks
+ *	smgrtruncate_multi() -- Truncate the given forks of supplied relation to
+ *							each specified numbers of blocks
  *
  * The truncation is done immediately, so this can't be rolled back.
  *
  * The caller must hold AccessExclusiveLock on the relation, to ensure that
  * other backends receive the smgr invalidation event that this function sends
  * before they access any forks of the relation again.
+ *
+ * Like smgrunlink_multi(), this handles multiple forks in one call because
+ * the cache invalidation happens at relation granularity.
+ *
+ * NB: The caller is responsible for dropping buffers! Before v16, this
+ * function did it.
  */
 void
-smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
+smgrtruncate_multi(RelFileLocator rlocator, BackendId backend, ForkNumber *forks,
+				   int nforks, BlockNumber *nblocks)
 {
 	int			i;
 
-	/*
-	 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
-	 * just drop them without bothering to write the contents.
-	 */
-	DropRelationBuffers(reln, forknum, nforks, nblocks);
+	Assert(nforks < MAX_FORKNUM + 1);
 
 	/*
 	 * Send a shared-inval message to force other backends to close any smgr
@@ -643,15 +628,19 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
 	 * is a performance-critical path.)  As in the unlink code, we want to be
 	 * sure the message is sent before we start changing things on-disk.
 	 */
-	CacheInvalidateSmgr(reln->smgr_rlocator);
+	CacheInvalidateSmgr(rlocator, backend);
 
-	/* Do the truncation */
+	/* Do the truncations */
 	for (i = 0; i < nforks; i++)
 	{
+		SMgrFileHandle sfile;
+
+		sfile = smgropen(rlocator, backend, forks[i]);
+
 		/* Make the cached size is invalid if we encounter an error. */
-		reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
+		sfile->smgr_cached_nblocks = InvalidBlockNumber;
 
-		smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
+		smgrsw[sfile->smgr_which].smgr_truncate(sfile, nblocks[i]);
 
 		/*
 		 * We might as well update the local smgr_cached_nblocks values. The
@@ -660,7 +649,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
 		 * smgr_vm_nblocks, and these ones too at the next command boundary.
 		 * But these ensure they aren't outright wrong until then.
 		 */
-		reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
+		sfile->smgr_cached_nblocks = nblocks[i];
 	}
 }
 
@@ -688,9 +677,9 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
  *		otherwise the sync is not very meaningful.
  */
 void
-smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
+smgrimmedsync(SMgrFileHandle sfile)
 {
-	smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
+	smgrsw[sfile->smgr_which].smgr_immedsync(sfile);
 }
 
 /*
@@ -714,14 +703,14 @@ AtEOXact_SMgr(void)
 	 * Zap all unowned SMgrRelations.  We rely on smgrclose() to remove each
 	 * one from the list.
 	 */
-	dlist_foreach_modify(iter, &unowned_relns)
+	dlist_foreach_modify(iter, &unowned_sfiles)
 	{
-		SMgrRelation rel = dlist_container(SMgrRelationData, node,
-										   iter.cur);
+		SMgrFileHandle sfile = dlist_container(SMgrFileData, node,
+											   iter.cur);
 
-		Assert(rel->smgr_owner == NULL);
+		Assert(sfile->smgr_owner == NULL);
 
-		smgrclose(rel);
+		smgrclose(sfile);
 	}
 }
 
diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c
index 9d6a9e91090..3572078ba79 100644
--- a/src/backend/storage/sync/sync.c
+++ b/src/backend/storage/sync/sync.c
@@ -18,9 +18,7 @@
 #include <fcntl.h>
 #include <sys/file.h>
 
-#include "access/commit_ts.h"
-#include "access/clog.h"
-#include "access/multixact.h"
+#include "access/slru.h"
 #include "access/xlog.h"
 #include "access/xlogutils.h"
 #include "commands/tablespace.h"
@@ -106,22 +104,6 @@ static const SyncOps syncsw[] = {
 		.sync_unlinkfiletag = mdunlinkfiletag,
 		.sync_filetagmatches = mdfiletagmatches
 	},
-	/* pg_xact */
-	[SYNC_HANDLER_CLOG] = {
-		.sync_syncfiletag = clogsyncfiletag
-	},
-	/* pg_commit_ts */
-	[SYNC_HANDLER_COMMIT_TS] = {
-		.sync_syncfiletag = committssyncfiletag
-	},
-	/* pg_multixact/offsets */
-	[SYNC_HANDLER_MULTIXACT_OFFSET] = {
-		.sync_syncfiletag = multixactoffsetssyncfiletag
-	},
-	/* pg_multixact/members */
-	[SYNC_HANDLER_MULTIXACT_MEMBER] = {
-		.sync_syncfiletag = multixactmemberssyncfiletag
-	}
 };
 
 /*
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index eb5782f82a4..bd435215dca 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -661,11 +661,12 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
 		 * We could have smgr entries for relations of other databases, so no
 		 * short-circuit test is possible here.
 		 */
-		RelFileLocatorBackend rlocator;
+		RelFileLocator rlocator;
+		BackendId backend;
 
-		rlocator.locator = msg->sm.rlocator;
-		rlocator.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo;
-		smgrcloserellocator(rlocator);
+		rlocator = msg->sm.rlocator;
+		backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo;
+		smgrcloserellocator(rlocator, backend);
 	}
 	else if (msg->id == SHAREDINVALRELMAP_ID)
 	{
@@ -1459,14 +1460,14 @@ CacheInvalidateRelcacheByRelid(Oid relid)
  * Thus, the maximum possible backend ID is 2^23-1.
  */
 void
-CacheInvalidateSmgr(RelFileLocatorBackend rlocator)
+CacheInvalidateSmgr(RelFileLocator rlocator, BackendId backend)
 {
 	SharedInvalidationMessage msg;
 
 	msg.sm.id = SHAREDINVALSMGR_ID;
-	msg.sm.backend_hi = rlocator.backend >> 16;
-	msg.sm.backend_lo = rlocator.backend & 0xffff;
-	msg.sm.rlocator = rlocator.locator;
+	msg.sm.backend_hi = backend >> 16;
+	msg.sm.backend_lo = backend & 0xffff;
+	msg.sm.rlocator = rlocator;
 	/* check AddCatcacheInvalidationMessage() for an explanation */
 	VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
 
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 450e5124a5a..9e5cd7922ce 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -419,7 +419,7 @@ AllocateRelationDesc(Form_pg_class relp)
 	relation = (Relation) palloc0(sizeof(RelationData));
 
 	/* make sure relation is marked as having no open file yet */
-	relation->rd_smgr = NULL;
+	MemSet(relation->rd_smgr, 0, sizeof(relation->rd_smgr));
 
 	/*
 	 * Copy the relation tuple form
@@ -1248,7 +1248,7 @@ retry:
 	RelationInitPhysicalAddr(relation);
 
 	/* make sure relation is marked as having no open file yet */
-	relation->rd_smgr = NULL;
+	MemSet(relation->rd_smgr, 0, sizeof(relation->rd_smgr));
 
 	/*
 	 * now we can free the memory allocated for pg_class_tuple
@@ -1877,7 +1877,7 @@ formrdesc(const char *relationName, Oid relationReltype,
 	relation = (Relation) palloc0(sizeof(RelationData));
 
 	/* make sure relation is marked as having no open file yet */
-	relation->rd_smgr = NULL;
+	MemSet(relation->rd_smgr, 0, sizeof(relation->rd_smgr));
 
 	/*
 	 * initialize reference count: 1 because it is nailed in cache
@@ -2701,7 +2701,8 @@ RelationClearRelation(Relation relation, bool rebuild)
 		}
 
 		/* rd_smgr must not be swapped, due to back-links from smgr level */
-		SWAPFIELD(SMgrRelation, rd_smgr);
+		for (int i = 0; i <= MAX_FORKNUM; i++)
+			SWAPFIELD(SMgrFileHandle, rd_smgr[i]);
 		/* rd_refcnt must be preserved */
 		SWAPFIELD(int, rd_refcnt);
 		/* isnailed shouldn't change */
@@ -3532,7 +3533,7 @@ RelationBuildLocalRelation(const char *relname,
 	rel = (Relation) palloc0(sizeof(RelationData));
 
 	/* make sure relation is marked as having no open file yet */
-	rel->rd_smgr = NULL;
+	MemSet(rel->rd_smgr, 0, sizeof(rel->rd_smgr));
 
 	/* mark it nailed if appropriate */
 	rel->rd_isnailed = nailit;
@@ -3764,7 +3765,6 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
 	 */
 	if (IsBinaryUpgrade)
 	{
-		SMgrRelation	srel;
 
 		/*
 		 * During a binary upgrade, we use this code path to ensure that
@@ -3781,9 +3781,14 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
 		 * fails at this stage, the new cluster will need to be recreated
 		 * anyway.
 		 */
-		srel = smgropen(relation->rd_locator, relation->rd_backend);
-		smgrdounlinkall(&srel, 1, false);
-		smgrclose(srel);
+		ForkNumber forks[MAX_FORKNUM + 1];
+		for (int i = 0; i <= MAX_FORKNUM; i ++) 
+		{
+			smgropen(relation->rd_locator, relation->rd_backend, i);
+			forks[i] = i;
+		}
+		
+		smgrunlink_multi(relation->rd_locator, relation->rd_backend, forks, MAX_FORKNUM + 1, false);
 	}
 	else
 	{
@@ -3811,7 +3816,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
 	else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
 	{
 		/* handle these directly, at least for now */
-		SMgrRelation srel;
+		SMgrFileHandle srel;
 
 		srel = RelationCreateStorage(newrlocator, persistence, true);
 		smgrclose(srel);
@@ -6298,7 +6303,7 @@ load_relcache_init_file(bool shared)
 		/*
 		 * Reset transient-state fields in the relcache entry
 		 */
-		rel->rd_smgr = NULL;
+		MemSet(rel->rd_smgr, 0, sizeof(rel->rd_smgr));
 		if (rel->rd_isnailed)
 			rel->rd_refcnt = 1;
 		else
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 57bd6690ca0..f9137e36411 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -151,9 +151,20 @@ static void MemoryContextStatsPrint(MemoryContext context, void *passthru,
  * You should not do memory allocations within a critical section, because
  * an out-of-memory error will be escalated to a PANIC. To enforce that
  * rule, the allocation functions Assert that.
+ *
+ * FIXME: bypass this for the critical section in RecordTransactionCommit()
+ * for now. It does a lot of things that can allocate:
+ * - calls TransactionIdCommitTree, which pins buffers, which requires
+ *   space in the ResourceOwner for the pin (ResourceOwnerEnlargeBuffers())
+ * - same for TransactionTreeSetCommitTsData() call.
+ * - reading a page can require flushing other pages, which in turn
+ *   can call CompactCheckpointerRequestQueue(), which allocates
+ * - reading a page calls smgropen(), which allocates the SMgrFile entry
+ *   if it's not open already
  */
 #define AssertNotInCriticalSection(context) \
-	Assert(CritSectionCount == 0 || (context)->allowInCritSection)
+	Assert(CritSectionCount == 0 || (context)->allowInCritSection || \
+		   (MyProc != NULL && (MyProc->delayChkptFlags & DELAY_CHKPT_START != 0)))
 
 /*
  * Call the given function in the MemoryContextMethods for the memory context
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 079fbda8389..6b76a6bf7e5 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -20,6 +20,7 @@
 #endif
 
 #include "access/visibilitymapdefs.h"
+#include "access/slrudefs.h"
 #include "common/file_perm.h"
 #include "pg_upgrade.h"
 #include "storage/bufpage.h"
@@ -139,7 +140,6 @@ copyFile(const char *src, const char *dst,
 #endif							/* WIN32 */
 }
 
-
 /*
  * linkFile()
  *
@@ -316,6 +316,179 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
 	close(src_fd);
 }
 
+
+//create new file and initialize to 256K
+static int
+init_new_slru_file(FILE * fp)
+{ 
+	int fd;
+	int res;
+	
+	fd = fileno(fp);
+	res = ftruncate(fd, BLCKSZ * SLRU_PAGES_PER_SEGMENT);
+	return res;
+}
+
+int 
+copy_to_new_format(const char *old_subdir, const char *new_subdir, int element_size) 
+{
+	DIR *dr;
+	struct dirent *de;
+	
+	size_t 	read_items;
+	size_t 	n_items;
+	size_t 	to_read_items;
+	size_t 	write_items;
+	size_t 	pages_written;
+	size_t 	write_offset;
+
+	char * 	read_file;	
+	char * 	write_file;
+	
+	int 	res;
+	int 	write_file_segno;
+	int 	total_read_files;
+	
+	FILE * 	old_fd; 
+	FILE * 	new_fp;
+
+#define MAXBUFSIZE (BLCKSZ - SizeOfPageHeaderData)
+
+	char	old_path[MAXPGPATH];
+	char	new_path[MAXPGPATH];
+	PGAlignedBlock pg_buf;
+	BlockNumber blkno; //track aboslute block number
+	
+
+	struct 	dirent** all_dirents;
+
+
+	snprintf(old_path, sizeof(old_path), "%s/%s", old_cluster.pgdata, old_subdir);
+	snprintf(new_path, sizeof(new_path), "%s/%s", new_cluster.pgdata, new_subdir);
+
+	/* 
+	 * calculate the number of items that can fit 
+	 * inside BLCKSZ - SizeOfPageHeaderData
+	 */
+
+	n_items = (size_t) (MAXBUFSIZE / element_size); 
+	
+	/* number of items to read in at a time */
+	to_read_items = n_items;
+	
+	//bufsize = (size_t) (n_items * element_size); 
+	write_file_segno = 0; /* track which segment we are writing into */
+	write_offset = (size_t) (SizeOfPageHeaderData); 
+
+	dr = opendir((char *)old_path);
+	if (dr == NULL)
+		return -1;
+
+	write_file = psprintf("%s/%04X", new_path, write_file_segno);
+	
+	new_fp = fopen(write_file, "wb");
+	
+	if (new_fp == NULL)
+		return -1;
+
+	res = init_new_slru_file(new_fp);
+	if (res < 0)	
+		return -1;
+
+	if (res < 0)
+		return -1;
+
+	pages_written = 0;
+	blkno = 0; /* absolute blkno */
+
+	all_dirents = get_sorted_hex_files(old_path, &total_read_files);
+
+	write_items = 0;
+	for (int i = 0; i < total_read_files; i++)
+	{
+		de = all_dirents[i];
+		
+		if ((!strcmp((char *) de->d_name, ".")) || !strcmp((char *) de->d_name, ".."))
+	 	{
+			continue;
+	 	}
+
+		read_file = psprintf("%s/%s", (char *) old_path, (char *) de->d_name);
+		
+		old_fd = fopen(read_file, "rb");
+		read_items = 0;
+		
+		do 
+		{
+			memset(pg_buf.data, 0, BLCKSZ);		
+			read_items = fread(pg_buf.data + write_offset, element_size, to_read_items, old_fd);  /* how many items we read */
+
+			if (to_read_items == n_items) /* not finishing up a partial read */
+			{
+
+				((PageHeader) pg_buf.data)->pd_lower = SizeOfPageHeaderData;
+				((PageHeader) pg_buf.data)->pd_upper = BLCKSZ;
+				((PageHeader) pg_buf.data)->pd_special = BLCKSZ;
+				
+				((PageHeader) pg_buf.data)->pd_checksum =
+					pg_checksum_page(pg_buf.data, blkno);
+
+				fwrite(pg_buf.data, SizeOfPageHeaderData, 1, new_fp); /* write page header data */
+			}
+
+			
+			write_items += fwrite(pg_buf.data + write_offset, element_size, read_items, new_fp);  /* increment how many items written */
+						
+			if (write_items % n_items == 0 && write_items > 0 && errno == 0) /* finished writing into new page. */
+			{
+				if (write_items == n_items * SLRU_PAGES_PER_SEGMENT) /* end of segment */
+				{
+					fclose(new_fp);
+					pages_written = 0;
+					write_file_segno ++;
+					write_items = 0;
+					write_file = psprintf("%s/%04X", new_path, write_file_segno);
+					new_fp = fopen((char *)write_file, "wb");
+					res = init_new_slru_file(new_fp);
+					if (res < 0 || new_fp == NULL)
+					{
+						fclose(new_fp);
+						cleanup_dirents(all_dirents, total_read_files);
+						
+					}
+				} else { /* end of page but not segment */
+					//fflush(new_fp);
+					pages_written ++; 
+				}
+
+				blkno++; 
+
+				to_read_items = n_items; /* to_read_items = n */
+				res = fseek(new_fp, (pages_written * BLCKSZ), SEEK_SET);
+				if (res < 0)
+				{
+					fclose(new_fp);
+					cleanup_dirents(all_dirents, total_read_files);
+					return -1;
+				}
+			} else {
+				/*end of segment in source dir*/
+				if (read_items < to_read_items) 
+				{
+					to_read_items = n_items - read_items; /* read remaining items */
+				}
+			}
+		} while (!feof(old_fd));  /* until end of file */
+		fclose(old_fd);
+	}
+	fflush(new_fp);
+	
+	/* free memory malloc'd by scandir while sorting */
+	cleanup_dirents(all_dirents, total_read_files);
+
+	return 0;
+}
+
 void
 check_file_clone(void)
 {
diff --git a/src/bin/pg_upgrade/function.c b/src/bin/pg_upgrade/function.c
index 93d975864ba..9141c30b165 100644
--- a/src/bin/pg_upgrade/function.c
+++ b/src/bin/pg_upgrade/function.c
@@ -7,6 +7,7 @@
  *	src/bin/pg_upgrade/function.c
  */
 
+
 #include "postgres_fe.h"
 
 #include "access/transam.h"
@@ -42,6 +43,43 @@ library_name_compare(const void *p1, const void *p2)
 			((const LibraryInfo *) p2)->dbnum;
 }
 
+/*
+ * qsort comparator for hex filenames
+ */
+static int
+file_name_compare(const struct dirent ** de_1, const struct dirent ** de_2)
+{
+	int n1;
+	int n2;
+
+
+	char * fname_1;
+	char * fname_2;
+
+	fname_1 = (char *) (*de_1)->d_name;
+	fname_2 = (char *) (*de_2)->d_name;
+	
+	if ((strcmp(fname_1, ".")) || strcmp(fname_1, "..")) 
+	{
+		n1 = (int) strtol(fname_1, NULL, 16);
+	} else {
+		n1 = -1;
+	}
+
+	if ((strcmp(fname_2, ".")) || strcmp(fname_2, ".."))
+	{
+		n2 = (int) strtol(fname_2, NULL, 16);
+	} else {
+		n2 = -1;
+	}
+
+	if (n1 == n2) 
+	{
+		return 1; //arbitrarily select the first input
+	} else {
+		return n1 - n2;
+	}
+}
 
 /*
  * get_loadable_libraries()
@@ -109,6 +147,34 @@ get_loadable_libraries(void)
 }
 
 
+/*
+ * get_sorted_hex_files()
+ * given the filepath of a directory,
+ * return array of child dirents with hex filenames e.g '000A'
+ * in sorted order
+ */
+struct dirent** 
+get_sorted_hex_files(char * dr, int * size)
+{
+	struct dirent **entry_list;
+
+	*size = scandir(dr, &entry_list, NULL, file_name_compare);
+	if (*size < 0)
+	{
+		return NULL; //error
+	}
+	return entry_list;
+}
+
+void
+cleanup_dirents(struct dirent ** all_dirents, int total_read_files)
+{	
+	for (int i = 0; i < total_read_files; i++)
+	{
+		 free((struct dirent **) all_dirents[i]);
+	}
+	free((struct dirent **) all_dirents);
+}
 /*
  * check_loadable_libraries()
  *
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 115faa222e3..bf01b1df5e1 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -38,27 +38,35 @@
 
 #include "postgres_fe.h"
 
+#include <dirent.h>
 #include <time.h>
+#include <unistd.h>
 
 #ifdef HAVE_LANGINFO_H
 #include <langinfo.h>
 #endif
 
+#include "access/slrudefs.h"
 #include "catalog/pg_class_d.h"
 #include "common/file_perm.h"
 #include "common/logging.h"
 #include "common/restricted_token.h"
 #include "fe_utils/string_utils.h"
 #include "pg_upgrade.h"
+#include "storage/bufpage.h"
 
 static void prepare_new_cluster(void);
 static void prepare_new_globals(void);
 static void create_new_objects(void);
 static void copy_xact_xlog_xid(void);
-static void set_frozenxids(bool minmxid_only);
+static void set_frozenxids(bool minmxid_only);;
 static void make_outputdirs(char *pgdata);
 static void setup(char *argv0, bool *live_check);
 
+
+#define MAXBUFSIZE (BLCKSZ - SizeOfPageHeaderData)
+#define SLRU_PAGES_PER_SEGMENT 32
+
 ClusterInfo old_cluster,
 			new_cluster;
 OSInfo		os_info;
@@ -573,11 +581,36 @@ copy_xact_xlog_xid(void)
 	 * Copy old commit logs to new data dir. pg_clog has been renamed to
 	 * pg_xact in post-10 clusters.
 	 */
-	copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ?
-					  "pg_clog" : "pg_xact",
-					  GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
-					  "pg_clog" : "pg_xact");
+	 
+	if (old_cluster.controldata.cat_ver <= CLOG_FORMATCHANGE_CAT_VER &&
+		new_cluster.controldata.cat_ver >= CLOG_FORMATCHANGE_CAT_VER)
+	{
+			int ret;
+			ret = copy_to_new_format(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ?
+			"pg_clog" : "pg_xact", 
+			"pg_xact", 1);
+			
+			if (ret < 0)
+			{
+				pg_fatal("could not reformat clog files");
+			}
+	}
 
+	if (old_cluster.controldata.cat_ver >= CLOG_FORMATCHANGE_CAT_VER &&
+		new_cluster.controldata.cat_ver >= CLOG_FORMATCHANGE_CAT_VER)
+	{
+		copy_subdir_files("pg_xact", "pg_xact");
+	}
+
+	if (old_cluster.controldata.cat_ver <= CLOG_FORMATCHANGE_CAT_VER &&
+		new_cluster.controldata.cat_ver <= CLOG_FORMATCHANGE_CAT_VER)
+	{
+		copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ?
+					"pg_clog" : "pg_xact",
+					GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
+					"pg_clog" : "pg_xact");
+	}
+	
 	prep_status("Setting oldest XID for new cluster");
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
 			  "\"%s/pg_resetwal\" -f -u %u \"%s\"",
@@ -633,23 +666,44 @@ copy_xact_xlog_xid(void)
 	}
 	else if (new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
 	{
+		copy_to_new_format("pg_multixact/offsets", "pg_multixact/offsets", MULTIXACT_MEMBER_ENTRY_SIZE);
+		copy_to_new_format("pg_multixact/members", "pg_multixact/members", MULTIXACT_OFFSET_ENTRY_SIZE);	 
+
+
+		prep_status("Setting next multixact ID and offset for new cluster");
+
 		/*
+		 * we preserve all files and contents, so we must preserve both "next"
+		 * counters here and the oldest multi present on system.
+		 */
+		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
+				  "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
+				  new_cluster.bindir,
+				  old_cluster.controldata.chkpnt_nxtmxoff,
+				  old_cluster.controldata.chkpnt_nxtmulti,
+				  old_cluster.controldata.chkpnt_oldstMulti,
+				  new_cluster.pgdata);
+		check_ok();
+
+	}
+	/*else if (new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
+	{
+		*
 		 * Remove offsets/0000 file created by initdb that no longer matches
 		 * the new multi-xid value.  "members" starts at zero so no need to
 		 * remove it.
-		 */
+		 *
 		remove_new_subdir("pg_multixact/offsets", false);
 
 		prep_status("Setting oldest multixact ID in new cluster");
-
-		/*
+ 		*
 		 * We don't preserve files in this case, but it's important that the
 		 * oldest multi is set to the latest value used by the old system, so
 		 * that multixact.c returns the empty set for multis that might be
 		 * present on disk.  We set next multi to the value following that; it
 		 * might end up wrapped around (i.e. 0) if the old cluster had
 		 * next=MaxMultiXactId, but multixact.c can cope with that just fine.
-		 */
+		 *
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
 				  "\"%s/pg_resetwal\" -m %u,%u \"%s\"",
 				  new_cluster.bindir,
@@ -657,7 +711,8 @@ copy_xact_xlog_xid(void)
 				  old_cluster.controldata.chkpnt_nxtmulti,
 				  new_cluster.pgdata);
 		check_ok();
-	}
+		
+	}*/
 
 	/* now reset the wal archives in the new cluster */
 	prep_status("Resetting WAL archives");
@@ -669,7 +724,6 @@ copy_xact_xlog_xid(void)
 	check_ok();
 }
 
-
 /*
  *	set_frozenxids()
  *
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 31589b0fdc4..26bd56cc429 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -7,6 +7,7 @@
 
 #include <unistd.h>
 #include <assert.h>
+#include <dirent.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 
@@ -113,7 +114,16 @@ extern char *output_files[];
  * version to this value.  pg_upgrade behavior depends on whether old and new
  * server versions are both newer than this, or only the new one is.
  */
-#define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
+
+/*
+ * page header format change
+ */
+#define MULTIXACT_FORMATCHANGE_CAT_VER 202209141
+
+/* 
+ * page header format change 
+ */
+#define CLOG_FORMATCHANGE_CAT_VER 202210141
 
 /*
  * large object chunk size added to pg_controldata,
@@ -122,7 +132,7 @@ extern char *output_files[];
 #define LARGE_OBJECT_SIZE_PG_CONTROL_VER 942
 
 /*
- * change in JSONB format during 9.4 beta
+ * addition of page header 
  */
 #define JSONB_FORMAT_CHANGE_CAT_VER 201409291
 
@@ -378,6 +388,9 @@ void		rewriteVisibilityMap(const char *fromfile, const char *tofile,
 void		check_file_clone(void);
 void		check_hard_link(void);
 
+
+int 	copy_to_new_format(const char *old_subdir, const char *new_subdir, int element_size);
+
 /* fopen_priv() is no longer different from fopen() */
 #define fopen_priv(path, mode)	fopen(path, mode)
 
@@ -385,6 +398,8 @@ void		check_hard_link(void);
 
 void		get_loadable_libraries(void);
 void		check_loadable_libraries(void);
+void 		cleanup_dirents(struct dirent ** all_dirents, int total_read_files);
+struct dirent** 	get_sorted_hex_files(char * dr, int * size);
 
 /* info.c */
 
diff --git a/src/common/relpath.c b/src/common/relpath.c
index 1b6b620ce83..4ec0e36d556 100644
--- a/src/common/relpath.c
+++ b/src/common/relpath.c
@@ -22,6 +22,16 @@
 #include "common/relpath.h"
 #include "storage/backendid.h"
 
+/*
+ * SLRU ID to path mapping
+ */
+#define PG_SLRU(symname,name,path,synchronize) \
+	path,
+
+static char *slru_dirs[] =
+{
+#include "access/slrulist.h"
+};
 
 /*
  * Lookup table of fork name by fork number.
@@ -129,7 +139,7 @@ GetDatabasePath(Oid dbOid, Oid spcOid)
 }
 
 /*
- * GetRelationPath - construct path to a relation's file
+ * GetSMgrFilePath - construct path to a relation's file
  *
  * Result is a palloc'd string.
  *
@@ -138,12 +148,27 @@ GetDatabasePath(Oid dbOid, Oid spcOid)
  * the trouble considering BackendId is just int anyway.
  */
 char *
-GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
+GetSMgrFilePath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
 				int backendId, ForkNumber forkNumber)
 {
 	char	   *path;
 
-	if (spcOid == GLOBALTABLESPACE_OID)
+	if (spcOid == SLRU_SPC_OID)
+	{
+		if (dbOid >= lengthof(slru_dirs) || forkNumber != 0 || backendId != InvalidBackendId)
+		{
+#ifndef FRONTEND
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid SLRU file locator %u/%u/%u/%u/%u",
+							spcOid, dbOid, relNumber, backendId, forkNumber)));
+#else
+			return NULL;
+#endif
+		}
+		path = psprintf("%s/%04X", slru_dirs[dbOid], relNumber);
+	}
+	else if (spcOid == GLOBALTABLESPACE_OID)
 	{
 		/* Shared system relations live in {datadir}/global */
 		Assert(dbOid == 0);
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
index 543f2e2643a..af16a2afcb9 100644
--- a/src/include/access/clog.h
+++ b/src/include/access/clog.h
@@ -40,18 +40,12 @@ extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
 									   TransactionId *subxids, XidStatus status, XLogRecPtr lsn);
 extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
 
-extern Size CLOGShmemBuffers(void);
-extern Size CLOGShmemSize(void);
-extern void CLOGShmemInit(void);
 extern void BootStrapCLOG(void);
 extern void StartupCLOG(void);
 extern void TrimCLOG(void);
-extern void CheckPointCLOG(void);
 extern void ExtendCLOG(TransactionId newestXact);
 extern void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid);
 
-extern int	clogsyncfiletag(const FileTag *ftag, char *path);
-
 /* XLOG stuff */
 #define CLOG_ZEROPAGE		0x00
 #define CLOG_TRUNCATE		0x10
diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h
index 7662f8e1a9c..4e986fc023d 100644
--- a/src/include/access/commit_ts.h
+++ b/src/include/access/commit_ts.h
@@ -27,7 +27,6 @@ extern bool TransactionIdGetCommitTsData(TransactionId xid,
 extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
 										   RepOriginId *nodeid);
 
-extern Size CommitTsShmemBuffers(void);
 extern Size CommitTsShmemSize(void);
 extern void CommitTsShmemInit(void);
 extern void BootStrapCommitTs(void);
@@ -41,8 +40,6 @@ extern void SetCommitTsLimit(TransactionId oldestXact,
 							 TransactionId newestXact);
 extern void AdvanceOldestCommitTsXid(TransactionId oldestXact);
 
-extern int	committssyncfiletag(const FileTag *ftag, char *path);
-
 /* XLOG stuff */
 #define COMMIT_TS_ZEROPAGE		0x00
 #define COMMIT_TS_TRUNCATE		0x10
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 4cbe17de7bd..96f8323f4e5 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -118,9 +118,6 @@ extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
 extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
 										MultiXactId multi2);
 
-extern int	multixactoffsetssyncfiletag(const FileTag *ftag, char *path);
-extern int	multixactmemberssyncfiletag(const FileTag *ftag, char *path);
-
 extern void AtEOXact_MultiXact(void);
 extern void AtPrepare_MultiXact(void);
 extern void PostPrepare_MultiXact(TransactionId xid);
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 130c41c8632..04fc078e8c3 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * slru.h
- *		Simple LRU buffering for transaction status logfiles
+ *		Buffering for transaction status logfiles
  *
  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -14,9 +14,36 @@
 #define SLRU_H
 
 #include "access/xlogdefs.h"
+#include "catalog/pg_tablespace_d.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
 #include "storage/lwlock.h"
+#include "storage/smgr.h"
 #include "storage/sync.h"
 
+/* Pseudo database ID used for SLRU data. */
+#define SLRU_SPC_ID 9
+
+/* Pseudo database IDs used by each cache. */
+#define PG_SLRU(symname,name,path, synchronize) \
+	symname,
+
+typedef enum SlruIds
+{
+#include "access/slrulist.h"
+	SLRU_NEXT_ID
+}			SlruIds;
+#undef PG_SLRU
+
+typedef bool (*SlruPagePrecedesFunction) (int, int);
+
+static inline RelFileLocator
+SlruRelFileLocator(uint32 slru_db_id, uint32 segment_id)
+{
+	RelFileLocator rlocator = {SLRU_SPC_ID, slru_db_id, segment_id};
+	return rlocator;
+}
+
 
 /*
  * Define SLRU segment size.  A page is the same BLCKSZ as is used everywhere
@@ -33,142 +60,40 @@
  */
 #define SLRU_PAGES_PER_SEGMENT	32
 
-/*
- * Page status codes.  Note that these do not include the "dirty" bit.
- * page_dirty can be true only in the VALID or WRITE_IN_PROGRESS states;
- * in the latter case it implies that the page has been re-dirtied since
- * the write started.
- */
-typedef enum
-{
-	SLRU_PAGE_EMPTY,			/* buffer is not in use */
-	SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */
-	SLRU_PAGE_VALID,			/* page is valid and not being written */
-	SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */
-} SlruPageStatus;
-
-/*
- * Shared-memory state
- */
-typedef struct SlruSharedData
-{
-	LWLock	   *ControlLock;
-
-	/* Number of buffers managed by this SLRU structure */
-	int			num_slots;
-
-	/*
-	 * Arrays holding info for each buffer slot.  Page number is undefined
-	 * when status is EMPTY, as is page_lru_count.
-	 */
-	char	  **page_buffer;
-	SlruPageStatus *page_status;
-	bool	   *page_dirty;
-	int		   *page_number;
-	int		   *page_lru_count;
-	LWLockPadded *buffer_locks;
-
-	/*
-	 * Optional array of WAL flush LSNs associated with entries in the SLRU
-	 * pages.  If not zero/NULL, we must flush WAL before writing pages (true
-	 * for pg_xact, false for multixact, pg_subtrans, pg_notify).  group_lsn[]
-	 * has lsn_groups_per_page entries per buffer slot, each containing the
-	 * highest LSN known for a contiguous group of SLRU entries on that slot's
-	 * page.
-	 */
-	XLogRecPtr *group_lsn;
-	int			lsn_groups_per_page;
-
-	/*----------
-	 * We mark a page "most recently used" by setting
-	 *		page_lru_count[slotno] = ++cur_lru_count;
-	 * The oldest page is therefore the one with the highest value of
-	 *		cur_lru_count - page_lru_count[slotno]
-	 * The counts will eventually wrap around, but this calculation still
-	 * works as long as no page's age exceeds INT_MAX counts.
-	 *----------
-	 */
-	int			cur_lru_count;
-
-	/*
-	 * latest_page_number is the page number of the current end of the log;
-	 * this is not critical data, since we use it only to avoid swapping out
-	 * the latest page.
-	 */
-	int			latest_page_number;
-
-	/* SLRU's index for statistics purposes (might not be unique) */
-	int			slru_stats_idx;
-} SlruSharedData;
-
-typedef SlruSharedData *SlruShared;
-
-/*
- * SlruCtlData is an unshared structure that points to the active information
- * in shared memory.
- */
-typedef struct SlruCtlData
-{
-	SlruShared	shared;
-
-	/*
-	 * Which sync handler function to use when handing sync requests over to
-	 * the checkpointer.  SYNC_HANDLER_NONE to disable fsync (eg pg_notify).
-	 */
-	SyncRequestHandler sync_handler;
-
-	/*
-	 * Decide whether a page is "older" for truncation and as a hint for
-	 * evicting pages in LRU order.  Return true if every entry of the first
-	 * argument is older than every entry of the second argument.  Note that
-	 * !PagePrecedes(a,b) && !PagePrecedes(b,a) need not imply a==b; it also
-	 * arises when some entries are older and some are not.  For SLRUs using
-	 * SimpleLruTruncate(), this must use modular arithmetic.  (For others,
-	 * the behavior of this callback has no functional implications.)  Use
-	 * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria.
-	 */
-	bool		(*PagePrecedes) (int, int);
-
-	/*
-	 * Dir is set during SimpleLruInit and does not change thereafter. Since
-	 * it's always the same, it doesn't need to be in shared memory.
-	 */
-	char		Dir[64];
-} SlruCtlData;
-
-typedef SlruCtlData *SlruCtl;
-
-
-extern Size SimpleLruShmemSize(int nslots, int nlsns);
-extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-						  LWLock *ctllock, const char *subdir, int tranche_id,
-						  SyncRequestHandler sync_handler);
-extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
-extern int	SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
-							  TransactionId xid);
-extern int	SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
-									   TransactionId xid);
-extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
-extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied);
 #ifdef USE_ASSERT_CHECKING
-extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page);
+extern void SlruPagePrecedesUnitTests(SlruPagePrecedesFunction PagePrecedes,
+									  int per_page);
 #else
 #define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0)
 #endif
-extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
-extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
+extern void SimpleLruTruncate(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+							  int cutoffPage);
+extern bool SimpleLruDoesPhysicalPageExist(int slru_id, int pageno);
 
-typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
+typedef bool (*SlruScanCallback) (int slru_id,
+								  SlruPagePrecedesFunction PagePrecedes,
+								  char *filename, int segpage,
 								  void *data);
-extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data);
-extern void SlruDeleteSegment(SlruCtl ctl, int segno);
-
-extern int	SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path);
+extern bool SlruScanDirectory(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+							  SlruScanCallback callback, void *data);
+extern void SlruDeleteSegment(int slru_id, int segno);
 
 /* SlruScanDirectory public callbacks */
-extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,
+extern bool SlruScanDirCbReportPresence(int slru_id,
+										SlruPagePrecedesFunction PagePrecedes,
+										char *filename,
 										int segpage, void *data);
-extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage,
+extern bool SlruScanDirCbDeleteAll(int slru_id, SlruPagePrecedesFunction PagePrecedes,
+								   char *filename, int segpage,
 								   void *data);
 
+/* Buffer access */
+extern Buffer ReadSlruBuffer(int slru_id, int pageno, ReadBufferMode mode);
+extern Buffer ZeroSlruBuffer(int slru_id, int pageno);
+extern bool ProbeSlruBuffer(int slru_id, int pageno);
+
+/* Interfaces use by stats view */
+extern Oid SlruRelIdByName(const char *name);
+extern const char *SlruName(int slru_id);
+
 #endif							/* SLRU_H */
diff --git a/src/include/access/slrudefs.h b/src/include/access/slrudefs.h
new file mode 100644
index 00000000000..49cd78d923d
--- /dev/null
+++ b/src/include/access/slrudefs.h
@@ -0,0 +1,19 @@
+/*-------------------------------------------------------------------------
+ *
+ * slrudefs.h
+ *		macros for accessing contents of "slru" pages 
+ *
+ *
+ * Copyright (c) 2021-2022, PostgreSQL Global Development Group
+ *
+ * src/include/access/slrudefs.h
+ *
+ *--------------------------------------------------------------------------
+ */
+
+#define SLRU_PAGES_PER_SEGMENT 32
+
+#define MULTIXACT_MEMBER_ENTRY_SIZE 20 
+
+#define MULTIXACT_OFFSET_ENTRY_SIZE 8
+
diff --git a/src/include/access/slrulist.h b/src/include/access/slrulist.h
new file mode 100644
index 00000000000..c1289a1326c
--- /dev/null
+++ b/src/include/access/slrulist.h
@@ -0,0 +1,30 @@
+/*---------------------------------------------------------------------------
+ * slrulist.h
+ *
+ * The SLRU list is kept in its own source file for possible
+ * use by automatic tools.  The exact representation of a rmgr is determined
+ * by the PG_SLRU macro, which is not defined in this file; it can be
+ * defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/slrulist.h
+ *---------------------------------------------------------------------------
+ */
+
+/* there is deliberately not an #ifndef SLRULIST_H here */
+
+/*
+ * List of SLRU entries.  Note that order of entries defines the
+ * numerical values of each SLRU's ID, which is used in in-memory structus.
+ */
+
+/* symbol name, textual name, path, synchronize */
+PG_SLRU(SLRU_CLOG_ID, "Xact", "pg_xact", true)
+PG_SLRU(SLRU_SUBTRANS_ID, "Subtrans", "pg_subtrans", false)
+PG_SLRU(SLRU_MULTIXACT_OFFSET_ID, "MultiXactOffset", "pg_multixact/offsets", true)
+PG_SLRU(SLRU_MULTIXACT_MEMBER_ID, "MultiXactMember", "pg_multixact/members", true)
+PG_SLRU(SLRU_COMMIT_TS_ID, "CommitTs", "pg_commit_ts", true)
+PG_SLRU(SLRU_SERIAL_ID, "Serial", "pg_serial", false)
+PG_SLRU(SLRU_NOTIFY_ID, "Notify", "pg_notify", false)
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index f94e116640b..4685a05bc92 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -18,11 +18,8 @@ extern void SubTransSetParent(TransactionId xid, TransactionId parent);
 extern TransactionId SubTransGetParent(TransactionId xid);
 extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
 
-extern Size SUBTRANSShmemSize(void);
-extern void SUBTRANSShmemInit(void);
 extern void BootStrapSUBTRANS(void);
 extern void StartupSUBTRANS(TransactionId oldestActiveXID);
-extern void CheckPointSUBTRANS(void);
 extern void ExtendSUBTRANS(TransactionId newestXact);
 extern void TruncateSUBTRANS(TransactionId oldestXact);
 
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 501a434b904..5af804509ce 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,8 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202212092
+
+
+#define CATALOG_VERSION_NO	202212021
 
 #endif
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 9964c312aa2..04114305547 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -22,19 +22,20 @@
 /* GUC variables */
 extern PGDLLIMPORT int wal_skip_threshold;
 
-extern SMgrRelation RelationCreateStorage(RelFileLocator rlocator,
-										  char relpersistence,
-										  bool register_delete);
+extern SMgrFileHandle RelationCreateStorage(RelFileLocator rlocator,
+											char relpersistence,
+											bool register_delete);
 extern void RelationDropStorage(Relation rel);
 extern void RelationPreserveStorage(RelFileLocator rlocator, bool atCommit);
 extern void RelationPreTruncate(Relation rel);
 extern void RelationTruncate(Relation rel, BlockNumber nblocks);
-extern void RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
-								ForkNumber forkNum, char relpersistence);
+extern void RelationCopyStorage(SMgrFileHandle src, SMgrFileHandle dst,
+								char relpersistence);
 extern bool RelFileLocatorSkippingWAL(RelFileLocator rlocator);
 extern Size EstimatePendingSyncsSpace(void);
 extern void SerializePendingSyncs(Size maxSize, char *startAddress);
 extern void RestorePendingSyncs(char *startAddress);
+extern void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo);
 
 /*
  * These functions used to be in storage/smgr/smgr.c, which explains the
diff --git a/src/include/common/relpath.h b/src/include/common/relpath.h
index 4bbd94393c8..606d0e8c64f 100644
--- a/src/include/common/relpath.h
+++ b/src/include/common/relpath.h
@@ -63,6 +63,9 @@ typedef enum ForkNumber
 
 #define FORKNAMECHARS	4		/* max chars for a fork name */
 
+/* Pseudo tablespace ID used for SLRUs. */
+#define SLRU_SPC_OID 9
+
 extern PGDLLIMPORT const char *const forkNames[];
 
 extern ForkNumber forkname_to_number(const char *forkName);
@@ -73,25 +76,25 @@ extern int	forkname_chars(const char *str, ForkNumber *fork);
  */
 extern char *GetDatabasePath(Oid dbOid, Oid spcOid);
 
-extern char *GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
+extern char *GetSMgrFilePath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
 							 int backendId, ForkNumber forkNumber);
 
 /*
  * Wrapper macros for GetRelationPath.  Beware of multiple
- * evaluation of the RelFileLocator or RelFileLocatorBackend argument!
+ * evaluation of the RelFileLocator or SMgrFileLocator argument!
  */
 
 /* First argument is a RelFileLocator */
 #define relpathbackend(rlocator, backend, forknum) \
-	GetRelationPath((rlocator).dbOid, (rlocator).spcOid, (rlocator).relNumber, \
+	GetSMgrFilePath((rlocator).dbOid, (rlocator).spcOid, (rlocator).relNumber, \
 					backend, forknum)
 
 /* First argument is a RelFileLocator */
 #define relpathperm(rlocator, forknum) \
 	relpathbackend(rlocator, InvalidBackendId, forknum)
 
-/* First argument is a RelFileLocatorBackend */
-#define relpath(rlocator, forknum) \
-	relpathbackend((rlocator).locator, (rlocator).backend, forknum)
+/* First argument is a SMgrFileLocator */
+#define smgrfilepath(slocator) \
+	GetSMgrFilePath((slocator).locator.dbOid, (slocator).locator.spcOid, (slocator).locator.relNumber, (slocator).backend, (slocator).forknum)
 
 #endif							/* RELPATH_H */
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 406db6be783..e676692a30a 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -281,6 +281,11 @@ typedef union BufferDescPadded
 	char		pad[BUFFERDESC_PAD_TO_SIZE];
 } BufferDescPadded;
 
+#define BufferGetExternalLSN(bufHdr) \
+	BufferExternalLSNs[(bufHdr)->buf_id]
+#define BufferSetExternalLSN(bufHdr, lsn) \
+	BufferExternalLSNs[(bufHdr)->buf_id] = (lsn)
+
 /*
  * The PendingWriteback & WritebackContext structure are used to keep
  * information about pending flush requests to be issued to the OS.
@@ -307,6 +312,7 @@ typedef struct WritebackContext
 /* in buf_init.c */
 extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
 extern PGDLLIMPORT ConditionVariableMinimallyPadded *BufferIOCVArray;
+extern PGDLLIMPORT XLogRecPtr *BufferExternalLSNs;
 extern PGDLLIMPORT WritebackContext BackendWritebackContext;
 
 /* in localbuf.c */
@@ -413,10 +419,9 @@ extern int	BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
 extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
 
 /* localbuf.c */
-extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
-												ForkNumber forkNum,
+extern PrefetchBufferResult PrefetchLocalBuffer(SMgrFileHandle smgr,
 												BlockNumber blockNum);
-extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
+extern BufferDesc *LocalBufferAlloc(SMgrFileHandle smgr,
 									BlockNumber blockNum, bool *foundPtr);
 extern void MarkLocalBufferDirty(Buffer buffer);
 extern void DropRelationLocalBuffers(RelFileLocator rlocator,
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index e1bd22441b0..4cddb59f500 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -42,8 +42,11 @@ typedef enum
 	RBM_ZERO_AND_CLEANUP_LOCK,	/* Like RBM_ZERO_AND_LOCK, but locks the page
 								 * in "cleanup" mode */
 	RBM_ZERO_ON_ERROR,			/* Read, but return an all-zeros page on error */
-	RBM_NORMAL_NO_LOG			/* Don't log page as invalid during WAL
+	RBM_NORMAL_NO_LOG,			/* Don't log page as invalid during WAL
 								 * replay; otherwise same as RBM_NORMAL */
+
+	RBM_TRIM				/*Read for TRIM functions in CLOG / MultiXact. 
+						  Don't validate checksum  or zero. */	
 } ReadBufferMode;
 
 /*
@@ -59,7 +62,7 @@ typedef struct PrefetchBufferResult
 struct WritebackContext;
 
 /* forward declared, to avoid including smgr.h here */
-struct SMgrRelationData;
+struct SMgrFileData;
 
 /* in globals.c ... this duplicates miscadmin.h */
 extern PGDLLIMPORT int NBuffers;
@@ -110,8 +113,7 @@ extern PGDLLIMPORT int32 *LocalRefCount;
 /*
  * prototypes for functions in bufmgr.c
  */
-extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
-												 ForkNumber forkNum,
+extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrFileData *smgr_file,
 												 BlockNumber blockNum);
 extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
 										   BlockNumber blockNum);
@@ -125,13 +127,19 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
 										ForkNumber forkNum, BlockNumber blockNum,
 										ReadBufferMode mode, BufferAccessStrategy strategy,
 										bool permanent);
+extern Buffer ReadBufferWithoutRelcacheWithHit(RelFileLocator rlocator,
+										ForkNumber forkNum, BlockNumber blockNum,
+										ReadBufferMode mode, BufferAccessStrategy strategy,
+											   bool permanent, bool *hit);
 extern void ReleaseBuffer(Buffer buffer);
 extern void UnlockReleaseBuffer(Buffer buffer);
 extern void MarkBufferDirty(Buffer buffer);
 extern void IncrBufferRefCount(Buffer buffer);
 extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
 								   BlockNumber blockNum);
-
+extern bool BufferProbe(RelFileLocator rlocator, ForkNumber forkNum,
+						BlockNumber blockNum);
+ 
 extern void InitBufferPoolAccess(void);
 extern void AtEOXact_Buffers(bool isCommit);
 extern void PrintBufferLeakWarning(Buffer buffer);
@@ -139,18 +147,26 @@ extern void CheckPointBuffers(int flags);
 extern BlockNumber BufferGetBlockNumber(Buffer buffer);
 extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
 												   ForkNumber forkNum);
-extern void FlushOneBuffer(Buffer buffer);
-extern void FlushRelationBuffers(Relation rel);
-extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
 extern void CreateAndCopyRelationData(RelFileLocator src_rlocator,
 									  RelFileLocator dst_rlocator,
 									  bool permanent);
+
+extern void FlushOneBuffer(Buffer buffer);
+extern void FlushRelationBuffers(Relation rel);
+extern void FlushRelationsAllBuffers(RelFileLocator *locators, int nlocators);
 extern void FlushDatabaseBuffers(Oid dbid);
-extern void DropRelationBuffers(struct SMgrRelationData *smgr_reln,
+
+extern void DropRelationBuffers(RelFileLocator rlocator, BackendId backend,
 								ForkNumber *forkNum,
 								int nforks, BlockNumber *firstDelBlock);
-extern void DropRelationsAllBuffers(struct SMgrRelationData **smgr_reln,
-									int nlocators);
+
+typedef struct RelFileLocatorBackend
+{
+	RelFileLocator locator;
+	BackendId	backend;
+} RelFileLocatorBackend;
+
+extern void DropRelationsAllBuffers(RelFileLocatorBackend *locators, int nlocators);
 extern void DropDatabaseBuffers(Oid dbid);
 
 #define RelationGetNumberOfBlocks(reln) \
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 2708c4b683d..3871a386d4b 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -201,6 +201,7 @@ typedef PageHeaderData *PageHeader;
  * handling pages.
  */
 #define PG_PAGE_LAYOUT_VERSION		4
+#define PG_METAPAGE_LAYOUT_VERSION  1
 #define PG_DATA_CHECKSUM_VERSION	1
 
 /* ----------------------------------------------------------------
@@ -302,6 +303,20 @@ PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
 	((PageHeader) page)->pd_pagesize_version = size | version;
 }
 
+/*
+ * PageSetHeaderDataMinimal 
+ * Sets the LSN, page size and version, and checksum
+ */
+#define PageSetHeaderDataNonRel(page, pageno, lsn, size, version) \
+( \
+	PageSetLSN(page, lsn), \
+	PageSetPageSizeAndVersion(page, size, version), \
+	PageClearHasFreeLinePointers(page), \
+	PageSetChecksumInplace(page, pageno) \
+)
+
+
+
 /* ----------------
  *		page special data functions
  * ----------------
@@ -486,6 +501,8 @@ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
 				 "BLCKSZ has to be a multiple of sizeof(size_t)");
 
 extern void PageInit(Page page, Size pageSize, Size specialSize);
+extern void PageInitSLRU(Page page, Size pageSize, Size specialSize);
+
 extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
 extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
 										OffsetNumber offsetNumber, int flags);
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index 10aa1b0109b..bcb87d56295 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -21,28 +21,26 @@
 
 /* md storage manager functionality */
 extern void mdinit(void);
-extern void mdopen(SMgrRelation reln);
-extern void mdclose(SMgrRelation reln, ForkNumber forknum);
-extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
-extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
-extern void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo);
-extern void mdextend(SMgrRelation reln, ForkNumber forknum,
+extern void mdopen(SMgrFileHandle sfile);
+extern void mdclose(SMgrFileHandle sfile);
+extern void mdcreate(SMgrFileHandle sfile, bool isRedo);
+extern bool mdexists(SMgrFileHandle sfile);
+extern void mdunlink(SMgrFileLocator slocator, bool isRedo);
+extern void mdextend(SMgrFileHandle sfile,
 					 BlockNumber blocknum, char *buffer, bool skipFsync);
-extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
+extern bool mdprefetch(SMgrFileHandle sfile,
 					   BlockNumber blocknum);
-extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+extern void mdread(SMgrFileHandle sfile, BlockNumber blocknum,
 				   char *buffer);
-extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
+extern void mdwrite(SMgrFileHandle sfile,
 					BlockNumber blocknum, char *buffer, bool skipFsync);
-extern void mdwriteback(SMgrRelation reln, ForkNumber forknum,
+extern void mdwriteback(SMgrFileHandle sfile,
 						BlockNumber blocknum, BlockNumber nblocks);
-extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
-extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
-					   BlockNumber nblocks);
-extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
+extern BlockNumber mdnblocks(SMgrFileHandle sfile);
+extern void mdtruncate(SMgrFileHandle sfile, BlockNumber nblocks);
+extern void mdimmedsync(SMgrFileHandle sfile);
 
 extern void ForgetDatabaseSyncRequests(Oid dbid);
-extern void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo);
 
 /* md sync callbacks */
 extern int	mdsyncfiletag(const FileTag *ftag, char *path);
diff --git a/src/include/storage/relfilelocator.h b/src/include/storage/relfilelocator.h
index 10f41f3abb3..296cb1b8e51 100644
--- a/src/include/storage/relfilelocator.h
+++ b/src/include/storage/relfilelocator.h
@@ -53,6 +53,8 @@
  * Note: various places use RelFileLocator in hashtable keys.  Therefore,
  * there *must not* be any unused padding bytes in this struct.  That
  * should be safe as long as all the fields are of type Oid.
+ *
+ * See also SMgrFileLocator in smgr.h.
  */
 typedef struct RelFileLocator
 {
@@ -62,38 +64,15 @@ typedef struct RelFileLocator
 } RelFileLocator;
 
 /*
- * Augmenting a relfilelocator with the backend ID provides all the information
- * we need to locate the physical storage.  The backend ID is InvalidBackendId
- * for regular relations (those accessible to more than one backend), or the
- * owning backend's ID for backend-local relations.  Backend-local relations
- * are always transient and removed in case of a database crash; they are
- * never WAL-logged or fsync'd.
- */
-typedef struct RelFileLocatorBackend
-{
-	RelFileLocator locator;
-	BackendId	backend;
-} RelFileLocatorBackend;
-
-#define RelFileLocatorBackendIsTemp(rlocator) \
-	((rlocator).backend != InvalidBackendId)
-
-/*
- * Note: RelFileLocatorEquals and RelFileLocatorBackendEquals compare relNumber
+ * Note: RelFileLocatorEquals compares relNumber
  * first since that is most likely to be different in two unequal
  * RelFileLocators.  It is probably redundant to compare spcOid if the other
  * fields are found equal, but do it anyway to be sure.  Likewise for checking
- * the backend ID in RelFileLocatorBackendEquals.
+ * the backend ID in SMgrFileLocatorBackendEquals.
  */
 #define RelFileLocatorEquals(locator1, locator2) \
 	((locator1).relNumber == (locator2).relNumber && \
 	 (locator1).dbOid == (locator2).dbOid && \
 	 (locator1).spcOid == (locator2).spcOid)
 
-#define RelFileLocatorBackendEquals(locator1, locator2) \
-	((locator1).locator.relNumber == (locator2).locator.relNumber && \
-	 (locator1).locator.dbOid == (locator2).locator.dbOid && \
-	 (locator1).backend == (locator2).backend && \
-	 (locator1).locator.spcOid == (locator2).locator.spcOid)
-
 #endif							/* RELFILELOCATOR_H */
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a07715356ba..0ed569b2836 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -18,31 +18,56 @@
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
 
+
 /*
- * smgr.c maintains a table of SMgrRelation objects, which are essentially
- * cached file handles.  An SMgrRelation is created (if not already present)
+ * SMgrFileLocator contains all the information needed to locate the physical
+ * storage of a relation fork, or some other file that is managed by the buffer
+ * manager.
+ *
+ * The backend ID is InvalidBackendId for regular relations (those accessible
+ * to more than one backend), or the owning backend's ID for backend-local
+ * relations. Backend-local relations are always transient and removed in
+ * case of a database crash; they are never WAL-logged or fsync'd.
+ */
+typedef struct SMgrFileLocator
+{
+	RelFileLocator locator;
+	BackendId	backend;
+	ForkNumber	forknum;
+} SMgrFileLocator;
+
+#define SMgrFileLocatorIsTemp(slocator) \
+	((slocator).backend != InvalidBackendId)
+
+/*
+ * smgr.c maintains a table of SMgrFileData objects, which are essentially
+ * cached file handles.  An SMgrFile is created (if not already present)
  * by smgropen(), and destroyed by smgrclose().  Note that neither of these
  * operations imply I/O, they just create or destroy a hashtable entry.
  * (But smgrclose() may release associated resources, such as OS-level file
  * descriptors.)
  *
- * An SMgrRelation may have an "owner", which is just a pointer to it from
- * somewhere else; smgr.c will clear this pointer if the SMgrRelation is
+ * An SMgrFile may have an "owner", which is just a pointer to it from
+ * somewhere else; smgr.c will clear this pointer if the SMgrFile is
  * closed.  We use this to avoid dangling pointers from relcache to smgr
  * without having to make the smgr explicitly aware of relcache.  There
  * can't be more than one "owner" pointer per SMgrRelation, but that's
  * all we need.
  *
- * SMgrRelations that do not have an "owner" are considered to be transient,
+ * SMgrFiles that do not have an "owner" are considered to be transient,
  * and are deleted at end of transaction.
+ *
+ * A file that is represented by an SMgrFile can be managed by the buffer
+ * manager. Currently, it's only used for relation files, but could be used
+ * for SLRUs and other things in the future.
  */
-typedef struct SMgrRelationData
+typedef struct SMgrFileData
 {
-	/* rlocator is the hashtable lookup key, so it must be first! */
-	RelFileLocatorBackend smgr_rlocator;	/* relation physical identifier */
+	/* locator is the hashtable lookup key, so must be first! */
+	SMgrFileLocator smgr_locator;	/* file physical identifier */
 
 	/* pointer to owning pointer, or NULL if none */
-	struct SMgrRelationData **smgr_owner;
+	struct SMgrFileData **smgr_owner;
 
 	/*
 	 * The following fields are reset to InvalidBlockNumber upon a cache flush
@@ -51,7 +76,7 @@ typedef struct SMgrRelationData
 	 * invalidation for fork extension.
 	 */
 	BlockNumber smgr_targblock; /* current insertion target block */
-	BlockNumber smgr_cached_nblocks[MAX_FORKNUM + 1];	/* last known size */
+	BlockNumber smgr_cached_nblocks;	/* last known size */
 
 	/* additional public fields may someday exist here */
 
@@ -65,46 +90,46 @@ typedef struct SMgrRelationData
 	 * for md.c; per-fork arrays of the number of open segments
 	 * (md_num_open_segs) and the segments themselves (md_seg_fds).
 	 */
-	int			md_num_open_segs[MAX_FORKNUM + 1];
-	struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
+	int			md_num_open_segs;
+	struct _MdfdVec *md_seg_fds;
 
-	/* if unowned, list link in list of all unowned SMgrRelations */
+	/* if unowned, list link in list of all unowned SMgrFiles */
 	dlist_node	node;
-} SMgrRelationData;
+} SMgrFileData;
 
-typedef SMgrRelationData *SMgrRelation;
+typedef SMgrFileData *SMgrFileHandle;
 
 #define SmgrIsTemp(smgr) \
-	RelFileLocatorBackendIsTemp((smgr)->smgr_rlocator)
+	SMgrFileLocatorIsTemp((smgr)->smgr_locator)
 
 extern void smgrinit(void);
-extern SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend);
-extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
-extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
-extern void smgrclearowner(SMgrRelation *owner, SMgrRelation reln);
-extern void smgrclose(SMgrRelation reln);
+extern SMgrFileHandle smgropen(RelFileLocator rlocator, BackendId backend, ForkNumber forkNum);
+extern bool smgrexists(SMgrFileHandle sfile);
+extern void smgrsetowner(SMgrFileHandle *owner, SMgrFileHandle sfile);
+extern void smgrclearowner(SMgrFileHandle *owner, SMgrFileHandle sfile);
+extern void smgrclose(SMgrFileHandle sfile);
 extern void smgrcloseall(void);
-extern void smgrcloserellocator(RelFileLocatorBackend rlocator);
-extern void smgrrelease(SMgrRelation reln);
 extern void smgrreleaseall(void);
-extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
-extern void smgrdosyncall(SMgrRelation *rels, int nrels);
-extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
-extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
+extern void smgrcreate(SMgrFileHandle sfile, bool isRedo);
+extern void smgrextend(SMgrFileHandle sfile,
 					   BlockNumber blocknum, char *buffer, bool skipFsync);
-extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
-						 BlockNumber blocknum);
-extern void smgrread(SMgrRelation reln, ForkNumber forknum,
+extern bool smgrprefetch(SMgrFileHandle sfile, BlockNumber blocknum);
+extern void smgrread(SMgrFileHandle sfile,
 					 BlockNumber blocknum, char *buffer);
-extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
+extern void smgrwrite(SMgrFileHandle sfile,
 					  BlockNumber blocknum, char *buffer, bool skipFsync);
-extern void smgrwriteback(SMgrRelation reln, ForkNumber forknum,
+extern void smgrwriteback(SMgrFileHandle sfile,
 						  BlockNumber blocknum, BlockNumber nblocks);
-extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
-extern BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum);
-extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum,
-						 int nforks, BlockNumber *nblocks);
-extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
+extern BlockNumber smgrnblocks(SMgrFileHandle sfile);
+extern BlockNumber smgrnblocks_cached(SMgrFileHandle sfile);
+extern void smgrimmedsync(SMgrFileHandle sfile);
+extern void smgrunlink(SMgrFileHandle sfile, bool isRedo);
+
+extern void smgrtruncate_multi(RelFileLocator rlocator, BackendId backend, ForkNumber *forks, int nforks, BlockNumber *nblocks);
+extern void smgrunlink_multi(RelFileLocator rlocator, BackendId backend, ForkNumber *forks, int nforks, bool isRedo);
+
+extern void smgrcloserellocator(RelFileLocator rlocator, BackendId backend);
+
 extern void AtEOXact_SMgr(void);
 extern bool ProcessBarrierSmgrRelease(void);
 
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 23748b72caf..c494c01056f 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -48,7 +48,7 @@ extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple);
 
 extern void CacheInvalidateRelcacheByRelid(Oid relid);
 
-extern void CacheInvalidateSmgr(RelFileLocatorBackend rlocator);
+extern void CacheInvalidateSmgr(RelFileLocator rlocator, BackendId backend);
 
 extern void CacheInvalidateRelmap(Oid databaseId);
 
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index f383a2fca9e..290838ab8c5 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -54,7 +54,7 @@ typedef LockInfoData *LockInfo;
 typedef struct RelationData
 {
 	RelFileLocator rd_locator;	/* relation physical identifier */
-	SMgrRelation rd_smgr;		/* cached file handle, or NULL */
+	SMgrFileHandle rd_smgr[MAX_FORKNUM + 1];		/* cached file handles, or NULLs */
 	int			rd_refcnt;		/* reference count */
 	BackendId	rd_backend;		/* owning backend id, if temporary relation */
 	bool		rd_islocaltemp; /* rel is a temp rel of this session */
@@ -562,16 +562,17 @@ typedef struct ViewOptions
  * Note: since a relcache flush can cause the file handle to be closed again,
  * it's unwise to hold onto the pointer returned by this function for any
  * long period.  Recommended practice is to just re-execute RelationGetSmgr
- * each time you need to access the SMgrRelation.  It's quite cheap in
+ * each time you need to access the SMgrFileHandle.  It's quite cheap in
  * comparison to whatever an smgr function is going to do.
  */
-static inline SMgrRelation
-RelationGetSmgr(Relation rel)
+static inline SMgrFileHandle
+RelationGetSmgr(Relation rel, ForkNumber forkNum)
 {
-	if (unlikely(rel->rd_smgr == NULL))
-		smgrsetowner(&(rel->rd_smgr), smgropen(rel->rd_locator, rel->rd_backend));
-	return rel->rd_smgr;
+	if (unlikely(rel->rd_smgr[forkNum] == NULL))
+		smgrsetowner(&(rel->rd_smgr[forkNum]), smgropen(rel->rd_locator, rel->rd_backend, forkNum));
+	return rel->rd_smgr[forkNum];
 }
+#endif
 
 /*
  * RelationCloseSmgr
@@ -580,13 +581,15 @@ RelationGetSmgr(Relation rel)
 static inline void
 RelationCloseSmgr(Relation relation)
 {
-	if (relation->rd_smgr != NULL)
-		smgrclose(relation->rd_smgr);
-
-	/* smgrclose should unhook from owner pointer */
-	Assert(relation->rd_smgr == NULL);
+	for (int i = 0; i <= MAX_FORKNUM; i++)
+	{
+		if (relation->rd_smgr[i] != NULL)
+		{
+			smgrclose(relation->rd_smgr[i]);
+			Assert(relation->rd_smgr[i] == NULL);
+		}
+	}
 }
-#endif							/* !FRONTEND */
 
 /*
  * RelationGetTargetBlock
@@ -597,7 +600,7 @@ RelationCloseSmgr(Relation relation)
  * so there's no need to re-open the smgr handle if it's not currently open.
  */
 #define RelationGetTargetBlock(relation) \
-	( (relation)->rd_smgr != NULL ? (relation)->rd_smgr->smgr_targblock : InvalidBlockNumber )
+	( (relation)->rd_smgr[MAIN_FORKNUM] != NULL ? (relation)->rd_smgr[MAIN_FORKNUM]->smgr_targblock : InvalidBlockNumber )
 
 /*
  * RelationSetTargetBlock
@@ -605,7 +608,7 @@ RelationCloseSmgr(Relation relation)
  */
 #define RelationSetTargetBlock(relation, targblock) \
 	do { \
-		RelationGetSmgr(relation)->smgr_targblock = (targblock); \
+		RelationGetSmgr(relation, MAIN_FORKNUM)->smgr_targblock = (targblock); \
 	} while (0)
 
 /*
-- 
2.38.1