From 931fbc4daff61c60520c3865963b3c59c68450d4 Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Thu, 22 Jan 2026 20:49:29 +0000 Subject: [PATCH v5 4/4] Refactor smgr API: mdcreate needs the old relfilelocator Sometimes mdcreate creates a new file because we are moving the data into a new location. For example an ALTER TABLE can cause a complete rewrite, creating a new file, writing new data, then deleting the original file, or TRUNCATE can simply create a new file and not copy anything from the previous file. With the extensions being able to modify the SMGR, some of them might want to track these changes - if they stored additional metadata previously using the old RelFileLocator, they might want to reuse/move that data with the new RelFileLocator. With this change, mdcreate receives the old relfilelocator along with the new for operations that create a new file for an existing relation. --- src/backend/access/heap/heapam_handler.c | 10 +- src/backend/access/transam/xlogutils.c | 2 +- src/backend/catalog/heap.c | 2 +- src/backend/catalog/index.c | 2 +- src/backend/catalog/storage.c | 8 +- src/backend/commands/sequence.c | 2 +- src/backend/commands/tablecmds.c | 4 +- src/backend/postmaster/postmaster.c | 2 + src/backend/storage/buffer/bufmgr.c | 7 +- src/backend/storage/smgr/md.c | 116 +++++--- src/backend/storage/smgr/smgr.c | 281 ++++++++++++++---- src/backend/tcop/postgres.c | 2 + src/backend/utils/cache/relcache.c | 2 +- src/backend/utils/init/miscinit.c | 63 +++- src/backend/utils/misc/guc_parameters.dat | 8 +- src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/catalog/storage.h | 3 +- src/include/miscadmin.h | 2 + src/include/storage/md.h | 32 -- src/include/storage/smgr.h | 108 +++++-- .../fsync_checker/fsync_checker_smgr.c | 61 ++-- src/tools/pgindent/typedefs.list | 1 + 22 files changed, 516 insertions(+), 203 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index cbef73e5d4b..4466d926a44 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -588,6 +588,8 @@ heapam_relation_set_new_filelocator(Relation rel, { SMgrRelation srel; + RelFileLocator oldlocator = rel->rd_locator; + /* * Initialize to the minimum XID that could put tuples in the table. We * know that no xacts older than RecentXmin are still running, so that @@ -605,7 +607,7 @@ heapam_relation_set_new_filelocator(Relation rel, */ *minmulti = GetOldestMultiXactId(); - srel = RelationCreateStorage(*newrlocator, persistence, true); + srel = RelationCreateStorage(oldlocator, *newrlocator, persistence, true); /* * If required, set up an init fork for an unlogged table so that it can @@ -615,7 +617,7 @@ heapam_relation_set_new_filelocator(Relation rel, { Assert(rel->rd_rel->relkind == RELKIND_RELATION || rel->rd_rel->relkind == RELKIND_TOASTVALUE); - smgrcreate(srel, INIT_FORKNUM, false); + smgrcreate(oldlocator, srel, INIT_FORKNUM, false); log_smgrcreate(newrlocator, INIT_FORKNUM); } @@ -648,7 +650,7 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator) * NOTE: any conflict in relfilenumber value will be caught in * RelationCreateStorage(). */ - dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true); + dstrel = RelationCreateStorage(rel->rd_locator, *newrlocator, rel->rd_rel->relpersistence, true); /* copy main fork */ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, @@ -660,7 +662,7 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator) { if (smgrexists(RelationGetSmgr(rel), forkNum)) { - smgrcreate(dstrel, forkNum, false); + smgrcreate(rel->rd_locator, dstrel, forkNum, false); /* * WAL log creation if the relation is persistent, or this is the diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 5fbe39133b8..42ff9f7820e 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -487,7 +487,7 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, * filesystem loses an inode during a crash. Better to write the data * until we are actually told to delete the file.) */ - smgrcreate(smgr, forknum, true); + smgrcreate(rlocator, smgr, forknum, true); lastblock = smgrnblocks(smgr, forknum); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 606434823cf..0ddc8402ff1 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -386,7 +386,7 @@ heap_create(const char *relname, relpersistence, relfrozenxid, relminmxid); else if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) - RelationCreateStorage(rel->rd_locator, relpersistence, true); + RelationCreateStorage(rel->rd_locator, rel->rd_locator, relpersistence, true); else Assert(false); } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 43de42ce39e..ca3ff5e9a85 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3087,7 +3087,7 @@ index_build(Relation heapRelation, if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED && !smgrexists(RelationGetSmgr(indexRelation), INIT_FORKNUM)) { - smgrcreate(RelationGetSmgr(indexRelation), INIT_FORKNUM, false); + smgrcreate(indexRelation->rd_locator, RelationGetSmgr(indexRelation), INIT_FORKNUM, false); log_smgrcreate(&indexRelation->rd_locator, INIT_FORKNUM); indexRelation->rd_indam->ambuildempty(indexRelation); } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index e443a4993c5..9b1b0f3a44a 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -119,7 +119,7 @@ AddPendingSync(const RelFileLocator *rlocator) * pass register_delete = false. */ SMgrRelation -RelationCreateStorage(RelFileLocator rlocator, char relpersistence, +RelationCreateStorage(RelFileLocator oldlocator, RelFileLocator rlocator, char relpersistence, bool register_delete) { SMgrRelation srel; @@ -148,7 +148,7 @@ RelationCreateStorage(RelFileLocator rlocator, char relpersistence, } srel = smgropen(rlocator, procNumber); - smgrcreate(srel, MAIN_FORKNUM, false); + smgrcreate(oldlocator, srel, MAIN_FORKNUM, false); if (needs_wal) log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM); @@ -992,7 +992,7 @@ smgr_redo(XLogReaderState *record) SMgrRelation reln; reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER); - smgrcreate(reln, xlrec->forkNum, true); + smgrcreate(xlrec->rlocator, reln, xlrec->forkNum, true); } else if (info == XLOG_SMGR_TRUNCATE) { @@ -1013,7 +1013,7 @@ smgr_redo(XLogReaderState *record) * XLogReadBufferForRedo, we prefer to recreate the rel and replay the * log as best we can until the drop is seen. */ - smgrcreate(reln, MAIN_FORKNUM, true); + smgrcreate(xlrec->rlocator, reln, MAIN_FORKNUM, true); /* * Before we perform the truncation, update minimum recovery point to diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index e1b808bbb60..9b3f2384720 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -337,7 +337,7 @@ fill_seq_with_data(Relation rel, HeapTuple tuple) SMgrRelation srel; srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); - smgrcreate(srel, INIT_FORKNUM, false); + smgrcreate(rel->rd_locator, srel, INIT_FORKNUM, false); log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); FlushRelationBuffers(rel); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index f976c0e5c7e..f7f4b29551d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -17193,7 +17193,7 @@ index_copy_data(Relation rel, RelFileLocator newrlocator) * NOTE: any conflict in relfilenumber value will be caught in * RelationCreateStorage(). */ - dstrel = RelationCreateStorage(newrlocator, rel->rd_rel->relpersistence, true); + dstrel = RelationCreateStorage(rel->rd_locator, newrlocator, rel->rd_rel->relpersistence, true); /* copy main fork */ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, @@ -17205,7 +17205,7 @@ index_copy_data(Relation rel, RelFileLocator newrlocator) { if (smgrexists(RelationGetSmgr(rel), forkNum)) { - smgrcreate(dstrel, forkNum, false); + smgrcreate(rel->rd_locator, dstrel, forkNum, false); /* * WAL log creation if the relation is persistent, or this is the diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index fbd726d39da..76ade8aa4b6 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -939,6 +939,8 @@ PostmasterMain(int argc, char *argv[]) */ process_shared_preload_libraries(); + process_smgr_chain(); + /* * Initialize SSL library, if specified. */ diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6f935648ae9..60b2757c6f1 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1055,7 +1055,7 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, /* recheck, fork might have been created concurrently */ if (!smgrexists(BMR_GET_SMGR(bmr), fork)) - smgrcreate(BMR_GET_SMGR(bmr), fork, flags & EB_PERFORMING_RECOVERY); + smgrcreate(bmr.rel->rd_locator, BMR_GET_SMGR(bmr), fork, flags & EB_PERFORMING_RECOVERY); UnlockRelationForExtension(bmr.rel, ExclusiveLock); } @@ -5394,7 +5394,7 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator, * directory. Therefore, each individual relation doesn't need to be * registered for cleanup. */ - RelationCreateStorage(dst_rlocator, relpersistence, false); + RelationCreateStorage(src_rlocator, dst_rlocator, relpersistence, false); /* copy main fork. */ RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM, @@ -5406,7 +5406,8 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator, { if (smgrexists(src_rel, forkNum)) { - smgrcreate(dst_rel, forkNum, false); + /* TODO: for sure? */ + smgrcreate(src_rel->smgr_rlocator.locator, dst_rel, forkNum, false); /* * WAL log creation if the relation is persistent, or this is the diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index d6822ff03f3..bea0b8229f5 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -135,6 +135,38 @@ typedef MdSMgrRelationData *MdSMgrRelation; /* don't try to open a segment, if not already open */ #define EXTENSION_DONT_OPEN (1 << 5) +/* md storage manager functionality */ +static void mdinit(void); +static void mdopen(SMgrRelation reln, SmgrChainIndex chain_index); +static void mdclose(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +static bool mdexists(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +static void mdextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, const void *buffer, bool skipFsync, SmgrChainIndex chain_index); +static void mdzeroextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index); +static bool mdprefetch(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, SmgrChainIndex chain_index); +static uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, SmgrChainIndex chain_index); +static void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index); +static void mdwritev(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index); +static void mdwriteback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index); +static BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdtruncate(SMgrRelation reln, ForkNumber forknum, + BlockNumber old_blocks, BlockNumber nblocks, SmgrChainIndex chain_index); +static void mdimmedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdregistersync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, void **buffers, BlockNumber nblocks, + SmgrChainIndex chain_index); +static int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + uint32 *off, SmgrChainIndex chain_index); /* * Fixed-length string to represent paths to files that need to be built by @@ -161,6 +193,7 @@ mdsmgr_register(void) /* magnetic disk */ f_smgr md_smgr = (f_smgr) { .name = "md", + .chain_position = SMGR_CHAIN_TAIL, .smgr_init = mdinit, .smgr_shutdown = NULL, .smgr_open = mdopen, @@ -231,7 +264,7 @@ _mdfd_open_flags(void) /* * mdinit() -- Initialize private state for magnetic disk storage manager. */ -void +static void mdinit(void) { MdCxt = AllocSetContextCreate(TopMemoryContext, @@ -244,8 +277,8 @@ mdinit(void) * * Note: this will return true for lingering files, with pending deletions */ -bool -mdexists(SMgrRelation reln, ForkNumber forknum) +static bool +mdexists(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -255,7 +288,7 @@ mdexists(SMgrRelation reln, ForkNumber forknum) * which already closes relations when dropping them. */ if (!InRecovery) - mdclose(reln, forknum); + mdclose(reln, forknum, 0); return (mdopenfork(mdreln, forknum, EXTENSION_RETURN_NULL) != NULL); } @@ -265,8 +298,8 @@ mdexists(SMgrRelation reln, ForkNumber forknum) * * If isRedo is true, it's okay for the relation to exist already. */ -void -mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) +static void +mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *mdfd; @@ -381,8 +414,8 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * Note: any failure should be reported as WARNING not ERROR, because * we are usually not in a transaction anymore when this is called. */ -void -mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo) +static void +mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) { /* Now do the per-fork work */ if (forknum == InvalidForkNumber) @@ -531,9 +564,9 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo) * EOF). Note that we assume writing a block beyond current EOF * causes intervening file space to become filled with zeroes. */ -void +static void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void *buffer, bool skipFsync) + const void *buffer, bool skipFsync, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; pgoff_t seekpos; @@ -597,9 +630,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * Similar to mdextend(), except the relation can be extended by multiple * blocks at once and the added blocks will be filled with zeroes. */ -void +static void mdzeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync) + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; @@ -748,8 +781,8 @@ mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior) /* * mdopen() -- Initialize newly-opened relation. */ -void -mdopen(SMgrRelation reln) +static void +mdopen(SMgrRelation reln, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -761,8 +794,8 @@ mdopen(SMgrRelation reln) /* * mdclose() -- Close the specified relation, if it isn't closed already. */ -void -mdclose(SMgrRelation reln, ForkNumber forknum) +static void +mdclose(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; int nopensegs = mdreln->md_num_open_segs[forknum]; @@ -785,9 +818,9 @@ mdclose(SMgrRelation reln, ForkNumber forknum) /* * mdprefetch() -- Initiate asynchronous read of the specified blocks of a relation */ -bool +static bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - int nblocks) + int nblocks, SmgrChainIndex chain_index) { #ifdef USE_PREFETCH MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -883,9 +916,9 @@ buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks) * mdmaxcombine() -- Return the maximum number of total blocks that can be * combined with an IO starting at blocknum. */ -uint32 +static uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum) + BlockNumber blocknum, SmgrChainIndex index) { BlockNumber segoff; @@ -897,9 +930,9 @@ mdmaxcombine(SMgrRelation reln, ForkNumber forknum, /* * mdreadv() -- Read the specified blocks from a relation. */ -void +static void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks) + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -1037,10 +1070,10 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, /* * mdstartreadv() -- Asynchronous version of mdreadv(). */ -void +static void mdstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks) + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; pgoff_t seekpos; @@ -1112,9 +1145,9 @@ mdstartreadv(PgAioHandle *ioh, * relation (ie, those before the current EOF). To extend a relation, * use mdextend(). */ -void +static void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void **buffers, BlockNumber nblocks, bool skipFsync) + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -1219,9 +1252,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * This accepts a range of blocks because flushing several pages at once is * considerably more efficient than doing so individually. */ -void +static void mdwriteback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks) + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -1280,8 +1313,8 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, * called, then only segments up to the last one actually touched * are present in the array. */ -BlockNumber -mdnblocks(SMgrRelation reln, ForkNumber forknum) +static BlockNumber +mdnblocks(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; @@ -1348,9 +1381,9 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * If nblocks > curnblk, the request is ignored when we are InRecovery, * otherwise, an error is raised. */ -void +static void mdtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber curnblk, BlockNumber nblocks) + BlockNumber curnblk, BlockNumber nblocks, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; BlockNumber priorblocks; @@ -1438,8 +1471,8 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, /* * mdregistersync() -- Mark whole relation as needing fsync */ -void -mdregistersync(SMgrRelation reln, ForkNumber forknum) +static void +mdregistersync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; @@ -1449,7 +1482,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) * NOTE: mdnblocks makes sure we have opened all active segments, so that * the loop below will get them all! */ - mdnblocks(reln, forknum); + mdnblocks(reln, forknum, 0); min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; @@ -1490,8 +1523,8 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) * crash before the next checkpoint syncs the newly-inactive segment, that * segment may survive recovery, reintroducing unwanted data into the table. */ -void -mdimmedsync(SMgrRelation reln, ForkNumber forknum) +static void +mdimmedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; @@ -1501,7 +1534,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) * NOTE: mdnblocks makes sure we have opened all active segments, so that * the loop below will get them all! */ - mdnblocks(reln, forknum); + mdnblocks(reln, forknum, 0); min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; @@ -1544,8 +1577,9 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) } } -int -mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off) +static int +mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off, + SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v = mdopenfork(mdreln, forknum, EXTENSION_FAIL); @@ -1882,7 +1916,7 @@ _mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, mdextend((SMgrRelation) reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, - zerobuf, skipFsync); + zerobuf, skipFsync, 0); pfree(zerobuf); } flags = O_CREAT; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 36e0b4f4a54..8938ffc9794 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -76,13 +76,13 @@ #include "utils/inval.h" #include "utils/memutils.h" -static f_smgr *smgrsw; +f_smgr *smgrsw; static int NSmgr = 0; static Size LargestSMgrRelationSize = 0; -SMgrId storage_manager_id; +SMgrChain storage_manager_chain; /* * Each backend has a hashtable that stores all extant SMgrRelation objects. @@ -121,23 +121,26 @@ smgr_register(const f_smgr *smgr, Size smgrrelation_size) if (smgr->name == NULL || *smgr->name == 0) elog(FATAL, "smgr registered with invalid name"); - Assert(smgr->smgr_init != NULL); - Assert(smgr->smgr_open != NULL); - Assert(smgr->smgr_close != NULL); - Assert(smgr->smgr_create != NULL); - Assert(smgr->smgr_exists != NULL); - Assert(smgr->smgr_unlink != NULL); - Assert(smgr->smgr_extend != NULL); - Assert(smgr->smgr_zeroextend != NULL); - Assert(smgr->smgr_prefetch != NULL); - Assert(smgr->smgr_readv != NULL); - Assert(smgr->smgr_startreadv != NULL); - Assert(smgr->smgr_writev != NULL); - Assert(smgr->smgr_writeback != NULL); - Assert(smgr->smgr_nblocks != NULL); - Assert(smgr->smgr_truncate != NULL); - Assert(smgr->smgr_registersync != NULL); - Assert(smgr->smgr_immedsync != NULL); + if (smgr->chain_position == SMGR_CHAIN_TAIL) + { + Assert(smgr->smgr_init != NULL); + Assert(smgr->smgr_open != NULL); + Assert(smgr->smgr_close != NULL); + Assert(smgr->smgr_create != NULL); + Assert(smgr->smgr_exists != NULL); + Assert(smgr->smgr_unlink != NULL); + Assert(smgr->smgr_extend != NULL); + Assert(smgr->smgr_zeroextend != NULL); + Assert(smgr->smgr_prefetch != NULL); + Assert(smgr->smgr_readv != NULL); + Assert(smgr->smgr_startreadv != NULL); + Assert(smgr->smgr_writev != NULL); + Assert(smgr->smgr_writeback != NULL); + Assert(smgr->smgr_nblocks != NULL); + Assert(smgr->smgr_truncate != NULL); + Assert(smgr->smgr_registersync != NULL); + Assert(smgr->smgr_immedsync != NULL); + } old = MemoryContextSwitchTo(TopMemoryContext); @@ -164,6 +167,17 @@ smgr_register(const f_smgr *smgr, Size smgrrelation_size) return my_id; } +SMgrId +smgr_lookup(const char *name) +{ + for (int i = 0; i < NSmgr; i++) + { + if (strcmp(smgrsw[i].name, name) == 0) + return i; + } + elog(FATAL, "Storage manager not found with name: %s", name); +} + /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage * managers. @@ -210,6 +224,22 @@ smgrshutdown(int code, Datum arg) RESUME_INTERRUPTS(); } +#define SMGR_CHAIN_LOOKUP(SMGR_METHOD) \ + do \ + { \ + while (chain_index < reln->smgr_chain.size && smgrsw[reln->smgr_chain.chain[chain_index]].SMGR_METHOD == NULL) \ + chain_index++; \ + Assert(chain_index < reln->smgr_chain.size); \ + } while (0) + +void +smgr_open_next(SMgrRelation reln, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_open); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_open(reln, chain_index); +} + /* * smgropen() -- Return an SMgrRelation object, creating it if need be. * @@ -265,14 +295,14 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = storage_manager_id; + memcpy(&reln->smgr_chain, &storage_manager_chain, sizeof(SMgrChain)); /* it is not pinned yet */ reln->pincount = 0; dlist_push_tail(&unpinned_relns, &reln->node); /* implementation-specific initialization */ - smgrsw[reln->smgr_which].smgr_open(reln); + smgr_open_next(reln, 0); } RESUME_INTERRUPTS(); @@ -308,6 +338,14 @@ smgrunpin(SMgrRelation reln) dlist_push_tail(&unpinned_relns, &reln->node); } +void +smgr_close_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_close); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_close(reln, forknum, chain_index); +} + /* * smgrdestroy() -- Delete an SMgrRelation object. */ @@ -321,7 +359,7 @@ smgrdestroy(SMgrRelation reln) HOLD_INTERRUPTS(); for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[reln->smgr_which].smgr_close(reln, forknum); + smgr_close_next(reln, forknum, 0); dlist_delete(&reln->node); @@ -345,7 +383,7 @@ smgrrelease(SMgrRelation reln) for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++) { - smgrsw[reln->smgr_which].smgr_close(reln, forknum); + smgr_close_next(reln, forknum, 0); reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; } reln->smgr_targblock = InvalidBlockNumber; @@ -447,6 +485,14 @@ smgrreleaserellocator(RelFileLocatorBackend rlocator) smgrrelease(reln); } +bool +smgr_exists_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_exists); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_exists(reln, forknum, chain_index); +} + /* * smgrexists() -- Does the underlying file for a fork exist? */ @@ -456,12 +502,20 @@ smgrexists(SMgrRelation reln, ForkNumber forknum) bool ret; HOLD_INTERRUPTS(); - ret = smgrsw[reln->smgr_which].smgr_exists(reln, forknum); + ret = smgr_exists_next(reln, forknum, 0); RESUME_INTERRUPTS(); return ret; } +void +smgr_create_next(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_create); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_create(relold, reln, forknum, isRedo, chain_index); +} + /* * smgrcreate() -- Create a new relation. * @@ -470,13 +524,21 @@ smgrexists(SMgrRelation reln, ForkNumber forknum) * to be created. */ void -smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) +smgrcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo) { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); + smgr_create_next(relold, reln, forknum, isRedo, 0); RESUME_INTERRUPTS(); } +void +smgr_immedsync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_immedsync); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_immedsync(reln, forknum, chain_index); +} + /* * smgrdosyncall() -- Immediately sync all forks of all given relations * @@ -504,18 +566,24 @@ smgrdosyncall(SMgrRelation *rels, int nrels) */ for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) { - if (smgrsw[which].smgr_exists(rels[i], forknum)) - smgrsw[which].smgr_immedsync(rels[i], forknum); + if (smgr_exists_next(rels[i], forknum, 0)) + smgr_immedsync_next(rels[i], forknum, 0); } } RESUME_INTERRUPTS(); } +void +smgr_unlink_next(SMgrRelation reln, RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_unlink); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_unlink(rlocator, forknum, isRedo, chain_index); +} + /* * smgrdounlinkall() -- Immediately unlink all forks of all given relations * @@ -557,13 +625,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator; - int which = rels[i]->smgr_which; rlocators[i] = rlocator; /* Close the forks at smgr level */ for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_close(rels[i], forknum); + smgr_close_next(rels[i], forknum, 0); } /* @@ -587,10 +654,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo); + smgr_unlink_next(rels[i], rlocators[i], forknum, isRedo, 0); } pfree(rlocators); @@ -598,6 +663,15 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) RESUME_INTERRUPTS(); } +void +smgr_extend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void *buffer, bool skipFsync, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_extend); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_extend(reln, forknum, blocknum, + buffer, skipFsync, chain_index); +} /* * smgrextend() -- Add a new block to a file. @@ -614,8 +688,7 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, - buffer, skipFsync); + smgr_extend_next(reln, forknum, blocknum, buffer, skipFsync, 0); /* * Normally we expect this to increase nblocks by one, but if the cached @@ -630,6 +703,16 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, RESUME_INTERRUPTS(); } +void +smgr_zeroextend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, bool skipFsync, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_zeroextend); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_zeroextend(reln, forknum, blocknum, + nblocks, skipFsync, chain_index); +} + /* * smgrzeroextend() -- Add new zeroed out blocks to a file. * @@ -643,8 +726,7 @@ smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum, - nblocks, skipFsync); + smgr_zeroextend_next(reln, forknum, blocknum, nblocks, skipFsync, 0); /* * Normally we expect this to increase the fork size by nblocks, but if @@ -659,6 +741,16 @@ smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, RESUME_INTERRUPTS(); } +bool +smgr_prefetch_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_prefetch); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_prefetch(reln, forknum, blocknum, + nblocks, chain_index); +} + /* * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation. * @@ -673,12 +765,21 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, bool ret; HOLD_INTERRUPTS(); - ret = smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks); + ret = smgr_prefetch_next(reln, forknum, blocknum, nblocks, 0); RESUME_INTERRUPTS(); return ret; } +uint32 +smgr_maxcombine_next(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_maxcombine); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_maxcombine(reln, forknum, blocknum, chain_index); +} + /* * smgrmaxcombine() - Return the maximum number of total blocks that can be * combined with an IO starting at blocknum. @@ -692,12 +793,22 @@ smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, uint32 ret; HOLD_INTERRUPTS(); - ret = smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum); + ret = smgr_maxcombine_next(reln, forknum, blocknum, 0); RESUME_INTERRUPTS(); return ret; } +void +smgr_readv_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_readv); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_readv(reln, forknum, blocknum, + buffers, nblocks, chain_index); +} + /* * smgrreadv() -- read a particular block range from a relation into the * supplied buffers. @@ -714,8 +825,7 @@ smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers, - nblocks); + smgr_readv_next(reln, forknum, blocknum, buffers, nblocks, 0); RESUME_INTERRUPTS(); } @@ -741,18 +851,37 @@ smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * - Under Valgrind, the "buffers" memory may or may not change status to * DEFINED, depending on io_method and concurrent activity. */ +void +smgr_startreadv_next(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, void **buffers, BlockNumber nblocks, + SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_startreadv); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_startreadv(ioh, reln, forknum, blocknum, + buffers, nblocks, chain_index); +} + void smgrstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_startreadv(ioh, - reln, forknum, blocknum, buffers, - nblocks); + smgr_startreadv_next(ioh, reln, forknum, blocknum, buffers, nblocks, 0); RESUME_INTERRUPTS(); } +void +smgr_writev_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_writev); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_writev(reln, forknum, blocknum, + buffers, nblocks, skipFsync, chain_index); +} + /* * smgrwritev() -- Write the supplied buffers out. * @@ -784,11 +913,20 @@ smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync) { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum, - buffers, nblocks, skipFsync); + smgr_writev_next(reln, forknum, blocknum, + buffers, nblocks, skipFsync, 0); RESUME_INTERRUPTS(); } +void +smgr_writeback_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + BlockNumber nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_writeback); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_writeback(reln, forknum, blocknum, nblocks, chain_index); +} + /* * smgrwriteback() -- Trigger kernel writeback for the supplied range of * blocks. @@ -798,11 +936,18 @@ smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, - nblocks); + smgr_writeback_next(reln, forknum, blocknum, nblocks, 0); RESUME_INTERRUPTS(); } +extern BlockNumber +smgr_nblocks_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_nblocks); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_nblocks(reln, forknum, chain_index); +} + /* * smgrnblocks() -- Calculate the number of blocks in the * supplied relation. @@ -819,7 +964,7 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) HOLD_INTERRUPTS(); - result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); + result = smgr_nblocks_next(reln, forknum, 0); reln->smgr_cached_nblocks[forknum] = result; @@ -849,6 +994,14 @@ smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum) return InvalidBlockNumber; } +void +smgr_truncate_next(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_truncate); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_truncate(reln, forknum, curnblk, nblocks, chain_index); +} + /* * smgrtruncate() -- Truncate the given forks of supplied relation to * each specified numbers of blocks @@ -893,8 +1046,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, /* Make the cached size is invalid if we encounter an error. */ reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber; - smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], - old_nblocks[i], nblocks[i]); + smgr_truncate_next(reln, forknum[i], old_nblocks[i], nblocks[i], 0); /* * We might as well update the local smgr_cached_nblocks values. The @@ -916,6 +1068,14 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, } } +void +smgr_registersync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_registersync); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_registersync(reln, forknum, chain_index); +} + /* * smgrregistersync() -- Request a relation to be sync'd at next checkpoint * @@ -932,7 +1092,7 @@ void smgrregistersync(SMgrRelation reln, ForkNumber forknum) { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_registersync(reln, forknum); + smgr_registersync_next(reln, forknum, 0); RESUME_INTERRUPTS(); } @@ -966,10 +1126,19 @@ void smgrimmedsync(SMgrRelation reln, ForkNumber forknum) { HOLD_INTERRUPTS(); - smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); + smgr_immedsync_next(reln, forknum, 0); RESUME_INTERRUPTS(); } +int +smgr_fd_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + uint32 *off, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_fd); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_fd(reln, forknum, blocknum, off, chain_index); +} + /* * Return fd for the specified block number and update *off to the appropriate * position. @@ -988,7 +1157,7 @@ smgrfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off) */ Assert(!INTERRUPTS_CAN_BE_PROCESSED()); - fd = smgrsw[reln->smgr_which].smgr_fd(reln, forknum, blocknum, off); + fd = smgr_fd_next(reln, forknum, blocknum, off, 0); return fd; } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index e54bf1e760f..9085a558e2d 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4116,6 +4116,8 @@ PostgresSingleUserMain(int argc, char *argv[], */ process_shared_preload_libraries(); + process_smgr_chain(); + /* Initialize MaxBackends */ InitializeMaxBackends(); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 6b634c9fff1..8ad1d27b4f1 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3875,7 +3875,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) /* handle these directly, at least for now */ SMgrRelation srel; - srel = RelationCreateStorage(newrlocator, persistence, true); + srel = RelationCreateStorage(relation->rd_locator, newrlocator, persistence, true); smgrclose(srel); } else diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index c6cd4445fff..e2af2eb3f9d 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -56,6 +56,7 @@ #include "utils/pidfile.h" #include "utils/syscache.h" #include "utils/varlena.h" +#include "storage/smgr.h" #define DIRECTORY_LOCK_FILE "postmaster.pid" @@ -1786,6 +1787,8 @@ char *session_preload_libraries_string = NULL; char *shared_preload_libraries_string = NULL; char *local_preload_libraries_string = NULL; +char *smgr_chain_string = NULL; + /* Flag telling that we are loading shared_preload_libraries */ bool process_shared_preload_libraries_in_progress = false; bool process_shared_preload_libraries_done = false; @@ -1862,6 +1865,62 @@ process_shared_preload_libraries(void) process_shared_preload_libraries_done = true; } +void +process_smgr_chain(void) +{ + char *rawstring; + List *elemlist; + ListCell *l; + uint8 idx = 0; + + if (smgr_chain_string == NULL || smgr_chain_string[0] == '\0') + return; /* nothing to do */ + + /* Need a modifiable copy of string */ + rawstring = pstrdup(smgr_chain_string); + + /* Parse string into list of filename paths */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + pfree(rawstring); + ereport(LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid list syntax in parameter \"%s\"", + "smgr_chain"))); + return; + } + + foreach(l, elemlist) + { + char *smgrname = (char *) lfirst(l); + SMgrId id = smgr_lookup(smgrname); + + storage_manager_chain.chain[idx++] = id; + + ereport(DEBUG1, + (errmsg_internal("using storage manager in chain \"%s\"", smgrname))); + } + + for (int i = 0; i < idx; ++i) + { + int chain_position = smgrsw[storage_manager_chain.chain[i]].chain_position; + + if (i == idx - 1 && chain_position != SMGR_CHAIN_TAIL) + ereport(FATAL, + (errmsg_internal("smgr_chain: the last element should be a `tail` implementation, not a modifier."))); + + if (i != idx - 1 && chain_position != SMGR_CHAIN_MODIFIER) + ereport(FATAL, + (errmsg_internal("smgr_chain: element %i/%i %s is not a modifier.", i, idx, smgrsw[storage_manager_chain.chain[i]].name))); + } + + storage_manager_chain.size = idx; + + list_free(elemlist); + pfree(rawstring); +} + /* * process any libraries that should be preloaded at backend start */ @@ -1884,7 +1943,9 @@ register_builtin_dynamic_managers(void) { mdsmgr_register(); - storage_manager_id = MdSMgrId; + /* setup a dummy chain with md, for tools */ + storage_manager_chain.chain[0] = MdSMgrId; + storage_manager_chain.size = 1; } /* diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index f0260e6e412..f19f1ee0f2f 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -1429,7 +1429,6 @@ variable => 'jit_provider', boot_val => '"llvmjit"', }, - { name => 'jit_tuple_deforming', type => 'bool', context => 'PGC_USERSET', group => 'DEVELOPER_OPTIONS', short_desc => 'Allow JIT compilation of tuple deforming.', flags => 'GUC_NOT_IN_SAMPLE', @@ -2633,6 +2632,13 @@ boot_val => '""', }, + +{ name => 'smgr_chain', type => 'string', context => 'PGC_POSTMASTER', group => 'CLIENT_CONN_PRELOAD', + short_desc => 'Lists storage managers used by the server, in order.', + flags => 'GUC_LIST_INPUT | GUC_LIST_QUOTE | GUC_SUPERUSER_ONLY', + variable => 'smgr_chain_string', + boot_val => '"md"', +}, { name => 'ssl', type => 'bool', context => 'PGC_SIGHUP', group => 'CONN_AUTH_SSL', short_desc => 'Enables SSL connections.', variable => 'EnableSSL', diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index c4f92fcdac8..57ed38aa406 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -816,6 +816,7 @@ #session_preload_libraries = '' #shared_preload_libraries = '' # (change requires restart) #jit_provider = 'llvmjit' # JIT library to use +#smgr_chain = 'md' # SMGR implementations to use # - Other Defaults - diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index 70f619a6d6f..ecb4ba91bfc 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -22,7 +22,8 @@ /* GUC variables */ extern PGDLLIMPORT int wal_skip_threshold; -extern SMgrRelation RelationCreateStorage(RelFileLocator rlocator, +extern SMgrRelation RelationCreateStorage(RelFileLocator oldlocator, + RelFileLocator rlocator, char relpersistence, bool register_delete); extern void RelationDropStorage(Relation rel); diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 11ac53a048b..11f5471cea3 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -516,6 +516,7 @@ extern PGDLLIMPORT bool process_shmem_requests_in_progress; extern PGDLLIMPORT char *session_preload_libraries_string; extern PGDLLIMPORT char *shared_preload_libraries_string; extern PGDLLIMPORT char *local_preload_libraries_string; +extern PGDLLIMPORT char *smgr_chain_string; extern void CreateDataDirLockFile(bool amPostmaster); extern void CreateSocketLockFile(const char *socketfile, bool amPostmaster, @@ -526,6 +527,7 @@ extern bool RecheckDataDirLockFile(void); extern void ValidatePgVersion(const char *path); extern void register_builtin_dynamic_managers(void); extern void process_shared_preload_libraries(void); +extern void process_smgr_chain(void); extern void process_session_preload_libraries(void); extern void process_shmem_requests(void); extern void pg_bindtextdomain(const char *domain); diff --git a/src/include/storage/md.h b/src/include/storage/md.h index c6958d92a68..5a044ad9aee 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -26,38 +26,6 @@ extern PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb; extern void mdsmgr_register(void); extern SMgrId MdSMgrId; -/* md storage manager functionality */ -extern void mdinit(void); -extern void mdopen(SMgrRelation reln); -extern void mdclose(SMgrRelation reln, ForkNumber forknum); -extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); -extern bool mdexists(SMgrRelation reln, ForkNumber forknum); -extern void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo); -extern void mdextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, const void *buffer, bool skipFsync); -extern void mdzeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); -extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks); -extern uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); -extern void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks); -extern void mdstartreadv(PgAioHandle *ioh, - SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks); -extern void mdwritev(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - const void **buffers, BlockNumber nblocks, bool skipFsync); -extern void mdwriteback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); -extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum); -extern void mdtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber curnblk, BlockNumber nblocks); -extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum); -extern void mdregistersync(SMgrRelation reln, ForkNumber forknum); -extern int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off); - extern void ForgetDatabaseSyncRequests(Oid dbid); extern void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index b170038708a..85915aa1384 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -21,7 +21,17 @@ typedef uint8 SMgrId; -extern PGDLLIMPORT SMgrId storage_manager_id; +typedef uint8 SmgrChainIndex; + +#define MAX_SMGR_CHAIN 15 + +typedef struct +{ + SMgrId chain[MAX_SMGR_CHAIN]; /* storage manager selector */ + uint8 size; +} SMgrChain; + +extern PGDLLIMPORT SMgrChain storage_manager_chain; /* * smgr.c maintains a table of SMgrRelation objects, which are essentially @@ -56,7 +66,7 @@ typedef struct SMgrRelationData * Fields below here are intended to be private to smgr.c and its * submodules. Do not touch them from elsewhere. */ - SMgrId smgr_which; /* storage manager selector */ + SMgrChain smgr_chain; /* selected storage manager chain */ /* * Pinning support. If unpinned (ie. pincount == 0), 'node' is a list @@ -73,6 +83,9 @@ typedef SMgrRelationData *SMgrRelation; extern PGDLLIMPORT const PgAioTargetInfo aio_smgr_target_info; +#define SMGR_CHAIN_TAIL 1 +#define SMGR_CHAIN_MODIFIER 2 + /* * This struct of function pointers defines the API between smgr.c and * any individual storage manager module. Note that smgr subfunctions are @@ -86,45 +99,49 @@ extern PGDLLIMPORT const PgAioTargetInfo aio_smgr_target_info; typedef struct f_smgr { const char *name; + int chain_position; void (*smgr_init) (void); /* may be NULL */ void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_open) (SMgrRelation reln); - void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, - bool isRedo); - bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_open) (SMgrRelation reln, SmgrChainIndex chain_index); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); + void (*smgr_create) (RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, + bool isRedo, SmgrChainIndex chain_index); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, - bool isRedo); + bool isRedo, SmgrChainIndex chain_index); void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, const void *buffer, bool skipFsync); + BlockNumber blocknum, const void *buffer, bool skipFsync, SmgrChainIndex chain_index); void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index); bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks); + BlockNumber blocknum, int nblocks, SmgrChainIndex chain_index); uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); + BlockNumber blocknum, SmgrChainIndex chain_index); void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks); + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index); void (*smgr_startreadv) (PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks); + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index); void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, - bool skipFsync); + bool skipFsync, SmgrChainIndex chain_index); void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); - BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber old_blocks, BlockNumber nblocks); - void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); - int (*smgr_fd) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off); + BlockNumber old_blocks, BlockNumber nblocks, SmgrChainIndex chain_index); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); + void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); + int (*smgr_fd) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off, SmgrChainIndex chain_index); } f_smgr; extern SMgrId smgr_register(const f_smgr *smgr, Size smgrrelation_size); +extern SMgrId smgr_lookup(const char *name); + +extern f_smgr *smgrsw; extern void smgrinit(void); extern SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend); @@ -136,7 +153,7 @@ extern void smgrdestroyall(void); extern void smgrrelease(SMgrRelation reln); extern void smgrreleaseall(void); extern void smgrreleaserellocator(RelFileLocatorBackend rlocator); -extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); +extern void smgrcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdosyncall(SMgrRelation *rels, int nrels); extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, @@ -170,6 +187,53 @@ extern void smgrregistersync(SMgrRelation reln, ForkNumber forknum); extern void AtEOXact_SMgr(void); extern bool ProcessBarrierSmgrRelease(void); +extern void + smgr_open_next(SMgrRelation reln, SmgrChainIndex chain_index); +extern void + smgr_close_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern bool + smgr_exists_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern void + smgr_create_next(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +extern void + smgr_immedsync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern void + smgr_unlink_next(SMgrRelation reln, RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +extern void + smgr_extend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void *buffer, bool skipFsync, SmgrChainIndex chain_index); +extern void + smgr_zeroextend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, bool skipFsync, SmgrChainIndex chain_index); +extern bool + smgr_prefetch_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, SmgrChainIndex chain_index); +extern uint32 + smgr_maxcombine_next(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, SmgrChainIndex chain_index); +extern void + smgr_readv_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index); +extern void + smgr_writev_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index); +extern void + smgr_writeback_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + BlockNumber nblocks, SmgrChainIndex chain_index); +extern BlockNumber + smgr_nblocks_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern void + smgr_truncate_next(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks, SmgrChainIndex chain_index); +extern void + smgr_registersync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern void + smgr_startreadv_next(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, void **buffers, BlockNumber nblocks, + SmgrChainIndex chain_index); +extern int + smgr_fd_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + uint32 *off, SmgrChainIndex chain_index); + static inline void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer) diff --git a/src/test/modules/fsync_checker/fsync_checker_smgr.c b/src/test/modules/fsync_checker/fsync_checker_smgr.c index 56126bef01e..017abdd9a45 100644 --- a/src/test/modules/fsync_checker/fsync_checker_smgr.c +++ b/src/test/modules/fsync_checker/fsync_checker_smgr.c @@ -27,15 +27,15 @@ typedef struct void _PG_init(void); static void fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void *buffer, bool skipFsync); -static void fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum); + const void *buffer, bool skipFsync, SmgrChainIndex chain_index); +static void fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); static void fsync_checker_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, - BlockNumber nblocks, bool skipFsync); + BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index); static void fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index); static void fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index); static void fsync_checker_checkpoint_create(const CheckPoint *checkPoint); static void fsync_checker_shmem_request(void); @@ -47,26 +47,27 @@ static void remove_reln(SMgrRelation reln, ForkNumber forknum); static SMgrId fsync_checker_smgr_id; static const struct f_smgr fsync_checker_smgr = { .name = "fsync_checker", - .smgr_init = mdinit, + .chain_position = SMGR_CHAIN_MODIFIER, + .smgr_init = NULL, .smgr_shutdown = NULL, - .smgr_open = mdopen, - .smgr_close = mdclose, - .smgr_create = mdcreate, - .smgr_exists = mdexists, - .smgr_unlink = mdunlink, + .smgr_open = NULL, + .smgr_close = NULL, + .smgr_create = NULL, + .smgr_exists = NULL, + .smgr_unlink = NULL, .smgr_extend = fsync_checker_extend, .smgr_zeroextend = fsync_checker_zeroextend, - .smgr_prefetch = mdprefetch, - .smgr_maxcombine = mdmaxcombine, - .smgr_readv = mdreadv, - .smgr_startreadv = mdstartreadv, + .smgr_prefetch = NULL, + .smgr_maxcombine = NULL, + .smgr_readv = NULL, + .smgr_startreadv = NULL, .smgr_writev = fsync_checker_writev, .smgr_writeback = fsync_checker_writeback, - .smgr_nblocks = mdnblocks, - .smgr_truncate = mdtruncate, + .smgr_nblocks = NULL, + .smgr_truncate = NULL, .smgr_immedsync = fsync_checker_immedsync, - .smgr_registersync = mdregistersync, - .smgr_fd = mdfd, + .smgr_registersync = NULL, + .smgr_fd = NULL, }; static HTAB *volatile_relns; @@ -93,8 +94,6 @@ _PG_init(void) * could use MdSmgrRelation as the parent. */ fsync_checker_smgr_id = smgr_register(&fsync_checker_smgr, 0); - - storage_manager_id = fsync_checker_smgr_id; } static void @@ -202,50 +201,50 @@ remove_reln(SMgrRelation reln, ForkNumber forknum) static void fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void *buffer, bool skipFsync) + const void *buffer, bool skipFsync, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln) && !skipFsync) add_reln(reln, forknum); - mdextend(reln, forknum, blocknum, buffer, skipFsync); + smgr_extend_next(reln, forknum, blocknum, buffer, skipFsync, chain_index + 1); } static void -fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum) +fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln)) remove_reln(reln, forknum); - mdimmedsync(reln, forknum); + smgr_immedsync_next(reln, forknum, chain_index + 1); } static void fsync_checker_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, - BlockNumber nblocks, bool skipFsync) + BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln) && !skipFsync) add_reln(reln, forknum); - mdwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync); + smgr_writev_next(reln, forknum, blocknum, buffers, nblocks, skipFsync, chain_index + 1); } static void fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks) + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln)) remove_reln(reln, forknum); - mdwriteback(reln, forknum, blocknum, nblocks); + smgr_writeback_next(reln, forknum, blocknum, nblocks, chain_index + 1); } static void fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync) + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln) && !skipFsync) add_reln(reln, forknum); - mdzeroextend(reln, forknum, blocknum, nblocks, skipFsync); + smgr_zeroextend_next(reln, forknum, blocknum, nblocks, skipFsync, chain_index + 1); } diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a8457135f33..41d271cb900 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2664,6 +2664,7 @@ SID_IDENTIFIER_AUTHORITY SID_NAME_USE SISeg SIZE_T +SMgrChain SMgrRelation SMgrRelationData SMgrSortArray -- 2.43.0