From b48d181b48a4ddc958f9256bfac02814878149b6 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Tue, 27 Jun 2023 15:59:23 +0200 Subject: [PATCH v5 1/4] Expose f_smgr to extensions for manual implementation There are various reasons why one would want to create their own implementation of a storage manager, among which are block-level compression, encryption and offloading to cold storage. This patch is a first patch that allows extensions to register their own SMgr, and allow them to override the global storage manager. --- src/backend/postmaster/postmaster.c | 5 + src/backend/storage/smgr/md.c | 196 +++++++++++++++++++--------- src/backend/storage/smgr/smgr.c | 148 ++++++++++----------- src/backend/utils/init/miscinit.c | 15 +++ src/include/miscadmin.h | 1 + src/include/storage/md.h | 4 + src/include/storage/smgr.h | 66 ++++++++-- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 285 insertions(+), 151 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index d6133bfebc6..fbd726d39da 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -929,6 +929,11 @@ PostmasterMain(int argc, char *argv[]) */ ApplyLauncherRegister(); + /* + * Register built-in managers that are not part of static arrays + */ + register_builtin_dynamic_managers(); + /* * process any libraries that should be preloaded at postmaster start */ diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index a2625871185..d6822ff03f3 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -95,6 +95,21 @@ typedef struct _MdfdVec } MdfdVec; static MemoryContext MdCxt; /* context for all MdfdVec objects */ +SMgrId MdSMgrId; + +typedef struct +{ + SMgrRelationData reln; /* parent data */ + + /* + * for md.c; per-fork arrays of the number of open segments + * (md_num_open_segs) and the segments themselves (md_seg_fds). + */ + int md_num_open_segs[MAX_FORKNUM + 1]; + MdfdVec *md_seg_fds[MAX_FORKNUM + 1]; +} MdSMgrRelationData; + +typedef MdSMgrRelationData *MdSMgrRelation; /* Populate a file tag describing an md.c segment file. */ @@ -140,27 +155,57 @@ typedef struct MdPathStr char str[MD_PATH_STR_MAXLEN + 1]; } MdPathStr; +void +mdsmgr_register(void) +{ + /* magnetic disk */ + f_smgr md_smgr = (f_smgr) { + .name = "md", + .smgr_init = mdinit, + .smgr_shutdown = NULL, + .smgr_open = mdopen, + .smgr_close = mdclose, + .smgr_create = mdcreate, + .smgr_exists = mdexists, + .smgr_unlink = mdunlink, + .smgr_extend = mdextend, + .smgr_zeroextend = mdzeroextend, + .smgr_prefetch = mdprefetch, + .smgr_maxcombine = mdmaxcombine, + .smgr_readv = mdreadv, + .smgr_startreadv = mdstartreadv, + .smgr_writev = mdwritev, + .smgr_writeback = mdwriteback, + .smgr_nblocks = mdnblocks, + .smgr_truncate = mdtruncate, + .smgr_immedsync = mdimmedsync, + .smgr_registersync = mdregistersync, + .smgr_fd = mdfd, + }; + + MdSMgrId = smgr_register(&md_smgr, sizeof(MdSMgrRelationData)); +} /* local routines */ static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo); -static MdfdVec *mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior); -static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior); +static void register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg); static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno); static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno); -static void _fdvec_resize(SMgrRelation reln, +static void _fdvec_resize(MdSMgrRelation reln, ForkNumber forknum, int nseg); -static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, +static MdPathStr _mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno); -static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags); -static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior); -static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, +static BlockNumber _mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg); static PgAioResult md_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data); @@ -202,6 +247,8 @@ mdinit(void) bool mdexists(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* * Close it first, to ensure that we notice if the fork has been unlinked * since we opened it. As an optimization, we can skip that in recovery, @@ -210,7 +257,7 @@ mdexists(SMgrRelation reln, ForkNumber forknum) if (!InRecovery) mdclose(reln, forknum); - return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL); + return (mdopenfork(mdreln, forknum, EXTENSION_RETURN_NULL) != NULL); } /* @@ -221,14 +268,15 @@ mdexists(SMgrRelation reln, ForkNumber forknum) void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *mdfd; RelPathStr path; File fd; - if (isRedo && reln->md_num_open_segs[forknum] > 0) + if (isRedo && mdreln->md_num_open_segs[forknum] > 0) return; /* created and opened already... */ - Assert(reln->md_num_open_segs[forknum] == 0); + Assert(mdreln->md_num_open_segs[forknum] == 0); /* * We may be using the target table space for the first time in this @@ -263,13 +311,13 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) } } - _fdvec_resize(reln, forknum, 1); - mdfd = &reln->md_seg_fds[forknum][0]; + _fdvec_resize(mdreln, forknum, 1); + mdfd = &mdreln->md_seg_fds[forknum][0]; mdfd->mdfd_vfd = fd; mdfd->mdfd_segno = 0; if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, mdfd); + register_dirty_segment(mdreln, forknum, mdfd); } /* @@ -487,6 +535,7 @@ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; pgoff_t seekpos; int nbytes; MdfdVec *v; @@ -513,7 +562,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, relpath(reln->smgr_rlocator, forknum).str, InvalidBlockNumber))); - v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); + v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_CREATE); seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -537,9 +586,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); - Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); } /* @@ -552,6 +601,7 @@ void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; BlockNumber curblocknum = blocknum; int remblocks = nblocks; @@ -586,7 +636,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, else numblocks = remblocks; - v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE); + v = _mdfd_getseg(mdreln, forknum, curblocknum, skipFsync, EXTENSION_CREATE); Assert(segstartblock < RELSEG_SIZE); Assert(segstartblock + numblocks <= RELSEG_SIZE); @@ -641,9 +691,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); - Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); remblocks -= numblocks; curblocknum += numblocks; @@ -661,7 +711,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, * invent one out of whole cloth. */ static MdfdVec * -mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) +mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior) { MdfdVec *mdfd; RelPathStr path; @@ -671,7 +721,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) if (reln->md_num_open_segs[forknum] > 0) return &reln->md_seg_fds[forknum][0]; - path = relpath(reln->smgr_rlocator, forknum); + path = relpath(reln->reln.smgr_rlocator, forknum); fd = PathNameOpenFile(path.str, _mdfd_open_flags()); @@ -701,9 +751,11 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) void mdopen(SMgrRelation reln) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* mark it not open */ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++) - reln->md_num_open_segs[forknum] = 0; + mdreln->md_num_open_segs[forknum] = 0; } /* @@ -712,7 +764,8 @@ mdopen(SMgrRelation reln) void mdclose(SMgrRelation reln, ForkNumber forknum) { - int nopensegs = reln->md_num_open_segs[forknum]; + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + int nopensegs = mdreln->md_num_open_segs[forknum]; /* No work if already closed */ if (nopensegs == 0) @@ -721,10 +774,10 @@ mdclose(SMgrRelation reln, ForkNumber forknum) /* close segments starting from the end */ while (nopensegs > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][nopensegs - 1]; FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, nopensegs - 1); + _fdvec_resize(mdreln, forknum, nopensegs - 1); nopensegs--; } } @@ -737,6 +790,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks) { #ifdef USE_PREFETCH + MdSMgrRelation mdreln = (MdSMgrRelation) reln; Assert((io_direct_flags & IO_DIRECT_DATA) == 0); @@ -749,7 +803,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MdfdVec *v; int nblocks_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL); if (v == NULL) return false; @@ -847,6 +901,8 @@ void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + while (nblocks > 0) { struct iovec iov[PG_IOV_MAX]; @@ -858,7 +914,7 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, size_t transferred_this_segment; size_t size_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -986,6 +1042,7 @@ mdstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; pgoff_t seekpos; MdfdVec *v; BlockNumber nblocks_this_segment; @@ -993,7 +1050,7 @@ mdstartreadv(PgAioHandle *ioh, int iovcnt; int ret; - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -1059,6 +1116,8 @@ void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* This assert is too expensive to have on normally ... */ #ifdef CHECK_WRITE_VS_EXTEND Assert((uint64) blocknum + (uint64) nblocks <= (uint64) mdnblocks(reln, forknum)); @@ -1075,7 +1134,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, size_t transferred_this_segment; size_t size_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, + v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -1145,7 +1204,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); nblocks -= nblocks_this_segment; buffers += nblocks_this_segment; @@ -1164,6 +1223,8 @@ void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + Assert((io_direct_flags & IO_DIRECT_DATA) == 0); /* @@ -1178,7 +1239,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, int segnum_start, segnum_end; - v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ , + v = _mdfd_getseg(mdreln, forknum, blocknum, true /* not used */ , EXTENSION_DONT_OPEN); /* @@ -1222,14 +1283,15 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; BlockNumber nblocks; BlockNumber segno; - mdopenfork(reln, forknum, EXTENSION_FAIL); + mdopenfork(mdreln, forknum, EXTENSION_FAIL); /* mdopen has opened the first segment */ - Assert(reln->md_num_open_segs[forknum] > 0); + Assert(mdreln->md_num_open_segs[forknum] > 0); /* * Start from the last open segments, to avoid redundant seeks. We have @@ -1244,12 +1306,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * that's OK because the checkpointer never needs to compute relation * size.) */ - segno = reln->md_num_open_segs[forknum] - 1; - v = &reln->md_seg_fds[forknum][segno]; + segno = mdreln->md_num_open_segs[forknum] - 1; + v = &mdreln->md_seg_fds[forknum][segno]; for (;;) { - nblocks = _mdnblocks(reln, forknum, v); + nblocks = _mdnblocks(mdreln, forknum, v); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) @@ -1267,7 +1329,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * undermines _mdfd_getseg's attempts to notice and report an error * upon access to a missing segment. */ - v = _mdfd_openseg(reln, forknum, segno, 0); + v = _mdfd_openseg(mdreln, forknum, segno, 0); if (v == NULL) return segno * ((BlockNumber) RELSEG_SIZE); } @@ -1290,6 +1352,7 @@ void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; BlockNumber priorblocks; int curopensegs; @@ -1310,14 +1373,14 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, * Truncate segments, starting at the last one. Starting at the end makes * managing the memory for the fd array easier, should there be errors. */ - curopensegs = reln->md_num_open_segs[forknum]; + curopensegs = mdreln->md_num_open_segs[forknum]; while (curopensegs > 0) { MdfdVec *v; priorblocks = (curopensegs - 1) * RELSEG_SIZE; - v = &reln->md_seg_fds[forknum][curopensegs - 1]; + v = &mdreln->md_seg_fds[forknum][curopensegs - 1]; if (priorblocks > nblocks) { @@ -1332,13 +1395,13 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, FilePathName(v->mdfd_vfd)))); if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); /* we never drop the 1st segment */ - Assert(v != &reln->md_seg_fds[forknum][0]); + Assert(v != &mdreln->md_seg_fds[forknum][0]); FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, curopensegs - 1); + _fdvec_resize(mdreln, forknum, curopensegs - 1); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { @@ -1358,7 +1421,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, FilePathName(v->mdfd_vfd), nblocks))); if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); } else { @@ -1378,6 +1441,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, void mdregistersync(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; int min_inactive_seg; @@ -1387,7 +1451,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) */ mdnblocks(reln, forknum); - min_inactive_seg = segno = reln->md_num_open_segs[forknum]; + min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; /* * Temporarily open inactive segments, then close them after sync. There @@ -1395,20 +1459,20 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) * harmless. We don't bother to clean them up and take a risk of further * trouble. The next mdclose() will soon close them. */ - while (_mdfd_openseg(reln, forknum, segno, 0) != NULL) + while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL) segno++; while (segno > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][segno - 1]; - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); /* Close inactive segments immediately */ if (segno > min_inactive_seg) { FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, segno - 1); + _fdvec_resize(mdreln, forknum, segno - 1); } segno--; @@ -1429,6 +1493,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) void mdimmedsync(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; int min_inactive_seg; @@ -1438,7 +1503,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) */ mdnblocks(reln, forknum); - min_inactive_seg = segno = reln->md_num_open_segs[forknum]; + min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; /* * Temporarily open inactive segments, then close them after sync. There @@ -1446,12 +1511,12 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) * is harmless. We don't bother to clean them up and take a risk of * further trouble. The next mdclose() will soon close them. */ - while (_mdfd_openseg(reln, forknum, segno, 0) != NULL) + while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL) segno++; while (segno > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][segno - 1]; /* * fsyncs done through mdimmedsync() should be tracked in a separate @@ -1472,7 +1537,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) if (segno > min_inactive_seg) { FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, segno - 1); + _fdvec_resize(mdreln, forknum, segno - 1); } segno--; @@ -1482,9 +1547,10 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off) { - MdfdVec *v = mdopenfork(reln, forknum, EXTENSION_FAIL); + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + MdfdVec *v = mdopenfork(mdreln, forknum, EXTENSION_FAIL); - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, EXTENSION_FAIL); *off = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -1504,14 +1570,14 @@ mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off) * enough to be a performance problem). */ static void -register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) +register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { FileTag tag; - INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno); + INIT_MD_FILETAG(tag, reln->reln.smgr_rlocator.locator, forknum, seg->mdfd_segno); /* Temp relations should never be fsync'd */ - Assert(!SmgrIsTemp(reln)); + Assert(!SmgrIsTemp(&reln->reln)); if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ )) { @@ -1629,7 +1695,7 @@ DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo) * _fdvec_resize() -- Resize the fork's open segments array */ static void -_fdvec_resize(SMgrRelation reln, +_fdvec_resize(MdSMgrRelation reln, ForkNumber forknum, int nseg) { @@ -1677,12 +1743,12 @@ _fdvec_resize(SMgrRelation reln, * returned string is palloc'd. */ static MdPathStr -_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno) +_mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno) { RelPathStr path; MdPathStr fullpath; - path = relpath(reln->smgr_rlocator, forknum); + path = relpath(reln->reln.smgr_rlocator, forknum); if (segno > 0) sprintf(fullpath.str, "%s.%u", path.str, segno); @@ -1697,7 +1763,7 @@ _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno) * and make a MdfdVec object for it. Returns NULL on failure. */ static MdfdVec * -_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, +_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags) { MdfdVec *v; @@ -1740,7 +1806,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, * EXTENSION_CREATE case. */ static MdfdVec * -_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, +_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior) { MdfdVec *v; @@ -1814,7 +1880,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, char *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, MCXT_ALLOC_ZERO); - mdextend(reln, forknum, + mdextend((SMgrRelation) reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, zerobuf, skipFsync); pfree(zerobuf); @@ -1869,7 +1935,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, * Get number of blocks present in a single disk file */ static BlockNumber -_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) +_mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { pgoff_t len; @@ -1892,7 +1958,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) int mdsyncfiletag(const FileTag *ftag, char *path) { - SMgrRelation reln = smgropen(ftag->rlocator, INVALID_PROC_NUMBER); + MdSMgrRelation reln = (MdSMgrRelation) smgropen(ftag->rlocator, INVALID_PROC_NUMBER); File file; instr_time io_start; bool need_to_close; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 378c2a03f39..36e0b4f4a54 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -71,87 +71,18 @@ #include "storage/ipc.h" #include "storage/md.h" #include "storage/smgr.h" +#include "port/atomics.h" #include "utils/hsearch.h" #include "utils/inval.h" +#include "utils/memutils.h" +static f_smgr *smgrsw; -/* - * This struct of function pointers defines the API between smgr.c and - * any individual storage manager module. Note that smgr subfunctions are - * generally expected to report problems via elog(ERROR). An exception is - * that smgr_unlink should use elog(WARNING), rather than erroring out, - * because we normally unlink relations during post-commit/abort cleanup, - * and so it's too late to raise an error. Also, various conditions that - * would normally be errors should be allowed during bootstrap and/or WAL - * recovery --- see comments in md.c for details. - */ -typedef struct f_smgr -{ - void (*smgr_init) (void); /* may be NULL */ - void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_open) (SMgrRelation reln); - void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, - bool isRedo); - bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, - bool isRedo); - void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, const void *buffer, bool skipFsync); - void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); - bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks); - uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); - void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - void **buffers, BlockNumber nblocks); - void (*smgr_startreadv) (PgAioHandle *ioh, - SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - void **buffers, BlockNumber nblocks); - void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - const void **buffers, BlockNumber nblocks, - bool skipFsync); - void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); - BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber old_blocks, BlockNumber nblocks); - void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); - int (*smgr_fd) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off); -} f_smgr; - -static const f_smgr smgrsw[] = { - /* magnetic disk */ - { - .smgr_init = mdinit, - .smgr_shutdown = NULL, - .smgr_open = mdopen, - .smgr_close = mdclose, - .smgr_create = mdcreate, - .smgr_exists = mdexists, - .smgr_unlink = mdunlink, - .smgr_extend = mdextend, - .smgr_zeroextend = mdzeroextend, - .smgr_prefetch = mdprefetch, - .smgr_maxcombine = mdmaxcombine, - .smgr_readv = mdreadv, - .smgr_startreadv = mdstartreadv, - .smgr_writev = mdwritev, - .smgr_writeback = mdwriteback, - .smgr_nblocks = mdnblocks, - .smgr_truncate = mdtruncate, - .smgr_immedsync = mdimmedsync, - .smgr_registersync = mdregistersync, - .smgr_fd = mdfd, - } -}; +static int NSmgr = 0; -static const int NSmgr = lengthof(smgrsw); +static Size LargestSMgrRelationSize = 0; + +SMgrId storage_manager_id; /* * Each backend has a hashtable that stores all extant SMgrRelation objects. @@ -175,6 +106,63 @@ const PgAioTargetInfo aio_smgr_target_info = { .describe_identity = smgr_aio_describe_identity, }; +#define MaxSMgrId UINT8_MAX + +SMgrId +smgr_register(const f_smgr *smgr, Size smgrrelation_size) +{ + SMgrId my_id; + MemoryContext old; + + if (process_shared_preload_libraries_done) + elog(FATAL, "SMgrs must be registered in the shared_preload_libraries phase"); + if (NSmgr == MaxSMgrId) + elog(FATAL, "Too many smgrs registered"); + if (smgr->name == NULL || *smgr->name == 0) + elog(FATAL, "smgr registered with invalid name"); + + Assert(smgr->smgr_init != NULL); + Assert(smgr->smgr_open != NULL); + Assert(smgr->smgr_close != NULL); + Assert(smgr->smgr_create != NULL); + Assert(smgr->smgr_exists != NULL); + Assert(smgr->smgr_unlink != NULL); + Assert(smgr->smgr_extend != NULL); + Assert(smgr->smgr_zeroextend != NULL); + Assert(smgr->smgr_prefetch != NULL); + Assert(smgr->smgr_readv != NULL); + Assert(smgr->smgr_startreadv != NULL); + Assert(smgr->smgr_writev != NULL); + Assert(smgr->smgr_writeback != NULL); + Assert(smgr->smgr_nblocks != NULL); + Assert(smgr->smgr_truncate != NULL); + Assert(smgr->smgr_registersync != NULL); + Assert(smgr->smgr_immedsync != NULL); + + old = MemoryContextSwitchTo(TopMemoryContext); + + my_id = NSmgr++; + if (my_id == 0) + smgrsw = palloc_array(f_smgr, 1); + else + smgrsw = repalloc_array(smgrsw, f_smgr, NSmgr); + + MemoryContextSwitchTo(old); + + pg_compiler_barrier(); + + if (!smgrsw) + { + NSmgr--; + elog(FATAL, "Failed to extend smgr array"); + } + + smgrsw[my_id] = *smgr; + + LargestSMgrRelationSize = Max(LargestSMgrRelationSize, smgrrelation_size); + + return my_id; +} /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage @@ -252,8 +240,11 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) /* First time through: initialize the hash table */ HASHCTL ctl; + LargestSMgrRelationSize = MAXALIGN(LargestSMgrRelationSize); + Assert(NSmgr > 0); + ctl.keysize = sizeof(RelFileLocatorBackend); - ctl.entrysize = sizeof(SMgrRelationData); + ctl.entrysize = LargestSMgrRelationSize; SMgrRelationHash = hash_create("smgr relation table", 400, &ctl, HASH_ELEM | HASH_BLOBS); dlist_init(&unpinned_relns); @@ -273,7 +264,8 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) reln->smgr_targblock = InvalidBlockNumber; for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = 0; /* we only have md.c at present */ + + reln->smgr_which = storage_manager_id; /* it is not pinned yet */ reln->pincount = 0; diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 563f20374ff..c6cd4445fff 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -43,6 +43,7 @@ #include "replication/slotsync.h" #include "storage/fd.h" #include "storage/ipc.h" +#include "storage/md.h" #include "storage/latch.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" @@ -192,6 +193,9 @@ InitStandaloneProcess(const char *argv0) InitProcessLocalLatch(); InitializeLatchWaitSet(); + /* Initialize smgrs */ + register_builtin_dynamic_managers(); + /* * For consistency with InitPostmasterChild, initialize signal mask here. * But we don't unblock SIGQUIT or provide a default handler for it. @@ -1872,6 +1876,17 @@ process_session_preload_libraries(void) true); } +/* + * Register any internal managers. + */ +void +register_builtin_dynamic_managers(void) +{ + mdsmgr_register(); + + storage_manager_id = MdSMgrId; +} + /* * process any shared memory requests from preloaded libraries */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index db559b39c4d..11ac53a048b 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -524,6 +524,7 @@ extern void TouchSocketLockFiles(void); extern void AddToDataDirLockFile(int target_line, const char *str); extern bool RecheckDataDirLockFile(void); extern void ValidatePgVersion(const char *path); +extern void register_builtin_dynamic_managers(void); extern void process_shared_preload_libraries(void); extern void process_session_preload_libraries(void); extern void process_shmem_requests(void); diff --git a/src/include/storage/md.h b/src/include/storage/md.h index b8d10329eb8..c6958d92a68 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -22,6 +22,10 @@ extern PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb; +/* registration function for md storage manager */ +extern void mdsmgr_register(void); +extern SMgrId MdSMgrId; + /* md storage manager functionality */ extern void mdinit(void); extern void mdopen(SMgrRelation reln); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 09bd42fcf4b..b170038708a 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -19,6 +19,10 @@ #include "storage/block.h" #include "storage/relfilelocator.h" +typedef uint8 SMgrId; + +extern PGDLLIMPORT SMgrId storage_manager_id; + /* * smgr.c maintains a table of SMgrRelation objects, which are essentially * cached file handles. An SMgrRelation is created (if not already present) @@ -52,14 +56,7 @@ typedef struct SMgrRelationData * Fields below here are intended to be private to smgr.c and its * submodules. Do not touch them from elsewhere. */ - int smgr_which; /* storage manager selector */ - - /* - * for md.c; per-fork arrays of the number of open segments - * (md_num_open_segs) and the segments themselves (md_seg_fds). - */ - int md_num_open_segs[MAX_FORKNUM + 1]; - struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1]; + SMgrId smgr_which; /* storage manager selector */ /* * Pinning support. If unpinned (ie. pincount == 0), 'node' is a list @@ -76,6 +73,59 @@ typedef SMgrRelationData *SMgrRelation; extern PGDLLIMPORT const PgAioTargetInfo aio_smgr_target_info; +/* + * This struct of function pointers defines the API between smgr.c and + * any individual storage manager module. Note that smgr subfunctions are + * generally expected to report problems via elog(ERROR). An exception is + * that smgr_unlink should use elog(WARNING), rather than erroring out, + * because we normally unlink relations during post-commit/abort cleanup, + * and so it's too late to raise an error. Also, various conditions that + * would normally be errors should be allowed during bootstrap and/or WAL + * recovery --- see comments in md.c for details. + */ +typedef struct f_smgr +{ + const char *name; + void (*smgr_init) (void); /* may be NULL */ + void (*smgr_shutdown) (void); /* may be NULL */ + void (*smgr_open) (SMgrRelation reln); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, + bool isRedo); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, + bool isRedo); + void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, const void *buffer, bool skipFsync); + void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, bool skipFsync); + bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks); + uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); + void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + void **buffers, BlockNumber nblocks); + void (*smgr_startreadv) (PgAioHandle *ioh, + SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + void **buffers, BlockNumber nblocks); + void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, + bool skipFsync); + void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, + BlockNumber old_blocks, BlockNumber nblocks); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); + int (*smgr_fd) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off); +} f_smgr; + +extern SMgrId smgr_register(const f_smgr *smgr, Size smgrrelation_size); + extern void smgrinit(void); extern SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 1c8610fd46c..c2fd081ef68 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1689,6 +1689,7 @@ Material MaterialPath MaterialState MdPathStr +MdSMgrRelationData MdfdVec Memoize MemoizeEntry base-commit: a5b40d156edaafa4fe93cdae96592f26bc865bd3 -- 2.43.0