From e17e79bfe150fa137d345dc7c6848c4e596c2fa4 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Sun, 5 Dec 2021 15:07:28 -0800 Subject: [PATCH v1 1/5] Introduce custodian. The custodian process is a new auxiliary process that is intended to help offload tasks could otherwise delay startup and checkpointing. This commit simply adds the new process; it does not yet do anything useful. --- src/backend/postmaster/Makefile | 1 + src/backend/postmaster/auxprocess.c | 8 ++ src/backend/postmaster/custodian.c | 210 ++++++++++++++++++++++++++++++++ src/backend/postmaster/postmaster.c | 44 ++++++- src/backend/storage/lmgr/proc.c | 1 + src/backend/utils/activity/wait_event.c | 3 + src/backend/utils/init/miscinit.c | 3 + src/include/miscadmin.h | 3 + src/include/postmaster/custodian.h | 17 +++ src/include/storage/proc.h | 11 +- src/include/utils/wait_event.h | 1 + 11 files changed, 297 insertions(+), 5 deletions(-) create mode 100644 src/backend/postmaster/custodian.c create mode 100644 src/include/postmaster/custodian.h diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile index 787c6a2c3b..7ec7b23467 100644 --- a/src/backend/postmaster/Makefile +++ b/src/backend/postmaster/Makefile @@ -18,6 +18,7 @@ OBJS = \ bgworker.o \ bgwriter.o \ checkpointer.o \ + custodian.o \ fork_process.o \ interrupt.o \ pgarch.o \ diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c index 7452f908b2..c55cc84490 100644 --- a/src/backend/postmaster/auxprocess.c +++ b/src/backend/postmaster/auxprocess.c @@ -20,6 +20,7 @@ #include "pgstat.h" #include "postmaster/auxprocess.h" #include "postmaster/bgwriter.h" +#include "postmaster/custodian.h" #include "postmaster/startup.h" #include "postmaster/walwriter.h" #include "replication/walreceiver.h" @@ -74,6 +75,9 @@ AuxiliaryProcessMain(AuxProcType auxtype) case CheckpointerProcess: MyBackendType = B_CHECKPOINTER; break; + case CustodianProcess: + MyBackendType = B_CUSTODIAN; + break; case WalWriterProcess: MyBackendType = B_WAL_WRITER; break; @@ -153,6 +157,10 @@ AuxiliaryProcessMain(AuxProcType auxtype) CheckpointerMain(); proc_exit(1); + case CustodianProcess: + CustodianMain(); + proc_exit(1); + case WalWriterProcess: InitXLOGAccess(); WalWriterMain(); diff --git a/src/backend/postmaster/custodian.c b/src/backend/postmaster/custodian.c new file mode 100644 index 0000000000..0ba59949bb --- /dev/null +++ b/src/backend/postmaster/custodian.c @@ -0,0 +1,210 @@ +/*------------------------------------------------------------------------- + * + * custodian.c + * + * The custodian process is new as of Postgres 15. It's main purpose is to + * offload tasks that could otherwise delay startup and checkpointing, but + * it needn't be restricted to just those things. Offloaded tasks should + * not be synchronous (e.g., checkpointing shouldn't need to wait for the + * custodian to complete a task before proceeding). Also, ensure that any + * offloaded tasks are either not required during single-user mode or are + * performed separately during single-user mode. + * + * The custodian is not an essential process and can shutdown quickly when + * requested. The custodian will wake up approximately once every 5 + * minutes to perform its tasks, but backends can (and should) set its + * latch to wake it up sooner. + * + * Normal termination is by SIGTERM, which instructs the bgwriter to + * exit(0). Emergency termination is by SIGQUIT; like any backend, the + * custodian will simply abort and exit on SIGQUIT. + * + * If the custodian exits unexpectedly, the postmaster treats that the same + * as a backend crash: shared memory may be corrupted, so remaining + * backends should be killed by SIGQUIT and then a recovery cycle started. + * + * + * Copyright (c) 2021, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/postmaster/custodian.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "libpq/pqsignal.h" +#include "pgstat.h" +#include "postmaster/custodian.h" +#include "postmaster/interrupt.h" +#include "storage/bufmgr.h" +#include "storage/condition_variable.h" +#include "storage/proc.h" +#include "storage/procsignal.h" +#include "utils/memutils.h" + +#define CUSTODIAN_TIMEOUT_S (300) /* 5 minutes */ + +/* + * Main entry point for custodian process + * + * This is invoked from AuxiliaryProcessMain, which has already created the + * basic execution environment, but not enabled signals yet. + */ +void +CustodianMain(void) +{ + sigjmp_buf local_sigjmp_buf; + MemoryContext custodian_context; + + /* + * Properly accept or ignore signals that might be sent to us. + */ + pqsignal(SIGHUP, SignalHandlerForConfigReload); + pqsignal(SIGINT, SignalHandlerForShutdownRequest); + pqsignal(SIGTERM, SignalHandlerForShutdownRequest); + /* SIGQUIT handler was already set up by InitPostmasterChild */ + pqsignal(SIGALRM, SIG_IGN); + pqsignal(SIGPIPE, SIG_IGN); + pqsignal(SIGUSR1, procsignal_sigusr1_handler); + pqsignal(SIGUSR2, SIG_IGN); + + /* + * Reset some signals that are accepted by postmaster but not here + */ + pqsignal(SIGCHLD, SIG_DFL); + + /* + * Create a memory context that we will do all our work in. We do this so + * that we can reset the context during error recovery and thereby avoid + * possible memory leaks. + */ + custodian_context = AllocSetContextCreate(TopMemoryContext, + "Custodian", + ALLOCSET_DEFAULT_SIZES); + MemoryContextSwitchTo(custodian_context); + + /* + * If an exception is encountered, processing resumes here. + * + * You might wonder why this isn't coded as an infinite loop around a + * PG_TRY construct. The reason is that this is the bottom of the + * exception stack, and so with PG_TRY there would be no exception handler + * in force at all during the CATCH part. By leaving the outermost setjmp + * always active, we have at least some chance of recovering from an error + * during error recovery. (If we get into an infinite loop thereby, it + * will soon be stopped by overflow of elog.c's internal state stack.) + * + * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask + * (to wit, BlockSig) will be restored when longjmp'ing to here. Thus, + * signals other than SIGQUIT will be blocked until we complete error + * recovery. It might seem that this policy makes the HOLD_INTERRUPS() + * call redundant, but it is not since InterruptPending might be set + * already. + */ + if (sigsetjmp(local_sigjmp_buf, 1) != 0) + { + /* Since not using PG_TRY, must reset error stack by hand */ + error_context_stack = NULL; + + /* Prevent interrupts while cleaning up */ + HOLD_INTERRUPTS(); + + /* Report the error to the server log */ + EmitErrorReport(); + + /* These operations are really just a minimal subset of + * AbortTransaction(). We don't have very many resources to worry + * about. + */ + LWLockReleaseAll(); + ConditionVariableCancelSleep(); + pgstat_report_wait_end(); + AbortBufferIO(); + UnlockBuffers(); + ReleaseAuxProcessResources(false); + AtEOXact_Buffers(false); + AtEOXact_SMgr(); + AtEOXact_Files(false); + AtEOXact_HashTables(false); + + /* + * Now return to normal top-level context and clear ErrorContext for + * next time. + */ + MemoryContextSwitchTo(custodian_context); + FlushErrorState(); + + /* Flush any leaked data in the top-level context */ + MemoryContextResetAndDeleteChildren(custodian_context); + + /* Now we can allow interrupts again */ + RESUME_INTERRUPTS(); + + /* + * Sleep at least 1 second after any error. A write error is likely + * to be repeated, and we don't want to be filling the error logs as + * fast as we can. + */ + pg_usleep(1000000L); + + /* + * Close all open files after any error. This is helpful on Windows, + * where holding deleted files open causes various strange errors. + * It's not clear we need it elsewhere, but shouldn't hurt. + */ + smgrcloseall(); + + /* Report wait end here, when there is no further possibility of wait */ + pgstat_report_wait_end(); + } + + /* We can now handle ereport(ERROR) */ + PG_exception_stack = &local_sigjmp_buf; + + /* + * Unblock signals (they were blocked when the postmaster forked us) + */ + PG_SETMASK(&UnBlockSig); + + /* + * Advertise out latch that backends can use to wake us up while we're + * sleeping. + */ + ProcGlobal->custodianLatch = &MyProc->procLatch; + + /* + * Loop forever + */ + for (;;) + { + pg_time_t start_time; + pg_time_t end_time; + int elapsed_secs; + int cur_timeout; + + /* Clear any already-pending wakeups */ + ResetLatch(MyLatch); + + HandleMainLoopInterrupts(); + + start_time = (pg_time_t) time(NULL); + + /* TODO: offloaded tasks go here */ + + /* Calculate how long to sleep */ + end_time = (pg_time_t) time(NULL); + elapsed_secs = end_time - start_time; + if (elapsed_secs >= CUSTODIAN_TIMEOUT_S) + continue; /* no sleep for us */ + cur_timeout = CUSTODIAN_TIMEOUT_S - elapsed_secs; + + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + cur_timeout * 1000L /* convert to ms */ , + WAIT_EVENT_CUSTODIAN_MAIN); + } + + pg_unreachable(); +} diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 328ecafa8c..635313cdb7 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -250,6 +250,7 @@ bool remove_temp_files_after_crash = true; static pid_t StartupPID = 0, BgWriterPID = 0, CheckpointerPID = 0, + CustodianPID = 0, WalWriterPID = 0, WalReceiverPID = 0, AutoVacPID = 0, @@ -556,6 +557,7 @@ static void ShmemBackendArrayRemove(Backend *bn); #define StartArchiver() StartChildProcess(ArchiverProcess) #define StartBackgroundWriter() StartChildProcess(BgWriterProcess) #define StartCheckpointer() StartChildProcess(CheckpointerProcess) +#define StartCustodian() StartChildProcess(CustodianProcess) #define StartWalWriter() StartChildProcess(WalWriterProcess) #define StartWalReceiver() StartChildProcess(WalReceiverProcess) @@ -1819,13 +1821,16 @@ ServerLoop(void) /* * If no background writer process is running, and we are not in a * state that prevents it, start one. It doesn't matter if this - * fails, we'll just try again later. Likewise for the checkpointer. + * fails, we'll just try again later. Likewise for the checkpointer + * and custodian. */ if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_STARTUP) { if (CheckpointerPID == 0) CheckpointerPID = StartCheckpointer(); + if (CustodianPID == 0) + CustodianPID = StartCustodian(); if (BgWriterPID == 0) BgWriterPID = StartBackgroundWriter(); } @@ -2782,6 +2787,8 @@ SIGHUP_handler(SIGNAL_ARGS) signal_child(BgWriterPID, SIGHUP); if (CheckpointerPID != 0) signal_child(CheckpointerPID, SIGHUP); + if (CustodianPID != 0) + signal_child(CustodianPID, SIGHUP); if (WalWriterPID != 0) signal_child(WalWriterPID, SIGHUP); if (WalReceiverPID != 0) @@ -3109,6 +3116,8 @@ reaper(SIGNAL_ARGS) */ if (CheckpointerPID == 0) CheckpointerPID = StartCheckpointer(); + if (CustodianPID == 0) + CustodianPID = StartCustodian(); if (BgWriterPID == 0) BgWriterPID = StartBackgroundWriter(); if (WalWriterPID == 0) @@ -3211,6 +3220,20 @@ reaper(SIGNAL_ARGS) continue; } + /* + * Was it the custodian? Normal exit can be ignored; we'll start a + * new one at the next iteration of the postmaster's main loop, if + * necessary. Any other exit condition is treated as a crash. + */ + if (pid == CustodianPID) + { + CustodianPID = 0; + if (!EXIT_STATUS_0(exitstatus)) + HandleChildCrash(pid, exitstatus, + _("custodian process")); + continue; + } + /* * Was it the wal writer? Normal exit can be ignored; we'll start a * new one at the next iteration of the postmaster's main loop, if @@ -3684,6 +3707,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT)); } + /* Take care of the custodian too */ + if (pid == CustodianPID) + CustodianPID = 0; + else if (CustodianPID != 0 && take_action) + { + ereport(DEBUG2, + (errmsg_internal("sending %s to process %d", + (SendStop ? "SIGSTOP" : "SIGQUIT"), + (int) CustodianPID))); + signal_child(CustodianPID, (SendStop ? SIGSTOP : SIGQUIT)); + } + /* Take care of the walwriter too */ if (pid == WalWriterPID) WalWriterPID = 0; @@ -3887,6 +3922,9 @@ PostmasterStateMachine(void) /* and the bgwriter too */ if (BgWriterPID != 0) signal_child(BgWriterPID, SIGTERM); + /* and the custodian too */ + if (CustodianPID != 0) + signal_child(CustodianPID, SIGTERM); /* and the walwriter too */ if (WalWriterPID != 0) signal_child(WalWriterPID, SIGTERM); @@ -3924,6 +3962,7 @@ PostmasterStateMachine(void) BgWriterPID == 0 && (CheckpointerPID == 0 || (!FatalError && Shutdown < ImmediateShutdown)) && + CustodianPID == 0 && WalWriterPID == 0 && AutoVacPID == 0) { @@ -4017,6 +4056,7 @@ PostmasterStateMachine(void) Assert(WalReceiverPID == 0); Assert(BgWriterPID == 0); Assert(CheckpointerPID == 0); + Assert(CustodianPID == 0); Assert(WalWriterPID == 0); Assert(AutoVacPID == 0); /* syslogger is not considered here */ @@ -4222,6 +4262,8 @@ TerminateChildren(int signal) signal_child(BgWriterPID, signal); if (CheckpointerPID != 0) signal_child(CheckpointerPID, signal); + if (CustodianPID != 0) + signal_child(CustodianPID, signal); if (WalWriterPID != 0) signal_child(WalWriterPID, signal); if (WalReceiverPID != 0) diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index b7d9da0aa9..a86a05adb4 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -182,6 +182,7 @@ InitProcGlobal(void) ProcGlobal->startupBufferPinWaitBufId = -1; ProcGlobal->walwriterLatch = NULL; ProcGlobal->checkpointerLatch = NULL; + ProcGlobal->custodianLatch = NULL; pg_atomic_init_u32(&ProcGlobal->procArrayGroupFirst, INVALID_PGPROCNO); pg_atomic_init_u32(&ProcGlobal->clogGroupFirst, INVALID_PGPROCNO); diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index 4d53f040e8..530af294d9 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -224,6 +224,9 @@ pgstat_get_wait_activity(WaitEventActivity w) case WAIT_EVENT_CHECKPOINTER_MAIN: event_name = "CheckpointerMain"; break; + case WAIT_EVENT_CUSTODIAN_MAIN: + event_name = "CustodianMain"; + break; case WAIT_EVENT_LOGICAL_APPLY_MAIN: event_name = "LogicalApplyMain"; break; diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 88801374b5..90c4160d42 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -273,6 +273,9 @@ GetBackendTypeDesc(BackendType backendType) case B_CHECKPOINTER: backendDesc = "checkpointer"; break; + case B_CUSTODIAN: + backendDesc = "custodian"; + break; case B_STARTUP: backendDesc = "startup"; break; diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 90a3016065..83089d23ff 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -329,6 +329,7 @@ typedef enum BackendType B_BG_WORKER, B_BG_WRITER, B_CHECKPOINTER, + B_CUSTODIAN, B_STARTUP, B_WAL_RECEIVER, B_WAL_SENDER, @@ -433,6 +434,7 @@ typedef enum BgWriterProcess, ArchiverProcess, CheckpointerProcess, + CustodianProcess, WalWriterProcess, WalReceiverProcess, @@ -445,6 +447,7 @@ extern AuxProcType MyAuxProcType; #define AmBackgroundWriterProcess() (MyAuxProcType == BgWriterProcess) #define AmArchiverProcess() (MyAuxProcType == ArchiverProcess) #define AmCheckpointerProcess() (MyAuxProcType == CheckpointerProcess) +#define AmCustodianProcess() (MyAuxProcType == CustodianProcess) #define AmWalWriterProcess() (MyAuxProcType == WalWriterProcess) #define AmWalReceiverProcess() (MyAuxProcType == WalReceiverProcess) diff --git a/src/include/postmaster/custodian.h b/src/include/postmaster/custodian.h new file mode 100644 index 0000000000..e8ac2ad3dd --- /dev/null +++ b/src/include/postmaster/custodian.h @@ -0,0 +1,17 @@ +/*------------------------------------------------------------------------- + * + * custodian.h + * Exports from postmaster/custodian.c. + * + * Copyright (c) 2021, PostgreSQL Global Development Group + * + * src/include/postmaster/custodian.h + * + *------------------------------------------------------------------------- + */ +#ifndef _CUSTODIAN_H +#define _CUSTODIAN_H + +extern void CustodianMain(void) pg_attribute_noreturn(); + +#endif /* _CUSTODIAN_H */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index cfabfdbedf..1fc4599941 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -357,6 +357,8 @@ typedef struct PROC_HDR Latch *walwriterLatch; /* Checkpointer process's latch */ Latch *checkpointerLatch; + /* Custodian process's latch */ + Latch *custodianLatch; /* Current shared estimate of appropriate spins_per_delay value */ int spins_per_delay; /* The proc of the Startup process, since not in ProcArray */ @@ -377,11 +379,12 @@ extern PGPROC *PreparedXactProcs; * We set aside some extra PGPROC structures for auxiliary processes, * ie things that aren't full-fledged backends but need shmem access. * - * Background writer, checkpointer, WAL writer and archiver run during normal - * operation. Startup process and WAL receiver also consume 2 slots, but WAL - * writer is launched only after startup has exited, so we only need 5 slots. + * Background writer, checkpointer, custodian, WAL writer and archiver run + * during normal operation. Startup process and WAL receiver also consume 2 + * slots, but WAL writer is launched only after startup has exited, so we only + * need 6 slots. */ -#define NUM_AUXILIARY_PROCS 5 +#define NUM_AUXILIARY_PROCS 6 /* configurable options */ extern PGDLLIMPORT int DeadlockTimeout; diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 8785a8e12c..08dc9d5caa 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -40,6 +40,7 @@ typedef enum WAIT_EVENT_BGWRITER_HIBERNATE, WAIT_EVENT_BGWRITER_MAIN, WAIT_EVENT_CHECKPOINTER_MAIN, + WAIT_EVENT_CUSTODIAN_MAIN, WAIT_EVENT_LOGICAL_APPLY_MAIN, WAIT_EVENT_LOGICAL_LAUNCHER_MAIN, WAIT_EVENT_PGSTAT_MAIN, -- 2.16.6