diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 0d494e2..eb28848 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -561,8 +561,8 @@ static bool reachedMinRecoveryPoint = false; static bool InRedo = false; -/* Have we launched bgwriter during recovery? */ -static bool bgwriterLaunched = false; +/* Have we launched checkpointer process during recovery? */ +static bool checkpointerLaunched = false; /* * Information logged when we detect a change in one of the parameters @@ -597,6 +597,8 @@ static void recoveryPausesHere(void); static void SetLatestXTime(TimestampTz xtime); static void CheckRequiredParameterValues(void); static void XLogReportParameters(void); +static void LogEndOfRecovery(void); +static void StartHotStandbyImmediately(TransactionId nextXid); static void LocalSetXLogInsertAllowed(void); static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags); static void KeepLogSeg(XLogRecPtr recptr, uint32 *logId, uint32 *logSeg); @@ -6382,7 +6384,7 @@ StartupXLOG(void) PublishStartupProcessInformation(); SetForwardFsyncRequests(); SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED); - bgwriterLaunched = true; + checkpointerLaunched = true; } /* @@ -6744,17 +6746,19 @@ StartupXLOG(void) /* * Perform a checkpoint to update all our recovery activity to disk. + * If we have a chckpoint process, we perform the checkpoint later. * - * Note that we write a shutdown checkpoint rather than an on-line - * one. This is not particularly critical, but since we may be - * assigning a new TLI, using a shutdown checkpoint allows us to have - * the rule that TLI only changes in shutdown checkpoints, which - * allows some extra error checking in xlog_redo. + * Note that we write a shutdown checkpoint rather than an on-line one. + * + * We mark end of recovery here, either by a end-of-recovery checkpoint + * or by and end-of-recovery WAL record. Either way, this is the only + * place the TLI of WAL files is allowed to change. + * + * We are able to do this because we know that all WAL files are + * available locally. XXX perhaps we should actually check that... */ - if (bgwriterLaunched) - RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY | - CHECKPOINT_IMMEDIATE | - CHECKPOINT_WAIT); + if (checkpointerLaunched) + LogEndOfRecovery(); else CreateCheckPoint(CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE); @@ -6867,6 +6871,19 @@ StartupXLOG(void) xlogctl->SharedRecoveryInProgress = false; SpinLockRelease(&xlogctl->info_lck); } + + /* + * If we skipped a shutdown checkpoint earlier, request a checkpoint now. + * This is a normal checkpoint now, not a restartpoint or end-of-recovery + * checkpoint. This is safe only because we know we have all required + * WAL files in the pg_xlog directory if we crash before writing the + * final part of this checkpoint. Checkpoint here is immediate, so that we + * minimise the time before the next checkpoint ends. Yet we don't wait for + * completion of the checkpoint, since that slows down getting normal + * users connected in case of a failover. + */ + if (checkpointerLaunched) + RequestCheckpoint(CHECKPOINT_IMMEDIATE); } /* @@ -8285,6 +8302,57 @@ XLogReportParameters(void) } } +static void +LogEndOfRecovery(void) +{ + XLogRecData rdata; + EndOfRecovery end; + + end.nextXid = ControlFile->checkPointCopy.nextXid; + end.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch; + end.ThisTimeLineID = ThisTimeLineID; + end.time = (pg_time_t) time(NULL); + + rdata.buffer = InvalidBuffer; + rdata.data = (char *) &end; + rdata.len = sizeof(EndOfRecovery); + rdata.next = NULL; + + XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata); +} + +static void +StartHotStandbyImmediately(TransactionId nextXid) +{ + TransactionId *xids; + int nxids; + TransactionId oldestActiveXID; + TransactionId latestCompletedXid; + RunningTransactionsData running; + + oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids); + + /* + * Construct a RunningTransactions snapshot representing a shut + * down server, with only prepared transactions still alive. We're + * never overflowed at this point because all subxids are listed + * with their parent prepared transactions. + */ + running.xcnt = nxids; + running.subxid_overflow = false; + running.nextXid = nextXid; + running.oldestRunningXid = oldestActiveXID; + latestCompletedXid = nextXid; + TransactionIdRetreat(latestCompletedXid); + Assert(TransactionIdIsNormal(latestCompletedXid)); + running.latestCompletedXid = latestCompletedXid; + running.xids = xids; + + ProcArrayApplyRecoveryInfo(&running); + + StandbyRecoverPreparedTransactions(true); +} + /* * XLOG resource manager's routines * @@ -8310,6 +8378,40 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ShmemVariableCache->oidCount = 0; } } + else if (info == XLOG_END_OF_RECOVERY) + { + EndOfRecovery end; + + memcpy(&end, XLogRecGetData(record), sizeof(EndOfRecovery)); + + /* + * If we see an end of recovery record, we know that no write transactions + * are active that will commit. So fake-up an empty running-xacts + * record and use that here and now. Recover additional standby state + * for prepared transactions. + */ + if (standbyState >= STANDBY_INITIALIZED) + StartHotStandbyImmediately(end.nextXid); + + /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ + ControlFile->checkPointCopy.nextXidEpoch = end.nextXidEpoch; + ControlFile->checkPointCopy.nextXid = end.nextXid; + + /* + * TLI may change in an end of recovery, but it shouldn't decrease + */ + if (end.ThisTimeLineID != ThisTimeLineID) + { + if (end.ThisTimeLineID < ThisTimeLineID || + !list_member_int(expectedTLIs, + (int) end.ThisTimeLineID)) + ereport(PANIC, + (errmsg("unexpected timeline ID %u (after %u) in end of recovery record", + end.ThisTimeLineID, ThisTimeLineID))); + /* Following WAL records should be run with new TLI */ + ThisTimeLineID = end.ThisTimeLineID; + } + } else if (info == XLOG_CHECKPOINT_SHUTDOWN) { CheckPoint checkPoint; @@ -8340,35 +8442,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) * for prepared transactions. */ if (standbyState >= STANDBY_INITIALIZED) - { - TransactionId *xids; - int nxids; - TransactionId oldestActiveXID; - TransactionId latestCompletedXid; - RunningTransactionsData running; - - oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids); - - /* - * Construct a RunningTransactions snapshot representing a shut - * down server, with only prepared transactions still alive. We're - * never overflowed at this point because all subxids are listed - * with their parent prepared transactions. - */ - running.xcnt = nxids; - running.subxid_overflow = false; - running.nextXid = checkPoint.nextXid; - running.oldestRunningXid = oldestActiveXID; - latestCompletedXid = checkPoint.nextXid; - TransactionIdRetreat(latestCompletedXid); - Assert(TransactionIdIsNormal(latestCompletedXid)); - running.latestCompletedXid = latestCompletedXid; - running.xids = xids; - - ProcArrayApplyRecoveryInfo(&running); - - StandbyRecoverPreparedTransactions(true); - } + StartHotStandbyImmediately(checkPoint.nextXid); /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; @@ -8524,6 +8598,15 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec) checkpoint->oldestActiveXid, (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); } + else if (info == XLOG_END_OF_RECOVERY) + { + EndOfRecovery *end = (EndOfRecovery *) rec; + + appendStringInfo(buf, "end of recovery: tli %u; xid %u/%u", + end->ThisTimeLineID, + end->nextXidEpoch, + end->nextXid); + } else if (info == XLOG_NOOP) { appendStringInfo(buf, "xlog no-op"); @@ -9563,7 +9646,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, * Request a restartpoint if we've replayed too much * xlog since the last one. */ - if (StandbyMode && bgwriterLaunched) + if (StandbyMode && checkpointerLaunched) { if (XLogCheckpointNeeded(readId, readSeg)) { diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 6688c19..baa7fc2 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -60,7 +60,18 @@ typedef struct CheckPoint #define XLOG_BACKUP_END 0x50 #define XLOG_PARAMETER_CHANGE 0x60 #define XLOG_RESTORE_POINT 0x70 +#define XLOG_END_OF_RECOVERY 0x80 +/* + * Body of EndOfRecovery XLOG records - only some of the fields of a checkpoint. + */ +typedef struct EndOfRecovery +{ + TimeLineID ThisTimeLineID; /* current TLI */ + uint32 nextXidEpoch; /* higher-order bits of nextXid */ + TransactionId nextXid; /* next free XID */ + pg_time_t time; /* time stamp of checkpoint */ +} EndOfRecovery; /* * System status indicator. Note this is stored in pg_control; if you change