diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 958220c495..ab8a4d9a1b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9658,6 +9658,7 @@ CreateRestartPoint(int flags) XLogRecPtr endptr; XLogSegNo _logSegNo; TimestampTz xtime; + bool chkpt_was_latest = false; /* Get a local copy of the last safe checkpoint record. */ SpinLockAcquire(&XLogCtl->info_lck); @@ -9752,44 +9753,69 @@ CreateRestartPoint(int flags) PriorRedoPtr = ControlFile->checkPointCopy.redo; /* - * Update pg_control, using current time. Check that it still shows - * DB_IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing; - * this is a quick hack to make sure nothing really bad happens if somehow - * we get here after the end-of-recovery checkpoint. + * Update pg_control, using current time if no later checkpoints have been + * performed. */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY && - ControlFile->checkPointCopy.redo < lastCheckPoint.redo) + if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo) { + chkpt_was_latest = true; ControlFile->checkPoint = lastCheckPointRecPtr; ControlFile->checkPointCopy = lastCheckPoint; /* - * Ensure minRecoveryPoint is past the checkpoint record. Normally, - * this will have happened already while writing out dirty buffers, - * but not necessarily - e.g. because no buffers were dirtied. We do - * this because a non-exclusive base backup uses minRecoveryPoint to - * determine which WAL files must be included in the backup, and the - * file (or files) containing the checkpoint record must be included, - * at a minimum. Note that for an ordinary restart of recovery there's - * no value in having the minimum recovery point any earlier than this + * Ensure minRecoveryPoint is past the checkpoint record while archive + * recovery is still ongoing. Normally, this will have happened + * already while writing out dirty buffers, but not necessarily - + * e.g. because no buffers were dirtied. We do this because a + * non-exclusive base backup uses minRecoveryPoint to determine which + * WAL files must be included in the backup, and the file (or files) + * containing the checkpoint record must be included, at a + * minimum. Note that for an ordinary restart of recovery there's no + * value in having the minimum recovery point any earlier than this * anyway, because redo will begin just after the checkpoint record. */ - if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr) + if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY) { - ControlFile->minRecoveryPoint = lastCheckPointEndPtr; - ControlFile->minRecoveryPointTLI = lastCheckPoint.ThisTimeLineID; + if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr) + { + ControlFile->minRecoveryPoint = lastCheckPointEndPtr; + ControlFile->minRecoveryPointTLI = lastCheckPoint.ThisTimeLineID; - /* update local copy */ - minRecoveryPoint = ControlFile->minRecoveryPoint; - minRecoveryPointTLI = ControlFile->minRecoveryPointTLI; + /* update local copy */ + minRecoveryPoint = ControlFile->minRecoveryPoint; + minRecoveryPointTLI = ControlFile->minRecoveryPointTLI; + } + if (flags & CHECKPOINT_IS_SHUTDOWN) + ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY; + } + else + { + /* + * We have exited from archive-recovery mode after starting. Crash + * recovery ever after should always recover to the end of WAL + */ + ControlFile->minRecoveryPoint = InvalidXLogRecPtr; + ControlFile->minRecoveryPointTLI = 0; } - if (flags & CHECKPOINT_IS_SHUTDOWN) - ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY; UpdateControlFile(); } LWLockRelease(ControlFileLock); + /* + * Skip all post-checkpoint work if others have done that with later + * checkpoints. + */ + if (!chkpt_was_latest) + { + ereport((log_checkpoints ? LOG : DEBUG2), + (errmsg("post-restartpoint cleanup is skpped at %X/%X, because later restartpoints have been already performed", + LSN_FORMAT_ARGS(lastCheckPoint.redo)))); + + /* this checkpoint has not bee established */ + return false; + } + /* * Update the average distance between checkpoints/restartpoints if the * prior checkpoint exists.