From cf39fc78537af29931b9e8051f4a680f0b671dcf Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 28 Mar 2017 14:09:13 +0900 Subject: [PATCH 1/2] Change detection of corrupted 2PC files as FATAL When scanning 2PC files, be it for initializing a hot standby node or finding the XID range at the end of recovery, bumping on a corrupted 2PC file is reported as a WARNING and are blindly corrupted. If it happened that the corrupted file was actually legit, there is actually a risk of corruption, so switch that to a hard failure. --- src/backend/access/transam/twophase.c | 25 ++++++++++--------------- src/backend/access/transam/xlog.c | 10 +++++++--- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 83169cccc3..e1e4052253 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1689,6 +1689,10 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) * write a WAL entry, and so there might be no evidence in WAL of those * subxact XIDs. * + * On corrupted two-phase files, fail immediately. Keeping around broken + * entries and let replay continue causes harm on the system, and likely + * a new backup should be rolled in. + * * Our other responsibility is to determine and return the oldest valid XID * among the prepared xacts (if none, return ShmemVariableCache->nextXid). * This is needed to synchronize pg_subtrans startup properly. @@ -1740,25 +1744,16 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) /* Read and validate file */ buf = ReadTwoPhaseFile(xid, true); if (buf == NULL) - { - ereport(WARNING, - (errmsg("removing corrupt two-phase state file \"%s\"", - clde->d_name))); - RemoveTwoPhaseFile(xid, true); - continue; - } + ereport(FATAL, + (errmsg("corrupted two-phase state file \"%s\"", + clde->d_name))); /* Deconstruct header */ hdr = (TwoPhaseFileHeader *) buf; if (!TransactionIdEquals(hdr->xid, xid)) - { - ereport(WARNING, - (errmsg("removing corrupt two-phase state file \"%s\"", - clde->d_name))); - RemoveTwoPhaseFile(xid, true); - pfree(buf); - continue; - } + ereport(FATAL, + (errmsg("corrupted two-phase state file \"%s\"", + clde->d_name))); /* * OK, we think this file is valid. Incorporate xid into the diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 61ca81d1d2..af40b0497e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7371,6 +7371,13 @@ StartupXLOG(void) } /* + * Pre-scan prepared transactions to find out the range of XIDs present. + * We don't need this information quite yet, but if it fails for some + * reason, better to fail before we make any on-disk changes. + */ + oldestActiveXID = PrescanPreparedTransactions(NULL, NULL); + + /* * Consider whether we need to assign a new timeline ID. * * If we are doing an archive recovery, we always assign a new ID. This @@ -7493,9 +7500,6 @@ StartupXLOG(void) XLogCtl->LogwrtRqst.Write = EndOfLog; XLogCtl->LogwrtRqst.Flush = EndOfLog; - /* Pre-scan prepared transactions to find out the range of XIDs present */ - oldestActiveXID = PrescanPreparedTransactions(NULL, NULL); - /* * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE * record before resource manager writes cleanup WAL records or checkpoint -- 2.12.2