diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 24165ab03e..b0f18e4e5e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -111,6 +111,7 @@ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ int wal_retrieve_retry_interval = 5000; int max_slot_wal_keep_size_mb = -1; bool track_wal_io_timing = false; +bool contrec_aborted = false; #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -586,6 +587,7 @@ typedef struct XLogCtlData XLogRecPtr replicationSlotMinLSN; /* oldest LSN needed by any slot */ XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG segment */ + XLogRecPtr contAbortedRecPtr; /* Fake LSN counter, for unlogged relations. Protected by ulsn_lck. */ XLogRecPtr unloggedLSN; @@ -735,6 +737,10 @@ typedef struct XLogCtlData XLogSegNo latestSegBoundary; XLogRecPtr latestSegBoundaryEndPtr; + /* BEGIN: FOR DEBUGGING-CRASH USE*/ + bool crossseg; + /* END: DEBUGGING-CRASH USE*/ + slock_t segtrack_lck; /* locks shared variables shown above */ } XLogCtlData; @@ -860,6 +866,7 @@ static XLogSource XLogReceiptSource = XLOG_FROM_ANY; /* State information for XLOG reading */ static XLogRecPtr ReadRecPtr; /* start of last record read */ static XLogRecPtr EndRecPtr; /* end+1 of last record read */ +static XLogRecPtr ContRecAbortPtr; /* end+1 of last aborted contrec */ /* * Local copies of equivalent fields in the control file. When running @@ -1178,16 +1185,10 @@ XLogInsertRecord(XLogRecData *rdata, XLByteToSeg(StartPos, StartSeg, wal_segment_size); XLByteToSeg(EndPos, EndSeg, wal_segment_size); - /* - * Register our crossing the segment boundary if that occurred. - * - * Note that we did not use XLByteToPrevSeg() for determining the - * ending segment. This is so that a record that fits perfectly into - * the end of the segment causes the latter to get marked ready for - * archival immediately. - */ - if (StartSeg != EndSeg && XLogArchivingActive()) - RegisterSegmentBoundary(EndSeg, EndPos); + /* BEGIN: FOR DEBUGGING-CRASH USE */ + if (StartSeg != EndSeg) + XLogCtl->crossseg = true; + /* END: FOR DEBUGGING-CRASH USE */ /* * Advance LogwrtRqst.Write so that it includes new block(s). @@ -2292,6 +2293,27 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic) if (!Insert->forcePageWrites) NewPage->xlp_info |= XLP_BKP_REMOVABLE; + /* + * If the last page ended with an aborted partial continuation record, + * mark it to tell the parital record is omittable. Snice this happens + * only at the end of crash recovery, no rece condition here. + */ + if (XLogCtl->contAbortedRecPtr >= NewPageBeginPtr) + { + if (XLogCtl->contAbortedRecPtr == NewPageBeginPtr) + { + NewPage->xlp_info |= XLP_FIRST_IS_ABORT_PARTIAL; + elog(LOG, "#### set XLP_FIRST_IS_ABORT_PARTIAL@%X/%X", + LSN_FORMAT_ARGS(NewPageBeginPtr)); + } + else + elog(LOG, "### incosistent abort location %X/%X, expected %X/%X", + LSN_FORMAT_ARGS(XLogCtl->contAbortedRecPtr), + LSN_FORMAT_ARGS(NewPageBeginPtr)); + + XLogCtl->contAbortedRecPtr = InvalidXLogRecPtr; + } + /* * If first page of an XLOG segment file, make it a long header. */ @@ -2644,6 +2666,17 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) { issue_xlog_fsync(openLogFile, openLogSegNo); + /* BEGIN: FOR DEBUGGING-CRASH USE */ + if (XLogCtl->crossseg) + { + static int c = 0; + struct stat b; + + if (stat("/tmp/hoge", &b) == 0) + Assert (c++ < 1); + } + /* END: FOR DEBUGGING-CRASH USE */ + /* signal that we need to wakeup walsenders later */ WalSndWakeupRequest(); @@ -4568,6 +4601,7 @@ ReadRecord(XLogReaderState *xlogreader, int emode, record = XLogReadRecord(xlogreader, &errormsg); ReadRecPtr = xlogreader->ReadRecPtr; EndRecPtr = xlogreader->EndRecPtr; + ContRecAbortPtr = xlogreader->ContRecAbortPtr; if (record == NULL) { if (readFile >= 0) @@ -7873,12 +7907,26 @@ StartupXLOG(void) StandbyMode = false; /* - * Re-fetch the last valid or last applied record, so we can identify the - * exact endpoint of what we consider the valid portion of WAL. + * The last record may be an immature continuation record at the end of a + * page. We continue writing from ContRecAbortPtr instead of EndRecPtr that + * case. */ - XLogBeginRead(xlogreader, LastRec); - record = ReadRecord(xlogreader, PANIC, false); - EndOfLog = EndRecPtr; + elog(LOG, "#### Recovery finished: ContRecAbort: %X/%X (EndRecPtr: %X/%X)", LSN_FORMAT_ARGS(ContRecAbortPtr), LSN_FORMAT_ARGS(EndRecPtr)); + if (XLogRecPtrIsInvalid(ContRecAbortPtr)) + { + /* + * Re-fetch the last valid or last applied record, so we can identify + * the exact endpoint of what we consider the valid portion of WAL. + */ + XLogBeginRead(xlogreader, LastRec); + record = ReadRecord(xlogreader, PANIC, false); + EndOfLog = EndRecPtr; + } + else + { + EndOfLog = ContRecAbortPtr; + XLogCtl->contAbortedRecPtr = ContRecAbortPtr; + } /* * EndOfLogTLI is the TLI in the filename of the XLOG segment containing @@ -8013,7 +8061,8 @@ StartupXLOG(void) Insert = &XLogCtl->Insert; Insert->PrevBytePos = XLogRecPtrToBytePos(LastRec); Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog); - + elog(LOG, "#### EndOfLog=%X/%X", LSN_FORMAT_ARGS(EndOfLog)); + /* * Tricky point here: readBuf contains the *last* block that the LastRec * record spans, not the one it starts in. The last block is indeed the diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 5cf74e181a..404db7ce4d 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -294,6 +294,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) ResetDecoder(state); + state->ContRecAbortPtr = InvalidXLogRecPtr; RecPtr = state->EndRecPtr; if (state->ReadRecPtr != InvalidXLogRecPtr) @@ -319,6 +320,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) randAccess = true; } +retry: state->currRecPtr = RecPtr; targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ); @@ -444,12 +446,27 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) XLOG_BLCKSZ)); if (readOff < 0) - goto err; + goto err_partial_contrec; Assert(SizeOfXLogShortPHD <= readOff); /* Check that the continuation on next page looks valid */ pageHeader = (XLogPageHeader) state->readBuf; + if (pageHeader->xlp_info & XLP_FIRST_IS_ABORT_PARTIAL) + { + if (pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD) + { + report_invalid_record(state, + "both XLP_FIRST_IS_CONTRECORD and XLP_FIRST_IS_ABORT_PARTIAL are set at %X/%X", + LSN_FORMAT_ARGS(RecPtr)); + goto err; + } + + fprintf(stderr, "#### aborted partial continuation record found at %X/%X, continue from %X/%X\n", LSN_FORMAT_ARGS(RecPtr), LSN_FORMAT_ARGS(targetPagePtr)); + ResetDecoder(state); + RecPtr = targetPagePtr; + goto retry; + } if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD)) { report_invalid_record(state, @@ -550,6 +567,10 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) else return NULL; +err_partial_contrec: + state->ContRecAbortPtr = targetPagePtr; + fprintf(stderr, "contrec aborted@%X/%X\n", LSN_FORMAT_ARGS(state->ContRecAbortPtr)); + err: /* diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 3b5eceff65..6390812a5a 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -76,8 +76,10 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; #define XLP_LONG_HEADER 0x0002 /* This flag indicates backup blocks starting in this page are optional */ #define XLP_BKP_REMOVABLE 0x0004 +/* This flag indicates the first record in this page breaks a contrecord */ +#define XLP_FIRST_IS_ABORT_PARTIAL 0x0008 /* All defined flag bits in xlp_info (used for validity checking of header) */ -#define XLP_ALL_FLAGS 0x0007 +#define XLP_ALL_FLAGS 0x000F #define XLogPageHeaderSize(hdr) \ (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 21d200d3df..00a03a628c 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -175,6 +175,8 @@ struct XLogReaderState XLogRecPtr ReadRecPtr; /* start of last record read */ XLogRecPtr EndRecPtr; /* end+1 of last record read */ + XLogRecPtr ContRecAbortPtr; /* end+1 of aborted partial contrecord if + * any */ /* ---------------------------------------- * Decoded representation of current record diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index e3f48158ce..26fc123cdb 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -76,6 +76,7 @@ typedef struct CheckPoint #define XLOG_END_OF_RECOVERY 0x90 #define XLOG_FPI_FOR_HINT 0xA0 #define XLOG_FPI 0xB0 +#define XLOG_ABORT_CONTRECORD x0C0 /*