From d84cab5275d36a4eb857067f05c63391fdcc661d Mon Sep 17 00:00:00 2001 From: Dilip Kumar Date: Thu, 15 Jun 2023 12:23:06 +0530 Subject: [PATCH v1] New WAL record to identify check redo location Currently, the checkpoint-redo LSN cannot be accurately detected while processing the WAL. Although we have a checkpoint WAL record containing the exact redo LSN, other WAL records may be inserted between the checkpoint-redo LSN and the actual checkpoint record. If we want to stop processing wal exactly at the checkpoint-redo location then we cannot do that because we would have already processed some extra records that got added after the redo LSN. The patch inserts a special wal record named CHECKPOINT_REDO WAL, which is located exactly at the checkpoint-redo location. We can guarantee this record to be exactly at the checkpoint-redo point because we already hold the exclusive WAL insertion lock while identifying the checkpoint redo point and can insert this special record exactly at the same time so that there are no concurrent WAL insertions. --- src/backend/access/rmgrdesc/xlogdesc.c | 7 +++ src/backend/access/transam/xlog.c | 60 +++++++++++++++++++++--- src/backend/replication/logical/decode.c | 1 + src/include/catalog/pg_control.h | 1 + 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index f390c177e4..7868ec7633 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -148,6 +148,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record) LSN_FORMAT_ARGS(xlrec.overwritten_lsn), timestamptz_to_str(xlrec.overwrite_time)); } + else if(info == XLOG_CHECKPOINT_REDO) + { + /* No details to write out */ + } } const char * @@ -196,6 +200,9 @@ xlog_identify(uint8 info) case XLOG_FPI_FOR_HINT: id = "FPI_FOR_HINT"; break; + case XLOG_CHECKPOINT_REDO: + id = "CHECKPOINT_REDO"; + break; } return id; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b2430f617c..a025fb91e2 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -744,6 +744,7 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr StartPos; XLogRecPtr EndPos; bool prevDoPageWrites = doPageWrites; + bool callerHoldingExlock = holdingAllLocks; TimeLineID insertTLI; /* we assume that all of the record header is in the first chunk */ @@ -792,10 +793,18 @@ XLogInsertRecord(XLogRecData *rdata, *---------- */ START_CRIT_SECTION(); - if (isLogSwitch) - WALInsertLockAcquireExclusive(); - else - WALInsertLockAcquire(); + + /* + * Acquire wal insertion lock, nothing to do if the caller is already + * holding the exclusive lock. + */ + if (!callerHoldingExlock) + { + if (isLogSwitch) + WALInsertLockAcquireExclusive(); + else + WALInsertLockAcquire(); + } /* * Check to see if my copy of RedoRecPtr is out of date. If so, may have @@ -828,7 +837,10 @@ XLogInsertRecord(XLogRecData *rdata, * Oops, some buffer now needs to be backed up that the caller didn't * back up. Start over. */ - WALInsertLockRelease(); + + /* release the wal insertion lock if we have acquired it here */ + if (!callerHoldingExlock) + WALInsertLockRelease(); END_CRIT_SECTION(); return InvalidXLogRecPtr; } @@ -886,9 +898,12 @@ XLogInsertRecord(XLogRecData *rdata, } /* - * Done! Let others know that we're finished. + * Done! Let others know that we're finished. But if we haven't acquire + * the lock in this function then don't release it now, the caller will + * take care of that. */ - WALInsertLockRelease(); + if (!callerHoldingExlock) + WALInsertLockRelease(); END_CRIT_SECTION(); @@ -6597,6 +6612,32 @@ CreateCheckPoint(int flags) */ RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; + /* + * Insert a special purpose CHECKPOINT_REDO record as the first record at + * checkpoint redo lsn. Although we have the checkpoint record that + * contains the exact redo lsn, there might have been some other records + * those got inserted between the redo lsn and the actual checkpoint + * record. So when processing the wal, we cannot rely on the checkpoint + * record if we want to stop at the checkpoint-redo LSN. + * + * This special record, however, is not required when we doing a shutdown + * checkpoint, as there will be no concurrent wal insertions during that + * time. So, the shutdown checkpoint LSN will be the same as + * checkpoint-redo LSN. + * + * This record is guaranteed to be the first record at checkpoint redo lsn + * because we are inserting this while holding the exclusive wal insertion + * lock. + */ + if (!shutdown) + { + int dummy = 0; + + XLogBeginInsert(); + XLogRegisterData((char *) &dummy, sizeof(dummy)); + recptr = XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO); + } + /* * Now we can release the WAL insertion locks, allowing other xacts to * proceed while we are flushing disk buffers. @@ -8059,6 +8100,11 @@ xlog_redo(XLogReaderState *record) /* Keep track of full_page_writes */ lastFullPageWrites = fpw; } + else if (info == XLOG_CHECKPOINT_REDO) + { + /* nothing to do here */ + } + } /* diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index d91055a440..a126dc3c18 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -190,6 +190,7 @@ xlog_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) case XLOG_FPI_FOR_HINT: case XLOG_FPI: case XLOG_OVERWRITE_CONTRECORD: + case XLOG_CHECKPOINT_REDO: break; default: elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info); diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index dc953977c5..1136613259 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -78,6 +78,7 @@ typedef struct CheckPoint #define XLOG_FPI 0xB0 /* 0xC0 is used in Postgres 9.5-11 */ #define XLOG_OVERWRITE_CONTRECORD 0xD0 +#define XLOG_CHECKPOINT_REDO 0xE0 /* -- 2.39.0