From b33e9b435954f4bc8cda481df697bdb4b1e04b9e Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Fri, 19 May 2023 16:20:55 -0700 Subject: [PATCH v1 1/2] nbtree VACUUM: cope with right sibling link corruption. Avoid "right sibling's left-link doesn't match" errors when vacuuming a corrupt nbtree index: just LOG the issue and press on with vacuuming. This ERROR is seen in the field from time to time (it's not just a theoretical possibility), even though the relevant check takes place after various similar checks ran without detecting any problems. It's worth avoiding such errors, even in the event of corruption. Anything that makes VACUUM unable to make forward progress against one table/index ultimately risks making the system enter xidStopLimit mode. There is no good reason to take any chances here. This is similar to the work from commit 5b861baa (later backpatched as commit 43e409ce), which taught nbtree to press on with vacuuming an index when page deletion fails to "re-find" a downlink in the target page's parent (or in some page to the right of the parent). Both cases involve throwing errors in nbtree page deletion that correspond to similar errors at a corresponding point during nbtree page splits. And in both cases the solution is to demote the ERROR to a LOG message, and press on (without changing anything about the page split errors). Note that _bt_unlink_halfdead_page() has long been able to bail on page deletion when the target page's left sibling was in an inconsistent state (that goes all the way back to the initial introduction of page deletion by commit 88dc31e3f2). _bt_unlink_halfdead_page() can now bail on page deletion in much the same way if the sibling link corruption happens to be in the right sibling page's link. While there was already significant overlap between each of the two sibling checks (elevel notwithstanding), there is no reason to take any chances here. --- src/backend/access/nbtree/nbtpage.c | 36 ++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 41aa1c4cc..def0d7926 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -2412,16 +2412,16 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, } else { - /* we have only a pin on leafbuf */ + /* we have only a pin on target, which is also leafbuf */ ReleaseBuffer(leafbuf); } ereport(LOG, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg_internal("valid left sibling for deletion target could not be located: " - "left sibling %u of target %u with leafblkno %u and scanblkno %u in index \"%s\"", + "left sibling %u of target %u with leafblkno %u and scanblkno %u on level %u of index \"%s\"", leftsib, target, leafblkno, scanblkno, - RelationGetRelationName(rel)))); + targetlevel, RelationGetRelationName(rel)))); return false; } @@ -2496,13 +2496,37 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, rbuf = _bt_getbuf(rel, vstate->info->heaprel, rightsib, BT_WRITE); page = BufferGetPage(rbuf); opaque = BTPageGetOpaque(page); + + /* + * Validate target's right sibling page's left link points back to target. + * + * This is known to fail in the field in the presence of index corruption, + * so we go to the trouble of avoiding a hard ERROR. This is fairly close + * to what we did earlier on when we located the target's left sibling + * (iff target has a left sibling). + */ if (opaque->btpo_prev != target) - ereport(ERROR, + { + ereport(LOG, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg_internal("right sibling's left-link doesn't match: " - "block %u links to %u instead of expected %u in index \"%s\"", + "block %u links to %u instead of expected %u on level %u of index \"%s\"", rightsib, opaque->btpo_prev, target, - RelationGetRelationName(rel)))); + targetlevel, RelationGetRelationName(rel)))); + /* release siblings */ + if (BufferIsValid(lbuf)) + _bt_relbuf(rel, lbuf); + _bt_relbuf(rel, rbuf); + + /* release target */ + _bt_relbuf(rel, buf); + /* release leafbuf when it isn't also the target */ + if (target != leafblkno) + _bt_relbuf(rel, leafbuf); + + return false; + } + rightsib_is_rightmost = P_RIGHTMOST(opaque); *rightsib_empty = (P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page)); -- 2.40.1