From 836b57525c3c221ef4f2826abed9b32d7f48ad8e Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Fri, 19 May 2023 16:20:55 -0700 Subject: [PATCH v2 1/2] nbtree VACUUM: cope with right sibling link corruption. Avoid "right sibling's left-link doesn't match" errors when vacuuming a corrupt nbtree index: just LOG the issue and press on with vacuuming. This ERROR is seen in the field from time to time (it's not just a theoretical possibility), even though the relevant check takes place after various similar checks ran without detecting any problems. It's worth avoiding such errors, even in the event of corruption. Anything that makes VACUUM unable to make forward progress against one table/index ultimately risks making the system enter xidStopLimit mode. There is no good reason to take any chances here. This is similar to the work from commit 5b861baa (later backpatched as commit 43e409ce), which taught nbtree to press on with vacuuming an index when page deletion fails to "re-find" a downlink in the target page's parent (or in some page to the right of the parent). Both cases involve throwing errors in nbtree page deletion that correspond to similar errors at a corresponding point during nbtree page splits. And in both cases the solution is to demote the ERROR to a LOG message, and press on (without changing anything about the page split errors). Note that _bt_unlink_halfdead_page() has long been able to bail on page deletion when the target page's left sibling was in an inconsistent state (that goes all the way back to the initial introduction of page deletion by commit 88dc31e3f2). _bt_unlink_halfdead_page() can now bail on page deletion in much the same way if the sibling link corruption happens to be in the right sibling page's link. While there was already significant overlap between each of the two sibling checks (elevel notwithstanding), there is no reason to take any chances here. Author: Peter Geoghegan Reviewed-By: Heikki Linnakangas Discussion: https://postgr.es/m/CAH2-Wzko2q2kP1+UvgJyP9g0mF4hopK0NtQZcxwvMv9_ytGhkQ@mail.gmail.com Backpatch: 11- (all supported versions). --- src/backend/access/nbtree/nbtpage.c | 62 ++++++++++++++++++++--------- src/backend/access/nbtree/nbtree.c | 3 +- 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 6be891522..486d1563d 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -2404,24 +2404,22 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, if (!leftsibvalid) { - if (target != leafblkno) - { - /* we have only a pin on target, but pin+lock on leafbuf */ - ReleaseBuffer(buf); - _bt_relbuf(rel, leafbuf); - } - else - { - /* we have only a pin on leafbuf */ - ReleaseBuffer(leafbuf); - } - + /* + * This is known to fail in the field; sibling link corruption + * is relatively common. Press on with vacuuming rather than + * just throwing an ERROR. + */ ereport(LOG, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg_internal("valid left sibling for deletion target could not be located: " - "left sibling %u of target %u with leafblkno %u and scanblkno %u in index \"%s\"", + "left sibling %u of target %u with leafblkno %u and scanblkno %u on level %u of index \"%s\"", leftsib, target, leafblkno, scanblkno, - RelationGetRelationName(rel)))); + targetlevel, RelationGetRelationName(rel)))); + + /* Must release all pins and locks on failure exit */ + ReleaseBuffer(buf); + if (target != leafblkno) + _bt_relbuf(rel, leafbuf); return false; } @@ -2496,13 +2494,40 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, rbuf = _bt_getbuf(rel, vstate->info->heaprel, rightsib, BT_WRITE); page = BufferGetPage(rbuf); opaque = BTPageGetOpaque(page); + + /* + * Validate target's right sibling page. Its left link must point back to + * the target page. + */ if (opaque->btpo_prev != target) - ereport(ERROR, + { + /* + * This is known to fail in the field; sibling link corruption is + * relatively common. Press on with vacuuming rather than just + * throwing an ERROR (same approach used for left-sibling's-right-link + * validation check a moment ago). + */ + ereport(LOG, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg_internal("right sibling's left-link doesn't match: " - "block %u links to %u instead of expected %u in index \"%s\"", - rightsib, opaque->btpo_prev, target, - RelationGetRelationName(rel)))); + "right sibling %u of target %u with leafblkno %u " + "and scanblkno %u spuriously links to non-target %u " + "on level %u of index \"%s\"", + rightsib, target, leafblkno, + scanblkno, opaque->btpo_prev, + targetlevel, RelationGetRelationName(rel)))); + + /* Must release all pins and locks on failure exit */ + if (BufferIsValid(lbuf)) + _bt_relbuf(rel, lbuf); + _bt_relbuf(rel, rbuf); + _bt_relbuf(rel, buf); + if (target != leafblkno) + _bt_relbuf(rel, leafbuf); + + return false; + } + rightsib_is_rightmost = P_RIGHTMOST(opaque); *rightsib_empty = (P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page)); @@ -2727,6 +2752,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, */ _bt_pendingfsm_add(vstate, target, safexid); + /* Success - hold on to lock on leafbuf (might also have been target) */ return true; } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 2df884985..1ce5b1519 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1093,8 +1093,7 @@ backtrack: * can't be half-dead because only an interrupted VACUUM process can * leave pages in that state, so we'd definitely have dealt with it * back when the page was the scanblkno page (half-dead pages are - * always marked fully deleted by _bt_pagedel()). This assumes that - * there can be only one vacuum process running at a time. + * always marked fully deleted by _bt_pagedel(), barring corruption). */ if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque)) { -- 2.40.1