From 7723ba395dbf15a0badd6cdafda2752c19356baa Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 6 Nov 2023 11:07:35 +0200 Subject: [PATCH] Fix false reports in pg_visibility --- contrib/pg_visibility/pg_visibility.c | 75 ++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 2a4acfd1eee..036a9fd7681 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -19,6 +19,7 @@ #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "storage/proc.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/rel.h" @@ -532,6 +533,58 @@ collect_visibility_data(Oid relid, bool include_pd) return info; } +/* + * The "strict" version of GetOldestNonRemovableTransactionId(). The + * pg_visiblity check can tolerale false positives (don't report some of the + * errors), but can't toletate false negatives (report false errors). + * This makes us fundamentally unhappy with ComputeXidHorizons(). Normally, + * horozons moves forwards, but there are cases when it could move backwards + * (see comment for ComputeXidHorizons()). + * + * This is why we have to implement our own function for xid horizon, which + * would be guaranteed to be newer or equal to any xid horizon computed before. + * We have to do the following to achieve this. + * + * 1. Ignore processes xmin's, because they take into account connection to + * other databases which were ignored before. + * 2. Ignore KnownAssignedXids, because they are not database-aware. While + * primary could compute its horizons database-aware. + * 3. Ignore walsender xmin, because it could go backward if some replication + * connections don't use replication slots. + * + * Surely these would significantly sacrifice accuracy. But we have to do so + * in order to avoid reporting false errors. + */ +static TransactionId +get_strict_xid_horizon(void) +{ + TransactionId xmin = InvalidTransactionId; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + for (int index = 0; index < ProcGlobal->allProcCount; index++) + { + TransactionId xid = (uint32) (*((volatile uint32 *) &ProcGlobal->xids[index])); + PGPROC *proc = &ProcGlobal->allProcs[index]; + uint8 statusFlags; + + if (likely(xid == InvalidTransactionId)) + continue; + + statusFlags = ProcGlobal->statusFlags[index]; + if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM)) + continue; + + if (proc->databaseId == MyDatabaseId || + (statusFlags & PROC_AFFECTS_ALL_HORIZONS)) + { + xmin = TransactionIdOlder(xmin, xid); + } + } + LWLockRelease(ProcArrayLock); + + return xmin; +} + /* * Returns a list of items whose visibility map information does not match * the status of the tuples on the page. @@ -562,8 +615,14 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) /* Only some relkinds have a visibility map */ check_relation_relkind(rel); + /* Mark ourselves as PROC_IN_VACUUM to exclude our influence on xmin's */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->statusFlags |= PROC_IN_VACUUM; + ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; + LWLockRelease(ProcArrayLock); + if (all_visible) - OldestXmin = GetOldestNonRemovableTransactionId(rel); + OldestXmin = get_strict_xid_horizon(); nblocks = RelationGetNumberOfBlocks(rel); @@ -670,12 +729,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) * From a concurrency point of view, it sort of sucks to * retake ProcArrayLock here while we're holding the buffer * exclusively locked, but it should be safe against - * deadlocks, because surely - * GetOldestNonRemovableTransactionId() should never take a - * buffer lock. And this shouldn't happen often, so it's worth - * being careful so as to avoid false positives. + * deadlocks, because surely get_strict_xid_horizon() should + * never take a buffer lock. And this shouldn't happen often, + * so it's worth being careful so as to avoid false positives. */ - RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel); + RecomputedOldestXmin = get_strict_xid_horizon(); if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin)) record_corrupt_item(items, &tuple.t_self); @@ -715,6 +773,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) items->count = items->next; items->next = 0; + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->statusFlags &= ~PROC_IN_VACUUM; + ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; + LWLockRelease(ProcArrayLock); + return items; } -- 2.39.3 (Apple Git-145)