commit 6a8e6f99b08a0a03ea8e415b5eaf24ceb398f4b4 Author: Simon Riggs Date: Mon Sep 26 14:51:49 2022 +0100 v11 diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 3d9088a704..d690774f33 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -107,7 +107,18 @@ static bool TransactionGroupUpdateXidStatus(TransactionId xid, static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn, int pageno); +/* + * Run locally by a backend to establish whether or not it needs to call + * SubTransSetParent for subxid. + */ +bool +TransactionIdsAreOnSameXactPage(TransactionId topxid, TransactionId subxid) +{ + int toppageno = TransactionIdToPage(topxid); + int subpageno = TransactionIdToPage(subxid); + return (toppageno == subpageno); +} /* * TransactionIdSetTreeStatus @@ -133,7 +144,7 @@ static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, * only once, and the status will be set to committed directly. Otherwise * we must * 1. set sub-committed all subxids that are not on the same page as the - * main xid + * main xid (see TransactionIdsAreOnSameXactPage()) * 2. atomically set committed the main xid and the subxids on the same page * 3. go over the first bunch again and set them committed * Note that as far as concurrent checkers are concerned, main transaction diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 66d3548155..140ad78530 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -137,14 +137,8 @@ SubTransGetParent(TransactionId xid) /* * SubTransGetTopmostTransaction * - * Returns the topmost transaction of the given transaction id. - * - * Because we cannot look back further than TransactionXmin, it is possible - * that this function will lie and return an intermediate subtransaction ID - * instead of the true topmost parent ID. This is OK, because in practice - * we only care about detecting whether the topmost parent is still running - * or is part of a current snapshot's list of still-running transactions. - * Therefore, any XID before TransactionXmin is as good as any other. + * Returns the topmost transaction of the given transaction id, if one has + * been recorded in pg_subtrans. */ TransactionId SubTransGetTopmostTransaction(TransactionId xid) @@ -177,7 +171,6 @@ SubTransGetTopmostTransaction(TransactionId xid) return previousXid; } - /* * Initialization of shared memory for SUBTRANS */ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 2bb975943c..ff58d19595 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -693,8 +693,53 @@ AssignTransactionId(TransactionState s) XactTopFullTransactionId = s->fullTransactionId; if (isSubXact) - SubTransSetParent(XidFromFullTransactionId(s->fullTransactionId), - XidFromFullTransactionId(s->parent->fullTransactionId)); + { + TransactionId subxid = XidFromFullTransactionId(s->fullTransactionId); + TransactionId topxid = GetTopTransactionId(); + + /* + * Subtrans entries are only required in specific circumstances: + * + * 1. When there's no room in PG_PROC, as mentioned above. + * During XactLockTableWait() we sometimes need to know the topxid. + * If there is room in PG_PROC we can get a subxid's topxid direct + * from the procarray if the topxid is still running, using + * GetTopmostTransactionIdFromProcArray(). So we only ever need to + * call SubTransGetTopmostTransaction() if that xact overflowed; + * since that is our current transaction, we know whether or not to + * log the xid for future use. + * This occurs only when large number of subxids are requested by + * app user. + * + * 2. When IsolationIsSerializable() we sometimes need to access topxid. + * This occurs only when SERIALIZABLE is requested by app user. + * + * 3. When TransactionIdSetTreeStatus() will use status SUB_COMMITTED, + * which then requires us to consult subtrans to find parent, which + * is needed to avoid race condition. In this case we ask Clog/Xact + * module if TransactionIdsAreOnSameXactPage(). Since we start a new + * clog page every 32000 xids, this is usually <<1% of subxids, + * depending upon how far apart the subxacts assign subxids. + */ + if (MyProc->subxidStatus.overflowed || + IsolationIsSerializable() || + !TransactionIdsAreOnSameXactPage(topxid, subxid)) + { + /* + * Insert entries into subtrans for this xid, noting that the entry + * points directly to the topxid, not the immediate parent. This is + * done for two reasons: + * (1) so it is faster in a long chain of subxids, because the + * algorithm is then O(1), no matter how many subxids are assigned. + * (2) so that we don't need to set subxids for unregistered parents. + * Previously when we set the parent to be the immediate parent, + * we then had to set the parent in all cases to maintain the chain + * of values to reach the topxid. If all subxids point to topxid, + * then they are independent of each other and we can skip some. + */ + SubTransSetParent(subxid, topxid); + } + } /* * If it's a top-level transaction, the predicate locking system needs to diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 207c4b27fd..fc42d865be 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -262,6 +262,13 @@ static ProcArrayStruct *procArray; static PGPROC *allProcs; +/* + * Remember the last call to TransactionIdIsInProgress() to avoid need to call + * SubTransGetTopMostTransaction() when the subxid is present in the procarray. + */ +static TransactionId LastCallXidIsInProgressSubXid = InvalidTransactionId; +static TransactionId LastCallXidIsInProgressParentXid = InvalidTransactionId; + /* * Cache to reduce overhead of repeated calls to TransactionIdIsInProgress() */ @@ -1310,14 +1317,14 @@ ProcArrayApplyXidAssignment(TransactionId topxid, /* * Notice that we update pg_subtrans with the top-level xid, rather than - * the parent xid. This is a difference between normal processing and - * recovery, yet is still correct in all cases. The reason is that + * the parent xid, which is correct in all cases. The reason is that * subtransaction commit is not marked in clog until commit processing, so * all aborted subtransactions have already been clearly marked in clog. * As a result we are able to refer directly to the top-level * transaction's state rather than skipping through all the intermediate * states in the subtransaction tree. This should be the first time we - * have attempted to SubTransSetParent(). + * have attempted to SubTransSetParent() for this xid in recovery. + * XXX this may be optimized later, but we don't have good test coverage. */ for (i = 0; i < nsubxids; i++) SubTransSetParent(subxids[i], topxid); @@ -1441,6 +1448,8 @@ TransactionIdIsInProgress(TransactionId xid) other_xids = ProcGlobal->xids; other_subxidstates = ProcGlobal->subxidStates; + LastCallXidIsInProgressSubXid = LastCallXidIsInProgressParentXid = InvalidTransactionId; + LWLockAcquire(ProcArrayLock, LW_SHARED); /* @@ -1509,6 +1518,15 @@ TransactionIdIsInProgress(TransactionId xid) { LWLockRelease(ProcArrayLock); xc_by_child_xid_inc(); + + /* + * Remember the parent xid, for use during XactLockTableWait(). + * We do this because it is cheaper than looking up pg_subtrans, + * and also allows us to reduce calls to subtrans. + */ + LastCallXidIsInProgressSubXid = xid; + LastCallXidIsInProgressParentXid = pxid; + return true; } } @@ -1589,12 +1607,38 @@ TransactionIdIsInProgress(TransactionId xid) Assert(TransactionIdIsValid(topxid)); if (!TransactionIdEquals(topxid, xid) && pg_lfind32(topxid, xids, nxids)) + { + LastCallXidIsInProgressSubXid = xid; + LastCallXidIsInProgressParentXid = topxid; return true; + } cachedXidIsNotInProgress = xid; return false; } +/* + * Allow the topmost xid to be accessed from the last call to + * TransactionIdIsInProgress(). Specifically designed for use in + * XactLockTableWait(). + */ +bool +GetTopmostTransactionIdFromProcArray(TransactionId xid, TransactionId *pxid) +{ + bool found = false; + + Assert(TransactionIdIsNormal(xid)); + + if (LastCallXidIsInProgressSubXid == xid) + { + Assert(TransactionIdIsNormal(LastCallXidIsInProgressParentXid)); + *pxid = LastCallXidIsInProgressParentXid; + found = true; + } + + return found; +} + /* * TransactionIdIsActive -- is xid the top-level XID of an active backend? * @@ -2366,27 +2410,34 @@ GetSnapshotData(Snapshot snapshot) * * Again, our own XIDs are not included in the snapshot. */ - if (!suboverflowed) + if (subxidStates[pgxactoff].overflowed) + suboverflowed = true; + + /* + * Include all subxids, whether or not we overflowed. This is + * important because it can reduce the number of accesses to SUBTRANS + * when we search snapshots, which is important for scalability, + * especially in the presence of both overflowed and long running xacts. + * This is true when fraction of subxids held in subxip is a large + * fraction of the total subxids in use. In the case where one or more + * transactions had more subxids in progress than the total size of + * the cache this might not be true, but we consider that case to be + * unlikely, even if it is possible. + */ { + int nsubxids = subxidStates[pgxactoff].count; - if (subxidStates[pgxactoff].overflowed) - suboverflowed = true; - else + if (nsubxids > 0) { - int nsubxids = subxidStates[pgxactoff].count; - - if (nsubxids > 0) - { - int pgprocno = pgprocnos[pgxactoff]; - PGPROC *proc = &allProcs[pgprocno]; + int pgprocno = pgprocnos[pgxactoff]; + PGPROC *proc = &allProcs[pgprocno]; - pg_read_barrier(); /* pairs with GetNewTransactionId */ + pg_read_barrier(); /* pairs with GetNewTransactionId */ - memcpy(snapshot->subxip + subcount, - (void *) proc->subxids.xids, - nsubxids * sizeof(TransactionId)); - subcount += nsubxids; - } + memcpy(snapshot->subxip + subcount, + (void *) proc->subxids.xids, + nsubxids * sizeof(TransactionId)); + subcount += nsubxids; } } } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 1043068bac..3e529e1bc2 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -694,6 +694,8 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, for (;;) { + TransactionId pxid = InvalidTransactionId; + Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); @@ -703,6 +705,13 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, LockRelease(&tag, ShareLock, false); + /* + * If a transaction has no lock, it might be a top-level transaction, + * in which case the procarray will show it as not in progress. + * + * If a transaction is a subtransaction, then it could have committed + * or aborted, yet the top-level transaction may still be in progress. + */ if (!TransactionIdIsInProgress(xid)) break; @@ -724,7 +733,27 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, if (!first) pg_usleep(1000L); first = false; - xid = SubTransGetTopmostTransaction(xid); + + /* + * In most cases, we can get the parent xid from our prior call to + * TransactionIdIsInProgress(), except in hot standby. If not, we have + * to ask subtrans for the parent. + */ + if (GetTopmostTransactionIdFromProcArray(xid, &pxid)) + { + /* TESTED-BY src/test/isolation/specs/subx-overflow.spec test3 */ + Assert(TransactionIdIsValid(pxid)); + xid = pxid; + } + else + { + /* + * We can get here if RecoveryInProgress() during the last call to + * TransactionIdIsInProgress(), but we don't Assert that here since + * that would create a race window against the end of recovery. + */ + xid = SubTransGetTopmostTransaction(xid); + } } if (oper != XLTW_None) @@ -745,6 +774,8 @@ ConditionalXactLockTableWait(TransactionId xid) for (;;) { + TransactionId pxid = InvalidTransactionId; + Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); @@ -762,7 +793,15 @@ ConditionalXactLockTableWait(TransactionId xid) if (!first) pg_usleep(1000L); first = false; - xid = SubTransGetTopmostTransaction(xid); + + /* See XactLockTableWait about this case */ + if (GetTopmostTransactionIdFromProcArray(xid, &pxid)) + { + Assert(TransactionIdIsValid(pxid)); + xid = pxid; + } + else + xid = SubTransGetTopmostTransaction(xid); } return true; diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index f1f2ddac17..b8d4e1d8dc 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -639,13 +639,9 @@ CopySnapshot(Snapshot snapshot) newsnap->xip = NULL; /* - * Setup subXID array. Don't bother to copy it if it had overflowed, - * though, because it's not used anywhere in that case. Except if it's a - * snapshot taken during recovery; all the top-level XIDs are in subxip as - * well in that case, so we mustn't lose them. + * Setup subXID array. */ - if (snapshot->subxcnt > 0 && - (!snapshot->suboverflowed || snapshot->takenDuringRecovery)) + if (snapshot->subxcnt > 0) { newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff); memcpy(newsnap->subxip, snapshot->subxip, @@ -1240,8 +1236,10 @@ ExportSnapshot(Snapshot snapshot) snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount()) appendStringInfoString(&buf, "sof:1\n"); else - { appendStringInfoString(&buf, "sof:0\n"); + + /* then unconditionally, since we always include all subxids */ + { appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); for (i = 0; i < snapshot->subxcnt; i++) appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]); @@ -1492,7 +1490,7 @@ ImportSnapshot(const char *idstr) snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path); - if (!snapshot.suboverflowed) + /* then unconditionally, since we always include all subxids */ { snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path); @@ -1506,11 +1504,6 @@ ImportSnapshot(const char *idstr) for (i = 0; i < xcnt; i++) snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path); } - else - { - snapshot.subxcnt = 0; - snapshot.subxip = NULL; - } snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path); @@ -2286,6 +2279,8 @@ RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc) bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) { + bool have_parent = false; + /* * Make a quick range check to eliminate most XIDs without looking at the * xip arrays. Note that this is OK even if we convert a subxact XID to @@ -2301,80 +2296,76 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (TransactionIdFollowsOrEquals(xid, snapshot->xmax)) return true; + /* + * This patch reorders the operations in XidMVCCSnapshot, so as to reduce + * calls to SubTransGetParent to the absolute minimum needed. + * The previous code was neat, but not efficient for the overflow case. + */ +retry_search: + /* * Snapshot information is stored slightly differently in snapshots taken - * during recovery. + * during recovery. xip is empty on standbys. */ if (!snapshot->takenDuringRecovery) { + if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt)) + return true; + /* - * If the snapshot contains full subxact data, the fastest way to - * check things is just to compare the given XID against both subxact - * XIDs and top-level XIDs. If the snapshot overflowed, we have to - * use pg_subtrans to convert a subxact XID to its parent XID, but - * then we need only look at top-level XIDs not subxacts. + * If we have the parent xid, then the xid is not in snapshot */ - if (!snapshot->suboverflowed) - { - /* we have full data, so search subxip */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) - return true; - - /* not there, fall through to search xip[] */ - } - else - { - /* - * Snapshot overflowed, so convert xid to top-level. This is safe - * because we eliminated too-old XIDs above. - */ - xid = SubTransGetTopmostTransaction(xid); + if (have_parent) + return false; + } - /* - * If xid was indeed a subxact, we might now have an xid < xmin, - * so recheck to avoid an array scan. No point in rechecking - * xmax. - */ - if (TransactionIdPrecedes(xid, snapshot->xmin)) - return false; - } + /* + * Now search subxip, which contains first 64 subxids of each topxid. + */ + if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; - if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt)) - return true; - } - else + if (!have_parent && snapshot->suboverflowed) { + TransactionId pxid; + + /* TESTED-BY src/test/isolation/specs/subx-overflow.spec test1 and test2 */ + /* - * In recovery we store all xids in the subxact array because it is by - * far the bigger array, and we mostly don't know which xids are - * top-level and which are subxacts. The xip array is empty. + * If we haven't found xid yet, it might be because it is a subxid + * that is not present because we overflowed, but it might also be + * because the xid is not in the snapshot. * - * We start by searching subtrans, if we overflowed. + * It is important we do this step last because it is expensive, + * and if everybody does this then SubTransSLRU glows white hot. */ - if (snapshot->suboverflowed) - { - /* - * Snapshot overflowed, so convert xid to top-level. This is safe - * because we eliminated too-old XIDs above. - */ - xid = SubTransGetTopmostTransaction(xid); - /* - * If xid was indeed a subxact, we might now have an xid < xmin, - * so recheck to avoid an array scan. No point in rechecking - * xmax. - */ - if (TransactionIdPrecedes(xid, snapshot->xmin)) - return false; - } + /* + * Snapshot overflowed, so convert xid to top-level. This is safe + * because we eliminated too-old XIDs above. Every overflowed subxid + * will always have a parent recorded during AssignTransactionId() + * so this should always return a valid TransactionId, if a subxact. + */ + pxid = SubTransGetTopmostTransaction(xid); /* - * We now have either a top-level xid higher than xmin or an - * indeterminate xid. We don't know whether it's top level or subxact - * but it doesn't matter. If it's present, the xid is visible. + * If xid was indeed a subxact, we might now have an xid < xmin, + * so recheck to avoid an array scan. No point in rechecking + * xmax. If it wasn't a subxact, pxid will be invalid, so this test + * will do the right thing also. */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) - return true; + if (TransactionIdPrecedes(pxid, snapshot->xmin)) + return false; + + /* + * Check whether xid was a subxact. If we now have a parent xid, loop. + */ + if (TransactionIdPrecedes(pxid, xid)) + { + xid = pxid; + have_parent = true; + goto retry_search; /* search arrays again, now we have parent */ + } } return false; diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 775471d2a7..f404db552f 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -301,6 +301,9 @@ extern void AssertTransactionIdInAllowableRange(TransactionId xid); #define AssertTransactionIdInAllowableRange(xid) ((void)true) #endif +/* in transam/clog.c */ +extern bool TransactionIdsAreOnSameXactPage(TransactionId topxid, TransactionId subxid); + /* * Some frontend programs include this header. For compilers that emit static * inline functions even when they're unused, that leads to unsatisfied diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 9761f5374c..8f344dab6f 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -52,6 +52,8 @@ extern bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc); extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); +extern bool GetTopmostTransactionIdFromProcArray(TransactionId xid, TransactionId *pxid); + extern bool TransactionIdIsActive(TransactionId xid); extern TransactionId GetOldestNonRemovableTransactionId(Relation rel); extern TransactionId GetOldestTransactionIdConsideredRunning(void);