diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index f8c9655111..ab73463b53 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -238,7 +238,7 @@ Not possible - Allowed, but not in PG + Allowed, but not generally useful Possible @@ -268,11 +268,10 @@ In PostgreSQL, you can request any of - the four standard transaction isolation levels, but internally only - three distinct isolation levels are implemented, i.e. PostgreSQL's - Read Uncommitted mode behaves like Read Committed. This is because - it is the only sensible way to map the standard isolation levels to - PostgreSQL's multiversion concurrency control architecture. + the four standard transaction isolation levels, but Read Uncommitted + is mostly irrelevant because there are few use cases where this + isolation level does anything useful for applications in comparison + with PostgreSQL's multiversion concurrency control architecture. @@ -784,6 +783,54 @@ ERROR: could not serialize access due to read/write dependencies among transact + + + Read Uncommitted Isolation Level + + + transaction isolation level + read uncommitted + + + + read uncommitted + + + + In PostgreSQL's MVCC + architecture, readers are not blocked by writers, so in general + you should have no need for this transaction isolation level. + + + + In general, read uncommitted will return inconsistent results and + wrong answers. If you look at the changes made by a transaction + while it continues to make changes then you may get partial results + from queries, or you may miss index entries that haven't yet been + written. However, if you are reading transactions that are paused + at the end of their execution for whatever reason then you can + see a consistent result. + + + + The main use case for this transaction isolation level is for + investigating or recovering data. Examples of this would be when + inspecting the writes made by a locked or hanging transaction, when + you are running queries on a standby node that is currently paused, + such as when a standby node has halted at a recovery target with + recovery_target_inclusive = false or when you + need to inspect changes made by an in-doubt prepared transaction to + decide whether to commit or abort that transaction. + + + + In PostgreSQL read uncommitted mode gives + a consistent snapshot of the currently running transactions at the + time the snapshot was taken. Transactions starting after that time + will not be visible, even though they are not yet committed. + + + diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 3e3646716f..d669a83418 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -1058,7 +1058,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetRawXmin(tuple)); - else + else if (XactIsoLevel != XACT_READ_UNCOMMITTED) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, @@ -1103,6 +1103,8 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, } if (XidInMVCCSnapshot(xmax, snapshot)) return true; + if (XactIsoLevel == XACT_READ_UNCOMMITTED) + return false; if (TransactionIdDidCommit(xmax)) return false; /* updating transaction committed */ /* it must have aborted or crashed */ @@ -1122,6 +1124,9 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) return true; + if (XactIsoLevel == XACT_READ_UNCOMMITTED) + return false; + if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 13bcbe77de..2335be5306 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1743,10 +1743,45 @@ GetSnapshotData(Snapshot snapshot) RecentXmin = xmin; snapshot->xmin = xmin; - snapshot->xmax = xmax; - snapshot->xcnt = count; - snapshot->subxcnt = subcount; - snapshot->suboverflowed = suboverflowed; + if (XactIsoLevel == XACT_READ_UNCOMMITTED) + { + /* + * In XACT_READ_UNCOMMITTED we want a consistent snapshot, just + * one that can see data written by transactions currently in + * progress. So any transactions started AFTER this point will + * still be invisible to us. We don't use the normal latest + * Committed+1 because that misses many currently executing xids. + * This is safe since we read the value atomically, so we + * don't need XidGenLock. + * + * This is a useful definition of a consistent snapshot when + * we want to see the effects of unresolved 2PC transactions + * or when recovery has paused. In other cases, transactions + * might continue to write and so the results might still be + * inconsistent in many cases; caveat emptor. + */ + snapshot->xmax = XidFromFullTransactionId(ShmemVariableCache->nextFullXid); + + /* + * We still need to calculate xmin correctly, so we respect the + * normal limits for cleaning up as we scan. This is needed in + * recovery in case we want to keep using this snapshot after + * the standby is promoted. + * + * Other values must be zeroed otherwise the snapshot wouldn't + * be able to see the uncommitted transactions. + */ + snapshot->xcnt = 0; + snapshot->subxcnt = 0; + snapshot->suboverflowed = false; + } + else + { + snapshot->xmax = xmax; + snapshot->xcnt = count; + snapshot->subxcnt = subcount; + snapshot->suboverflowed = suboverflowed; + } snapshot->curcid = GetCurrentCommandId(false);