From 1fbe8648abc70d97c6773b14c833f3fcf40411cc Mon Sep 17 00:00:00 2001 From: Jasper Smit Date: Thu, 9 Oct 2025 16:23:38 +0200 Subject: [PATCH] DEF0734066 Delay marking aborted tuples unused, until no visibile tuple has a ctid link to that tuple --- contrib/pgstattuple/pgstatapprox.c | 1 + src/backend/access/heap/heapam.c | 1 + src/backend/access/heap/heapam_handler.c | 3 ++ src/backend/access/heap/heapam_visibility.c | 39 +++++++++++++-------- src/backend/access/heap/pruneheap.c | 18 +++++++++- src/backend/access/heap/vacuumlazy.c | 2 ++ src/include/access/heapam.h | 3 +- 7 files changed, 51 insertions(+), 16 deletions(-) diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index a59ff4e9d4f..50092793b99 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -156,6 +156,7 @@ statapprox_heap(Relation rel, output_type *stat) break; case HEAPTUPLE_DEAD: case HEAPTUPLE_RECENTLY_DEAD: + case HEAPTUPLE_RECENTLY_ABORTED: case HEAPTUPLE_INSERT_IN_PROGRESS: stat->dead_tuple_len += tuple.t_len; stat->dead_tuple_count++; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0dcd6ee817e..43e8d343de1 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -9264,6 +9264,7 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, return; xid = HeapTupleHeaderGetXmin(tuple->t_data); break; + case HEAPTUPLE_RECENTLY_ABORTED: case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_DELETE_IN_PROGRESS: if (visible) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index cb4bc35c93e..29df8dbc372 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -846,6 +846,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, isdead = true; break; case HEAPTUPLE_RECENTLY_DEAD: + case HEAPTUPLE_RECENTLY_ABORTED: *tups_recently_dead += 1; /* fall through */ case HEAPTUPLE_LIVE: @@ -1079,6 +1080,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, case HEAPTUPLE_DEAD: case HEAPTUPLE_RECENTLY_DEAD: + case HEAPTUPLE_RECENTLY_ABORTED: /* Count dead and recently-dead rows */ *deadrows += 1; break; @@ -1427,6 +1429,7 @@ heapam_index_build_range_scan(Relation heapRelation, /* Count it as live, too */ reltuples += 1; break; + case HEAPTUPLE_RECENTLY_ABORTED: case HEAPTUPLE_RECENTLY_DEAD: /* diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 05f6946fe60..ac6f7dc14a8 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -1176,7 +1176,7 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after); - if (res == HEAPTUPLE_RECENTLY_DEAD) + if (res == HEAPTUPLE_RECENTLY_DEAD || res == HEAPTUPLE_RECENTLY_ABORTED) { Assert(TransactionIdIsValid(dead_after)); @@ -1195,7 +1195,7 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, * In contrast to HeapTupleSatisfiesVacuum this routine, when encountering a * tuple that could still be visible to some backend, stores the xid that * needs to be compared with the horizon in *dead_after, and returns - * HEAPTUPLE_RECENTLY_DEAD. The caller then can perform the comparison with + * HEAPTUPLE_RECENTLY_DEAD || HEAPTUPLE_RECENTLY_ABORTED. The caller then can perform the comparison with * the horizon. This is e.g. useful when comparing with different horizons. * * Note: HEAPTUPLE_DEAD can still be returned here, e.g. if the inserting @@ -1215,13 +1215,20 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de /* * Has inserting transaction committed? * - * If the inserting transaction aborted, then the tuple was never visible - * to any other transaction, so we can delete it immediately. + * For aborted and committed transactions we need to take care of dead_after, + * as there can be someone taking a tuple lock on it and in the process of traversing the tuple chain. + * If we allow the tuple to be deleted immediately, it can be that another tuple is placed there. */ if (!HeapTupleHeaderXminCommitted(tuple)) { - if (HeapTupleHeaderXminInvalid(tuple)) + TransactionId xmin = HeapTupleHeaderGetRawXmin(tuple); + if (HeapTupleHeaderXminInvalid(tuple)) { + if (TransactionIdIsNormal(xmin)) { + *dead_after = xmin; + return HEAPTUPLE_RECENTLY_ABORTED; + } return HEAPTUPLE_DEAD; + } /* Used by pre-9.0 binary upgrades */ else if (tuple->t_infomask & HEAP_MOVED_OFF) { @@ -1256,10 +1263,11 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); + // TODO: should also here treat HEAPTUPLE_RECENTLY_ABORTED return HEAPTUPLE_DEAD; } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(xmin)) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; @@ -1273,7 +1281,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de /* deleting subtransaction must have aborted */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(xmin)) { /* * It'd be possible to discern between INSERT/DELETE in progress @@ -1285,17 +1293,20 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + else if (TransactionIdDidCommit(xmin)) + SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, xmin); else { /* * Not in Progress, Not Committed, so either Aborted or crashed */ - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return HEAPTUPLE_DEAD; + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, xmin); + if(TransactionIdIsNormal(xmin)) { + *dead_after = xmin; + return HEAPTUPLE_RECENTLY_ABORTED; + } + else + return HEAPTUPLE_DEAD; } /* @@ -1443,7 +1454,7 @@ HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after); - if (res == HEAPTUPLE_RECENTLY_DEAD) + if (res == HEAPTUPLE_RECENTLY_DEAD || res == HEAPTUPLE_RECENTLY_ABORTED) { Assert(TransactionIdIsValid(dead_after)); diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index a8025889be0..ecda8daf0e3 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -921,7 +921,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer) res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after); - if (res != HEAPTUPLE_RECENTLY_DEAD) + if (res != HEAPTUPLE_RECENTLY_DEAD && res != HEAPTUPLE_RECENTLY_ABORTED) return res; /* @@ -1108,6 +1108,9 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, */ break; + case HEAPTUPLE_RECENTLY_ABORTED: + break; + case HEAPTUPLE_DELETE_IN_PROGRESS: case HEAPTUPLE_LIVE: case HEAPTUPLE_INSERT_IN_PROGRESS: @@ -1415,6 +1418,19 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb } break; + case HEAPTUPLE_RECENTLY_ABORTED: + prstate->recently_dead_tuples++; + prstate->all_visible = false; + + /* + * This tuple will soon become DEAD. Update the hint field so + * that the page is reconsidered for pruning in future. + */ + heap_prune_record_prunable(prstate, + HeapTupleHeaderGetRawXmin(htup)); + break; + + case HEAPTUPLE_RECENTLY_DEAD: prstate->recently_dead_tuples++; prstate->all_visible = false; diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 0fef8e49e2b..def04eb758c 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -2352,6 +2352,7 @@ lazy_scan_noprune(LVRelState *vacrel, */ missed_dead_tuples++; break; + case HEAPTUPLE_RECENTLY_ABORTED: case HEAPTUPLE_RECENTLY_DEAD: /* @@ -3694,6 +3695,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, break; case HEAPTUPLE_DEAD: + case HEAPTUPLE_RECENTLY_ABORTED: case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_INSERT_IN_PROGRESS: case HEAPTUPLE_DELETE_IN_PROGRESS: diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 3a9424c19c9..139f1f01037 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -123,7 +123,8 @@ typedef enum { HEAPTUPLE_DEAD, /* tuple is dead and deletable */ HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */ - HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */ + HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet; check xmax for when to delete */ + HEAPTUPLE_RECENTLY_ABORTED, /* tuple is aborted, but not deletable yet; check xmin for when to delete */ HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */ HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */ } HTSV_Result; -- 2.39.5