From 774376dc8f7ed76657660e6beb0f5191d1bdaae4 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 3 Nov 2017 07:52:29 -0700
Subject: [PATCH] Fix pruning of locked and updated tuples.

Previously it was possible that a tuple was not pruned during vacuum,
even though it's update xmax (i.e. the updating xid in a multixact
with both key share lockers and an updater) was below the cutoff
horizon.

As the freezing code assumed, rightly so, that that's not supposed to
happen, xmax would be preserved (as a member of a new multixact or
xmax directly). That causes two problems: For one the tuple is below
the xmin horizon, which can cause problems if the clog is truncated or
once there's an xid wraparound. The bigger problem is that that will
break HOT chains, leading to index lookups failing, which in turn can
lead to constraints being violated.

Fix the problem by recognizing that tuples can be DEAD instead of
RECENTLY_DEAD, even if the multixactid has alive members, if the
update_xid is below the xmin horizon. That's safe because newer
versions of the tuple will contain the locking xids.

As the wrong freezing of xmax can cause data corruption, extend sanity
checks and promote them to elogs rather than assertions.

Per-Discussion: Robert Haas and Freund
Reported-By: Daniel Wood
Discussion:
    https://postgr.es/m/E5711E62-8FDF-4DCA-A888-C200BF6B5742@amazon.com
    https://postgr.es/m/20171102112019.33wb7g5wp4zpjelu@alap3.anarazel.de
---
 src/backend/access/heap/heapam.c      | 34 ++++++++++++++++------
 src/backend/commands/vacuumlazy.c     | 13 +++++++++
 src/backend/utils/time/tqual.c        | 53 +++++++++++++++--------------------
 src/test/isolation/isolation_schedule |  1 +
 4 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 765750b8743..9b963e0cd0d 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6412,7 +6412,8 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			 */
 			if (TransactionIdPrecedes(xid, cutoff_xid))
 			{
-				Assert(!TransactionIdDidCommit(xid));
+				if (TransactionIdDidCommit(xid))
+					elog(ERROR, "can't freeze committed xmax");
 				*flags |= FRM_INVALIDATE_XMAX;
 				xid = InvalidTransactionId; /* not strictly necessary */
 			}
@@ -6484,6 +6485,8 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 		{
 			TransactionId xid = members[i].xid;
 
+			Assert(TransactionIdIsValid(xid));
+
 			/*
 			 * It's an update; should we keep it?  If the transaction is known
 			 * aborted or crashed then it's okay to ignore it, otherwise not.
@@ -6512,18 +6515,24 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 				update_committed = true;
 				update_xid = xid;
 			}
+			else
+			{
+				/*
+				 * Not in progress, not committed -- must be aborted or crashed;
+				 * we can ignore it.
+				 */
+			}
 
-			/*
-			 * Not in progress, not committed -- must be aborted or crashed;
-			 * we can ignore it.
-			 */
 
 			/*
 			 * Since the tuple wasn't marked HEAPTUPLE_DEAD by vacuum, the
-			 * update Xid cannot possibly be older than the xid cutoff.
+			 * update Xid cannot possibly be older than the xid cutoff. The
+			 * presence of such a tuple would cause corruption, so be paranoid
+			 * and check.
 			 */
-			Assert(!TransactionIdIsValid(update_xid) ||
-				   !TransactionIdPrecedes(update_xid, cutoff_xid));
+			if (TransactionIdIsValid(update_xid) &&
+				TransactionIdPrecedes(update_xid, cutoff_xid))
+				elog(ERROR, "encountered tuple from before xid cutoff");
 
 			/*
 			 * If we determined that it's an Xid corresponding to an update
@@ -6714,7 +6723,16 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
 	else if (TransactionIdIsNormal(xid))
 	{
 		if (TransactionIdPrecedes(xid, cutoff_xid))
+		{
+			/*
+			 * If we freeze xmax, make absolutely sure that it's not an XID
+			 * that is important.
+			 */
+			if (!(tuple->t_infomask & HEAP_XMAX_LOCK_ONLY) &&
+				TransactionIdDidCommit(xid))
+				elog(ERROR, "can't freeze committed xmax");
 			freeze_xmax = true;
+		}
 		else
 			totally_frozen = false;
 	}
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 6587db77acf..93e3ba31214 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -1007,7 +1007,20 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 					 */
 					if (HeapTupleIsHotUpdated(&tuple) ||
 						HeapTupleIsHeapOnly(&tuple))
+					{
 						nkeep += 1;
+
+						/*
+						 * If this were to happen for a tuple that actually
+						 * needed to be frozen, we'd be in trouble, because
+						 * it'd leave a tuple below the relation's xmin
+						 * horizon alive.
+						 */
+						if (heap_tuple_needs_freeze(tuple.t_data, FreezeLimit,
+													MultiXactCutoff, buf))
+							elog(ERROR, "encountered tuple from before xid cutoff");
+
+					}
 					else
 						tupgone = true; /* we can delete the tuple */
 					all_visible = false;
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index b7aab0dd190..6209b4e17cc 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -1311,49 +1311,40 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
 
 	if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
 	{
-		TransactionId xmax;
+		TransactionId xmax = HeapTupleGetUpdateXid(tuple);
 
-		if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
-		{
-			/* already checked above */
-			Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
-
-			xmax = HeapTupleGetUpdateXid(tuple);
-
-			/* not LOCKED_ONLY, so it has to have an xmax */
-			Assert(TransactionIdIsValid(xmax));
-
-			if (TransactionIdIsInProgress(xmax))
-				return HEAPTUPLE_DELETE_IN_PROGRESS;
-			else if (TransactionIdDidCommit(xmax))
-				/* there are still lockers around -- can't return DEAD here */
-				return HEAPTUPLE_RECENTLY_DEAD;
-			/* updating transaction aborted */
-			return HEAPTUPLE_LIVE;
-		}
-
-		Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED));
-
-		xmax = HeapTupleGetUpdateXid(tuple);
+		/* already checked above */
+		Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
 
 		/* not LOCKED_ONLY, so it has to have an xmax */
 		Assert(TransactionIdIsValid(xmax));
 
-		/* multi is not running -- updating xact cannot be */
-		Assert(!TransactionIdIsInProgress(xmax));
-		if (TransactionIdDidCommit(xmax))
+		if (TransactionIdIsInProgress(xmax))
+			return HEAPTUPLE_DELETE_IN_PROGRESS;
+		else if (TransactionIdDidCommit(xmax))
 		{
+			/*
+			 * The multixact might still be running due to lockers. If the
+			 * updater is below the horizon we have to return DEAD regardless
+			 * - otherwise we could end up with a tuple where the updater has
+			 * to be removed due to the horizon, but is not pruned away. It's
+			 * not a problem to prune that tuple because all the lockers will
+			 * also be present in the newer tuple version.
+			 */
 			if (!TransactionIdPrecedes(xmax, OldestXmin))
 				return HEAPTUPLE_RECENTLY_DEAD;
 			else
 				return HEAPTUPLE_DEAD;
 		}
+		else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+		{
+			/*
+			 * Not in Progress, Not Committed, so either Aborted or crashed.
+			 * Remove the Xmax.
+			 */
+			SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
+		}
 
-		/*
-		 * Not in Progress, Not Committed, so either Aborted or crashed.
-		 * Remove the Xmax.
-		 */
-		SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
 		return HEAPTUPLE_LIVE;
 	}
 
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 32c965b2a02..7dad3c23163 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -44,6 +44,7 @@ test: update-locked-tuple
 test: propagate-lock-delete
 test: tuplelock-conflict
 test: tuplelock-update
+test: freeze-the-dead
 test: nowait
 test: nowait-2
 test: nowait-3
-- 
2.14.1.536.g6867272d5b.dirty

