>From c9440c1004539c40398c341d2fe6455c6cb9e49a Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sat, 31 May 2014 00:26:22 +0200
Subject: [PATCH] Fix longstanding bug in HeapTupleSatisfiesVacuum().

HeapTupleSatisfiesVacuum() didn't properly discern between
DELETE_IN_PROGRESS and INSERT_IN_PROGRESS for rows that have been
inserted in the current transaction and deleted in a aborted
subtransaction of the current backend. At the very least that caused
problems for CLUSTER and CREATE INDEX in transactions that had
aborting subtransactions producing rows, leading to warnings like:
WARNING:  concurrent delete in progress within table "..."
possibly in an endless, uninterruptible, loop.

Instead of treating *InProgress xmins the same as *IsCurrent ones,
treat them distinctly like the other visibility routines. As
implemented this separatation can cause a behaviour change for rows
that have been inserted and deleted in another, still running,
transaction. HTSV will now return INSERT_IN_PROGRESS instead of
DELETE_IN_PROGRESS for those. That's both, more in line with the other
visibility routines and arguably more correct. The latter because a
INSERT_IN_PROGRESS will make callers look at/wait for xmin, instead of
xmax.
The only current caller where that's possibly worse than the old
behaviour is heap_prune_chain() which now won't mark the page as
prunable if a row has concurrently been inserted and deleted. That
seems harmless enough.

As a cautionary measure insert a interrupt check before the gotos in
IndexBuildHeapScan() that lead to the uninterruptible loop.

As this bug goes back to the introduction of subtransactions in
573a71a5da, backpatch to all supported releases.

Reported-By: Sandro Santilli
---
 src/backend/catalog/index.c                |  2 ++
 src/backend/utils/time/tqual.c             | 17 +++++++++--
 src/test/regress/expected/create_index.out | 45 ++++++++++++++++++++++++++++++
 src/test/regress/sql/create_index.sql      | 27 ++++++++++++++++++
 4 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 80acc0e..a5a204e 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -2298,6 +2298,7 @@ IndexBuildHeapScan(Relation heapRelation,
 							XactLockTableWait(xwait, heapRelation,
 											  &heapTuple->t_data->t_ctid,
 											  XLTW_InsertIndexUnique);
+							CHECK_FOR_INTERRUPTS();
 							goto recheck;
 						}
 					}
@@ -2346,6 +2347,7 @@ IndexBuildHeapScan(Relation heapRelation,
 							XactLockTableWait(xwait, heapRelation,
 											  &heapTuple->t_data->t_ctid,
 											  XLTW_InsertIndexUnique);
+							CHECK_FOR_INTERRUPTS();
 							goto recheck;
 						}
 
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index 75cd53e..33f5953 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -1166,7 +1166,7 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
 				return HEAPTUPLE_DEAD;
 			}
 		}
-		else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+		else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
 		{
 			if (tuple->t_infomask & HEAP_XMAX_INVALID)	/* xid invalid */
 				return HEAPTUPLE_INSERT_IN_PROGRESS;
@@ -1175,7 +1175,20 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
 				HeapTupleHeaderIsOnlyLocked(tuple))
 				return HEAPTUPLE_INSERT_IN_PROGRESS;
 			/* inserted and then deleted by same xact */
-			return HEAPTUPLE_DELETE_IN_PROGRESS;
+			if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple)))
+				return HEAPTUPLE_DELETE_IN_PROGRESS;
+			/* deleting subtransaction must have aborted */
+			return HEAPTUPLE_INSERT_IN_PROGRESS;
+		}
+		else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+		{
+			/*
+			 * It'd be possible to discern between INSERT/DELETE in progress
+			 * here by looking at xmax, but that doesn't seem beneficial for
+			 * the majority of callers. We'd rather have callers look/wait for
+			 * xmin than xmax.
+			 */
+			return HEAPTUPLE_INSERT_IN_PROGRESS;
 		}
 		else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
 			SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index f6f5516..be38db5 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2782,3 +2782,48 @@ explain (costs off)
    Index Cond: ((thousand = 1) AND (tenthous = 1001))
 (2 rows)
 
+--
+-- Check index creation with rows that have been inserted/deleted in the same
+-- xact as the index. That used to be buggy for a long time.
+--
+CREATE TABLE sametrans1(a int);
+INSERT INTO sametrans1 VALUES (10);
+BEGIN;
+INSERT INTO sametrans1 VALUES (20);
+INSERT INTO sametrans1 VALUES (30);
+INSERT INTO sametrans1 VALUES (40);
+SAVEPOINT a;
+DELETE FROM sametrans1 WHERE a = 20;
+ROLLBACK TO a;
+SAVEPOINT a;
+DELETE FROM sametrans1 WHERE a = 30;
+RELEASE SAVEPOINT a;
+CREATE UNIQUE INDEX ON sametrans1 (a);
+COMMIT;
+SET enable_seqscan = off;
+EXPLAIN (COSTS OFF) SELECT * FROM sametrans1 WHERE a = 10;
+                      QUERY PLAN                      
+------------------------------------------------------
+ Index Only Scan using sametrans1_a_idx on sametrans1
+   Index Cond: (a = 10)
+(2 rows)
+
+SELECT * FROM sametrans1 WHERE a = 10;
+ a  
+----
+ 10
+(1 row)
+
+SELECT * FROM sametrans1 WHERE a = 20;
+ a  
+----
+ 20
+(1 row)
+
+SELECT * FROM sametrans1 WHERE a = 30;
+ a 
+---
+(0 rows)
+
+SET enable_seqscan = on;
+DROP TABLE sametrans1;
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index d4d24ef..0858bdd 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -938,3 +938,30 @@ ORDER BY thousand;
 
 explain (costs off)
   select * from tenk1 where (thousand, tenthous) in ((1,1001), (null,null));
+
+--
+-- Check index creation with rows that have been inserted/deleted in the same
+-- xact as the index. That used to be buggy for a long time.
+--
+CREATE TABLE sametrans1(a int);
+
+INSERT INTO sametrans1 VALUES (10);
+BEGIN;
+INSERT INTO sametrans1 VALUES (20);
+INSERT INTO sametrans1 VALUES (30);
+INSERT INTO sametrans1 VALUES (40);
+SAVEPOINT a;
+DELETE FROM sametrans1 WHERE a = 20;
+ROLLBACK TO a;
+SAVEPOINT a;
+DELETE FROM sametrans1 WHERE a = 30;
+RELEASE SAVEPOINT a;
+CREATE UNIQUE INDEX ON sametrans1 (a);
+COMMIT;
+SET enable_seqscan = off;
+EXPLAIN (COSTS OFF) SELECT * FROM sametrans1 WHERE a = 10;
+SELECT * FROM sametrans1 WHERE a = 10;
+SELECT * FROM sametrans1 WHERE a = 20;
+SELECT * FROM sametrans1 WHERE a = 30;
+SET enable_seqscan = on;
+DROP TABLE sametrans1;
-- 
2.0.0.rc2.4.g1dc51c6.dirty

