From 648de70f226831af04e1d31329118c325161da0b Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Wed, 19 Oct 2016 13:59:20 -0700
Subject: [PATCH] Fix ON CONFLICT bugs at higher isolation levels.

When used at higher isolation levels, ON CONFLICT DO NOTHING could raise
spurious serialization failure errors.  This happened when duplicate
values were proposed for insertion within a single ON CONFLICT DO
NOTHING command. (Similar ON CONFLICT DO UPDATE cases typically raised
valid cardinality violation errors instead.)

Separately, though in the same code path, insufficient care was taken
when a visibility check was performed on some existing, conflicting
tuple.  At least a shared buffer lock is required on any buffer
containing a tuple whose visibility is checked through tqual.c routines,
which may set hint bits (a buffer pin is therefore insufficient).

To fix the first issue, check if tuple was created by current
transaction as a further condition of raising a serialization failure
in relevant test.  To fix the second issue, outsource visibility test to
preexisting heap_fetch() call.

Patch by Thomas Munroe and Peter Geoghegan.  First bug reported by Jason
Dusek.  Second bug reported by Konstantin Knizhnik.

Report: <CAO3NbwOycQjt2Oqy2VW-eLTq2M5uGMyHnGm=RNga4mjqcYD7gQ@mail.gmail.com>
Report: <57EE93C8.8080504@postgrespro.ru>
Backpatch-through: 9.5
---
 src/backend/executor/nodeModifyTable.c             | 41 ++++++++++------------
 .../expected/insert-conflict-do-nothing-2.out      | 29 +++++++++++++++
 src/test/isolation/isolation_schedule              |  1 +
 .../specs/insert-conflict-do-nothing-2.spec        | 34 ++++++++++++++++++
 4 files changed, 82 insertions(+), 23 deletions(-)
 create mode 100644 src/test/isolation/expected/insert-conflict-do-nothing-2.out
 create mode 100644 src/test/isolation/specs/insert-conflict-do-nothing-2.spec

diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index af7b26c..3c4f458 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -179,7 +179,7 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
 }
 
 /*
- * ExecCheckHeapTupleVisible -- verify heap tuple is visible
+ * ExecCheckTIDVisible -- fetch tuple, and verify its visibility
  *
  * It would not be consistent with guarantees of the higher isolation levels to
  * proceed with avoiding insertion (taking speculative insertion's alternative
@@ -187,23 +187,6 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
  * Check for the need to raise a serialization failure, and do so as necessary.
  */
 static void
-ExecCheckHeapTupleVisible(EState *estate,
-						  HeapTuple tuple,
-						  Buffer buffer)
-{
-	if (!IsolationUsesXactSnapshot())
-		return;
-
-	if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer))
-		ereport(ERROR,
-				(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-			 errmsg("could not serialize access due to concurrent update")));
-}
-
-/*
- * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible()
- */
-static void
 ExecCheckTIDVisible(EState *estate,
 					ResultRelInfo *relinfo,
 					ItemPointer tid)
@@ -212,14 +195,26 @@ ExecCheckTIDVisible(EState *estate,
 	Buffer		buffer;
 	HeapTupleData tuple;
 
-	/* Redundantly check isolation level */
 	if (!IsolationUsesXactSnapshot())
 		return;
 
 	tuple.t_self = *tid;
-	if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
-		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
-	ExecCheckHeapTupleVisible(estate, &tuple, buffer);
+	if (!heap_fetch(rel, estate->es_snapshot, &tuple, &buffer, true, NULL))
+	{
+		/*
+		 * Must not raise serialization failure when multiple values are
+		 * proposed for insertion by the same command with duplication among
+		 * values (this is at least possible with ON CONFLICT DO NOTHING, where
+		 * cardinality violation errors are never raised).  Avoid this with
+		 * final check that determines if tuple was inserted by another
+		 * transaction.
+		 */
+		if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
+			ereport(ERROR,
+					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+				 errmsg("could not serialize access due to concurrent update")));
+	}
+
 	ReleaseBuffer(buffer);
 }
 
@@ -1170,7 +1165,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	 * snapshot.  This is in line with the way UPDATE deals with newer tuple
 	 * versions.
 	 */
-	ExecCheckHeapTupleVisible(estate, &tuple, buffer);
+	ExecCheckTIDVisible(estate, resultRelInfo, &tuple.t_self);
 
 	/* Store target's existing tuple in the state's dedicated slot */
 	ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false);
diff --git a/src/test/isolation/expected/insert-conflict-do-nothing-2.out b/src/test/isolation/expected/insert-conflict-do-nothing-2.out
new file mode 100644
index 0000000..b379ab1
--- /dev/null
+++ b/src/test/isolation/expected/insert-conflict-do-nothing-2.out
@@ -0,0 +1,29 @@
+Parsed test spec with 2 sessions
+
+starting permutation: donothing1 c1 donothing2 c2
+step donothing1: INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING;
+step c1: COMMIT;
+step donothing2: INSERT INTO ints(key, val) VALUES(1, 'donothing2'), (1, 'donothing2') ON CONFLICT DO NOTHING;
+step c2: COMMIT;
+
+starting permutation: donothing2 c2 donothing1 c1
+step donothing2: INSERT INTO ints(key, val) VALUES(1, 'donothing2'), (1, 'donothing2') ON CONFLICT DO NOTHING;
+step c2: COMMIT;
+step donothing1: INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING;
+step c1: COMMIT;
+
+starting permutation: donothing1 donothing2 c1 c2
+step donothing1: INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING;
+step donothing2: INSERT INTO ints(key, val) VALUES(1, 'donothing2'), (1, 'donothing2') ON CONFLICT DO NOTHING; <waiting ...>
+step c1: COMMIT;
+step donothing2: <... completed>
+error in steps c1 donothing2: ERROR:  could not serialize access due to concurrent update
+step c2: COMMIT;
+
+starting permutation: donothing2 donothing1 c2 c1
+step donothing2: INSERT INTO ints(key, val) VALUES(1, 'donothing2'), (1, 'donothing2') ON CONFLICT DO NOTHING;
+step donothing1: INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING; <waiting ...>
+step c2: COMMIT;
+step donothing1: <... completed>
+error in steps c2 donothing1: ERROR:  could not serialize access due to concurrent update
+step c1: COMMIT;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index a96a318..2606a27 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -25,6 +25,7 @@ test: eval-plan-qual
 test: lock-update-delete
 test: lock-update-traversal
 test: insert-conflict-do-nothing
+test: insert-conflict-do-nothing-2
 test: insert-conflict-do-update
 test: insert-conflict-do-update-2
 test: insert-conflict-do-update-3
diff --git a/src/test/isolation/specs/insert-conflict-do-nothing-2.spec b/src/test/isolation/specs/insert-conflict-do-nothing-2.spec
new file mode 100644
index 0000000..bbe92f7
--- /dev/null
+++ b/src/test/isolation/specs/insert-conflict-do-nothing-2.spec
@@ -0,0 +1,34 @@
+# INSERT...ON CONFLICT DO NOTHING test with multiple rows in higher isolation
+
+setup
+{
+  CREATE TABLE ints (key int primary key, val text);
+}
+
+teardown
+{
+  DROP TABLE ints;
+}
+
+session "s1"
+setup
+{
+  BEGIN ISOLATION LEVEL REPEATABLE READ;
+}
+step "donothing1" { INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING; }
+step "c1" { COMMIT; }
+step "a1" { ABORT; }
+
+session "s2"
+setup
+{
+  BEGIN ISOLATION LEVEL REPEATABLE READ;
+}
+step "donothing2" { INSERT INTO ints(key, val) VALUES(1, 'donothing2'), (1, 'donothing2') ON CONFLICT DO NOTHING; }
+step "c2" { COMMIT; }
+step "a2" { ABORT; }
+
+permutation "donothing1" "c1" "donothing2" "c2"
+permutation "donothing2" "c2" "donothing1" "c1"
+permutation "donothing1" "donothing2" "c1" "c2"
+permutation "donothing2" "donothing1" "c2" "c1"
-- 
2.7.4

