From fc30da97cf60bb9e1e50972acc0f58cb7329f639 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@heroku.com>
Date: Wed, 27 Aug 2014 15:01:32 -0700
Subject: [PATCH 2/4] Support INSERT ... ON CONFLICT {UPDATE | IGNORE}

This non-standard INSERT clause allows DML statement authors to specify
that in the event of each of any of the tuples being inserted
duplicating an existing tuple in terms of a value or set of values
constrained by a unique index, an alternative path may be taken.  The
statement may alternatively IGNORE the tuple being inserted without
raising an error, or go to UPDATE the existing tuple whose value is
duplicated by a value within one single tuple proposed for insertion.
The implementation loops until either an insert or an UPDATE/IGNORE
occurs.  No existing tuple may be affected more than once per INSERT.

This is implemented using a new infrastructure called "speculative
insertion".  A speculative inserter establishes the right to insert
ahead of time.  In order to proceed with insertion, consensus must be
reached between all unique indexes on the table, or else the alternative
path is taken.  Finally, heap tuple insertion occurs in the conventional
manner, followed by insertion of index tuples, where index tuple
insertion picks up where value locking left off, performing essentially
the conventional insertion process in a staggered fashion.
Alternatively, we may go to UPDATE, using the EvalPlanQual() mechanism
to execute a special auxiliary plan.

READ COMMITTED isolation level is permitted to UPDATE a tuple even where
no version is visible to the command's MVCC snapshot.  Similarly, any
query predicate associated with the UPDATE portion of the new statement
need only satisfy an already locked, conclusively committed and visible
conflict tuple.  When the predicate isn't satisfied, the tuple is still
locked, which implies that at READ COMMITTED, a tuple may be locked
without any version being visible to the command's MVCC snapshot.

Users may optionally specify a single unique index to merge on with a
WITHIN `unique_index` specification, which is useful when there is a
concern about spuriously merging on the wrong unique index due to there
being more than one would-be unique violation.  Otherwise, we UPDATE (or
IGNORE) based on the first would-be unique violation detected, on the
assumption that that is the only unique index where a violation could
appear.

The auxiliary ModifyTable plan used by the UPDATE portion of the new
statement is not formally a subplan of its parent INSERT ModifyTable
plan.  Rather, it's an independently planned subquery, whose execution
is tightly driven by its parent.  Special auxiliary state pertaining to
the auxiliary UPDATE is tracked by its parent through all stages of
query execution.

The optimizer imposes some restrictions on child auxiliary UPDATE plans,
which make the plans comport with their parent to the extent required
during the executor stage.  One user-visible consequences of this is
that the special auxiliary UPDATE query cannot have subselects within
its targetlist or WHERE clause.  UPDATEs may not reference any other
table, and UPDATE FROM is disallowed.  INSERT's RETURNING clause
continues to only project tuples actually inserted.
---
 contrib/pg_stat_statements/pg_stat_statements.c |   4 +
 src/backend/access/heap/heapam.c                |  22 +-
 src/backend/access/heap/tuptoaster.c            |   2 +-
 src/backend/access/index/indexam.c              | 153 +++++-
 src/backend/access/nbtree/nbtinsert.c           | 612 ++++++++++++++++++++++--
 src/backend/access/nbtree/nbtree.c              |  71 ++-
 src/backend/catalog/index.c                     |   6 +-
 src/backend/catalog/indexing.c                  |   3 +-
 src/backend/commands/constraint.c               |   2 +-
 src/backend/commands/copy.c                     |   4 +-
 src/backend/commands/explain.c                  |   9 +-
 src/backend/executor/execMain.c                 |  11 +-
 src/backend/executor/execUtils.c                | 293 ++++++++++--
 src/backend/executor/nodeLockRows.c             |   9 +-
 src/backend/executor/nodeModifyTable.c          | 427 ++++++++++++++++-
 src/backend/nodes/copyfuncs.c                   |  23 +
 src/backend/nodes/equalfuncs.c                  |  16 +
 src/backend/nodes/nodeFuncs.c                   |   5 +
 src/backend/nodes/outfuncs.c                    |  19 +
 src/backend/nodes/readfuncs.c                   |   3 +
 src/backend/optimizer/path/costsize.c           |   8 +-
 src/backend/optimizer/plan/createplan.c         |  11 +-
 src/backend/optimizer/plan/planner.c            |  42 ++
 src/backend/optimizer/plan/setrefs.c            |   8 +
 src/backend/optimizer/plan/subselect.c          |   2 +
 src/backend/parser/analyze.c                    |  58 ++-
 src/backend/parser/gram.y                       |  65 ++-
 src/backend/parser/parse_clause.c               |  42 +-
 src/backend/parser/parse_relation.c             |   8 +-
 src/backend/rewrite/rewriteHandler.c            |   5 +
 src/backend/utils/time/tqual.c                  |  23 +
 src/include/access/genam.h                      | 100 +++-
 src/include/access/nbtree.h                     |  49 +-
 src/include/catalog/pg_am.h                     |  44 +-
 src/include/catalog/pg_proc.h                   |   2 +
 src/include/executor/executor.h                 |   4 +-
 src/include/nodes/execnodes.h                   |   3 +
 src/include/nodes/nodes.h                       |  13 +
 src/include/nodes/parsenodes.h                  |  24 +
 src/include/nodes/plannodes.h                   |   3 +
 src/include/optimizer/planmain.h                |   3 +-
 src/include/parser/kwlist.h                     |   2 +
 src/include/parser/parse_clause.h               |   1 +
 src/include/parser/parse_relation.h             |   4 +-
 src/include/utils/rel.h                         |   1 +
 src/include/utils/tqual.h                       |   1 +
 46 files changed, 2058 insertions(+), 162 deletions(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 799242b..c8ce44b 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2198,6 +2198,10 @@ JumbleQuery(pgssJumbleState *jstate, Query *query)
 	JumbleRangeTable(jstate, query->rtable);
 	JumbleExpr(jstate, (Node *) query->jointree);
 	JumbleExpr(jstate, (Node *) query->targetList);
+	APP_JUMB(query->specClause);
+	APP_JUMB(query->mergeIndex);
+	if (query->onConflict)
+		JumbleQuery(jstate, (Query *) query->onConflict);
 	JumbleExpr(jstate, (Node *) query->returningList);
 	JumbleExpr(jstate, (Node *) query->groupClause);
 	JumbleExpr(jstate, query->havingQual);
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 4d7575b..b0c5d62 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -4101,14 +4101,15 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  *
  * Function result may be:
  *	HeapTupleMayBeUpdated: lock was successfully acquired
+ *	HeapTupleInvisible: lock failed because tuple instantaneously invisible
  *	HeapTupleSelfUpdated: lock failed because tuple updated by self
  *	HeapTupleUpdated: lock failed because tuple updated by other xact
  *
- * In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
- * (the last only for HeapTupleSelfUpdated, since we
- * cannot obtain cmax from a combocid generated by another transaction).
- * See comments for struct HeapUpdateFailureData for additional info.
+ * In the failure cases other than HeapTupleInvisible, the routine fills *hufd
+ * with the tuple's t_ctid, t_xmax (resolving a possible MultiXact, if
+ * necessary), and t_cmax (the last only for HeapTupleSelfUpdated, since we
+ * cannot obtain cmax from a combocid generated by another transaction).  See
+ * comments for struct HeapUpdateFailureData for additional info.
  *
  * See README.tuplock for a thorough explanation of this mechanism.
  */
@@ -4145,8 +4146,15 @@ l3:
 
 	if (result == HeapTupleInvisible)
 	{
-		UnlockReleaseBuffer(*buffer);
-		elog(ERROR, "attempted to lock invisible tuple");
+		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+
+		/*
+		 * This is possible, but only when locking a tuple for speculative
+		 * insertion.  We return this value here rather than throwing an error
+		 * in order to give that case the opportunity to throw a more specific
+		 * error.
+		 */
+		return HeapTupleInvisible;
 	}
 	else if (result == HeapTupleBeingUpdated)
 	{
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index ce44bbd..114553e 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -1530,7 +1530,7 @@ toast_save_datum(Relation rel, Datum value,
 							 &(toasttup->t_self),
 							 toastrel,
 							 toastidxs[i]->rd_index->indisunique ?
-							 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
+							 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, NULL);
 		}
 
 		/*
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 53cf96f..1b5c1fc 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -18,8 +18,12 @@
  *		index_rescan	- restart a scan of an index
  *		index_endscan	- end a scan
  *		index_insert	- insert an index tuple into a relation
+ *		index_lock		- lock index values speculatively
  *		index_markpos	- mark a scan position
  *		index_restrpos	- restore a scan position
+ *		index_proceed	- proceed with insert?
+ *		index_release	- release index locks
+ *		index_tid		- get duplicate/conflict tid from speculative state
  *		index_getnext_tid	- get the next TID from a scan
  *		index_fetch_heap		- get the scan's next heap tuple
  *		index_getnext	- get the next heap tuple from a scan
@@ -98,6 +102,13 @@
 	AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \
 )
 
+#define SPECULATIVE_CHECKS \
+( \
+	AssertMacro(!state || \
+				RelationGetRelid(indexRelation) ==    \
+				RelationGetRelid(state->uniqueIndex)) \
+)
+
 #define SCAN_CHECKS \
 ( \
 	AssertMacro(IndexScanIsValid(scan)), \
@@ -199,6 +210,46 @@ index_close(Relation relation, LOCKMODE lockmode)
 }
 
 /* ----------------
+ * index_lock - lock index values speculatively.
+ *
+ * Returns palloc'd state indicating if the insertion would have proceeded were
+ * this not just a "dry-run".  The underlying amcanunique implementation
+ * performs locking.  Callers are obligated to pass this state back in a
+ * subsequent index_insert, or use the callback set here to release locks and
+ * other resources.
+ * ----------------
+ */
+SpeculativeState *
+index_lock(Relation indexRelation,
+		   Datum *values,
+		   bool *isnull,
+		   Relation heapRelation,
+		   List *otherSpecStates,
+		   bool priorConflict)
+{
+	FmgrInfo   *procedure;
+
+	RELATION_CHECKS;
+	GET_REL_PROCEDURE(amlock);
+
+	if (!(indexRelation->rd_am->ampredlocks))
+		CheckForSerializableConflictIn(indexRelation,
+									   (HeapTuple) NULL,
+									   InvalidBuffer);
+
+	/*
+	 * call am's lock proc.
+	 */
+	return (SpeculativeState *) DatumGetPointer(FunctionCall6(procedure,
+												PointerGetDatum(indexRelation),
+												PointerGetDatum(values),
+												PointerGetDatum(isnull),
+												PointerGetDatum(heapRelation),
+												PointerGetDatum(otherSpecStates),
+												BoolGetDatum(priorConflict)));
+}
+
+/* ----------------
  *		index_insert - insert an index tuple into a relation
  * ----------------
  */
@@ -208,11 +259,14 @@ index_insert(Relation indexRelation,
 			 bool *isnull,
 			 ItemPointer heap_t_ctid,
 			 Relation heapRelation,
-			 IndexUniqueCheck checkUnique)
+			 IndexUniqueCheck checkUnique,
+			 SpeculativeState *state)
 {
 	FmgrInfo   *procedure;
+	bool		satisfies;
 
 	RELATION_CHECKS;
+	SPECULATIVE_CHECKS;
 	GET_REL_PROCEDURE(aminsert);
 
 	if (!(indexRelation->rd_am->ampredlocks))
@@ -223,13 +277,106 @@ index_insert(Relation indexRelation,
 	/*
 	 * have the am's insert proc do all the work.
 	 */
-	return DatumGetBool(FunctionCall6(procedure,
+	satisfies = DatumGetBool(FunctionCall7(procedure,
 									  PointerGetDatum(indexRelation),
 									  PointerGetDatum(values),
 									  PointerGetDatum(isnull),
 									  PointerGetDatum(heap_t_ctid),
 									  PointerGetDatum(heapRelation),
-									  Int32GetDatum((int32) checkUnique)));
+									  Int32GetDatum((int32) checkUnique),
+									  PointerGetDatum(state)));
+
+	/*
+	 * AM will have released private speculative insertion state and locks, but
+	 * the responsibility to release generic state rests here
+	 */
+	if (state)
+		pfree(state);
+
+	return satisfies;
+}
+
+/* ----------------
+ * index_proceed - should we proceed with a speculative insertion?
+ *
+ * Returns whether or not to proceed (states may be NIL).  A would-be duplicate
+ * key violation on any unique index is grounds for not proceeding with the
+ * second phase of speculative insertion (as is there never being a request for
+ * speculative insertion).  Total consensus is required.
+ * ----------------
+ */
+bool
+index_proceed(List *specStates)
+{
+	ListCell   *lc;
+
+	foreach(lc, specStates)
+	{
+		SpeculativeState   *state = lfirst(lc);
+		SpecStatus			outcome = state->outcome;
+
+		Assert(outcome != INSERT_TRY_AGAIN);
+
+		if (outcome == INSERT_NO_PROCEED)
+			return false;
+	}
+
+	return true;
+}
+
+/* ----------------
+ * index_release - release resources from speculative insertion.
+ *
+ * Unique index value locks and other resources are freed here.  This is called
+ * just prior to the latter phase of speculative insertion, in respect of a
+ * single slot.
+ *
+ * The AM must free its own private resources here (principally, value locks).
+ * We release the AM-generic state itself.
+ *
+ * It's possible that the AM will call this function itself directly, passing
+ * back the list of other states that that particular AM-invocation is itself
+ * passed.  The need to release all value locks immediately prior to waiting on
+ * the outcome of another, conflicting xact may arise.
+ *
+ * Returns a heap item pointer for locking where applicable.
+ * ----------------
+ */
+ItemPointer
+index_release(List *specStates, bool free)
+{
+	ListCell	   *lc;
+	ItemPointer		conflict = NULL;
+
+	/*
+	 * Note that unlocking occurs in the same order as insertion would have,
+	 * and so is in the opposite order to the original lock acquisition (i.e.
+	 * it is "right-way-around")
+	 */
+	foreach(lc, specStates)
+	{
+		SpeculativeState *state = lfirst(lc);
+
+		/* Only expecting one tuple to lock */
+		Assert(!conflict || state->outcome != INSERT_NO_PROCEED);
+		if (state->outcome == INSERT_NO_PROCEED)
+		{
+			Assert(ItemPointerIsValid(state->conflictTid));
+			conflict = state->conflictTid;
+		}
+
+		/*
+		 * Only release value locks (and other resources) for unique indexes
+		 * that have indicated they require it
+		 */
+		if (state->unlock)
+			(*state->unlock) (state);
+
+		if (free)
+			pfree(state);
+	}
+
+	return conflict;
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index ecee5ac..3bd4e71 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -47,10 +47,16 @@ typedef struct
 
 static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
 
+static void _bt_speccleanup(SpeculativeState *specState);
+static void _bt_insertinitpg(Relation rel, IndexUniqueCheck checkUnique,
+					 ScanKey itup_scankey, Buffer *buf,
+					 BlockNumber *bufblock, BTStack stack,
+					 BTSpecOpaque btspec);
 static TransactionId _bt_check_unique(Relation rel, IndexTuple itup,
 				 Relation heapRel, Buffer buf, OffsetNumber offset,
 				 ScanKey itup_scankey,
-				 IndexUniqueCheck checkUnique, bool *is_unique);
+				 IndexUniqueCheck checkUnique, bool *is_unique,
+				 ItemPointer dup_htid);
 static void _bt_findinsertloc(Relation rel,
 				  Buffer *bufptr,
 				  OffsetNumber *offsetptr,
@@ -84,56 +90,359 @@ static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
 
 
 /*
+ * _bt_speccleanup --- speculative insertion won't proceed callback.
+ *
+ * This is called by the core code when it turns out that an insertion of a
+ * btree index tuple will not proceed, despite the fact that an individual
+ * phased-lock call (the one whose state is passed) believed that it should.
+ *
+ * Sometimes a call that instructs the core code to call this method for each
+ * unique index (or an analogous method for possible other amcanunique access
+ * methods) may come from nbtree itself.
+ *
+ * In general, we prefer to clean up directly if it is immediately clear that
+ * speculative insertion won't proceed.
+ */
+static void
+_bt_speccleanup(SpeculativeState *specState)
+{
+	BTSpecOpaque	btspec = (BTSpecOpaque) specState->privState;
+
+	if (specState->outcome == INSERT_NO_PROCEED)
+	{
+		/*
+		 * We already found duplicate, and already released hwlock.  Just have
+		 * pin to release, but not yet.
+		 *
+		 * There will be a final callback for this unique index, since it is
+		 * used to merge values.  It is necessary to sit on the page buffer
+		 * pin, to interlock against VACUUM.
+		 *
+		 * XXX: The index tuple whose heap tuple we are intent on locking may
+		 * actually be on a later page than the currently pinned page.
+		 * Discussion on VACUUM interlocking should revisit the need for this.
+		 */
+		specState->outcome = INSERT_NEED_UNPIN;
+	}
+	else
+	{
+		ReleaseBuffer(btspec->lockedBuf);
+		/* Unset callback that brought control here (not strictly necessary) */
+		specState->unlock = NULL;
+		/* If already released all state other than pin, don't try it again */
+		if (specState->outcome == INSERT_NEED_UNPIN)
+			return;
+		/* Free private state memory */
+		_bt_freestack(btspec->stack);
+		pfree(btspec->itup);
+		pfree(btspec->itupScankey);
+		pfree(btspec);
+	}
+}
+
+/*
+ * _bt_lockinsert() -- Lock values for speculative insertion
+ *
+ *		Obtain an exclusive heavyweight lock on appropriate leaf page, to
+ *		implement a limited form of value locking that prevents concurrent
+ *		insertion of conflicting values for an instant.
+ *
+ *		It is the caller's responsibility to subsequently ask us to release
+ *		value locks (by passing the state back to _bt_doinsert, or perhaps
+ *		through our callback), though we can ask the core code to ask all other
+ *		unique indexes to release just before we wait, and in general we don't
+ *		hold locks if we know insertion will not proceed.
+ *
+ *		specState is passed by the calling indexam proc, but responsibility for
+ *		initializing its fields (with the sole exception of the cached index
+ *		tuple) rests here.  In addition, a list of states of those unique
+ *		indexes participating in speculative insertion that have already
+ *		committed to insertion is passed, in case it proves necessary to
+ *		release values locks on all unique indexes and restart the first stage
+ *		of speculative insertion.
+ */
+void
+_bt_lockinsert(Relation rel, Relation heapRel, SpeculativeState *specState,
+			   List *otherSpecStates, bool priorConflict)
+{
+	int				natts = rel->rd_rel->relnatts;
+	BTSpecOpaque 	btspec = specState->privState;
+	BTPageOpaque 	opaque;
+	ScanKey			itup_scankey;
+	BTStack			stack;
+	bool			is_unique;
+	/* Heavyweight page lock and exclusive buffer lock acquisition avoidance */
+	bool			hwlocked;
+	OffsetNumber	nitems;
+	/* Buffer and pinned page for value locking */
+	Buffer			buf;
+	BlockNumber		bufblock;
+	Page			bufpage;
+	Buffer			presplitbuf;
+	OffsetNumber	offset;
+	TransactionId	xwait;
+
+	/* build insertion scan key */
+	itup_scankey = _bt_mkscankey(rel, btspec->itup);
+	/* find the first page containing this key */
+	stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE);
+	/* set up state needed to consider skipping most locking */
+	bufblock = BufferGetBlockNumber(buf);
+	bufpage = BufferGetPage(buf);
+	opaque = (BTPageOpaque) PageGetSpecialPointer(bufpage);
+	nitems = PageGetMaxOffsetNumber(bufpage);
+
+	/*
+	 * Consider trading in our read lock for a write lock.  Checking if the
+	 * value already exists is typically relatively cheap, so the threshold is,
+	 * in a word, low.  If the core code indicated a prior conflict, that means
+	 * that an earlier call here returned recently, having either indicated
+	 * that there was a conflicting xact or a conflicting row.  A recent, known
+	 * conflict against the value proposed for insertion is by far the most
+	 * compelling reason to apply the optimization.
+	 */
+	if (priorConflict || nitems > BTREE_PITEMS_NOLOCK)
+	{
+		/*
+		 * Chances are good that a conflicting row will be returned (typically
+		 * to be locked by caller).  Optimistically try to find likely
+		 * duplicate without acquiring hwlock, and without swapping read buffer
+		 * lock for write buffer lock.
+		 */
+		hwlocked = false;
+	}
+	else
+	{
+		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+		/* now lock page of still pinned buffer, and write-lock buffer */
+hwlock:
+		/*
+		 * Lock avoidance optimization did not initially appear promising, or
+		 * has already been unsuccessfully attempted once, or there was a
+		 * concurrent page split
+		 */
+		hwlocked = true;
+		LockPage(rel, bufblock, ExclusiveLock);
+		LockBuffer(buf, BT_WRITE);
+
+		opaque = (BTPageOpaque) PageGetSpecialPointer(BufferGetPage(buf));
+
+		/*
+		 * Consider the need to move right in the tree (see comments at end of
+		 * _bt_insertinitpg), but defend against concurrent page splits by
+		 * retrying.  Respect the general protocol for heavyweight locking
+		 * nbtree pages, which is that pages must be locked before any buffer
+		 * is locked, and released after all buffer locks are released.
+		 */
+		presplitbuf = buf;
+		buf = _bt_moveright(rel, buf, natts, itup_scankey, false, true, stack,
+							BT_WRITE);
+
+		if (buf != presplitbuf)
+		{
+			/*
+			 * A page split between unlocking and relocking the first page that
+			 * a would-be duplicate value could be on necessitates reacquiring
+			 * all locks.  The existing heavyweight lock is superficially a
+			 * sufficient choke point for value locking against concurrent
+			 * insertions of the value proposed for insertion, but it is
+			 * actually necessary to hold only a single pinned buffer in which
+			 * the heavyweight locked page is allocated, since the page is
+			 * marked as locked with a flag.  When regular inserters that hope
+			 * to mostly avoid heavyweight lock acquisition are considered,
+			 * just holding the existing lock is insufficient, since they rely
+			 * on this flag to indicate that a heavyweight lock may be held by
+			 * another backend on the same buffer's page.
+			 *
+			 * We unlock the buffer newly locked by _bt_moveright(), but keep
+			 * the pin on that same buffer for next iteration.
+			 */
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+			UnlockPage(rel, bufblock, ExclusiveLock);
+			bufblock = BufferGetBlockNumber(buf);
+			goto hwlock;
+		}
+	}
+
+	/*
+	 * Indicate that heavyweight lock is held.  The flag bit may already be
+	 * set, due to an aborted transaction setting it without having the
+	 * opportunity to unset it, but this should be rare.
+	 *
+	 * Since we never release our buffer pin, we don't MarkBufferDirty().
+	 */
+	if (hwlocked)
+		opaque->btpo_flags |= BTP_IS_LOCKED;
+
+	/* get page from buffer */
+	offset = _bt_binsrch(rel, buf, natts, itup_scankey, false);
+
+	/* check for duplicates in a similar fashion to _bt_insert */
+	xwait = _bt_check_unique(rel, btspec->itup, heapRel, buf, offset,
+							 itup_scankey, UNIQUE_CHECK_SPEC, &is_unique,
+							 specState->conflictTid);
+
+	if (TransactionIdIsValid(xwait))
+	{
+		if (hwlocked)
+			opaque->btpo_flags &= ~BTP_IS_LOCKED;
+		_bt_relbuf(rel, buf);
+		if (hwlocked)
+			UnlockPage(rel, bufblock, ExclusiveLock);
+		/* Free all state and amcanunique value locks -- not just our own */
+		index_release(otherSpecStates, true);
+		_bt_freestack(stack);
+		_bt_freeskey(itup_scankey);
+
+		/*
+		 * Have the core code retry, re-locking any previous unique index
+		 * values as required
+		 */
+		specState->outcome = INSERT_TRY_AGAIN;
+		XactLockTableWait(xwait, heapRel, specState->conflictTid, XLTW_Lock);
+		return;
+	}
+
+	if (!hwlocked && is_unique)
+	{
+		/*
+		 * A heavyweight page lock is not held.
+		 *
+		 * It was incorrectly predicted that insertion would not proceed.  Now
+		 * that it's clear that it could have safely proceeded if only the
+		 * appropriate locks were held, restart.  This strategy is abandoned
+		 * from here (although when INSERT_TRY_AGAIN is passed back to core
+		 * code, it may later inform this routine that there was a recent
+		 * conflict in respect of the same unique index, which is always
+		 * treated as a valid reason to apply the optimization, even
+		 * repeatedly).
+		 */
+		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+
+		/*
+		 * Since the buffer lock is now released, it's unlikely though possible
+		 * that upon reacquiring locks suitable for proceeding with insertion,
+		 * insertion will turn out to be unnecessary.
+		 */
+		goto hwlock;
+	}
+
+	/*
+	 * This unique index individually assents to insertion proceeding, or vetos
+	 * insertion from proceeding (though if row locking in the executor later
+	 * fails, all bets are off and a fresh attempt at gaining consensus to
+	 * proceed with "insertion proper" begins)
+	 */
+	specState->outcome = is_unique? INSERT_PROCEED : INSERT_NO_PROCEED;
+	/* Stash stack for insertion proper */
+	btspec->stack = stack;
+	/* Return state to core code */
+	specState->uniqueIndex = rel;
+	btspec->lockedBuf = buf;
+
+	/* Free locks and other state no longer needed as appropriate */
+	if (specState->outcome == INSERT_NO_PROCEED)
+	{
+		/* Release buffer write lock and heavyweight lock here */
+		if (hwlocked)
+			opaque->btpo_flags &= ~BTP_IS_LOCKED;
+		/* However, keep pin */
+		LockBuffer(btspec->lockedBuf, BUFFER_LOCK_UNLOCK);
+		if (hwlocked)
+			UnlockPage(rel, bufblock, ExclusiveLock);
+		_bt_freestack(btspec->stack);
+	}
+	else
+	{
+		Assert(hwlocked);
+
+		/*
+		 * Unlock buffer, but keep pin and heavyweight lock.  Holding on to the
+		 * buffer pin is at least necessary because we don't want to have a
+		 * non-speculative inserter fail to observe that they require the
+		 * heavyweight page lock associated with this leaf page (we could also
+		 * mark the buffer dirty, but since we'll almost never hold a
+		 * heavyweight page lock for more than an instant anyway, we prefer not
+		 * to).
+		 */
+		LockBuffer(btspec->lockedBuf, BUFFER_LOCK_UNLOCK);
+		/* Cache scankey for second phase too */
+		btspec->itupScankey = itup_scankey;
+	}
+
+	/* Give core code ability to undo/release */
+	specState->unlock = _bt_speccleanup;
+}
+
+/*
  *	_bt_doinsert() -- Handle insertion of a single index tuple in the tree.
  *
- *		This routine is called by the public interface routine, btinsert.
- *		By here, itup is filled in, including the TID.
+ *		This routine is called by the public interface routines, btbuild and
+ *		btinsert.  By here, itup is filled in, including the TID, unlike
+ *		_bt_lockinsert where the heap tuple was not yet inserted (though we
+ *		still use cached itup for speculative insertion).
+ *
+ *		If checkUnique is UNIQUE_CHECK_NO or UNIQUE_CHECK_PARTIAL, this will
+ *		allow duplicates.	UNIQUE_CHECK_YES or UNIQUE_CHECK_EXISTING will
+ *		throw errors for a duplicate.  For UNIQUE_CHECK_EXISTING we merely run
+ *		the duplicate check, and don't actually insert.  In the
+ *		UNIQUE_CHECK_SPEC case, a call here represents specualtive "insertion
+ *		proper", with insertion already committed to, so uniqueness checking is
+ *		redundant.
  *
- *		If checkUnique is UNIQUE_CHECK_NO or UNIQUE_CHECK_PARTIAL, this
- *		will allow duplicates.  Otherwise (UNIQUE_CHECK_YES or
- *		UNIQUE_CHECK_EXISTING) it will throw error for a duplicate.
- *		For UNIQUE_CHECK_EXISTING we merely run the duplicate check, and
- *		don't actually insert.
+ *		specState is state for the speculative insertion locking, and will be
+ *		NULL for regular index tuple inserts.  If speculative insertion reaches
+ *		here, consensus has been reached to proceed and "insertion proper"
+ *		finishing without unique constraint violations is a foregone
+ *		conclusion.
  *
  *		The result value is only significant for UNIQUE_CHECK_PARTIAL:
  *		it must be TRUE if the entry is known unique, else FALSE.
  *		(In the current implementation we'll also return TRUE after a
- *		successful UNIQUE_CHECK_YES or UNIQUE_CHECK_EXISTING call, but
- *		that's just a coding artifact.)
+ *		successful UNIQUE_CHECK_YES, UNIQUE_CHECK_EXISTING or UNIQUE_CHECK_SPEC
+ *		call, but that's just a coding artifact.)
  */
 bool
 _bt_doinsert(Relation rel, IndexTuple itup,
-			 IndexUniqueCheck checkUnique, Relation heapRel)
+			 IndexUniqueCheck checkUnique, Relation heapRel,
+			 SpeculativeState *specState)
 {
 	bool		is_unique = false;
 	int			natts = rel->rd_rel->relnatts;
 	ScanKey		itup_scankey;
 	BTStack		stack;
 	Buffer		buf;
-	OffsetNumber offset;
-
-	/* we need an insertion scan key to do our search, so build one */
-	itup_scankey = _bt_mkscankey(rel, itup);
+	OffsetNumber	offset = InvalidOffsetNumber;
+	BlockNumber		bufblock = InvalidBlockNumber;
+	BTSpecOpaque	btspec = NULL;
 
+	if (checkUnique != UNIQUE_CHECK_SPEC)
+	{
+		/* we need an insertion scan key to do our search, so build one */
+		itup_scankey = _bt_mkscankey(rel, itup);
 top:
-	/* find the first page containing this key */
-	stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE);
-
-	offset = InvalidOffsetNumber;
-
-	/* trade in our read lock for a write lock */
-	LockBuffer(buf, BUFFER_LOCK_UNLOCK);
-	LockBuffer(buf, BT_WRITE);
+		/* find the first page containing this key */
+		stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE);
+	}
+	else
+	{
+		/* retrieve private state */
+		btspec = specState->privState;
+		/* pick up from value locking */
+		itup = btspec->itup;
+		itup_scankey = btspec->itupScankey;
+		stack = btspec->stack;
+		/* this value has already been determined to be unique */
+		is_unique = true;
+	}
 
 	/*
-	 * If the page was split between the time that we surrendered our read
-	 * lock and acquired our write lock, then this page may no longer be the
-	 * right place for the key we want to insert.  In this case, we need to
-	 * move right in the tree.  See Lehman and Yao for an excruciatingly
-	 * precise description.
+	 * Convert read lock (buffer read lock or heavyweight exclusive lock that
+	 * effectively functions as an intent exclusive lock) on first page
+	 * containing this key to write lock as appropriate
 	 */
-	buf = _bt_moveright(rel, buf, natts, itup_scankey, false,
-						true, stack, BT_WRITE);
+	_bt_insertinitpg(rel, checkUnique, itup_scankey, &buf, &bufblock, stack,
+					 btspec);
 
 	/*
 	 * If we're not allowing duplicates, make sure the key isn't already in
@@ -142,32 +451,54 @@ top:
 	 * NOTE: obviously, _bt_check_unique can only detect keys that are already
 	 * in the index; so it cannot defend against concurrent insertions of the
 	 * same key.  We protect against that by means of holding a write lock on
-	 * the target page.  Any other would-be inserter of the same key must
-	 * acquire a write lock on the same target page, so only one would-be
+	 * the target page's buffer.  Any other would-be inserter of the same key
+	 * must acquire a write lock on the same target page, so only one would-be
 	 * inserter can be making the check at one time.  Furthermore, once we are
 	 * past the check we hold write locks continuously until we have performed
-	 * our insertion, so no later inserter can fail to see our insertion.
-	 * (This requires some care in _bt_insertonpg.)
+	 * our insertion, so no later inserter (or, in the analogous point in the
+	 * function _btlock, the undecided speculative inserter) can fail to see
+	 * our insertion  (This requires some care in _bt_insertonpg).  Speculative
+	 * insertion staggers this process, and converts the buffer lock to a page
+	 * heavyweight lock, so that if and when control reaches here, unique
+	 * checking has already been performed, and the right to insert has already
+	 * been established.
 	 *
 	 * If we must wait for another xact, we release the lock while waiting,
 	 * and then must start over completely.
 	 *
 	 * For a partial uniqueness check, we don't wait for the other xact. Just
 	 * let the tuple in and return false for possibly non-unique, or true for
-	 * definitely unique.
+	 * definitely unique.  This work is redundant for speculative insertion,
+	 * and so that case also skips the check, but make sure it got things right
+	 * on assert-enabled builds.
 	 */
-	if (checkUnique != UNIQUE_CHECK_NO)
+	if (checkUnique != UNIQUE_CHECK_SPEC &&
+		checkUnique != UNIQUE_CHECK_NO)
 	{
-		TransactionId xwait;
+		TransactionId xwait = InvalidTransactionId;
 
 		offset = _bt_binsrch(rel, buf, natts, itup_scankey, false);
 		xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey,
-								 checkUnique, &is_unique);
+								 checkUnique, &is_unique, NULL);
 
 		if (TransactionIdIsValid(xwait))
 		{
 			/* Have to wait for the other guy ... */
 			_bt_relbuf(rel, buf);
+			/* Release heavyweight lock if held */
+			if (bufblock != InvalidBlockNumber)
+				UnlockPage(rel, bufblock, ExclusiveLock);
+
+			/*
+			 * It may be necessary to sit on an already acquired page
+			 * heavyweight "value lock" (on another unique index) if a
+			 * speculative insertion has already established the right to
+			 * insert into a single, user specified unique index, and the right
+			 * to (non-speculatively) insert into this unique index is in the
+			 * process of being established.  It is assumed that this case is
+			 * rare enough to make it not worth teaching this routine and
+			 * associated executor code to release its heavyweight value lock.
+			 */
 			XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
 			/* start over... */
 			_bt_freestack(stack);
@@ -196,6 +527,10 @@ top:
 		_bt_relbuf(rel, buf);
 	}
 
+	/* Release heavyweight lock if held */
+	if (bufblock != InvalidBlockNumber)
+		UnlockPage(rel, bufblock, ExclusiveLock);
+
 	/* be tidy */
 	_bt_freestack(stack);
 	_bt_freeskey(itup_scankey);
@@ -204,6 +539,139 @@ top:
 }
 
 /*
+ *	_bt_insertinitpg() -- Handle insertion's page write lock initialization
+ *
+ * This routine handles minutia relating to how each IndexUniqueCheck case must
+ * lock and unlock buffers, and perhaps the buffer's page.  Some existing type
+ * of lock on *buf is converted to a buffer write lock.
+ *
+ * Caller passes a pinned buffer with the first page the scankey's value could
+ * be on.  The buffer is generally already read locked, though not when the
+ * routine is called as part of the second stage of speculative insertion (i.e.
+ * UNIQUE_CHECK_SPEC), where no buffer lock is held, but rather a heavyweight
+ * exclusive page lock that similarly needs to be correctly converted to a
+ * buffer write lock.  This routine is also tasked with considering the
+ * necessity of acquiring a heavyweight lock, and doing so, for relevant
+ * non-speculative insertion cases.
+ */
+static void
+_bt_insertinitpg(Relation rel, IndexUniqueCheck checkUnique,
+				 ScanKey itup_scankey, Buffer *buf, BlockNumber *bufblock,
+				 BTStack stack, BTSpecOpaque btspec)
+{
+	bool			hwlock;
+	int				natts = rel->rd_rel->relnatts;
+	BTPageOpaque 	opaque;
+
+	switch (checkUnique)
+	{
+		case UNIQUE_CHECK_NO:
+		/* Deferred unique constraints don't support speculative insertion */
+		case UNIQUE_CHECK_PARTIAL:
+		case UNIQUE_CHECK_EXISTING:
+			/* trade in our read lock for a write lock */
+			LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+			LockBuffer(*buf, BT_WRITE);
+
+			/* Break and consider the need to move right. */
+			break;
+		case UNIQUE_CHECK_YES:
+			/*
+			 * The non-speculative unique index tuple insertion case.  As with
+			 * the prior case, some additional initialization is required that
+			 * is already taken care of in the speculative insertion case, but
+			 * unlike that case the implementation must be wary of speculative
+			 * inserters.
+			 */
+			opaque = (BTPageOpaque) PageGetSpecialPointer(BufferGetPage(*buf));
+
+			/* Trade in our read lock for a write lock */
+			hwlock = P_IS_LOCKED(opaque) != 0;
+do_hwlock:
+			LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+			if (hwlock)
+			{
+				/*
+				 * Speculative insertion is underway for this page/buffer, so
+				 * must obtain heavyweight lock to interlock against
+				 * speculative inserters.  Some may be between locking and
+				 * insertion proper for this choke point page/buffer/value, and
+				 * have only a heavyweight lock.
+				 */
+				*bufblock = BufferGetBlockNumber(*buf);
+				LockPage(rel, *bufblock, ExclusiveLock);
+			}
+
+			LockBuffer(*buf, BT_WRITE);
+
+			/*
+			 * When hwlock held, unset flag, preventing future unnecessary
+			 * hwlock acquisition by this type of inserter
+			 */
+			if (hwlock)
+			{
+				opaque->btpo_flags &= ~BTP_IS_LOCKED;
+			}
+			else if (P_IS_LOCKED(opaque))
+			{
+				/*
+				 * Race -- make sure that lock does not need to be acquired
+				 * once more.  This should virtually never occur because the
+				 * window is so small.
+				 */
+				hwlock = true;
+				goto do_hwlock;
+			}
+
+			/*
+			 * Finally, break and consider the need to move right.  When
+			 * speculative insertion is currently treating the page/buffer as a
+			 * choke point, we may move right, but the existing heavyweight
+			 * lock suffices because other non-speculative inserters will block
+			 * on the heavyweight lock now held by this backend.
+			 */
+			break;
+		case UNIQUE_CHECK_SPEC:
+			/*
+			 * Speculative insertion case ("insertion proper").
+			 *
+			 * We already have a heavyweight page lock corresponding to this
+			 * buffer from earlier value locking.  Lock the buffer that the
+			 * page is allocated into, acquiring a buffer lock equivalent to
+			 * the one dropped at the end of value locking.
+			 */
+			LockBuffer(btspec->lockedBuf, BT_WRITE);
+#ifndef USE_ASSERT_CHECKING
+			*buf = btspec->lockedBuf;
+#else
+			*buf = _bt_moveright(rel, btspec->lockedBuf, natts, itup_scankey,
+								 false, true, btspec->stack, BT_WRITE);
+#endif
+			*bufblock = BufferGetBlockNumber(*buf);
+			opaque = (BTPageOpaque) PageGetSpecialPointer(BufferGetPage(*buf));
+			Assert(P_IS_LOCKED(opaque));
+			/*
+			 * Mark the page as no longer heavyweight locked.  From here we can
+			 * rely on buffer locks to ensure no duplicates are inserted.
+			 */
+			opaque->btpo_flags &= ~BTP_IS_LOCKED;
+			/* No break; no need to consider necessity of move to right */
+			Assert(*buf == btspec->lockedBuf);
+			return;
+	}
+
+	/*
+	 * If the page was split between the time that we surrendered our read lock
+	 * and acquired our write lock, then this page may no longer be the right
+	 * place for the key we want to insert.  In this case, we need to move
+	 * right in the tree.	See Lehman and Yao for an excruciatingly precise
+	 * description.
+	 */
+	*buf = _bt_moveright(rel, *buf, natts, itup_scankey, false, true, stack,
+						 BT_WRITE);
+}
+
+/*
  *	_bt_check_unique() -- Check for violation of unique index constraint
  *
  * offset points to the first possible item that could conflict. It can
@@ -212,7 +680,8 @@ top:
  *
  * Returns InvalidTransactionId if there is no conflict, else an xact ID
  * we must wait for to see if it commits a conflicting tuple.   If an actual
- * conflict is detected, no return --- just ereport().
+ * conflict is detected, return when checkUnique == UNIQUE_CHECK_SPEC.
+ * Otherwise, just ereport().
  *
  * However, if checkUnique == UNIQUE_CHECK_PARTIAL, we always return
  * InvalidTransactionId because we don't want to wait.  In this case we
@@ -222,7 +691,8 @@ top:
 static TransactionId
 _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 				 Buffer buf, OffsetNumber offset, ScanKey itup_scankey,
-				 IndexUniqueCheck checkUnique, bool *is_unique)
+				 IndexUniqueCheck checkUnique, bool *is_unique,
+				 ItemPointer dup_htid)
 {
 	TupleDesc	itupdesc = RelationGetDescr(rel);
 	int			natts = rel->rd_rel->relnatts;
@@ -291,6 +761,9 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 				curitup = (IndexTuple) PageGetItem(page, curitemid);
 				htid = curitup->t_tid;
 
+				if (dup_htid)
+					*dup_htid = htid;
+
 				/*
 				 * If we are doing a recheck, we expect to find the tuple we
 				 * are rechecking.  It's not a duplicate, but we have to keep
@@ -312,6 +785,9 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 				{
 					TransactionId xwait;
 
+					if (dup_htid)
+						*dup_htid = htid;
+
 					/*
 					 * It is a duplicate. If we are only doing a partial
 					 * check, then don't bother checking if the tuple is being
@@ -338,7 +814,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 					{
 						if (nbuf != InvalidBuffer)
 							_bt_relbuf(rel, nbuf);
-						/* Tell _bt_doinsert to wait... */
+						/* Tell _bt_doinsert or _bt_lockinsert to wait... */
 						return xwait;
 					}
 
@@ -349,6 +825,16 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 					 * This is a waste of time in normal scenarios but we must
 					 * do it to support CREATE INDEX CONCURRENTLY.
 					 *
+					 * Naturally, we don't bother with this if there is no
+					 * heap tuple that might have committed dead (as with
+					 * speculative tuple insertion, during value locking).
+					 * Furthermore, in that scenario we don't have to worry
+					 * about this at all during speculative "insertion proper".
+					 * This is because we now know that there never will be a
+					 * heap tuple.  Indeed, there won't even be a real
+					 * insertion (unless row locking fails, in which case all
+					 * bets are off).
+					 *
 					 * We must follow HOT-chains here because during
 					 * concurrent index build, we insert the root TID though
 					 * the actual tuple may be somewhere in the HOT-chain.
@@ -360,9 +846,18 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 					 * entry.
 					 */
 					htid = itup->t_tid;
-					if (heap_hot_search(&htid, heapRel, SnapshotSelf, NULL))
+					if (checkUnique == UNIQUE_CHECK_SPEC ||
+						heap_hot_search(&htid, heapRel, SnapshotSelf, NULL))
 					{
-						/* Normal case --- it's still live */
+						/*
+						 * Normal case --- it's still live, or there is no heap
+						 * tuple pointed to by itup->t_tid to begin with as
+						 * this is speculative insertion value locking.  Note
+						 * that speculative insertion (which is now almost done
+						 * with this attempt at uniqueness verification) will
+						 * not save the NULL heap pointer, but htid is still
+						 * set for the benefit of the case handled here.
+						 */
 					}
 					else
 					{
@@ -379,10 +874,28 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 					 * release the buffer locks we're holding ---
 					 * BuildIndexValueDescription could make catalog accesses,
 					 * which in the worst case might touch this same index and
-					 * cause deadlocks.
+					 * cause deadlocks.  Speculative insertion also requires
+					 * that this buffer be released.
 					 */
 					if (nbuf != InvalidBuffer)
 						_bt_relbuf(rel, nbuf);
+
+					*is_unique = false;
+
+					/*
+					 * Done, but don't necessarily raise error.  Where
+					 * applicable, tell caller to report duplicate TID back to
+					 * core code.  Since it's a definite conflict TID, no xact
+					 * to wait on.
+					 */
+					if (checkUnique == UNIQUE_CHECK_SPEC)
+						return InvalidTransactionId;
+
+					/*
+					 * The UNIQUE_CHECK_SPEC case needs the buffer to remain
+					 * locked, but otherwise it must be released lest the
+					 * deadlock scenario described above occur.
+					 */
 					_bt_relbuf(rel, buf);
 
 					{
@@ -1010,10 +1523,15 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
 	isroot = P_ISROOT(oopaque);
 	isleaf = P_ISLEAF(oopaque);
 
-	/* if we're splitting this page, it won't be the root when we're done */
-	/* also, clear the SPLIT_END and HAS_GARBAGE flags in both pages */
+	/*
+	 * if we're splitting this page, it won't be the root when we're done.
+	 *
+	 * also, clear the SPLIT_END, HAS_GARBAGE and BTP_IS_LOCKED flags in both
+	 * pages.
+	 */
 	lopaque->btpo_flags = oopaque->btpo_flags;
-	lopaque->btpo_flags &= ~(BTP_ROOT | BTP_SPLIT_END | BTP_HAS_GARBAGE);
+	lopaque->btpo_flags &= ~(BTP_ROOT | BTP_SPLIT_END | BTP_HAS_GARBAGE |
+							 BTP_IS_LOCKED);
 	ropaque->btpo_flags = lopaque->btpo_flags;
 	/* set flag in left page indicating that the right page has no downlink */
 	lopaque->btpo_flags |= BTP_INCOMPLETE_SPLIT;
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 117b18e..d77cb00 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -219,10 +219,52 @@ btbuildempty(PG_FUNCTION_ARGS)
 }
 
 /*
+ *	btlock() -- First phase of speculative index tuple insertion.
+ *
+ *		Descend the tree recursively, find the appropriate location for our new
+ *		tuple, or an existing tuple with the same value as the value argument
+ *		passed.  Lock the value for an instant (the implementation performs
+ *		locking at a granularity coarser than strictly necessary), preventing
+ *		concurrent insertion of would-be duplicates, and let the caller know if
+ *		insertion can proceed.  Value locks may still be held, and exact
+ *		details of who frees them and other state, and when, is arbitrated by
+ *		implementation (sometimes it pro-actively releases its own locks) and
+ *		is fully described within _bt_lockinsert().
+ */
+Datum
+btlock(PG_FUNCTION_ARGS)
+{
+	Relation			rel = (Relation) PG_GETARG_POINTER(0);
+	Datum			   *values = (Datum *) PG_GETARG_POINTER(1);
+	bool			   *isnull = (bool *) PG_GETARG_POINTER(2);
+	Relation			heapRel = (Relation) PG_GETARG_POINTER(3);
+	List			   *otherSpecStates = (List *) PG_GETARG_POINTER(4);
+	bool				priorConflict = PG_GETARG_BOOL(5);
+	SpeculativeState   *specState = palloc(sizeof(SpeculativeState));
+	BTSpecOpaque		btspec = palloc(sizeof(BTSpecOpaqueData));
+
+	/* allocate conflict tid space */
+	specState->conflictTid = palloc0(sizeof(ItemPointerData));
+	/* generate an index tuple */
+	btspec->itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+	/* set private state, set by _bt_lockinsert, for return to caller */
+	specState->privState = btspec;
+
+	_bt_lockinsert(rel, heapRel, specState, otherSpecStates, priorConflict);
+
+	PG_RETURN_POINTER(specState);
+}
+
+/*
  *	btinsert() -- insert an index tuple into a btree.
  *
- *		Descend the tree recursively, find the appropriate location for our
- *		new tuple, and put it there.
+ *		Descend the tree recursively, find the appropriate location for our new
+ *		tuple (though in the speculative insertion case, btlock will have taken
+ *		care of much of that for us already), and put it there.  This can be
+ *		called for a conventional insertion, or the latter phase of speculative
+ *		insertion (it is called in the same manner as always for non-unique
+ *		indexes during what the executor would formally consider to be the
+ *		second phase of speculative insertion).
  */
 Datum
 btinsert(PG_FUNCTION_ARGS)
@@ -233,17 +275,34 @@ btinsert(PG_FUNCTION_ARGS)
 	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
 	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);
 	IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
+	SpeculativeState *specState = (SpeculativeState *) PG_GETARG_POINTER(6);
+	BTSpecOpaque btspec;
 	bool		result;
-	IndexTuple	itup;
+	IndexTuple	itup = NULL;
 
-	/* generate an index tuple */
-	itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+	if (!specState)
+	{
+		/* non-speculative insertion, generate an index tuple from scratch */
+		itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+	}
+	else
+	{
+		btspec = (BTSpecOpaque) specState->privState;
+		/* Used cached index tuple from first phase (value locking) */
+		itup = btspec->itup;
+	}
 	itup->t_tid = *ht_ctid;
 
-	result = _bt_doinsert(rel, itup, checkUnique, heapRel);
+	Assert(ItemPointerIsValid(&itup->t_tid));
+
+	result = _bt_doinsert(rel, itup, checkUnique, heapRel, specState);
 
 	pfree(itup);
 
+	/* free private state only - caller should always free specState */
+	if (specState)
+		pfree(specState->privState);
+
 	PG_RETURN_BOOL(result);
 }
 
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index ee10594..68bfca2 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1648,6 +1648,9 @@ BuildIndexInfo(Relation index)
 	ii->ii_Unique = indexStruct->indisunique;
 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
 
+	/* merge on all indexes until we hear otherwise */
+	ii->ii_MergeOn = true;
+
 	/* initialize index-build state to default */
 	ii->ii_Concurrent = false;
 	ii->ii_BrokenHotChain = false;
@@ -2959,7 +2962,8 @@ validate_index_heapscan(Relation heapRelation,
 						 &rootTuple,
 						 heapRelation,
 						 indexInfo->ii_Unique ?
-						 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
+						 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+						 NULL);
 
 			state->tups_inserted += 1;
 		}
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index 05aa56e..4c82d0d 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -139,7 +139,8 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple)
 					 &(heapTuple->t_self),		/* tid of heap tuple */
 					 heapRelation,
 					 relationDescs[i]->rd_index->indisunique ?
-					 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
+					 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+					 NULL);
 	}
 
 	ExecDropSingleTupleTableSlot(slot);
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
index b0cad46..183a69d 100644
--- a/src/backend/commands/constraint.c
+++ b/src/backend/commands/constraint.c
@@ -162,7 +162,7 @@ unique_key_recheck(PG_FUNCTION_ARGS)
 		 * that has already been inserted is unique.
 		 */
 		index_insert(indexRel, values, isnull, &(new_row->t_self),
-					 trigdata->tg_relation, UNIQUE_CHECK_EXISTING);
+					 trigdata->tg_relation, UNIQUE_CHECK_EXISTING, NULL);
 	}
 	else
 	{
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index eb2844a..54cba48 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2333,7 +2333,7 @@ CopyFrom(CopyState cstate)
 
 				if (resultRelInfo->ri_NumIndices > 0)
 					recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-														   estate);
+														   estate, NIL);
 
 				/* AFTER ROW INSERT Triggers */
 				ExecARInsertTriggers(estate, resultRelInfo, tuple,
@@ -2440,7 +2440,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
 			ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
 			recheckIndexes =
 				ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
-									  estate);
+									  estate, NIL);
 			ExecARInsertTriggers(estate, resultRelInfo,
 								 bufferedTuples[i],
 								 recheckIndexes);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 781a736..1546fa8 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -850,6 +850,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	const char *operation = NULL;
 	int			save_indent = es->indent;
 	bool		haschildren;
+	ModifyTable *mtplan;
 
 	switch (nodeTag(plan))
 	{
@@ -858,10 +859,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			break;
 		case T_ModifyTable:
 			sname = "ModifyTable";
-			switch (((ModifyTable *) plan)->operation)
+			mtplan = (ModifyTable *) plan;
+			switch (mtplan->operation)
 			{
 				case CMD_INSERT:
-					pname = operation = "Insert";
+					if (!mtplan->onConflictPlan)
+						pname = operation = "Insert";
+					else
+						pname = operation = "Insert or Update";
 					break;
 				case CMD_UPDATE:
 					pname = operation = "Update";
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index d71e9b8..df77573 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2063,11 +2063,12 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 					 * case, so as to avoid the "Halloween problem" of
 					 * repeated update attempts.  In the latter case it might
 					 * be sensible to fetch the updated tuple instead, but
-					 * doing so would require changing heap_lock_tuple as well
-					 * as heap_update and heap_delete to not complain about
-					 * updating "invisible" tuples, which seems pretty scary.
-					 * So for now, treat the tuple as deleted and do not
-					 * process.
+					 * doing so would require changing heap_update and
+					 * heap_delete to not complain about updating "invisible"
+					 * tuples, which seems pretty scary (heap_lock_tuple will
+					 * not complain, but few callers expect HeapTupleInvisible,
+					 * and we're not one of them).  So for now, treat the tuple
+					 * as deleted and do not process.
 					 */
 					ReleaseBuffer(buffer);
 					return NULL;
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index d5e1273..05187e9 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -30,6 +30,7 @@
  *
  *		ExecOpenIndices			\
  *		ExecCloseIndices		 | referenced by InitPlan, EndPlan,
+ *		ExecLockIndexValues		 | (optionally lock indexes first)
  *		ExecInsertIndexTuples	/  ExecInsert, ExecUpdate
  *
  *		RegisterExprContextCallback    Register function shutdown callback
@@ -54,6 +55,8 @@
 
 
 static bool get_last_attnums(Node *node, ProjectionInfo *projInfo);
+static bool ExecCheckPartialPredicate(IndexInfo *info, EState *estate,
+						 ExprContext *econtext);
 static bool index_recheck_constraint(Relation index, Oid *constr_procs,
 						 Datum *existing_values, bool *existing_isnull,
 						 Datum *new_values);
@@ -978,6 +981,236 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
 }
 
 /* ----------------------------------------------------------------
+ * ExecCheckPartialPredicate
+ *
+ *		Verify if predicate is satisfied -- if not, callers can skip
+ *		index tuple insertion altogether
+ * ----------------------------------------------------------------
+ */
+static bool
+ExecCheckPartialPredicate(IndexInfo *info, EState *estate,
+						  ExprContext *econtext)
+{
+	List	   *predicate;
+
+	/* No predicate, insertion must be required */
+	if (info->ii_Predicate == NIL)
+		return true;
+
+	/*
+	 * Check for partial index.
+	 *
+	 * If predicate state not set up yet, create it (in the estate's per-query
+	 * context).
+	 */
+	predicate = info->ii_PredicateState;
+	if (predicate == NIL)
+	{
+		predicate = (List *)
+			ExecPrepareExpr((Expr *) info->ii_Predicate,
+							estate);
+		info->ii_PredicateState = predicate;
+	}
+
+	return ExecQual(predicate, econtext, false);
+}
+
+/* ----------------------------------------------------------------
+ *		ExecLockIndexValues
+ *
+ *		Acquire value locks on values that are proposed for insertion
+ *		into unique indexes.  Value locking prevents the concurrent
+ *		insertion of conflicting index tuples into unique indexes.
+ *		This is the first phase of speculative index tuple insertion.
+ *
+ *		Lock values for each unique index such that it is possible to
+ *		commit the core system to inserting a slot without any unique
+ *		index violations, or to inform the core system to not proceed
+ *		with heap insertion at all in respect of this slot.
+ *
+ *		Returns a list of unique indexes that were locked. *conflict
+ *		is set to the index offset of any index on which a conflict
+ *		occurs, if any, and may be read back before being set locally
+ *		to check for previous conflicts.
+ * ----------------------------------------------------------------
+ */
+List *
+ExecLockIndexValues(TupleTableSlot *slot, EState *estate,
+					SpecType spec, int *conflict)
+{
+	ResultRelInfo *resultRelInfo;
+	int			i;
+	int			numIndices;
+	RelationPtr relationDescs;
+	Relation	heapRelation;
+	IndexInfo **indexInfoArray;
+	ExprContext *econtext;
+	Datum		values[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	List	   *result = NIL;
+
+	/*
+	 * Get information from the result relation info structure.
+	 */
+	resultRelInfo = estate->es_result_relation_info;
+	numIndices = resultRelInfo->ri_NumIndices;
+	relationDescs = resultRelInfo->ri_IndexRelationDescs;
+	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+	heapRelation = resultRelInfo->ri_RelationDesc;
+
+	/*
+	 * We will use the EState's per-tuple context for evaluating predicates
+	 * and index expressions (creating it if it's not already there).
+	 */
+	econtext = GetPerTupleExprContext(estate);
+
+	/* Arrange for econtext's scan tuple to be the tuple under test */
+	econtext->ecxt_scantuple = slot;
+
+lindex:
+
+	/*
+	 * For each relevant unique index, form and lock the index tuple (this can
+	 * be cached by amcanunique access methods for later use by
+	 * ExecInsertIndexTuples())
+	 *
+	 * We lock the indexes in the opposite order to their usual
+	 * ExecInsertIndexTuples()/release order here.  That routine will always
+	 * insert into any unique indexes first, before finally inserting into
+	 * non-unique indexes, so as to minimize the window of lock contention.
+	 * The fact that indexes are sorted in this way while locking in opposite
+	 * order often implies that the locking window for the primary key is
+	 * lowest of all, though this isn't strictly guaranteed.
+	 */
+	for (i = numIndices - 1; i >= 0; i--)
+	{
+		Relation	indexRelation = relationDescs[i];
+		IndexInfo  *indexInfo;
+		SpeculativeState *state;
+
+		if (indexRelation == NULL)
+			continue;
+
+		indexInfo = indexInfoArray[i];
+
+		/* If the index is marked as read-only, ignore it */
+		if (!indexInfo->ii_ReadyForInserts)
+			continue;
+
+		/* Only value lock unique indexes */
+		if (!indexInfo->ii_Unique)
+			continue;
+
+		/* May be limited to merging on particular index */
+		if (!indexInfo->ii_MergeOn)
+			continue;
+
+		if (!indexRelation->rd_index->indimmediate)
+		{
+			switch (spec)
+			{
+				case SPEC_IGNORE:
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("unsupported use of ON CONFLICT IGNORE with deferred unique constraint \"%s\"",
+									RelationGetRelationName(indexRelation)),
+							 errhint("ON CONFLICT IGNORE performs immediate verification."),
+							 errtableconstraint(heapRelation,
+												RelationGetRelationName(indexRelation))));
+				case SPEC_INSERT:
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("unsupported use of ON CONFLICT UPDATE with deferred unique constraint \"%s\"",
+									RelationGetRelationName(indexRelation)),
+							 errhint("ON CONFLICT UPDATE performs immediate verification."),
+							 errtableconstraint(heapRelation,
+												RelationGetRelationName(indexRelation))));
+				case SPEC_UPDATE:
+				case SPEC_NONE:
+					  elog(ERROR, "invalid speculative insertion specification");
+			}
+		}
+
+		if (!ExecCheckPartialPredicate(indexInfo, estate, econtext))
+			continue;
+
+		/*
+		 * FormIndexDatum fills in its values and isnull parameters with the
+		 * appropriate values for the column(s) of the index.
+		 */
+		FormIndexDatum(indexInfo,
+					   slot,
+					   estate,
+					   values,
+					   isnull);
+
+		/*
+		 * The index AM does uniqueness checking.
+		 */
+		state = index_lock(indexRelation,
+						   values,
+						   isnull,
+						   heapRelation,
+						   result,
+						   *conflict == i);
+
+		if (state->outcome == INSERT_TRY_AGAIN)
+		{
+			/*
+			 * AM indicated that we must try again, typically because it had to
+			 * wait pending the outcome of another transaction.
+			 *
+			 * The AM lock function will have already used the list of
+			 * locked-so-far states passed to it to free all locks and other
+			 * resources for all previously locked indexes just before waiting.
+			 *
+			 * The lock starvation hazards here are minimal, since no sensible
+			 * usage of speculative insertion is likely to see conflicts on
+			 * multiple unique indexes at once when row locking (we only really
+			 * concurrently lock all unique index values proposed for insertion
+			 * to save the DML statement's author from specifying which unique
+			 * index they intend to merge on; it will ordinarily be obvious to
+			 * a person with application domain knowledge).  When clients wish
+			 * to avoid a tuple slot's insertion when a duplicate key violation
+			 * would otherwise occur (i.e. IGNORE), only one
+			 * conclusively-present conflict is required to give up, so lock
+			 * starvation is improbable there too.
+			 */
+			list_free(result);
+			result = NIL;
+
+			/* Record conflict, to hint to AM to expect this next time */
+			*conflict = i;
+
+			goto lindex;
+		}
+
+		/*
+		 * Prepend the list, because later stages expect to find the state in
+		 * right-way-around order, and we're working backwards
+		 */
+		result = lcons(state, result);
+
+		/*
+		 * There is no point in proceeding if we already have one would-be
+		 * violation -- we require total consensus
+		 */
+		if (state->outcome == INSERT_NO_PROCEED)
+		{
+			/*
+			 * Record conflict, to hint to AM to expect this if row locking
+			 * ultimately indicates that a full restart is required.  Should
+			 * control once again reach this function, the hint still carries.
+			 */
+			*conflict = i;
+			break;
+		}
+	}
+
+	return result;
+}
+
+/* ----------------------------------------------------------------
  *		ExecInsertIndexTuples
  *
  *		This routine takes care of inserting index tuples
@@ -990,7 +1223,10 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
  *
  *		This returns a list of index OIDs for any unique or exclusion
  *		constraints that are deferred and that had
- *		potential (unconfirmed) conflicts.
+ *		potential (unconfirmed) conflicts.  With speculative
+ *		insertion, successful insertion without unique violations is
+ *		already assured, and specStates allows the second phase to
+ *		perform "insertion proper".
  *
  *		CAUTION: this must not be called for a HOT update.
  *		We can't defend against that here for lack of info.
@@ -1000,11 +1236,13 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
 List *
 ExecInsertIndexTuples(TupleTableSlot *slot,
 					  ItemPointer tupleid,
-					  EState *estate)
+					  EState *estate,
+					  List *specStates)
 {
 	List	   *result = NIL;
 	ResultRelInfo *resultRelInfo;
 	int			i;
+	int			sn = 0;
 	int			numIndices;
 	RelationPtr relationDescs;
 	Relation	heapRelation;
@@ -1040,6 +1278,7 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 		IndexInfo  *indexInfo;
 		IndexUniqueCheck checkUnique;
 		bool		satisfiesConstraint;
+		SpeculativeState *state;
 
 		if (indexRelation == NULL)
 			continue;
@@ -1050,38 +1289,22 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 		if (!indexInfo->ii_ReadyForInserts)
 			continue;
 
-		/* Check for partial index */
-		if (indexInfo->ii_Predicate != NIL)
-		{
-			List	   *predicate;
-
-			/*
-			 * If predicate state not set up yet, create it (in the estate's
-			 * per-query context)
-			 */
-			predicate = indexInfo->ii_PredicateState;
-			if (predicate == NIL)
-			{
-				predicate = (List *)
-					ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
-									estate);
-				indexInfo->ii_PredicateState = predicate;
-			}
+		if (!ExecCheckPartialPredicate(indexInfo, estate, econtext))
+			continue;
 
-			/* Skip this index-update if the predicate isn't satisfied */
-			if (!ExecQual(predicate, econtext, false))
-				continue;
-		}
+		state = specStates && indexInfo->ii_Unique && indexInfo->ii_MergeOn ?
+			list_nth(specStates, sn++) : NULL;
 
 		/*
 		 * FormIndexDatum fills in its values and isnull parameters with the
-		 * appropriate values for the column(s) of the index.
+		 * appropriate values for the column(s) of the index, where required.
 		 */
-		FormIndexDatum(indexInfo,
-					   slot,
-					   estate,
-					   values,
-					   isnull);
+		if (!state)
+			FormIndexDatum(indexInfo,
+						   slot,
+						   estate,
+						   values,
+						   isnull);
 
 		/*
 		 * The index AM does the actual insertion, plus uniqueness checking.
@@ -1089,12 +1312,21 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 		 * For an immediate-mode unique index, we just tell the index AM to
 		 * throw error if not unique.
 		 *
+		 * For the latter phase of speculative insertion, uniqueness
+		 * checks will already have been performed if we arrive here, and
+		 * should not be repeated here.  However, the user may have specified
+		 * that speculative insertion is in respect of a particular unique
+		 * index, in which case only that unique index will now have value
+		 * locks;  only tell am to insert that unique index speculatively.
+		 *
 		 * For a deferrable unique index, we tell the index AM to just detect
 		 * possible non-uniqueness, and we add the index OID to the result
 		 * list if further checking is needed.
 		 */
 		if (!indexRelation->rd_index->indisunique)
 			checkUnique = UNIQUE_CHECK_NO;
+		else if (specStates && indexInfo->ii_MergeOn)
+			checkUnique = UNIQUE_CHECK_SPEC;
 		else if (indexRelation->rd_index->indimmediate)
 			checkUnique = UNIQUE_CHECK_YES;
 		else
@@ -1106,7 +1338,8 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 						 isnull,	/* null flags */
 						 tupleid,		/* tid of heap tuple */
 						 heapRelation,	/* heap relation */
-						 checkUnique);	/* type of uniqueness check to do */
+						 checkUnique,	/* type of uniqueness check to do */
+						 state);		/* speculative state, if any */
 
 		/*
 		 * If the index has an associated exclusion constraint, check that.
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index 298d4b4..fbafcc5 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -147,10 +147,11 @@ lnext:
 				 * case, so as to avoid the "Halloween problem" of repeated
 				 * update attempts.  In the latter case it might be sensible
 				 * to fetch the updated tuple instead, but doing so would
-				 * require changing heap_lock_tuple as well as heap_update and
-				 * heap_delete to not complain about updating "invisible"
-				 * tuples, which seems pretty scary.  So for now, treat the
-				 * tuple as deleted and do not process.
+				 * require changing heap_update and heap_delete to not complain
+				 * about updating "invisible" tuples, which seems pretty scary
+				 * (heap_lock_tuple will not complain, but few callers expect
+				 * HeapTupleInvisible, and we're not one of them).  So for now,
+				 * treat the tuple as deleted and do not process.
 				 */
 				goto lnext;
 
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 8ac6047..2350f60 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -52,6 +52,12 @@
 #include "utils/tqual.h"
 
 
+static bool ExecLockUpdateTuple(EState *estate,
+								ResultRelInfo *relinfo,
+								ItemPointer tid,
+								TupleTableSlot *planSlot,
+								ModifyTableState *onConflict);
+
 /*
  * Verify that the tuples to be produced by INSERT or UPDATE match the
  * target relation's rowtype
@@ -152,6 +158,38 @@ ExecProcessReturning(ProjectionInfo *projectReturning,
 }
 
 /* ----------------------------------------------------------------
+ * Verify that heap tuple is visible to transaction snapshot at higher
+ * isolation levels.
+ *
+ * It would not represent serializable behavior to proceed with avoiding
+ * insertion on the basis of another tuple that is not visible (at
+ * higher isolation levels).
+ * ----------------------------------------------------------------
+ */
+static void
+ExecCheckHeapTupleVisible(EState *estate,
+						  ResultRelInfo *relinfo,
+						  ItemPointer tid)
+{
+
+	Relation 	relation = relinfo->ri_RelationDesc;
+	Buffer					buffer;
+	HeapTupleData			tuple;
+
+	if (!IsolationUsesXactSnapshot())
+		return;
+
+	tuple.t_self = *tid;
+	if (!heap_fetch(relation, estate->es_snapshot, &tuple, &buffer, false,
+					NULL))
+		ereport(ERROR,
+				(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+				 errmsg("could not proceed on basis of version originating in still in progress transaction")));
+
+	ReleaseBuffer(buffer);
+}
+
+/* ----------------------------------------------------------------
  *		ExecInsert
  *
  *		For INSERT, we have to insert the tuple into the target relation
@@ -163,6 +201,8 @@ ExecProcessReturning(ProjectionInfo *projectReturning,
 static TupleTableSlot *
 ExecInsert(TupleTableSlot *slot,
 		   TupleTableSlot *planSlot,
+		   ModifyTableState *onConflict,
+		   SpecType spec,
 		   EState *estate,
 		   bool canSetTag)
 {
@@ -171,6 +211,8 @@ ExecInsert(TupleTableSlot *slot,
 	Relation	resultRelationDesc;
 	Oid			newId;
 	List	   *recheckIndexes = NIL;
+	List	   *specStates = NIL;
+	int			conflict;
 
 	/*
 	 * get the heap tuple out of the tuple table slot, making sure we have a
@@ -228,6 +270,11 @@ ExecInsert(TupleTableSlot *slot,
 	}
 	else if (resultRelInfo->ri_FdwRoutine)
 	{
+		if (spec != SPEC_NONE)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("INSERT...ON CONFLICT is not supported on foreign tables")));
+
 		/*
 		 * insert into foreign table: let the FDW do it
 		 */
@@ -259,6 +306,94 @@ ExecInsert(TupleTableSlot *slot,
 			ExecConstraints(resultRelInfo, slot, estate);
 
 		/*
+		 * For now, if we're performing a speculative insertion, no conflict on
+		 * any particular unique index is considered particularly likely.  As
+		 * and when conflicts emerge for this slot, further future conflicts on
+		 * the same unique index are anticipated, which is hinted to the
+		 * underlying AM's implementation in ExecLockIndexValues(), to
+		 * facilitate optimizations around lock strength.
+		 */
+		conflict = -1;
+ilock:
+		/*
+		 * Lock unique index values, as required when performing speculative
+		 * insertion
+		 */
+		if (resultRelInfo->ri_NumIndices > 0 && spec != SPEC_NONE)
+			specStates = ExecLockIndexValues(slot, estate, spec, &conflict);
+
+		/*
+		 * Check if it's required to proceed with the second phase ("insertion
+		 * proper") of speculative insertion in respect of the slot.  If
+		 * insertion should not proceed, no firing of AFTER ROW INSERT triggers
+		 * occurs.
+		 *
+		 * We don't suppress the effects (or, perhaps, side-effects) of BEFORE
+		 * ROW INSERT triggers.  This isn't ideal, but then we cannot proceed
+		 * with even considering uniqueness violations until these triggers
+		 * fire on the one hand, but on the other hand they have the ability to
+		 * execute arbitrary user-defined code which may perform operations
+		 * entirely outside the system's ability to nullify.
+		 */
+		if (!index_proceed(specStates))
+		{
+			ItemPointer		htup;
+
+			/*
+			 * Release index value locks -- if insertion had proceeded, this
+			 * would occur during index tuple insertion instead, as each index
+			 * tuple is individually inserted...
+			 */
+			htup = index_release(specStates, false);
+
+			/*
+			 * ...locks were only needed to ensure that insertion could proceed
+			 * without hindrance from concurrent insertions.  Now that it's
+			 * clear that insertion won't proceed (unless there is a later row
+			 * lock conflict due to a concurrent deletion) there is no
+			 * advantage in holding value locks until after row locks are
+			 * acquired.  Continuing to hold locks would not imply that there'd
+			 * be fewer row locking conflicts in ExecLockUpdateTuple(), plus
+			 * doing so introduces the risk of unprincipled deadlocks (i.e.
+			 * deadlocks that the user cannot reasonably avoid).
+			 *
+			 * Value locks can only prevent concurrent unique index tuple
+			 * insertion from completing, but UPDATEs and DELETEs still cause
+			 * conflicts for row locking (even non-HOT updates).
+			 */
+			switch (spec)
+			{
+				case SPEC_IGNORE:
+					ExecCheckHeapTupleVisible(estate, resultRelInfo, htup);
+					break;
+				case SPEC_INSERT:
+					if (!ExecLockUpdateTuple(estate, resultRelInfo, htup,
+											 planSlot, onConflict))
+					{
+						/* Couldn't lock row due to conflict.  Restart. */
+						index_release(specStates, true);
+						list_free(specStates);
+						goto ilock;
+					}
+					break;
+				case SPEC_UPDATE:
+				case SPEC_NONE:
+					elog(ERROR, "invalid speculative insertion specification");
+			}
+
+			/*
+			 * Finally, unpin value lock buffer
+			 */
+			index_release(specStates, true);
+
+			/*
+			 * Speculative insertion does not project RETURNING tuples in
+			 * respect of rows that were updated/ignored
+			 */
+			return NULL;
+		}
+
+		/*
 		 * insert the tuple
 		 *
 		 * Note: heap_insert returns the tid (location) of the new tuple in
@@ -269,10 +404,14 @@ ExecInsert(TupleTableSlot *slot,
 
 		/*
 		 * insert index entries for tuple
+		 *
+		 * Locks will be acquired if needed, or the locks acquired by
+		 * ExecLockIndexTuples() may be used instead.
 		 */
 		if (resultRelInfo->ri_NumIndices > 0)
 			recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-												   estate);
+												   estate,
+												   specStates);
 	}
 
 	if (canSetTag)
@@ -285,6 +424,7 @@ ExecInsert(TupleTableSlot *slot,
 	/* AFTER ROW INSERT Triggers */
 	ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes);
 
+	list_free(specStates);
 	list_free(recheckIndexes);
 
 	/* Check any WITH CHECK OPTION constraints */
@@ -768,7 +908,7 @@ lreplace:;
 		 */
 		if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
 			recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-												   estate);
+												   estate, NIL);
 	}
 
 	if (canSetTag)
@@ -793,6 +933,213 @@ lreplace:;
 }
 
 
+/* ----------------------------------------------------------------
+ * Try to lock tuple for update as part of speculative insertion.  If
+ * a qual originating from ON CONFLICT UPDATE is satisfied, update
+ * (but still lock row, even though it may not satisfy estate's
+ * snapshot).
+ *
+ * Parameterized plans appearing in auxiliary update plan are rejected
+ * outright.  Therefore, it is safe to perform update without
+ * parameters that may be intended for SPEC_INSERT plan.  This
+ * effectively prevents any cases where the EvalPlanQual re-scanning
+ * performed here would be problematic (including, for example,
+ * subqueries in targetlists or quals).  Some of these cases might
+ * still be possible (such as when only an initplan is required), but
+ * restrictions are applied more broadly during parsing and planning.
+ *
+ * Returns value indicating if we're done (with or without an
+ * update), or if the executor must start from scratch.
+ * ----------------------------------------------------------------
+ */
+static bool
+ExecLockUpdateTuple(EState *estate,
+					ResultRelInfo *relinfo,
+					ItemPointer tid,
+					TupleTableSlot *planSlot,
+					ModifyTableState *onConflict)
+{
+	Relation				relation = relinfo->ri_RelationDesc;
+	HeapTupleData			tuple;
+	HeapTuple				copyTuple = NULL;
+	HeapUpdateFailureData 	hufd;
+	HTSU_Result 			test;
+	Buffer					buffer;
+	TupleTableSlot		   *slot;
+
+	Assert(ItemPointerIsValid(tid));
+
+	/*
+	 * Lock tuple for update.
+	 *
+	 * Like EvalPlanQualFetch(), don't follow updates.  There is no actual
+	 * benefit to doing so, since a conflict invalidates our previous
+	 * conclusion that the tuple is the conclusively committed conflicting
+	 * tuple.
+	 */
+	tuple.t_self = *tid;
+	test = heap_lock_tuple(relation, &tuple,
+						   estate->es_output_cid,
+						   LockTupleExclusive, false, /* wait */
+						   false, &buffer, &hufd);
+
+	if (test == HeapTupleMayBeUpdated)
+		copyTuple = heap_copytuple(&tuple);
+
+	switch (test)
+	{
+		case HeapTupleInvisible:
+			/*
+			 * This may occur when an instantaneously invisible tuple is blamed
+			 * as a conflict because multiple rows are inserted with the same
+			 * constrained values.
+			 *
+			 * We cannot proceed, because to do so would leave users open to
+			 * the risk that the same row will be updated a second time in the
+			 * same command;  allowing a second update affecting a single row
+			 * within the same command a second time would leave the update
+			 * order undefined.  It is the user's responsibility to resolve
+			 * these self-duplicates in advance of proposing for insertion a
+			 * set of tuples, but warn them.  These problems are why SQL-2003
+			 * similarly specifies that for SQL MERGE, an exception must be
+			 * raised in the event of an attempt to update the same row twice.
+			 *
+			 * XXX:  It might be preferable to do something similar when a
+			 * row is locked twice (and not updated twice) by the same
+			 * speculative insertion, as if to take each lock acquisition as a
+			 * indication of a discrete, unfulfilled intent to update (perhaps
+			 * in some later command of the same xact).  This does not seem
+			 * feasible, though.
+			 */
+			if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNIQUE_VIOLATION),
+						 errmsg("could not lock instantaneously invisible tuple inserted in same transaction"),
+						 errhint("Ensure that no rows proposed for insertion in the same command have constrained values that duplicate each other.")));
+
+			/* This shouldn't happen */
+			elog(ERROR, "attempted to lock invisible tuple");
+			return false;		/* keep compiler quiet */
+		case HeapTupleSelfUpdated:
+			if (hufd.cmax != estate->es_output_cid)
+			{
+				/*
+				 * A later command updated or deleted the tuple.  Throw an
+				 * error for the same reasons enumerated in ExecUpdate and
+				 * ExecDelete.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
+						 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
+						 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
+			}
+			else
+			{
+				/*
+				 * Handle this case as equivalent to the HeapTupleInvisible
+				 * case.
+				 *
+				 * XXX: In practice this is dead code, and the problem will
+				 * always be handled by the HeapTupleInvisible case, since if
+				 * we updated this tuple, speculative insertion's first
+				 * phase/the amcanunique AM wouldn't have concluded that it was
+				 * a conflict tuple, since a dirty snapshot is used to do so,
+				 * which would have seen the effects of that would-be update
+				 * (i.e. this tuple would not have been visible to a dirty
+				 * snapshot immediately before now).
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_UNIQUE_VIOLATION),
+						 errmsg("could not lock tuple already updated or deleted by the current command"),
+						 errhint("Ensure that no rows proposed for insertion in the same command have constrained values that duplicate each other.")));
+			}
+			return false;		/* keep compiler quiet */
+		case HeapTupleMayBeUpdated:
+			/*
+			 * Success -- we're done, as tuple is locked.  Verify that the
+			 * tuple is known to be visible to our snapshot under conventional
+			 * MVCC rules if the current isolation level mandates that.  In
+			 * READ COMMITTED mode, we can lock + update a tuple still in
+			 * progress according to our snapshot, but higher isolation levels
+			 * cannot avail of that, and must actively defend against doing so.
+			 *
+			 * This loosening of snapshot isolation for the benefit of READ
+			 * COMMITTED speculative insertions is used consistently:
+			 * speculative quals are only tested in respect of already locked
+			 * tuples.  It would be rather inconsistent to UPDATE when no tuple
+			 * version is visible on the one hand, while also failing to UPDATE
+			 * only on the basis of the non-conclusive tuple version that
+			 * happens to be the version visible to the estate snapshot.
+			 */
+			if (IsolationUsesXactSnapshot() &&
+				XidInProgress(HeapTupleHeaderGetXmin(tuple.t_data),
+							  estate->es_snapshot))
+				ereport(ERROR,
+						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+						 errmsg("could not update row version originating in still in progress transaction")));
+
+			/*
+			 * Conceptually, the parent ModifyTable is like a relation scan
+			 * node that uses a dirty snapshot, returning rows which the
+			 * auxiliary plan must operate on (if only to lock all such rows).
+			 * EvalPlanQual() is involved in the evaluation of their UPDATE,
+			 * regardless of whether or not the tuple is visible to the
+			 * command's MVCC Snapshot.
+			 */
+			EvalPlanQualBegin(&onConflict->mt_epqstate, onConflict->ps.state);
+			EvalPlanQualSetTuple(&onConflict->mt_epqstate,
+								 onConflict->ps.state->es_result_relation_info->ri_RangeTableIndex,
+								 copyTuple);
+			slot = EvalPlanQualNext(&onConflict->mt_epqstate);
+
+			if (!TupIsNull(slot))
+				ExecUpdate(&tuple.t_data->t_ctid, NULL, slot, planSlot,
+						   &onConflict->mt_epqstate, onConflict->ps.state, false);
+
+			ReleaseBuffer(buffer);
+
+			/*
+			 * As when executing a ModifyTable node in the conventional manner,
+			 * Reset the per-output-tuple exprcontext
+			 */
+			ResetPerTupleExprContext(onConflict->ps.state);
+			return true;
+		case HeapTupleUpdated:
+			if (IsolationUsesXactSnapshot())
+				ereport(ERROR,
+						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+						 errmsg("could not serialize access due to concurrent update")));
+
+			/*
+			 * Tell caller to try again from the very start.  We don't use the
+			 * usual EvalPlanQual looping pattern here, fundamentally because
+			 * we don't have a useful qual to verify the next tuple with.
+			 *
+			 * We might devise a means of verifying, by way of binary equality
+			 * in a similar manner to HOT codepaths, if any unique indexed
+			 * columns changed, but this would only serve to ameliorate the
+			 * fundamental problem.  It might well not be good enough, because
+			 * those columns could change too.  It's not clear that doing any
+			 * better here would be worth it.
+			 *
+			 * At this point, all bets are off -- it might actually turn out to
+			 * be okay to proceed with insertion instead of locking now (the
+			 * tuple we attempted to lock could have been deleted, for
+			 * example).  On the other hand, it might not be okay, but for an
+			 * entirely different reason, with an entirely separate TID to
+			 * blame and lock.  This TID may not even be part of the same
+			 * update chain.
+			 */
+			ReleaseBuffer(buffer);
+			return false;
+		default:
+			elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
+	}
+
+	return false;
+}
+
+
 /*
  * Process BEFORE EACH STATEMENT triggers
  */
@@ -852,6 +1199,8 @@ ExecModifyTable(ModifyTableState *node)
 {
 	EState	   *estate = node->ps.state;
 	CmdType		operation = node->operation;
+	ModifyTableState  *onConflict = (ModifyTableState *) node->onConflict;
+	SpecType	spec = node->spec;
 	ResultRelInfo *saved_resultRelInfo;
 	ResultRelInfo *resultRelInfo;
 	PlanState  *subplanstate;
@@ -1022,7 +1371,8 @@ ExecModifyTable(ModifyTableState *node)
 		switch (operation)
 		{
 			case CMD_INSERT:
-				slot = ExecInsert(slot, planSlot, estate, node->canSetTag);
+				slot = ExecInsert(slot, planSlot, onConflict, spec, estate,
+								  node->canSetTag);
 				break;
 			case CMD_UPDATE:
 				slot = ExecUpdate(tupleid, oldtuple, slot, planSlot,
@@ -1070,6 +1420,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 {
 	ModifyTableState *mtstate;
 	CmdType		operation = node->operation;
+	Plan	   *onConflictPlan = node->onConflictPlan;
 	int			nplans = list_length(node->plans);
 	ResultRelInfo *saved_resultRelInfo;
 	ResultRelInfo *resultRelInfo;
@@ -1097,6 +1448,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
 	mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
 	mtstate->mt_nplans = nplans;
+	mtstate->spec = node->spec;
 
 	/* set up epqstate with dummy subplan data for the moment */
 	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
@@ -1135,8 +1487,39 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
 			operation != CMD_DELETE &&
 			resultRelInfo->ri_IndexRelationDescs == NULL)
+		{
 			ExecOpenIndices(resultRelInfo);
 
+			/*
+			 * If explicit ON CONFLICT WITHIN unique index was specified,
+			 * represent that value locking should only occur in that index
+			 */
+			if (node->spec != SPEC_NONE && node->mergeIndex != InvalidOid)
+			{
+				RelationPtr relationDescs = resultRelInfo->ri_IndexRelationDescs;
+				IndexInfo **indexInfoArray;
+				int 		numIndices = resultRelInfo->ri_NumIndices;
+				int			j;
+				IndexInfo  *indexInfo;
+
+				indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+
+				resultRelInfo = mtstate->resultRelInfo;
+				for (j = 0; j < numIndices; j++)
+				{
+					Relation	indexRelation = relationDescs[j];
+					indexInfo = indexInfoArray[j];
+
+					if (indexRelation->rd_id != node->mergeIndex)
+						indexInfo->ii_MergeOn = false;
+					else if (!indexInfo->ii_Unique)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION ),
+								 errmsg("ON CONFLICT WITHIN index is not a unique index")));
+				}
+			}
+		}
+
 		/* Now init the plan for this result rel */
 		estate->es_result_relation_info = resultRelInfo;
 		mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
@@ -1331,7 +1714,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 							resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
 									   ExecInitExtraTupleSlot(estate));
 
-				if (operation == CMD_UPDATE || operation == CMD_DELETE)
+				if ((operation == CMD_UPDATE || operation == CMD_DELETE) &&
+					node->spec == SPEC_NONE)
 				{
 					/* For UPDATE/DELETE, find the appropriate junk attr now */
 					char		relkind;
@@ -1373,6 +1757,27 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	}
 
 	/*
+	 * Initialize auxiliary ModifyTable node for INSERT...ON CONFLICT UPDATE.
+	 *
+	 * The UPDATE portion of the query is essentially represented as auxiliary
+	 * to INSERT state at all stages of query processing, with a representation
+	 * at each stage that is analogous to a regular UPDATE, but it is not
+	 * directly represented in user-visible output such as EXPLAIN.
+	 *
+	 * ExecModifyTable() is never called in respect of an auxiliary
+	 * ModifyTableState.  Execution of the auxiliary plan is driven by its
+	 * parent in an ad-hoc fashion.
+	 */
+	if (onConflictPlan)
+	{
+		PlanState *pstate;
+
+		Assert(mtstate->spec == SPEC_INSERT);
+		pstate = ExecInitNode(onConflictPlan, estate, eflags);
+		mtstate->onConflict = pstate;
+	}
+
+	/*
 	 * Set up a tuple table slot for use for trigger output tuples. In a plan
 	 * containing multiple ModifyTable nodes, all can share one such slot, so
 	 * we keep it in the estate.
@@ -1387,11 +1792,19 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	 * ModifyTable node too, but there's no need.)  Note the use of lcons not
 	 * lappend: we need later-initialized ModifyTable nodes to be shut down
 	 * before earlier ones.  This ensures that we don't throw away RETURNING
-	 * rows that need to be seen by a later CTE subplan.
+	 * rows that need to be seen by a later CTE subplan.  We do not want to
+	 * append an auxiliary ON CONFLICT UPDATE node, since it must have a
+	 * primary ModifyTable node that merely uses its state to selectively
+	 * execute some portions of the would-be unqualified UPDATE.  There are
+	 * restrictions imposed upon the UPDATE that make the primary and auxiliary
+	 * plans isomorphic.  This is sufficient to make ExecLockUpdateTuple()
+	 * safe.
 	 */
-	if (!mtstate->canSetTag)
+	if (!mtstate->canSetTag && mtstate->spec != SPEC_UPDATE)
+	{
 		estate->es_auxmodifytables = lcons(mtstate,
 										   estate->es_auxmodifytables);
+	}
 
 	return mtstate;
 }
@@ -1442,6 +1855,8 @@ ExecEndModifyTable(ModifyTableState *node)
 	 */
 	for (i = 0; i < node->mt_nplans; i++)
 		ExecEndNode(node->mt_plans[i]);
+
+	ExecEndNode(node->onConflict);
 }
 
 void
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 9f5bcd7..079b3aa 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -178,6 +178,9 @@ _copyModifyTable(const ModifyTable *from)
 	COPY_NODE_FIELD(resultRelations);
 	COPY_SCALAR_FIELD(resultRelIndex);
 	COPY_NODE_FIELD(plans);
+	COPY_NODE_FIELD(onConflictPlan);
+	COPY_SCALAR_FIELD(spec);
+	COPY_SCALAR_FIELD(mergeIndex);
 	COPY_NODE_FIELD(withCheckOptionLists);
 	COPY_NODE_FIELD(returningLists);
 	COPY_NODE_FIELD(fdwPrivLists);
@@ -2090,6 +2093,19 @@ _copyWithClause(const WithClause *from)
 	return newnode;
 }
 
+static ConflictClause *
+_copyConflictClause(const ConflictClause *from)
+{
+	ConflictClause *newnode = makeNode(ConflictClause);
+
+	COPY_SCALAR_FIELD(specClause);
+	COPY_NODE_FIELD(stmt);
+	COPY_NODE_FIELD(relation);
+	COPY_LOCATION_FIELD(location);
+
+	return newnode;
+}
+
 static CommonTableExpr *
 _copyCommonTableExpr(const CommonTableExpr *from)
 {
@@ -2494,6 +2510,9 @@ _copyQuery(const Query *from)
 	COPY_NODE_FIELD(jointree);
 	COPY_NODE_FIELD(targetList);
 	COPY_NODE_FIELD(withCheckOptions);
+	COPY_NODE_FIELD(onConflict);
+	COPY_SCALAR_FIELD(specClause);
+	COPY_SCALAR_FIELD(mergeIndex);
 	COPY_NODE_FIELD(returningList);
 	COPY_NODE_FIELD(groupClause);
 	COPY_NODE_FIELD(havingQual);
@@ -2518,6 +2537,7 @@ _copyInsertStmt(const InsertStmt *from)
 	COPY_NODE_FIELD(cols);
 	COPY_NODE_FIELD(selectStmt);
 	COPY_NODE_FIELD(returningList);
+	COPY_NODE_FIELD(confClause);
 	COPY_NODE_FIELD(withClause);
 
 	return newnode;
@@ -4653,6 +4673,9 @@ copyObject(const void *from)
 		case T_WithClause:
 			retval = _copyWithClause(from);
 			break;
+		case T_ConflictClause:
+			retval = _copyConflictClause(from);
+			break;
 		case T_CommonTableExpr:
 			retval = _copyCommonTableExpr(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 8d13c69..1ec7192 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -862,6 +862,9 @@ _equalQuery(const Query *a, const Query *b)
 	COMPARE_NODE_FIELD(jointree);
 	COMPARE_NODE_FIELD(targetList);
 	COMPARE_NODE_FIELD(withCheckOptions);
+	COMPARE_NODE_FIELD(onConflict);
+	COMPARE_SCALAR_FIELD(specClause);
+	COMPARE_SCALAR_FIELD(mergeIndex);
 	COMPARE_NODE_FIELD(returningList);
 	COMPARE_NODE_FIELD(groupClause);
 	COMPARE_NODE_FIELD(havingQual);
@@ -2400,6 +2403,16 @@ _equalWithClause(const WithClause *a, const WithClause *b)
 }
 
 static bool
+_equalConflictClause(const ConflictClause *a, const ConflictClause *b)
+{
+	COMPARE_SCALAR_FIELD(specClause);
+	COMPARE_NODE_FIELD(relation);
+	COMPARE_LOCATION_FIELD(location);
+
+	return true;
+}
+
+static bool
 _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
 {
 	COMPARE_STRING_FIELD(ctename);
@@ -3117,6 +3130,9 @@ equal(const void *a, const void *b)
 		case T_WithClause:
 			retval = _equalWithClause(a, b);
 			break;
+		case T_ConflictClause:
+			retval = _equalConflictClause(a, b);
+			break;
 		case T_CommonTableExpr:
 			retval = _equalCommonTableExpr(a, b);
 			break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 41e973b..12735ab 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -1474,6 +1474,9 @@ exprLocation(const Node *expr)
 		case T_WithClause:
 			loc = ((const WithClause *) expr)->location;
 			break;
+		case T_ConflictClause:
+			loc = ((const ConflictClause *) expr)->location;
+			break;
 		case T_CommonTableExpr:
 			loc = ((const CommonTableExpr *) expr)->location;
 			break;
@@ -1958,6 +1961,8 @@ query_tree_walker(Query *query,
 		return true;
 	if (walker((Node *) query->withCheckOptions, context))
 		return true;
+	if (walker(query->onConflict, context))
+		return true;
 	if (walker((Node *) query->returningList, context))
 		return true;
 	if (walker((Node *) query->jointree, context))
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index da75e29..3c35e4f 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -332,6 +332,9 @@ _outModifyTable(StringInfo str, const ModifyTable *node)
 	WRITE_NODE_FIELD(resultRelations);
 	WRITE_INT_FIELD(resultRelIndex);
 	WRITE_NODE_FIELD(plans);
+	WRITE_NODE_FIELD(onConflictPlan);
+	WRITE_ENUM_FIELD(spec, SpecType);
+	WRITE_OID_FIELD(mergeIndex);
 	WRITE_NODE_FIELD(withCheckOptionLists);
 	WRITE_NODE_FIELD(returningLists);
 	WRITE_NODE_FIELD(fdwPrivLists);
@@ -2268,6 +2271,9 @@ _outQuery(StringInfo str, const Query *node)
 	WRITE_NODE_FIELD(jointree);
 	WRITE_NODE_FIELD(targetList);
 	WRITE_NODE_FIELD(withCheckOptions);
+	WRITE_NODE_FIELD(onConflict);
+	WRITE_ENUM_FIELD(specClause, SpecType);
+	WRITE_OID_FIELD(mergeIndex);
 	WRITE_NODE_FIELD(returningList);
 	WRITE_NODE_FIELD(groupClause);
 	WRITE_NODE_FIELD(havingQual);
@@ -2341,6 +2347,16 @@ _outWithClause(StringInfo str, const WithClause *node)
 }
 
 static void
+_outConflictClause(StringInfo str, const ConflictClause *node)
+{
+	WRITE_NODE_TYPE("CONFLICTCLAUSE");
+
+	WRITE_ENUM_FIELD(specClause, SpecType);
+	WRITE_NODE_FIELD(relation);
+	WRITE_LOCATION_FIELD(location);
+}
+
+static void
 _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
 {
 	WRITE_NODE_TYPE("COMMONTABLEEXPR");
@@ -3187,6 +3203,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_WithClause:
 				_outWithClause(str, obj);
 				break;
+			case T_ConflictClause:
+				_outConflictClause(str, obj);
+				break;
 			case T_CommonTableExpr:
 				_outCommonTableExpr(str, obj);
 				break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 65584d1..62c0f3e 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -213,6 +213,9 @@ _readQuery(void)
 	READ_NODE_FIELD(jointree);
 	READ_NODE_FIELD(targetList);
 	READ_NODE_FIELD(withCheckOptions);
+	READ_NODE_FIELD(onConflict);
+	READ_ENUM_FIELD(specClause, SpecType);
+	READ_OID_FIELD(mergeIndex);
 	READ_NODE_FIELD(returningList);
 	READ_NODE_FIELD(groupClause);
 	READ_NODE_FIELD(havingQual);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 0cdb790..de2c171 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -280,7 +280,13 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
 		allclauses = baserel->baserestrictinfo;
 	}
 
-	if (!enable_indexscan)
+	/*
+	 * TODO: Clean this up.  There is no obvious way to generalize from the
+	 * example of isCurrentOf, which seems more appropriate, because there
+	 * isn't necessarily something to add disable_cost to within
+	 * cost_qual_eval().
+	 */
+	if (!enable_indexscan || root->parse->specClause == SPEC_UPDATE)
 		startup_cost += disable_cost;
 	/* we don't need to check enable_indexonlyscan; indxpath.c does that */
 
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4b641a2..b5966d0 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4719,7 +4719,8 @@ make_modifytable(PlannerInfo *root,
 				 CmdType operation, bool canSetTag,
 				 List *resultRelations, List *subplans,
 				 List *withCheckOptionLists, List *returningLists,
-				 List *rowMarks, int epqParam)
+				 List *rowMarks, Plan *onConflictPlan, SpecType spec,
+				 Oid mergeIndex, int epqParam)
 {
 	ModifyTable *node = makeNode(ModifyTable);
 	Plan	   *plan = &node->plan;
@@ -4735,6 +4736,11 @@ make_modifytable(PlannerInfo *root,
 	Assert(returningLists == NIL ||
 		   list_length(resultRelations) == list_length(returningLists));
 
+	if (spec != SPEC_NONE && list_length(resultRelations) > 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("INSERT...ON CONFLICT does not support table inheritance")));
+
 	/*
 	 * Compute cost as sum of subplan costs.
 	 */
@@ -4768,6 +4774,9 @@ make_modifytable(PlannerInfo *root,
 	node->resultRelations = resultRelations;
 	node->resultRelIndex = -1;	/* will be set correctly in setrefs.c */
 	node->plans = subplans;
+	node->onConflictPlan = onConflictPlan;
+	node->spec = spec;
+	node->mergeIndex = mergeIndex;
 	node->withCheckOptionLists = withCheckOptionLists;
 	node->returningLists = returningLists;
 	node->rowMarks = rowMarks;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e1480cd..282b9b2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -610,7 +610,41 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 											 withCheckOptionLists,
 											 returningLists,
 											 rowMarks,
+											 NULL,
+											 parse->specClause,
+											 parse->mergeIndex,
 											 SS_assign_special_param(root));
+
+			if (parse->onConflict)
+			{
+				Query		   *conflictQry = (Query*) parse->onConflict;
+				ModifyTable	   *parent = (ModifyTable *) plan;
+				Plan		   *planOnConflict;
+
+				/*
+				 * An ON CONFLICT UPDATE query is a subquery of its parent
+				 * INSERT ModifyTable, but isn't formally a subplan.
+				 *
+				 * XXX:  It may be worth blending the costs associated with
+				 * this plan into its parent.  Since it isn't formally a
+				 * subplan, that does not occur at present.
+				 */
+				planOnConflict = subquery_planner(glob, conflictQry, root,
+												  hasRecursion, 0, NULL);
+
+				if (contain_subplans((Node *) conflictQry->targetList))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("UPDATE portion of ON CONFLICT contains subplans"),
+							 errhint("Only trivial targetlist entries and predicates are supported.")));
+
+				if (bms_num_members(planOnConflict->allParam) > 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("paramaterized auxiliary UPDATE queries are unsupported")));
+
+				parent->onConflictPlan = planOnConflict;
+			}
 		}
 	}
 
@@ -1045,6 +1079,11 @@ inheritance_planner(PlannerInfo *root)
 	else
 		rowMarks = root->rowMarks;
 
+	if (parse->onConflict)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ON CONFLICT UPDATE is not supported within inheritance hierarchy")));
+
 	/* And last, tack on a ModifyTable node to do the UPDATE/DELETE work */
 	return (Plan *) make_modifytable(root,
 									 parse->commandType,
@@ -1054,6 +1093,9 @@ inheritance_planner(PlannerInfo *root)
 									 withCheckOptionLists,
 									 returningLists,
 									 rowMarks,
+									 NULL,
+									 parse->specClause,
+									 InvalidOid,
 									 SS_assign_special_param(root));
 }
 
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 4d717df..fb378b2 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -765,6 +765,14 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 				root->glob->resultRelations =
 					list_concat(root->glob->resultRelations,
 								list_copy(splan->resultRelations));
+
+				if (splan->onConflictPlan)
+				{
+					splan->onConflictPlan = (Plan *) set_plan_refs(root,
+											  (Plan *) splan->onConflictPlan,
+											  rtoffset);
+				}
+
 			}
 			break;
 		case T_Append:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 3e7dc85..81b90b0 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2305,6 +2305,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
 													  valid_params,
 													  scan_params));
 				}
+
+				/* No need to directly handle onConflict here */
 			}
 			break;
 
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index c0b1fe3..cdc0ab2 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -391,6 +391,8 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt)
 	/* done building the range table and jointree */
 	qry->rtable = pstate->p_rtable;
 	qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
+	qry->specClause = SPEC_NONE;
+	qry->onConflict = NULL;
 
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 	qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
@@ -412,6 +414,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 {
 	Query	   *qry = makeNode(Query);
 	SelectStmt *selectStmt = (SelectStmt *) stmt->selectStmt;
+	SpecType	spec = stmt->confClause? stmt->confClause->specClause : SPEC_NONE;
 	List	   *exprList = NIL;
 	bool		isGeneralSelect;
 	List	   *sub_rtable;
@@ -482,8 +485,9 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 	 * mentioned in the SELECT part.  Note that the target table is not added
 	 * to the joinlist or namespace.
 	 */
-	qry->resultRelation = setTargetTable(pstate, stmt->relation,
-										 false, false, ACL_INSERT);
+	qry->resultRelation = setTargetTable(pstate, stmt->relation, false, false,
+										 ACL_INSERT |
+										 (spec == SPEC_INSERT ? ACL_UPDATE : 0));
 
 	/* Validate stmt->cols list, or build default list if no list given */
 	icolumns = checkInsertTargets(pstate, stmt->cols, &attrnos);
@@ -762,8 +766,52 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 	/* done building the range table and jointree */
 	qry->rtable = pstate->p_rtable;
 	qry->jointree = makeFromExpr(pstate->p_joinlist, NULL);
-
+	qry->specClause = spec;
 	qry->hasSubLinks = pstate->p_hasSubLinks;
+	qry->onConflict = NULL;
+
+	if (stmt->confClause)
+	{
+		/*
+		 * ON CONFLICT UPDATE requires special parse analysis of auxiliary
+		 * update Query
+		 */
+		if (stmt->confClause->stmt)
+		{
+			UpdateStmt	   *pupd  = (UpdateStmt *) stmt->confClause->stmt;
+			ParseState	   *sub_pstate = make_parsestate(pstate);
+			Query		   *dqry;
+			RangeTblEntry  *subTarget;
+
+			if (!IsA(pupd, UpdateStmt))
+				elog(ERROR, "unrecognized statement in ON CONFLICT clause");
+
+			/* Assign same target relation as parent InsertStmt */
+			pupd->relation = stmt->relation;
+
+			dqry = transformStmt(sub_pstate, (Node *) pupd);
+			dqry->specClause = SPEC_UPDATE;
+			dqry->canSetTag = false;
+
+			/* Save auxiliary query */
+			qry->onConflict = (Node *) dqry;
+
+			/*
+			 * Mark parent Query as requiring appropriate UPDATE/SELECT
+			 * privileges
+			 */
+			subTarget = sub_pstate->p_target_rangetblentry;
+
+			rte->updatedCols = bms_copy(subTarget->updatedCols);
+			rte->selectedCols = bms_union(rte->selectedCols,
+										  subTarget->selectedCols);
+
+			free_parsestate(sub_pstate);
+		}
+
+		/* Look up index to limit speculative insertion merge to */
+		qry->mergeIndex = transformConflictWithinClause(pstate, stmt);
+	}
 
 	assign_query_collations(pstate, qry);
 
@@ -1010,6 +1058,8 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 
 	qry->rtable = pstate->p_rtable;
 	qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
+	qry->specClause = SPEC_NONE;
+	qry->onConflict = NULL;
 
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 	qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
@@ -1951,6 +2001,8 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 
 	qry->rtable = pstate->p_rtable;
 	qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
+	qry->specClause = SPEC_NONE;
+	qry->onConflict = NULL;
 
 	qry->hasSubLinks = pstate->p_hasSubLinks;
 
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 6f4d645..275600a 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -215,6 +215,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	RangeVar			*range;
 	IntoClause			*into;
 	WithClause			*with;
+	ConflictClause			*conf;
 	A_Indices			*aind;
 	ResTarget			*target;
 	struct PrivTarget	*privtarget;
@@ -315,7 +316,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				opt_class opt_inline_handler opt_validator validator_clause
 				opt_collate
 
-%type <range>	qualified_name OptConstrFromTable
+%type <range>	qualified_name OptConstrFromTable OptConfWithinIndex
 
 %type <str>		all_Op MathOp
 
@@ -410,6 +411,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	SeqOptElem
 
 %type <istmt>	insert_rest
+%type <conf>	opt_on_conflict
 
 %type <vsetstmt> generic_set set_rest set_rest_more SetResetClause FunctionSetResetClause
 
@@ -507,6 +509,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <list>	cte_list
 
 %type <list>	within_group_clause
+%type <node>	UpdateInsertStmt
 %type <node>	filter_clause
 %type <list>	window_clause window_definition_list opt_partition_clause
 %type <windef>	window_definition over_clause window_specification
@@ -545,8 +548,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	CACHE CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COMMENT COMMENTS COMMIT
-	COMMITTED CONCURRENTLY CONFIGURATION CONNECTION CONSTRAINT CONSTRAINTS
-	CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
+	COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT
+	CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
 	CROSS CSV CURRENT_P
 	CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA
 	CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
@@ -566,7 +569,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
 	HANDLER HAVING HEADER_P HOLD HOUR_P
 
-	IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P
+	IDENTITY_P IF_P IGNORE ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P
 	INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
 	INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
 	INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
@@ -646,6 +649,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %nonassoc	OVERLAPS
 %nonassoc	BETWEEN
 %nonassoc	IN_P
+%nonassoc	DISTINCT
+%nonassoc	ON
 %left		POSTFIXOP		/* dummy for postfix Op rules */
 /*
  * To support target_el without AS, we must give IDENT an explicit priority
@@ -9095,11 +9100,13 @@ DeallocateStmt: DEALLOCATE name
  *****************************************************************************/
 
 InsertStmt:
-			opt_with_clause INSERT INTO qualified_name insert_rest returning_clause
+			opt_with_clause INSERT INTO qualified_name insert_rest
+			opt_on_conflict returning_clause
 				{
 					$5->relation = $4;
-					$5->returningList = $6;
+					$5->returningList = $7;
 					$5->withClause = $1;
+					$5->confClause = $6;
 					$$ = (Node *) $5;
 				}
 		;
@@ -9143,6 +9150,35 @@ insert_column_item:
 				}
 		;
 
+opt_on_conflict:
+			ON CONFLICT OptConfWithinIndex UpdateInsertStmt
+				{
+					$$ = makeNode(ConflictClause);
+					$$->relation = $3;
+					$$->stmt = $4;
+					$$->specClause = SPEC_INSERT;
+					$$->location = @1;
+				}
+			|
+			ON CONFLICT OptConfWithinIndex IGNORE
+				{
+					$$ = makeNode(ConflictClause);
+					$$->relation = $3;
+					$$->stmt = NULL;
+					$$->specClause = SPEC_IGNORE;
+					$$->location = @1;
+				}
+			| /*EMPTY*/
+				{
+					$$ = NULL;
+				}
+		;
+
+OptConfWithinIndex:
+			WITHIN qualified_name		{ $$ = $2; }
+			| /*EMPTY*/				{ $$ = NULL; }
+		;
+
 returning_clause:
 			RETURNING target_list		{ $$ = $2; }
 			| /* EMPTY */				{ $$ = NIL; }
@@ -9236,6 +9272,21 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias
 				}
 		;
 
+UpdateInsertStmt: UPDATE
+			SET set_clause_list
+			where_clause
+				{
+					UpdateStmt *n = makeNode(UpdateStmt);
+					n->relation = NULL;
+					n->targetList = $3;
+					n->fromClause = NULL;
+					n->whereClause = $4;
+					n->returningList = NULL;
+					n->withClause = NULL;
+					$$ = (Node *)n;
+				}
+		;
+
 set_clause_list:
 			set_clause							{ $$ = $1; }
 			| set_clause_list ',' set_clause	{ $$ = list_concat($1,$3); }
@@ -12898,6 +12949,7 @@ unreserved_keyword:
 			| COMMIT
 			| COMMITTED
 			| CONFIGURATION
+			| CONFLICT
 			| CONNECTION
 			| CONSTRAINTS
 			| CONTENT_P
@@ -12957,6 +13009,7 @@ unreserved_keyword:
 			| HOUR_P
 			| IDENTITY_P
 			| IF_P
+			| IGNORE
 			| IMMEDIATE
 			| IMMUTABLE
 			| IMPLICIT_P
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 4931dca..c3d4e0c 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -17,6 +17,7 @@
 
 #include "access/heapam.h"
 #include "catalog/heap.h"
+#include "catalog/index.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "nodes/makefuncs.h"
@@ -177,8 +178,8 @@ setTargetTable(ParseState *pstate, RangeVar *relation,
 	 * free_parsestate() will eventually do the corresponding heap_close(),
 	 * but *not* release the lock.
 	 */
-	pstate->p_target_relation = parserOpenTable(pstate, relation,
-												RowExclusiveLock);
+	pstate->p_target_relation = parserOpenRelation(pstate, relation,
+												   RowExclusiveLock);
 
 	/*
 	 * Now build an RTE.
@@ -2166,6 +2167,43 @@ get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs)
 }
 
 /*
+ * transformConflictWithinClause -
+ *		transform WITHIN portion of ON CONFLICT UPDATE/IGNORE.
+ *
+ * Handles adding named unique index relation to range table.  Returns Oid of
+ * index relation.
+ */
+Oid
+transformConflictWithinClause(ParseState *pstate, InsertStmt *stmt)
+{
+	RangeTblEntry *uniqueIndex;
+	Oid				heapOid;
+	RangeVar	   *indexVar = stmt->confClause->relation;
+
+	if (!stmt->confClause->relation)
+		return InvalidOid;
+
+	uniqueIndex = addRangeTableEntry(pstate, indexVar, NULL, false, false);
+
+	addRTEtoQuery(pstate, uniqueIndex, false, true, false);
+
+	heapOid = IndexGetRelation(uniqueIndex->relid, true);
+
+	if (heapOid != RelationGetRelid(pstate->p_target_relation))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("ON CONFLICT WITHIN relation \"%s\" is not an index on target relation \"%s\"",
+						indexVar->relname, RelationGetRelationName(pstate->p_target_relation)),
+				 parser_errposition(pstate,
+									exprLocation((Node *) stmt->confClause->relation))));
+	}
+
+	/* assume new rte is at end */
+	return uniqueIndex->relid;
+}
+
+/*
  * addTargetToSortList
  *		If the given targetlist entry isn't already in the SortGroupClause
  *		list, add it to the end of the list, using the given sort ordering
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 364ced0..0fa9245 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -949,13 +949,13 @@ chooseScalarFunctionAlias(Node *funcexpr, char *funcname,
  * LOCKMODE is typedef'd as int anyway, that seems like overkill.
  */
 Relation
-parserOpenTable(ParseState *pstate, const RangeVar *relation, int lockmode)
+parserOpenRelation(ParseState *pstate, const RangeVar *relation, int lockmode)
 {
 	Relation	rel;
 	ParseCallbackState pcbstate;
 
 	setup_parser_errposition_callback(&pcbstate, pstate, relation->location);
-	rel = heap_openrv_extended(relation, lockmode, true);
+	rel = relation_openrv_extended(relation, lockmode, true);
 	if (rel == NULL)
 	{
 		if (relation->schemaname)
@@ -1021,7 +1021,7 @@ addRangeTableEntry(ParseState *pstate,
 	 * depending on whether we're doing SELECT FOR UPDATE/SHARE.
 	 */
 	lockmode = isLockedRefname(pstate, refname) ? RowShareLock : AccessShareLock;
-	rel = parserOpenTable(pstate, relation, lockmode);
+	rel = parserOpenRelation(pstate, relation, lockmode);
 	rte->relid = RelationGetRelid(rel);
 	rte->relkind = rel->rd_rel->relkind;
 
@@ -1037,7 +1037,7 @@ addRangeTableEntry(ParseState *pstate,
 	 * so that the table can't be deleted or have its schema modified
 	 * underneath us.
 	 */
-	heap_close(rel, NoLock);
+	relation_close(rel, NoLock);
 
 	/*
 	 * Set flags and access permissions.
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index cc967f0..244085c 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -3089,6 +3089,11 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 			rt_entry_relation->rd_rel->relkind == RELKIND_VIEW &&
 			!view_has_instead_trigger(rt_entry_relation, event))
 		{
+			if (parsetree->specClause == SPEC_INSERT)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED ),
+						 errmsg("INSERT ON CONFLICT is not supported on updatable views")));
+
 			/*
 			 * This throws an error if the view can't be automatically
 			 * updated, but that's OK since the query would fail at runtime
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index 5c3f5ad..03fd12c 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -1525,6 +1525,29 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
 }
 
 /*
+ * XidInProgress
+ *		Public variant of XidInMVCCSnapshot.
+ *
+ * This routine indicates if a transaction is still-in-progress to a snapshot
+ * according to a classic notion of MVCC.  Unlike XidInMVCCSnapshot, it
+ * accounts for the current subtransaction, as well as ancestor and child
+ * subcommitted transactions.
+ *
+ * Speculative insertion effectively avails of a special MVCC exception,
+ * because a tuple may be locked/updated that has no version visible to the
+ * updating command's MVCC snapshot.  It's okay for READ COMMITTED mode to not
+ * care about having availed of this special exception (that's what it's there
+ * for), but higher isolation levels must not, and should actively call this
+ * routine to ensure that the issue has not occurred.
+ */
+bool
+XidInProgress(TransactionId xid, Snapshot snapshot)
+{
+	return !TransactionIdIsCurrentTransactionId(xid) &&
+				XidInMVCCSnapshot(xid, snapshot);
+}
+
+/*
  * Is the tuple really only locked?  That is, is it not updated?
  *
  * It's easy to check just infomask bits if the locker is not a multi; but
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index d99158f..37bbac8 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -102,15 +102,103 @@ typedef struct SysScanDescData *SysScanDesc;
  * call is made with UNIQUE_CHECK_EXISTING.  The tuple is already in the
  * index in this case, so it should not be inserted again.  Rather, just
  * check for conflicting live tuples (possibly blocking).
+ *
+ * INSERT...ON CONFLICT UPDATE operations involve "speculative" insertion of
+ * tuples.  There is a call to establish the uniqueness of a tuple and take
+ * appropriate value locks (generally once per unique index per table slot).
+ * These locks prevent concurrent insertion of conflicting key values, and will
+ * only be held for an instant.  Values are locked in the abstract;  existing
+ * index tuples are not locked, because they don't yet exist.  Subsequently,
+ * there may be a second, corresponding phase where insertion proper actually
+ * occurs -- index_insert() calls are made with UNIQUE_CHECK_SPEC, where
+ * insertion proper picks up from the first phase, and is guaranteed to be
+ * successful (unless "WITHIN named_unique_index" was originally specified, in
+ * which case only "named_unique_index" has been value locked, implying that
+ * other unique indexes could have duplicate key violations when index_insert()
+ * is called).
  */
 typedef enum IndexUniqueCheck
 {
 	UNIQUE_CHECK_NO,			/* Don't do any uniqueness checking */
 	UNIQUE_CHECK_YES,			/* Enforce uniqueness at insertion time */
 	UNIQUE_CHECK_PARTIAL,		/* Test uniqueness, but no error */
-	UNIQUE_CHECK_EXISTING		/* Check if existing tuple is unique */
+	UNIQUE_CHECK_EXISTING,		/* Check if existing tuple is unique */
+	UNIQUE_CHECK_SPEC			/* Speculative phased locking insertion */
 } IndexUniqueCheck;
 
+/*
+ * For speculative insertion's phased locking, it is necessary for the core
+ * system to have callbacks to amcanunique AMs that allow them to clean-up in
+ * the duplicate found case.  This is because one first-phase call in respect
+ * of some unique index might indicate that it's okay to proceed, while another
+ * such call relating to another unique index (but the same executor-level
+ * tuple slot) indicates that it is not.  In general, we cannot rely on
+ * actually reaching the second phase even if one check in the first phase says
+ * that it assents to insertion proceeding, because we need total consensus
+ * from all unique indexes that may be inserted into as part of proceeding with
+ * inserting the tuple slot (assuming there was no WITHIN unique index
+ * specification).
+ *
+ * It is the responsibility of supporting AMs to set the callback if there is
+ * clean-up to be done when not proceeding.	 Note, however, that the executor
+ * will not call the callback if insertion of the relevant index tuple
+ * proceeds, because the AM should take care of this itself in the second,
+ * final, optional step ("insertion proper").
+ *
+ * The callback is called twice when the core code goes to UPDATE.  The first
+ * call releases all functional value locks, which is necessary to prevent
+ * unprincipled deadlocks, while the second post-lock/update call releases an
+ * interlock against VACUUM (typically a buffer pin) on the merge-on unique
+ * index.
+ *
+ * We must interlock against VACUUM because the executor may need to
+ * lock/UPDATE a tuple not visible to the command's MVCC snapshot, and it would
+ * be bad news to lock/update a heap tuple distinct from the one that the AM
+ * made a determination about in the first phase just because it happened to
+ * occupy the same heap slot. XXX: Perhaps not.  Discussion on VACUUM
+ * interlocking should revisit the need for this.
+ */
+struct SpeculativeState;
+typedef void (*ReleaseIndexCallback) (struct SpeculativeState *state);
+
+/*
+ * Speculative status returned.
+ *
+ * It may be necessary to start the first phase of speculative insertion from
+ * scratch, in the event of needing to block pending the completion of another
+ * transaction, where the AM cannot reasonably sit on value locks (associated
+ * with some other, previously locked amcanunique indexes) indefinitely.
+ */
+typedef enum
+{
+	INSERT_TRY_AGAIN,	/* had xact conflict - restart from first index */
+	INSERT_NO_PROCEED,	/* duplicate conclusively found */
+	INSERT_PROCEED,		/* no duplicate key in any index */
+	INSERT_NEED_UNPIN	/* only unpin buffer */
+} SpecStatus;
+
+/*
+ * Struct representing state passed to and from clients for speculative phased
+ * insertion.  This is allocated by amcanunique access methods, and passed back
+ * and forth for phased index locking.
+ *
+ * There is one such piece of state associated with every unique index
+ * participating in insertion of a slot (or a single named unique index in the
+ * event of a WITHIN specification).
+ */
+typedef struct SpeculativeState
+{
+	/* Index under consideration */
+	Relation				uniqueIndex;
+	/* Opaque amcanunique state */
+	void				   *privState;
+	/* Callback to tell AM to clean-up */
+	ReleaseIndexCallback	unlock;
+	/* Outcome of first phase (current attempt) for uniqueIndex */
+	SpecStatus				outcome;
+	/* Conflicting heap tuple, if any */
+	ItemPointer				conflictTid;
+}	SpeculativeState;
 
 /*
  * generalized index_ interface routines (in indexam.c)
@@ -125,11 +213,17 @@ typedef enum IndexUniqueCheck
 extern Relation index_open(Oid relationId, LOCKMODE lockmode);
 extern void index_close(Relation relation, LOCKMODE lockmode);
 
+extern SpeculativeState *index_lock(Relation indexRelation,
+			 Datum *values, bool *isnull,
+			 Relation heapRelation,
+			 List *otherSpecStates, bool priorConflict);
 extern bool index_insert(Relation indexRelation,
 			 Datum *values, bool *isnull,
 			 ItemPointer heap_t_ctid,
-			 Relation heapRelation,
-			 IndexUniqueCheck checkUnique);
+			 Relation heapRelation, IndexUniqueCheck checkUnique,
+			 SpeculativeState * state);
+extern bool index_proceed(List *specStates);
+extern ItemPointer index_release(List *specStates, bool free);
 
 extern IndexScanDesc index_beginscan(Relation heapRelation,
 				Relation indexRelation,
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 9fa943f..ccb673b 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -74,6 +74,7 @@ typedef BTPageOpaqueData *BTPageOpaque;
 #define BTP_SPLIT_END	(1 << 5)	/* rightmost page of split group */
 #define BTP_HAS_GARBAGE (1 << 6)	/* page has LP_DEAD tuples */
 #define BTP_INCOMPLETE_SPLIT (1 << 7)	/* right sibling's downlink is missing */
+#define BTP_IS_LOCKED	(1 << 8)	/* page is heavyweight locked */
 
 /*
  * The max allowed value of a cycle ID is a bit less than 64K.  This is
@@ -132,6 +133,24 @@ typedef struct BTMetaPageData
 #define BTREE_NONLEAF_FILLFACTOR	70
 
 /*
+ * BTREE_PITEMS_NOLOCK is a number of items on leaf page, representing a
+ * threshold after which speculative insertion prefers to attempt lock
+ * avoidance.
+ *
+ * This value may seem suspect, because it does not account for btree leaf
+ * fillfactor, nor BLCKSZ;  in general it does not carefully consider how many
+ * items present on a leaf page meet some particular definition of "almost
+ * full", and therefore that a value proposed for insertion is likely to be
+ * found rather than newly inserted.  Perhaps most woolly of all, the page
+ * under consideration may not be the only page inspected for a would-be
+ * duplicate.  The value is only intended as a very rough approximation of the
+ * tipping point at which the optimization becomes profitable, and there are
+ * other, more important factors that will frequently independently make the
+ * optimization applicable.
+ */
+#define BTREE_PITEMS_NOLOCK			150
+
+/*
  *	Test whether two btree entries are "the same".
  *
  *	Old comments:
@@ -180,6 +199,7 @@ typedef struct BTMetaPageData
 #define P_IGNORE(opaque)		((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD))
 #define P_HAS_GARBAGE(opaque)	((opaque)->btpo_flags & BTP_HAS_GARBAGE)
 #define P_INCOMPLETE_SPLIT(opaque)	((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT)
+#define P_IS_LOCKED(opaque)		((opaque)->btpo_flags & BTP_IS_LOCKED)
 
 /*
  *	Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
@@ -611,6 +631,28 @@ typedef struct BTScanOpaqueData
 typedef BTScanOpaqueData *BTScanOpaque;
 
 /*
+ * BTSpecOpaqueStateData is the btree-private state that is managed as part of
+ * speculative index tuple insertion, particular to this implementation.  In
+ * the first phase, the implementation stashes this private state.  The state
+ * is passed back during the second phase, or resources are freed using a
+ * callback.
+ *
+ * lockedBuf is always pinned when passed back to the caller at the end of the
+ * first phase, unless the current attempt to get consensus was unsuccessful.
+ * An exlusive heavyweight lock will also be held on the page which lockedBuf
+ * is allocated into.
+ */
+typedef struct BTSpecOpaqueData
+{
+	Buffer		lockedBuf;		/* Buffer whose page (a leaf page) is locked */
+	BTStack		stack;			/* Saved stack in case of page split */
+	IndexTuple	itup;			/* Cached index tuple */
+	ScanKey		itupScankey;	/* Cached insertion scankey - redundant */
+} BTSpecOpaqueData;
+
+typedef BTSpecOpaqueData *BTSpecOpaque;
+
+/*
  * We use some private sk_flags bits in preprocessed scan keys.  We're allowed
  * to use bits 16-31 (see skey.h).  The uppermost bits are copied from the
  * index's indoption[] array entry for the index attribute.
@@ -626,6 +668,7 @@ typedef BTScanOpaqueData *BTScanOpaque;
  */
 extern Datum btbuild(PG_FUNCTION_ARGS);
 extern Datum btbuildempty(PG_FUNCTION_ARGS);
+extern Datum btlock(PG_FUNCTION_ARGS);
 extern Datum btinsert(PG_FUNCTION_ARGS);
 extern Datum btbeginscan(PG_FUNCTION_ARGS);
 extern Datum btgettuple(PG_FUNCTION_ARGS);
@@ -642,8 +685,12 @@ extern Datum btoptions(PG_FUNCTION_ARGS);
 /*
  * prototypes for functions in nbtinsert.c
  */
+extern void _bt_lockinsert(Relation rel, Relation heapRel,
+			 SpeculativeState *specState, List *otherSpecStates,
+			 bool priorConflict);
 extern bool _bt_doinsert(Relation rel, IndexTuple itup,
-			 IndexUniqueCheck checkUnique, Relation heapRel);
+			 IndexUniqueCheck checkUnique, Relation heapRel,
+			 SpeculativeState *specState);
 extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
 extern void _bt_finish_split(Relation rel, Buffer bbuf, BTStack stack);
 
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index 759ea70..f3b17c5 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -52,6 +52,7 @@ CATALOG(pg_am,2601)
 	bool		amclusterable;	/* does AM support cluster command? */
 	bool		ampredlocks;	/* does AM handle predicate locks? */
 	Oid			amkeytype;		/* type of data in index, or InvalidOid */
+	regproc		amlock;			/* "speculative insertion" function */
 	regproc		aminsert;		/* "insert this tuple" function */
 	regproc		ambeginscan;	/* "prepare for index scan" function */
 	regproc		amgettuple;		/* "next valid tuple" function, or 0 */
@@ -80,7 +81,7 @@ typedef FormData_pg_am *Form_pg_am;
  *		compiler constants for pg_am
  * ----------------
  */
-#define Natts_pg_am						30
+#define Natts_pg_am						31
 #define Anum_pg_am_amname				1
 #define Anum_pg_am_amstrategies			2
 #define Anum_pg_am_amsupport			3
@@ -96,40 +97,41 @@ typedef FormData_pg_am *Form_pg_am;
 #define Anum_pg_am_amclusterable		13
 #define Anum_pg_am_ampredlocks			14
 #define Anum_pg_am_amkeytype			15
-#define Anum_pg_am_aminsert				16
-#define Anum_pg_am_ambeginscan			17
-#define Anum_pg_am_amgettuple			18
-#define Anum_pg_am_amgetbitmap			19
-#define Anum_pg_am_amrescan				20
-#define Anum_pg_am_amendscan			21
-#define Anum_pg_am_ammarkpos			22
-#define Anum_pg_am_amrestrpos			23
-#define Anum_pg_am_ambuild				24
-#define Anum_pg_am_ambuildempty			25
-#define Anum_pg_am_ambulkdelete			26
-#define Anum_pg_am_amvacuumcleanup		27
-#define Anum_pg_am_amcanreturn			28
-#define Anum_pg_am_amcostestimate		29
-#define Anum_pg_am_amoptions			30
+#define Anum_pg_am_amlock				16
+#define Anum_pg_am_aminsert				17
+#define Anum_pg_am_ambeginscan			18
+#define Anum_pg_am_amgettuple			19
+#define Anum_pg_am_amgetbitmap			20
+#define Anum_pg_am_amrescan				21
+#define Anum_pg_am_amendscan			22
+#define Anum_pg_am_ammarkpos			23
+#define Anum_pg_am_amrestrpos			24
+#define Anum_pg_am_ambuild				25
+#define Anum_pg_am_ambuildempty			26
+#define Anum_pg_am_ambulkdelete			27
+#define Anum_pg_am_amvacuumcleanup		28
+#define Anum_pg_am_amcanreturn			29
+#define Anum_pg_am_amcostestimate		30
+#define Anum_pg_am_amoptions			31
 
 /* ----------------
  *		initial contents of pg_am
  * ----------------
  */
 
-DATA(insert OID = 403 (  btree		5 2 t f t t t t t t f t t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbuildempty btbulkdelete btvacuumcleanup btcanreturn btcostestimate btoptions ));
+DATA(insert OID = 403 (  btree		5 2 t f t t t t t t f t t 0 btlock btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbuildempty btbulkdelete btvacuumcleanup btcanreturn btcostestimate btoptions ));
 DESCR("b-tree index access method");
 #define BTREE_AM_OID 403
-DATA(insert OID = 405 (  hash		1 1 f f t f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup - hashcostestimate hashoptions ));
+DATA(insert OID = 405 (  hash		1 1 f f t f f f f f f f f 23 - hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup - hashcostestimate hashoptions ));
 DESCR("hash index access method");
 #define HASH_AM_OID 405
-DATA(insert OID = 783 (  gist		0 8 f t f f t t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup - gistcostestimate gistoptions ));
+DATA(insert OID = 783 (  gist		0 8 f t f f t t f t t t f 0 - gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup - gistcostestimate gistoptions ));
 DESCR("GiST index access method");
 #define GIST_AM_OID 783
-DATA(insert OID = 2742 (  gin		0 6 f f f f t t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions ));
+DATA(insert OID = 2742 (  gin		0 6 f f f f t t f f t f f 0 - gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions ));
 DESCR("GIN index access method");
 #define GIN_AM_OID 2742
-DATA(insert OID = 4000 (  spgist	0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
+DATA(insert OID = 4000 (  spgist	0 5 f f f f f t f t f f f 0 - spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
 DESCR("SP-GiST index access method");
 #define SPGIST_AM_OID 4000
 
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a84595e..eb7f203 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -538,6 +538,8 @@ DATA(insert OID = 330 (  btgettuple		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0
 DESCR("btree(internal)");
 DATA(insert OID = 636 (  btgetbitmap	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 20 "2281 2281" _null_ _null_ _null_ _null_	btgetbitmap _null_ _null_ _null_ ));
 DESCR("btree(internal)");
+DATA(insert OID = 3218 (  btlock		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 6 0 2278 "2281 2281 2281 2281 2281 16" _null_ _null_ _null_ _null_	btlock _null_ _null_ _null_ ));
+DESCR("btree(internal)");
 DATA(insert OID = 331 (  btinsert		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 6 0 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	btinsert _null_ _null_ _null_ ));
 DESCR("btree(internal)");
 DATA(insert OID = 333 (  btbeginscan	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	btbeginscan _null_ _null_ _null_ ));
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 239aff3..1237c82 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -349,8 +349,10 @@ extern void ExecCloseScanRelation(Relation scanrel);
 
 extern void ExecOpenIndices(ResultRelInfo *resultRelInfo);
 extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
+extern List *ExecLockIndexValues(TupleTableSlot *slot, EState *estate,
+						   SpecType specReason, int *conflict);
 extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid,
-					  EState *estate);
+						   EState *estate, List *specStates);
 extern bool check_exclusion_constraint(Relation heap, Relation index,
 						   IndexInfo *indexInfo,
 						   ItemPointer tupleid,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b271f21..efb171d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -63,6 +63,7 @@ typedef struct IndexInfo
 	Oid		   *ii_ExclusionProcs;		/* array with one entry per column */
 	uint16	   *ii_ExclusionStrats;		/* array with one entry per column */
 	bool		ii_Unique;
+	bool		ii_MergeOn;
 	bool		ii_ReadyForInserts;
 	bool		ii_Concurrent;
 	bool		ii_BrokenHotChain;
@@ -1087,6 +1088,8 @@ typedef struct ModifyTableState
 	int			mt_whichplan;	/* which one is being executed (0..n-1) */
 	ResultRelInfo *resultRelInfo;		/* per-subplan target relations */
 	List	  **mt_arowmarks;	/* per-subplan ExecAuxRowMark lists */
+	SpecType	spec;			/* reason for speculative insertion */
+	PlanState  *onConflict; /* associated OnConflict state */
 	EPQState	mt_epqstate;	/* for evaluating EvalPlanQual rechecks */
 	bool		fireBSTriggers; /* do we need to fire stmt triggers? */
 } ModifyTableState;
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index a031b88..f173d11 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -407,6 +407,7 @@ typedef enum NodeTag
 	T_RowMarkClause,
 	T_XmlSerialize,
 	T_WithClause,
+	T_ConflictClause,
 	T_CommonTableExpr,
 
 	/*
@@ -619,4 +620,16 @@ typedef enum JoinType
 	   (1 << JOIN_RIGHT) | \
 	   (1 << JOIN_ANTI))) != 0)
 
+/* SpecType - "Speculative insertion" clause
+ *
+ * This also appears across various subsystems
+ */
+typedef enum
+{
+	SPEC_NONE,		/* Not involved in speculative insertion */
+	SPEC_IGNORE,	/* INSERT of "ON CONFLICT IGNORE" */
+	SPEC_INSERT,	/* INSERT of "ON CONFLICT UPDATE" */
+	SPEC_UPDATE		/* UPDATE of "ON CONFLICT UPDATE" */
+} SpecType;
+
 #endif   /* NODES_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 2c5d842..21b81ef 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -130,6 +130,14 @@ typedef struct Query
 
 	List	   *withCheckOptions;		/* a list of WithCheckOption's */
 
+	Node	   *onConflict;		/* ON CONFLICT Query */
+
+	SpecType	specClause;		/* speculative insertion clause */
+
+	Oid			mergeIndex;		/* rtable index of unique index
+								 * relation for speculative insertion; 0 when
+								 * unspecified */
+
 	List	   *returningList;	/* return-values list (of TargetEntry) */
 
 	List	   *groupClause;	/* a list of SortGroupClause's */
@@ -996,6 +1004,21 @@ typedef struct WithClause
 } WithClause;
 
 /*
+ * ConflictClause -
+ * 		representation of ON CONFLICT clause
+ *
+ * Note: ConflictClause does not propagate into the Query representation
+ */
+typedef struct ConflictClause
+{
+	NodeTag		type;
+	SpecType	specClause;		/* Variant specified */
+	Node	   *stmt;			/* Update/Delete parse stmt */
+	RangeVar   *relation;		/* unique index to merge on, or NULL */
+	int			location;		/* token location, or -1 if unknown */
+} ConflictClause;
+
+/*
  * CommonTableExpr -
  *	   representation of WITH list element
  *
@@ -1046,6 +1069,7 @@ typedef struct InsertStmt
 	List	   *cols;			/* optional: names of the target columns */
 	Node	   *selectStmt;		/* the source SELECT/VALUES, or NULL */
 	List	   *returningList;	/* list of expressions to return */
+	ConflictClause  *confClause;	/* ON CONFLICT clause */
 	WithClause *withClause;		/* WITH clause */
 } InsertStmt;
 
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 3b9c683..2ff83ad 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -172,6 +172,9 @@ typedef struct ModifyTable
 	List	   *resultRelations;	/* integer list of RT indexes */
 	int			resultRelIndex; /* index of first resultRel in plan's list */
 	List	   *plans;			/* plan(s) producing source data */
+	Plan	   *onConflictPlan;	/* Plan for ON CONFLICT UPDATE auxiliary query */
+	SpecType	spec;			/* speculative insertion specification */
+	Oid			mergeIndex;		/* Oid of merge index relation */
 	List	   *withCheckOptionLists;	/* per-target-table WCO lists */
 	List	   *returningLists; /* per-target-table RETURNING tlists */
 	List	   *fdwPrivLists;	/* per-target-table FDW private data lists */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 4504250..bd93f67 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -84,7 +84,8 @@ extern ModifyTable *make_modifytable(PlannerInfo *root,
 				 CmdType operation, bool canSetTag,
 				 List *resultRelations, List *subplans,
 				 List *withCheckOptionLists, List *returningLists,
-				 List *rowMarks, int epqParam);
+				 List *rowMarks, Plan *onConflictPlan, SpecType spec,
+				 Oid mergeIndex, int epqParam);
 extern bool is_projection_capable_plan(Plan *plan);
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 17888ad..d5d0589 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -87,6 +87,7 @@ PG_KEYWORD("commit", COMMIT, UNRESERVED_KEYWORD)
 PG_KEYWORD("committed", COMMITTED, UNRESERVED_KEYWORD)
 PG_KEYWORD("concurrently", CONCURRENTLY, TYPE_FUNC_NAME_KEYWORD)
 PG_KEYWORD("configuration", CONFIGURATION, UNRESERVED_KEYWORD)
+PG_KEYWORD("conflict", CONFLICT, UNRESERVED_KEYWORD)
 PG_KEYWORD("connection", CONNECTION, UNRESERVED_KEYWORD)
 PG_KEYWORD("constraint", CONSTRAINT, RESERVED_KEYWORD)
 PG_KEYWORD("constraints", CONSTRAINTS, UNRESERVED_KEYWORD)
@@ -180,6 +181,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD)
 PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD)
 PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD)
 PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD)
+PG_KEYWORD("ignore", IGNORE, UNRESERVED_KEYWORD)
 PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD)
 PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD)
 PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD)
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index e9e7cdc..6c7cf67 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -41,6 +41,7 @@ extern List *transformDistinctClause(ParseState *pstate,
 						List **targetlist, List *sortClause, bool is_agg);
 extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist,
 						  List **targetlist, List *sortClause);
+extern Oid transformConflictWithinClause(ParseState *pstate, InsertStmt *stmt);
 
 extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
 					List *sortlist, List *targetlist, SortBy *sortby,
diff --git a/src/include/parser/parse_relation.h b/src/include/parser/parse_relation.h
index d8b9493..56cd10e 100644
--- a/src/include/parser/parse_relation.h
+++ b/src/include/parser/parse_relation.h
@@ -40,8 +40,8 @@ extern Node *colNameToVar(ParseState *pstate, char *colname, bool localonly,
 			 int location);
 extern void markVarForSelectPriv(ParseState *pstate, Var *var,
 					 RangeTblEntry *rte);
-extern Relation parserOpenTable(ParseState *pstate, const RangeVar *relation,
-				int lockmode);
+extern Relation parserOpenRelation(ParseState *pstate,
+					const RangeVar *relation, int lockmode);
 extern RangeTblEntry *addRangeTableEntry(ParseState *pstate,
 				   RangeVar *relation,
 				   Alias *alias,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 37b6cbb..8f8583c 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -52,6 +52,7 @@ typedef LockInfoData *LockInfo;
  */
 typedef struct RelationAmInfo
 {
+	FmgrInfo	amlock;
 	FmgrInfo	aminsert;
 	FmgrInfo	ambeginscan;
 	FmgrInfo	amgettuple;
diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h
index ae285c3..3e8ed88 100644
--- a/src/include/utils/tqual.h
+++ b/src/include/utils/tqual.h
@@ -89,6 +89,7 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup,
 extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
 					 uint16 infomask, TransactionId xid);
 extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+extern bool XidInProgress(TransactionId xid, Snapshot snapshot);
 
 /*
  * To avoid leaking to much knowledge about reorderbuffer implementation
-- 
1.9.1

