From acd49444d49835da01aeb31f74872a081e608c53 Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Mon, 3 Apr 2017 15:37:00 +0900
Subject: [PATCH] Fix possibility of deadlock when dropping partitions

The partition relation is locked much earlier than
heap_drop_with_catalog, which is the first point in the drop command's
processing where the parent relation is locked.  That reverses the
order in which the locking should happen, which is to lock the parent
before any of its partitions.
---
 src/backend/catalog/heap.c       | 30 +++++++++++++++++-------------
 src/backend/commands/tablecmds.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 1cbe7f907f..2dc533ba43 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1758,29 +1758,33 @@ void
 heap_drop_with_catalog(Oid relid)
 {
 	Relation	rel;
+	HeapTuple	tuple;
 	Oid			parentOid;
 	Relation	parent = NULL;
 
 	/*
-	 * Open and lock the relation.
+	 * To drop a partition safely, we must grab exclusive lock on its parent,
+	 * because another backend might be about to execute a query on the parent
+	 * table.  If it relies on previously cached partition descriptor, then
+	 * it could attempt to access the just-dropped relation as its partition.
+	 * We must therefore take a table lock strong enough to prevent all
+	 * queries on the table from proceeding until we commit and send out a
+	 * shared-cache-inval notice that will make them update their index lists.
 	 */
-	rel = relation_open(relid, AccessExclusiveLock);
-
-	/*
-	 * If the relation is a partition, we must grab exclusive lock on its
-	 * parent because we need to update its partition descriptor. We must take
-	 * a table lock strong enough to prevent all queries on the parent from
-	 * proceeding until we commit and send out a shared-cache-inval notice
-	 * that will make them update their partition descriptor. Sometimes, doing
-	 * this is cycles spent uselessly, especially if the parent will be
-	 * dropped as part of the same command anyway.
-	 */
-	if (rel->rd_rel->relispartition)
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (((Form_pg_class) GETSTRUCT(tuple))->relispartition)
 	{
 		parentOid = get_partition_parent(relid);
 		parent = heap_open(parentOid, AccessExclusiveLock);
 	}
 
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Open and lock the relation.
+	 */
+	rel = relation_open(relid, AccessExclusiveLock);
+
 	/*
 	 * There can no longer be anyone *else* touching the relation, but we
 	 * might still have open queries or cursors, or pending trigger events, in
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index d418d56b54..622a572580 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -271,6 +271,7 @@ struct DropRelationCallbackState
 {
 	char		relkind;
 	Oid			heapOid;
+	Oid			partParentOid;
 	bool		concurrent;
 };
 
@@ -1041,6 +1042,7 @@ RemoveRelations(DropStmt *drop)
 		/* Look up the appropriate relation using namespace search. */
 		state.relkind = relkind;
 		state.heapOid = InvalidOid;
+		state.partParentOid = InvalidOid;
 		state.concurrent = drop->concurrent;
 		relOid = RangeVarGetRelidExtended(rel, lockmode, true,
 										  false,
@@ -1070,6 +1072,8 @@ RemoveRelations(DropStmt *drop)
 /*
  * Before acquiring a table lock, check whether we have sufficient rights.
  * In the case of DROP INDEX, also try to lock the table before the index.
+ * Also, if the table to be dropped is a partition, we try to lock the parent
+ * first.
  */
 static void
 RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
@@ -1079,6 +1083,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 	struct DropRelationCallbackState *state;
 	char		relkind;
 	char		expected_relkind;
+	bool		is_partition;
 	Form_pg_class classform;
 	LOCKMODE	heap_lockmode;
 
@@ -1098,6 +1103,17 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 		state->heapOid = InvalidOid;
 	}
 
+	/*
+	 * Similarly, if we previously locked some other partition's heap, and
+	 * the name we're looking up no longer refers to that relation, release
+	 * the now-useless lock.
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->partParentOid))
+	{
+		UnlockRelationOid(state->partParentOid, AccessExclusiveLock);
+		state->partParentOid = InvalidOid;
+	}
+
 	/* Didn't find a relation, so no need for locking or permission checks. */
 	if (!OidIsValid(relOid))
 		return;
@@ -1106,6 +1122,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 	if (!HeapTupleIsValid(tuple))
 		return;					/* concurrently dropped, so nothing to do */
 	classform = (Form_pg_class) GETSTRUCT(tuple);
+	is_partition = classform->relispartition;
 
 	/*
 	 * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
@@ -1149,6 +1166,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
 		if (OidIsValid(state->heapOid))
 			LockRelationOid(state->heapOid, heap_lockmode);
 	}
+
+	/*
+	 * Similarly, if the relation is a partition, we must acquire lock on its
+	 * parent before locking the partition.  That's because queries lock the
+	 * parent before its partitions, so we risk deadlock it we do it the other
+	 * way around.
+	 */
+	if (is_partition && relOid != oldRelOid)
+	{
+		state->partParentOid = get_partition_parent(relOid);
+		if (OidIsValid(state->partParentOid))
+			LockRelationOid(state->partParentOid, AccessExclusiveLock);
+	}
 }
 
 /*
-- 
2.11.0

