From e9ad9f947d6d553ce8ec29feb8560dff48f166b6 Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Wed, 16 Aug 2017 11:36:14 +0900
Subject: [PATCH 1/3] Relieve RelationGetPartitionDispatchInfo() of doing any
 locking

Anyone who wants to call RelationGetPartitionDispatchInfo() must first
acquire locks using find_all_inheritors.

Doing it this way gets rid of the possibility of a deadlock when partitions
are concurrently locked, because RelationGetPartitionDispatchInfo would lock
the partitions in one order and find_all_inheritors would in another.

Reported-by: Amit Khandekar, Robert Haas
Reports: https://postgr.es/m/CAJ3gD9fdjk2O8aPMXidCeYeB-mFB%3DwY9ZLfe8cQOfG4bTqVGyQ%40mail.gmail.com
         https://postgr.es/m/CA%2BTgmobwbh12OJerqAGyPEjb_%2B2y7T0nqRKTcjed6L4NTET6Fg%40mail.gmail.com
---
 src/backend/catalog/partition.c |   52 ++++++++++++++++++---------------------
 src/backend/executor/execMain.c |    7 +++---
 src/include/catalog/partition.h |    3 +--
 3 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index c1a307c..e5dc42d 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -998,14 +998,22 @@ get_partition_qual_relid(Oid relid)
 /*
  * RelationGetPartitionDispatchInfo
  *		Returns information necessary to route tuples down a partition tree
+ *		rooted at "rel" as an array of PartitionDispatch entries.
  *
- * All the partitions will be locked with lockmode, unless it is NoLock.
- * A list of the OIDs of all the leaf partitions of rel is returned in
- * *leaf_part_oids.
+ * The array contains as many entries as the number of partitioned tables in
+ * the partition tree. The number of entries is returned in "num_parted". The
+ * functions also returns a list of the OIDs of all the leaf partitions of rel
+ * in "leaf_part_oids".
+ *
+ * The function traverses the the partition tree using relcaches of partitioned
+ * tables within it. Hence all the relations in the partition tree including
+ * the root must have been locked (with at least AccessShareLock) by the caller
+ * typically using find_all_inheritors() to preserve the locking order to avoid
+ * deadlocks.
  */
 PartitionDispatch *
-RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
-								 int *num_parted, List **leaf_part_oids)
+RelationGetPartitionDispatchInfo(Relation rel, int *num_parted,
+								 List **leaf_part_oids)
 {
 	PartitionDispatchData **pd;
 	List	   *all_parts = NIL,
@@ -1019,14 +1027,12 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 				offset;
 
 	/*
-	 * Lock partitions and make a list of the partitioned ones to prepare
-	 * their PartitionDispatch objects below.
-	 *
-	 * Cannot use find_all_inheritors() here, because then the order of OIDs
-	 * in parted_rels list would be unknown, which does not help, because we
-	 * assign indexes within individual PartitionDispatch in an order that is
-	 * predetermined (determined by the order of OIDs in individual partition
-	 * descriptors).
+	 * For every partitioned table in the tree, starting with the root
+	 * partitioned table, add its relcache entry to parted_rels, while also
+	 * queuing its partitions (in the order in which they appear in the
+	 * partition descriptor) to be looked at later in the same loop.  This is
+	 * a bit tricky but works because the foreach() macro doesn't fetch the
+	 * next list element until the bottom of the loop.
 	 */
 	*num_parted = 1;
 	parted_rels = list_make1(rel);
@@ -1035,29 +1041,19 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 	APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents);
 	forboth(lc1, all_parts, lc2, all_parents)
 	{
-		Relation	partrel = heap_open(lfirst_oid(lc1), lockmode);
+		Oid			partrelid = lfirst_oid(lc1);
 		Relation	parent = lfirst(lc2);
-		PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
 
-		/*
-		 * If this partition is a partitioned table, add its children to the
-		 * end of the list, so that they are processed as well.
-		 */
-		if (partdesc)
+		if (get_rel_relkind(partrelid) == RELKIND_PARTITIONED_TABLE)
 		{
+			/* Already locked by the caller. */
+			Relation	partrel = heap_open(partrelid, NoLock);
+
 			(*num_parted)++;
 			parted_rels = lappend(parted_rels, partrel);
 			parted_rel_parents = lappend(parted_rel_parents, parent);
 			APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents);
 		}
-		else
-			heap_close(partrel, NoLock);
-
-		/*
-		 * We keep the partitioned ones open until we're done using the
-		 * information being collected here (for example, see
-		 * ExecEndModifyTable).
-		 */
 	}
 
 	/*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 6671a25..91a3766 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -43,6 +43,7 @@
 #include "access/xact.h"
 #include "catalog/namespace.h"
 #include "catalog/partition.h"
+#include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_publication.h"
 #include "commands/matview.h"
 #include "commands/trigger.h"
@@ -3249,9 +3250,9 @@ ExecSetupPartitionTupleRouting(Relation rel,
 	int			i;
 	ResultRelInfo *leaf_part_rri;
 
-	/* Get the tuple-routing information and lock partitions */
-	*pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, num_parted,
-										   &leaf_parts);
+	/* Get the tuple-routing information after locking all the partitions */
+	find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
+	*pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
 	*num_partitions = list_length(leaf_parts);
 	*partitions = (ResultRelInfo *) palloc(*num_partitions *
 										   sizeof(ResultRelInfo));
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index bef7a0f..2283c67 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -88,8 +88,7 @@ extern Expr *get_partition_qual_relid(Oid relid);
 
 /* For tuple routing */
 extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
-								 int lockmode, int *num_parted,
-								 List **leaf_part_oids);
+								 int *num_parted, List **leaf_part_oids);
 extern void FormPartitionKeyDatum(PartitionDispatch pd,
 					  TupleTableSlot *slot,
 					  EState *estate,
-- 
1.7.9.5

