From 4fc03463d51cc55de676813a7a8ac7321ecc7be7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Wed, 6 Apr 2022 07:23:39 -0500
Subject: [PATCH v13 2/2] cluster: early ownership check of partitions

Similar to a556549d7, check the ownership of partitions before queueing them to
be clustered.  Otherwise, we may later wait on an exclusive lock on a partition
only for it to fails ownership check anyway, causing other queries to wait
behind cluster.
---
 src/backend/commands/cluster.c        | 19 +++++++++++++++----
 src/test/regress/expected/cluster.out | 21 +++++++++++++++++++++
 src/test/regress/sql/cluster.sql      | 15 +++++++++++++++
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 0f0a6e9f018..671ea3f45a2 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1663,9 +1663,6 @@ get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
 	/* Do not lock the children until they're processed */
 	inhoids = find_all_inheritors(indexOid, NoLock, NULL);
 
-	/* Use a permanent memory context for the result list */
-	old_context = MemoryContextSwitchTo(cluster_context);
-
 	foreach(lc, inhoids)
 	{
 		Oid			indexrelid = lfirst_oid(lc);
@@ -1676,12 +1673,26 @@ get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
 		if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
 			continue;
 
+		/*
+		 * Silently skip partitions which the user has no access to.  They'll
+		 * be skipped later anyway, but this avoids blocking before the
+		 * permission check if another session is reading the table.
+		 */
+		if (!pg_class_ownercheck(relid, GetUserId()) &&
+				(!pg_database_ownercheck(MyDatabaseId, GetUserId()) ||
+				 IsSharedRelation(relid)))
+			continue;
+
+		/* Use a permanent memory context for the result list */
+		old_context = MemoryContextSwitchTo(cluster_context);
+
 		rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
 		rtc->tableOid = relid;
 		rtc->indexOid = indexrelid;
 		rtcs = lappend(rtcs, rtc);
+
+		MemoryContextSwitchTo(old_context);
 	}
 
-	MemoryContextSwitchTo(old_context);
 	return rtcs;
 }
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index 953818c74e1..08e10d89dcf 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -493,6 +493,27 @@ ERROR:  cannot mark index clustered in partitioned table
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 ERROR:  cannot mark index clustered in partitioned table
 DROP TABLE clstrpart;
+-- Ownership of partitions is checked
+CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i);
+CREATE INDEX ptnowner_i_idx ON ptnowner(i);
+CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1);
+CREATE ROLE ptnowner;
+CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2);
+ALTER TABLE ptnowner1 OWNER TO ptnowner;
+ALTER TABLE ptnowner OWNER TO ptnowner;
+SET SESSION AUTHORIZATION ptnowner;
+CREATE TEMP TABLE ptnowner_oldnodes AS SELECT oid, relname, relfilenode FROM pg_partition_tree('ptnowner') AS tree JOIN pg_class AS c ON c.oid=tree.relid;
+CLUSTER ptnowner USING ptnowner_i_idx;
+SELECT a.relname, a.relfilenode=b.relfilenode FROM pg_class a JOIN ptnowner_oldnodes b USING (oid) ORDER BY a.relname COLLATE "C";
+  relname  | ?column? 
+-----------+----------
+ ptnowner  | t
+ ptnowner1 | f
+ ptnowner2 | t
+(3 rows)
+
+RESET SESSION AUTHORIZATION;
+DROP TABLE ptnowner;
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 5601684ee3f..631eec1b739 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -229,6 +229,21 @@ ALTER TABLE clstrpart SET WITHOUT CLUSTER;
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 DROP TABLE clstrpart;
 
+-- Ownership of partitions is checked
+CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i);
+CREATE INDEX ptnowner_i_idx ON ptnowner(i);
+CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1);
+CREATE ROLE ptnowner;
+CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2);
+ALTER TABLE ptnowner1 OWNER TO ptnowner;
+ALTER TABLE ptnowner OWNER TO ptnowner;
+SET SESSION AUTHORIZATION ptnowner;
+CREATE TEMP TABLE ptnowner_oldnodes AS SELECT oid, relname, relfilenode FROM pg_partition_tree('ptnowner') AS tree JOIN pg_class AS c ON c.oid=tree.relid;
+CLUSTER ptnowner USING ptnowner_i_idx;
+SELECT a.relname, a.relfilenode=b.relfilenode FROM pg_class a JOIN ptnowner_oldnodes b USING (oid) ORDER BY a.relname COLLATE "C";
+RESET SESSION AUTHORIZATION;
+DROP TABLE ptnowner;
+
 -- Test CLUSTER with external tuplesorting
 
 create table clstr_4 as select * from tenk1;
-- 
2.17.1

