CREATE INDEX CONCURRENTLY on partitioned index

Started by Justin Pryzbyabout 5 years ago40 messages
#1Justin Pryzby
pryzby@telsasoft.com
3 attachment(s)

Forking this thread, since the existing CFs have been closed.
/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with indisvalid=false,
and then process them like REINDEX CONCURRENTLY. If it's interrupted, it
leaves INVALID indexes, which can be cleaned up with DROP or REINDEX, same as
CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:

On Mon, Jun 15, 2020 at 08:15:05PM +0800, 李杰(慎追) wrote:

As shown above, an error occurred while creating an index in the second partition.
It can be clearly seen that the index of the partitioned table is invalid
and the index of the first partition is normal, the second partition is invalid,
and the Third Partition index does not exist at all.

That's a problem. I really think that we should make the steps of the
concurrent operation consistent across all relations, meaning that all
the indexes should be created as invalid for all the parts of the
partition tree, including partitioned tables as well as their
partitions, in the same transaction. Then a second new transaction
gets used for the index build, followed by a third one for the
validation that switches the indexes to become valid.

Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended. However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier wrote:

It would be good also to check if
we have a partition index tree that maps partially with a partition
table tree (aka no all table partitions have a partition index), where
these don't get clustered because there is no index to work on.

This should not happen, since a incomplete partitioned index is "invalid".

--
Justin

Attachments:

v10-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-t.patchtext/x-diff; charset=us-asciiDownload
From 9f146ace80596f0177163de34556d8ef581fd2e3 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH v10 1/3] Allow CREATE INDEX CONCURRENTLY on partitioned table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.

XXX: does pgstat_progress_update_param() break other commands progress ?
---
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 141 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 ++++++++++-
 src/test/regress/sql/indexing.sql      |  18 +++-
 4 files changed, 172 insertions(+), 56 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 749db2845e..ba4424d379 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -661,15 +661,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 996f1ed070..c1dd4c8362 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -93,6 +93,7 @@ static bool ReindexRelationConcurrently(Oid relationOid, int options);
 
 static void ReindexPartitions(Oid relid, int options, bool isTopLevel);
 static void ReindexMultipleInternal(List *relids, int options);
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static void reindex_error_callback(void *args);
 static void update_relispartition(Oid relationId, bool newval);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
@@ -667,17 +668,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1111,6 +1101,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1163,6 +1158,14 @@ DefineIndex(Oid relationId,
 		 */
 		if (!stmt->relation || stmt->relation->inh)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			PartitionDesc partdesc = RelationGetPartitionDesc(rel);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
@@ -1170,12 +1173,15 @@ DefineIndex(Oid relationId,
 			TupleDesc	parentDesc;
 			Oid		   *opfamOids;
 
+			// If concurrent, maybe this should be done after excluding indexes which already exist ?
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1218,10 +1224,12 @@ DefineIndex(Oid relationId,
 					continue;
 				}
 
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				childidxs = RelationGetIndexList(childrel);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1292,10 +1300,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1347,10 +1359,18 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1360,51 +1380,42 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
-	if (!concurrent)
+	table_close(rel, NoLock);
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
 		return address;
 	}
 
-	/* save lockrelid and locktag for below, then close rel */
+	/* save lockrelid and locktag for below */
 	heaprelid = rel->rd_lockInfo.lockRelId;
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
-	table_close(rel, NoLock);
 
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
@@ -1586,6 +1597,56 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexMultipleInternal(partitions, REINDEXOPT_CONCURRENTLY);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 49b6f7c18f..2fb00a2fa7 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index fc1479dca6..610c12de9e 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.17.0

v10-0002-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; charset=us-asciiDownload
From dad382a194d5c7c82204fb011f1fd08507d20d8b Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 16:23:02 -0500
Subject: [PATCH v10 2/3] Add SKIPVALID flag for more integration

---
 src/backend/commands/indexcmds.c | 54 +++++++++++---------------------
 src/include/nodes/parsenodes.h   |  7 +++--
 2 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index c1dd4c8362..f4d49f5bf6 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1597,53 +1597,30 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
-
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, childs)
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
-
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 */
-	ReindexMultipleInternal(partitions, REINDEXOPT_CONCURRENTLY);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId,
+			REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -2927,6 +2904,11 @@ ReindexPartitions(Oid relid, int options, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip invalid indexes, if requested */
+		if ((options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 93fd796163..d099378951 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -3356,10 +3356,11 @@ typedef struct ConstraintsSetStmt
  */
 
 /* Reindex options */
-#define REINDEXOPT_VERBOSE (1 << 0) /* print progress info */
+#define REINDEXOPT_VERBOSE	(1 << 0) /* print progress info */
 #define REINDEXOPT_REPORT_PROGRESS (1 << 1) /* report pgstat progress */
-#define REINDEXOPT_MISSING_OK (1 << 2)	/* skip missing relations */
-#define REINDEXOPT_CONCURRENTLY (1 << 3)	/* concurrent mode */
+#define REINDEXOPT_MISSING_OK	(1 << 2)	/* skip missing relations */
+#define REINDEXOPT_CONCURRENTLY	(1 << 3)	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	(1 << 4)	/* skip valid indexes */
 
 typedef enum ReindexObjectType
 {
-- 
2.17.0

v10-0003-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; charset=us-asciiDownload
From 2335546e703996d1f4fdf7a9f3c5a930f74d4bb7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 23:52:31 -0500
Subject: [PATCH v10 3/3] ReindexPartitions() to set indisvalid..

Something like this should probably have been included in
a6642b3ae060976b42830b7dc8f29ec190ab05e4

See also 71a05b223, which mentioned the absence of any way to validate an
index.
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index f4d49f5bf6..77390f0c1e 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1608,8 +1608,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1620,9 +1618,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 */
 	ReindexPartitions(indexRelationId,
 			REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -2924,6 +2919,24 @@ ReindexPartitions(Oid relid, int options, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, options);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.17.0

#2Justin Pryzby
pryzby@telsasoft.com
In reply to: Justin Pryzby (#1)
3 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Sat, Oct 31, 2020 at 01:31:17AM -0500, Justin Pryzby wrote:

Forking this thread, since the existing CFs have been closed.
/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with indisvalid=false,
and then process them like REINDEX CONCURRENTLY. If it's interrupted, it
leaves INVALID indexes, which can be cleaned up with DROP or REINDEX, same as
CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:

On Mon, Jun 15, 2020 at 08:15:05PM +0800, 李杰(慎追) wrote:

As shown above, an error occurred while creating an index in the second partition.
It can be clearly seen that the index of the partitioned table is invalid
and the index of the first partition is normal, the second partition is invalid,
and the Third Partition index does not exist at all.

That's a problem. I really think that we should make the steps of the
concurrent operation consistent across all relations, meaning that all
the indexes should be created as invalid for all the parts of the
partition tree, including partitioned tables as well as their
partitions, in the same transaction. Then a second new transaction
gets used for the index build, followed by a third one for the
validation that switches the indexes to become valid.

Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended. However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier wrote:

It would be good also to check if
we have a partition index tree that maps partially with a partition
table tree (aka no all table partitions have a partition index), where
these don't get clustered because there is no index to work on.

This should not happen, since a incomplete partitioned index is "invalid".

@cfbot: rebased over recent changes to indexcmds.c

--
Justin

Attachments:

v11-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-t.patchtext/x-diff; charset=us-asciiDownload
From 05cd4feaa5a1d77a1ae8e3a167a68b2a21af1fc6 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH v11 1/9] Allow CREATE INDEX CONCURRENTLY on partitioned table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.

XXX: does pgstat_progress_update_param() break other commands progress ?
---
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 141 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 ++++++++++-
 src/test/regress/sql/indexing.sql      |  18 +++-
 4 files changed, 172 insertions(+), 56 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 29dee5689e..bd4431a3ce 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -661,15 +661,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index ca24620fd0..76219381c1 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -671,17 +672,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1119,6 +1109,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1171,6 +1166,14 @@ DefineIndex(Oid relationId,
 		 */
 		if (!stmt->relation || stmt->relation->inh)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			PartitionDesc partdesc = RelationGetPartitionDesc(rel);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
@@ -1178,12 +1181,15 @@ DefineIndex(Oid relationId,
 			TupleDesc	parentDesc;
 			Oid		   *opfamOids;
 
+			// If concurrent, maybe this should be done after excluding indexes which already exist ?
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1226,10 +1232,12 @@ DefineIndex(Oid relationId,
 					continue;
 				}
 
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				childidxs = RelationGetIndexList(childrel);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1300,10 +1308,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1355,10 +1367,18 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1368,51 +1388,42 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
-	if (!concurrent)
+	table_close(rel, NoLock);
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
 		return address;
 	}
 
-	/* save lockrelid and locktag for below, then close rel */
+	/* save lockrelid and locktag for below */
 	heaprelid = rel->rd_lockInfo.lockRelId;
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
-	table_close(rel, NoLock);
 
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
@@ -1606,6 +1617,56 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexMultipleInternal(partitions, REINDEXOPT_CONCURRENTLY);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index c93f4470c9..f04abc6897 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c..3d4b6e9bc9 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.17.0

v11-0002-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; charset=us-asciiDownload
From ba40cea72e3f47e5e4d187316766083db5963fc8 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 16:23:02 -0500
Subject: [PATCH v11 2/9] Add SKIPVALID flag for more integration

---
 src/backend/commands/indexcmds.c | 54 +++++++++++---------------------
 src/include/nodes/parsenodes.h   |  7 +++--
 2 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 76219381c1..ffc166206b 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1617,53 +1617,30 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
-
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, childs)
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
-
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 */
-	ReindexMultipleInternal(partitions, REINDEXOPT_CONCURRENTLY);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId,
+			REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -2947,6 +2924,11 @@ ReindexPartitions(Oid relid, int options, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip invalid indexes, if requested */
+		if ((options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index d1f9ef29ca..1373b57ae7 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -3347,10 +3347,11 @@ typedef struct ConstraintsSetStmt
  */
 
 /* Reindex options */
-#define REINDEXOPT_VERBOSE (1 << 0) /* print progress info */
+#define REINDEXOPT_VERBOSE	(1 << 0) /* print progress info */
 #define REINDEXOPT_REPORT_PROGRESS (1 << 1) /* report pgstat progress */
-#define REINDEXOPT_MISSING_OK (1 << 2)	/* skip missing relations */
-#define REINDEXOPT_CONCURRENTLY (1 << 3)	/* concurrent mode */
+#define REINDEXOPT_MISSING_OK	(1 << 2)	/* skip missing relations */
+#define REINDEXOPT_CONCURRENTLY	(1 << 3)	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	(1 << 4)	/* skip valid indexes */
 
 typedef enum ReindexObjectType
 {
-- 
2.17.0

v11-0003-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; charset=us-asciiDownload
From b1ff160a715a7e87c93bf4f7179086e406f5dad1 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 23:52:31 -0500
Subject: [PATCH v11 3/9] ReindexPartitions() to set indisvalid..

Something like this should probably have been included in
a6642b3ae060976b42830b7dc8f29ec190ab05e4

See also 71a05b223, which mentioned the absence of any way to validate an
index.
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index ffc166206b..c6a75e887f 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1628,8 +1628,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1640,9 +1638,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 */
 	ReindexPartitions(indexRelationId,
 			REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -2944,6 +2939,24 @@ ReindexPartitions(Oid relid, int options, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, options);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.17.0

#3Masahiko Sawada
sawada.mshk@gmail.com
In reply to: Justin Pryzby (#2)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Mon, Nov 30, 2020 at 5:22 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

On Sat, Oct 31, 2020 at 01:31:17AM -0500, Justin Pryzby wrote:

Forking this thread, since the existing CFs have been closed.
/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with indisvalid=false,
and then process them like REINDEX CONCURRENTLY. If it's interrupted, it
leaves INVALID indexes, which can be cleaned up with DROP or REINDEX, same as
CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:

On Mon, Jun 15, 2020 at 08:15:05PM +0800, 李杰(慎追) wrote:

As shown above, an error occurred while creating an index in the second partition.
It can be clearly seen that the index of the partitioned table is invalid
and the index of the first partition is normal, the second partition is invalid,
and the Third Partition index does not exist at all.

That's a problem. I really think that we should make the steps of the
concurrent operation consistent across all relations, meaning that all
the indexes should be created as invalid for all the parts of the
partition tree, including partitioned tables as well as their
partitions, in the same transaction. Then a second new transaction
gets used for the index build, followed by a third one for the
validation that switches the indexes to become valid.

Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended. However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier wrote:

It would be good also to check if
we have a partition index tree that maps partially with a partition
table tree (aka no all table partitions have a partition index), where
these don't get clustered because there is no index to work on.

This should not happen, since a incomplete partitioned index is "invalid".

@cfbot: rebased over recent changes to indexcmds.c

Status update for a commitfest entry.

This patch has not been updated and "Waiting on Author" status since
Nov 30. Are you still planning to work on this, Justin? If no, I'm
going to set this entry to "Returned with Feedback" barring
objections.

Regards,

--
Masahiko Sawada
EDB: https://www.enterprisedb.com/

#4Justin Pryzby
pryzby@telsasoft.com
In reply to: Masahiko Sawada (#3)
5 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Thu, Jan 28, 2021 at 09:51:51PM +0900, Masahiko Sawada wrote:

On Mon, Nov 30, 2020 at 5:22 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

On Sat, Oct 31, 2020 at 01:31:17AM -0500, Justin Pryzby wrote:

Forking this thread, since the existing CFs have been closed.
/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with indisvalid=false,
and then process them like REINDEX CONCURRENTLY. If it's interrupted, it
leaves INVALID indexes, which can be cleaned up with DROP or REINDEX, same as
CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:
Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended. However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier wrote:

It would be good also to check if
we have a partition index tree that maps partially with a partition
table tree (aka no all table partitions have a partition index), where
these don't get clustered because there is no index to work on.

This should not happen, since a incomplete partitioned index is "invalid".

@cfbot: rebased over recent changes to indexcmds.c

Status update for a commitfest entry.

This patch has not been updated and "Waiting on Author" status since
Nov 30. Are you still planning to work on this, Justin? If no, I'm
going to set this entry to "Returned with Feedback" barring
objections.

I had been waiting to rebase since there hasn't been any review comments and I
expected additional, future conflicts.

--
Justin

Attachments:

v12-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-t.patchtext/x-diff; charset=us-asciiDownload
From 2840a6d355961ea6fdd29c2851b9c333c17c849f Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH v12 1/5] Allow CREATE INDEX CONCURRENTLY on partitioned table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.

XXX: does pgstat_progress_update_param() break other commands progress ?
---
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 142 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 ++++++++++-
 src/test/regress/sql/indexing.sql      |  18 +++-
 4 files changed, 173 insertions(+), 56 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index a5271a9f8f..6869a18968 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -686,15 +686,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index f9f3ff3b62..c513e8a6bd 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -680,17 +681,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1128,6 +1118,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1183,18 +1178,29 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
 			TupleDesc	parentDesc;
 			Oid		   *opfamOids;
 
+			// If concurrent, maybe this should be done after excluding indexes which already exist ?
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1237,10 +1243,12 @@ DefineIndex(Oid relationId,
 					continue;
 				}
 
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				childidxs = RelationGetIndexList(childrel);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1311,10 +1319,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1366,10 +1378,18 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1379,51 +1399,42 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
-	if (!concurrent)
+	table_close(rel, NoLock);
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
 		return address;
 	}
 
-	/* save lockrelid and locktag for below, then close rel */
+	/* save lockrelid and locktag for below */
 	heaprelid = rel->rd_lockInfo.lockRelId;
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
-	table_close(rel, NoLock);
 
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
@@ -1617,6 +1628,57 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+	ReindexParams params = { .options = REINDEXOPT_CONCURRENTLY };
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexMultipleInternal(partitions, &params);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index c93f4470c9..f04abc6897 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c..3d4b6e9bc9 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.17.0

v12-0002-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; charset=us-asciiDownload
From 2ce39870407fa8bb486df75f51bc9f20dd626045 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 16:23:02 -0500
Subject: [PATCH v12 2/5] Add SKIPVALID flag for more integration

---
 src/backend/commands/indexcmds.c | 52 +++++++++++---------------------
 src/include/catalog/index.h      |  1 +
 2 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index c513e8a6bd..89c2f62eda 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1628,54 +1628,31 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
 	ReindexParams params = { .options = REINDEXOPT_CONCURRENTLY };
 
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, childs)
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
-
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 */
-	ReindexMultipleInternal(partitions, &params);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId, &params, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -3037,6 +3014,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip invalid indexes, if requested */
+		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 266f8950dc..6673122ec2 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -40,6 +40,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.17.0

v12-0003-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; charset=us-asciiDownload
From 15643b52a721f4c43d00296cd87f152543b018f7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 23:52:31 -0500
Subject: [PATCH v12 3/5] ReindexPartitions() to set indisvalid..

Something like this should probably have been included in
a6642b3ae060976b42830b7dc8f29ec190ab05e4

See also 71a05b223, which mentioned the absence of any way to validate an
index.
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 89c2f62eda..739bd14001 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1641,8 +1641,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1652,9 +1650,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * this commits and then starts a new transaction immediately.
 	 */
 	ReindexPartitions(indexRelationId, &params, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -3034,6 +3029,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, params);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.17.0

v12-0004-Refactor-to-allow-reindexing-all-index-partition.patchtext/x-diff; charset=us-asciiDownload
From 66c0421963c7f98b004c21ab1b29dc2cd2713d2e Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 1 Nov 2020 12:25:15 -0600
Subject: [PATCH v12 4/5] Refactor to allow reindexing all index partitions at
 once

---
 src/backend/commands/indexcmds.c           | 262 ++++++++++++++-------
 src/test/regress/expected/create_index.out |   4 +-
 2 files changed, 185 insertions(+), 81 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 739bd14001..08d44a1999 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -102,6 +102,8 @@ static void ReindexMultipleInternal(List *relids,
 									ReindexParams *params);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
+static List *ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds,
+										int options, MemoryContext private_context);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
 
@@ -124,6 +126,15 @@ typedef struct ReindexErrorInfo
 	char		relkind;
 } ReindexErrorInfo;
 
+/* Argument to ReindexIndexConcurrently takes a List* of these */
+typedef struct ReindexIndexInfo
+{
+	Oid			indexId;
+	Oid			tableId;
+	Oid			amId;
+	bool		safe;		/* for set_indexsafe_procflags */
+} ReindexIndexInfo;
+
 /*
  * CheckIndexCompatible
  *		Determine whether an existing index definition is compatible with a
@@ -1635,7 +1646,9 @@ DefineIndex(Oid relationId,
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ReindexParams params = { .options = REINDEXOPT_CONCURRENTLY };
+	ReindexParams params = {
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID,
+	};
 
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
@@ -2607,7 +2620,15 @@ ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
 		ReindexPartitions(indOid, params, isTopLevel);
 	else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
 			 persistence != RELPERSISTENCE_TEMP)
-		ReindexRelationConcurrently(indOid, params);
+	{
+		ReindexIndexInfo idxinfo = {
+			.indexId = indOid,
+			/* other fields set later */
+		};
+		ReindexIndexesConcurrently(list_make1(&idxinfo),
+				list_make1_oid(IndexGetRelation(indOid, false)),
+				params->options, CurrentMemoryContext);
+	}
 	else
 	{
 		ReindexParams newparams = *params;
@@ -2938,20 +2959,69 @@ reindex_error_callback(void *arg)
 				   errinfo->relnamespace, errinfo->relname);
 }
 
+
+/*
+ * Given a list of index oids, return a list of leaf partitions by removing
+ * any intermediate parents.  heaprels is populated with the corresponding
+ * tables.
+ */
+static List *
+leaf_partitions(List *inhoids, int options, List **heaprels)
+{
+	List		*partitions = NIL;
+	ListCell	*lc;
+
+	foreach(lc, inhoids)
+	{
+		Oid			partoid = lfirst_oid(lc);
+		Oid			tableoid;
+		Relation	table;
+		char		partkind = get_rel_relkind(partoid);
+
+		/*
+		 * This discards partitioned indexes and foreign tables.
+		 */
+		if (!RELKIND_HAS_STORAGE(partkind))
+			continue;
+
+		Assert(partkind == RELKIND_INDEX);
+
+		/* Skip invalid indexes, if requested */
+		if ((options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
+		/* (try to) Open the table, with lock */
+		tableoid = IndexGetRelation(partoid, false);
+		table = table_open(tableoid, ShareLock);
+		table_close(table, NoLock);
+
+		/* Save partition OID in current MemoryContext */
+		partitions = lappend_oid(partitions, partoid);
+		*heaprels = lappend_oid(*heaprels, tableoid);
+	}
+
+	return partitions;
+}
+
+
 /*
  * ReindexPartitions
  *
  * Reindex a set of partitions, per the partitioned index or table given
  * by the caller.
+ * XXX: should be further refactored with logic from ReindexRelationConcurrently
  */
 static void
 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 {
-	List	   *partitions = NIL;
+	List	   *partitions = NIL,
+			*heaprels = NIL;
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
 	MemoryContext reindex_context;
+	MemoryContext old_context;
 	List	   *inhoids;
 	ListCell   *lc;
 	ErrorContextCallback errcallback;
@@ -2996,38 +3066,58 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 * The list of relations to reindex are the physical partitions of the
 	 * tree so discard any partitioned table or index.
 	 */
-	foreach(lc, inhoids)
-	{
-		Oid			partoid = lfirst_oid(lc);
-		char		partkind = get_rel_relkind(partoid);
-		MemoryContext old_context;
 
-		/*
-		 * This discards partitioned tables, partitioned indexes and foreign
-		 * tables.
-		 */
-		if (!RELKIND_HAS_STORAGE(partkind))
-			continue;
-
-		/* Skip invalid indexes, if requested */
-		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
-				get_index_isvalid(partoid))
-			continue;
+	if (relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		old_context = MemoryContextSwitchTo(reindex_context);
+		partitions = leaf_partitions(inhoids, params->options, &heaprels);
+		MemoryContextSwitchTo(old_context);
+	} else {
+		/* Loop over parent tables */
+		foreach(lc, inhoids)
+		{
+			Oid		partoid = lfirst_oid(lc);
+			Relation parttable;
+			List	*partindexes;
+
+			parttable = table_open(partoid, ShareLock);
+			old_context = MemoryContextSwitchTo(reindex_context);
+			partindexes = RelationGetIndexList(parttable);
+			partindexes = leaf_partitions(partindexes, params->options, &heaprels);
+			partitions = list_concat(partitions, partindexes);
+
+			MemoryContextSwitchTo(old_context);
+			table_close(parttable, ShareLock);
+		}
+	}
 
-		Assert(partkind == RELKIND_INDEX ||
-			   partkind == RELKIND_RELATION);
+	if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+		relkind == RELKIND_PARTITIONED_INDEX &&
+		get_rel_persistence(relid) != RELPERSISTENCE_TEMP)
+	{
+		List			   *idxinfos = NIL;
+		ReindexIndexInfo	*idxinfo;
 
-		/* Save partition OID */
 		old_context = MemoryContextSwitchTo(reindex_context);
-		partitions = lappend_oid(partitions, partoid);
+		foreach (lc, partitions)
+		{
+			Oid partoid = lfirst_oid(lc);
+			idxinfo = palloc(sizeof(ReindexIndexInfo));
+			idxinfo->indexId = partoid;
+			/* other fields set later */
+			idxinfos = lappend(idxinfos, idxinfo);
+		}
 		MemoryContextSwitchTo(old_context);
-	}
 
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 */
-	ReindexMultipleInternal(partitions, params);
+		/* Process all indexes in a single loop */
+		ReindexIndexesConcurrently(idxinfos, heaprels, params->options, reindex_context);
+	} else {
+		/*
+		 * Process each partition listed in a separate transaction.  Note that
+		 * this commits and then starts a new transaction immediately.
+		 */
+		ReindexMultipleInternal(partitions, params);
+	}
 
 	/*
 	 * If indexes exist on all of the partitioned table's children, and we
@@ -3172,18 +3262,9 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 static bool
 ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 {
-	typedef struct ReindexIndexInfo
-	{
-		Oid			indexId;
-		Oid			tableId;
-		Oid			amId;
-		bool		safe;		/* for set_indexsafe_procflags */
-	} ReindexIndexInfo;
 	List	   *heapRelationIds = NIL;
 	List	   *indexIds = NIL;
 	List	   *newIndexIds = NIL;
-	List	   *relationLocks = NIL;
-	List	   *lockTags = NIL;
 	ListCell   *lc,
 			   *lc2;
 	MemoryContext private_context;
@@ -3192,13 +3273,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 	char	   *relationName = NULL;
 	char	   *relationNamespace = NULL;
 	PGRUsage	ru0;
-	const int	progress_index[] = {
-		PROGRESS_CREATEIDX_COMMAND,
-		PROGRESS_CREATEIDX_PHASE,
-		PROGRESS_CREATEIDX_INDEX_OID,
-		PROGRESS_CREATEIDX_ACCESS_METHOD_OID
-	};
-	int64		progress_vals[4];
 
 	/*
 	 * Create a memory context that will survive forced transaction commits we
@@ -3449,6 +3523,69 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 	Assert(heapRelationIds != NIL);
 
+	/* Do the work */
+	newIndexIds = ReindexIndexesConcurrently(indexIds, heapRelationIds, params->options, private_context);
+
+	/* Log what we did */
+	if ((params->options & REINDEXOPT_VERBOSE) != 0)
+	{
+		if (relkind == RELKIND_INDEX)
+			ereport(INFO,
+					(errmsg("index \"%s.%s\" was reindexed",
+							relationNamespace, relationName),
+					 errdetail("%s.",
+							   pg_rusage_show(&ru0))));
+		else
+		{
+			foreach(lc, newIndexIds)
+			{
+				Oid			indOid = lfirst_oid(lc);
+				ereport(INFO,
+						(errmsg("index \"%s.%s\" was reindexed",
+								get_namespace_name(get_rel_namespace(indOid)),
+								get_rel_name(indOid))));
+				/* Don't show rusage here, since it's not per index. */
+			}
+
+			ereport(INFO,
+					(errmsg("table \"%s.%s\" was reindexed",
+							relationNamespace, relationName),
+					 errdetail("%s.",
+							   pg_rusage_show(&ru0))));
+		}
+	}
+
+
+	MemoryContextDelete(private_context);
+
+	return true;
+}
+
+/*
+ * Reindex concurrently for an arbitrary list of index relations
+ * This is called by ReindexRelationConcurrently and
+ */
+static List *
+ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds, int options,
+		MemoryContext private_context)
+{
+	List	   *newIndexIds = NIL;
+	List	   *relationLocks = NIL;
+	List	   *lockTags = NIL;
+
+	ListCell   *lc,
+			   *lc2;
+
+	MemoryContext oldcontext;
+
+	const int	progress_index[] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_ACCESS_METHOD_OID
+	};
+	int64		progress_vals[4];
+
 	/*-----
 	 * Now we have all the indexes we want to process in indexIds.
 	 *
@@ -3913,42 +4050,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 	/* Start a new transaction to finish process properly */
 	StartTransactionCommand();
 
-	/* Log what we did */
-	if ((params->options & REINDEXOPT_VERBOSE) != 0)
-	{
-		if (relkind == RELKIND_INDEX)
-			ereport(INFO,
-					(errmsg("index \"%s.%s\" was reindexed",
-							relationNamespace, relationName),
-					 errdetail("%s.",
-							   pg_rusage_show(&ru0))));
-		else
-		{
-			foreach(lc, newIndexIds)
-			{
-				ReindexIndexInfo *idx = lfirst(lc);
-				Oid			indOid = idx->indexId;
-
-				ereport(INFO,
-						(errmsg("index \"%s.%s\" was reindexed",
-								get_namespace_name(get_rel_namespace(indOid)),
-								get_rel_name(indOid))));
-				/* Don't show rusage here, since it's not per index. */
-			}
-
-			ereport(INFO,
-					(errmsg("table \"%s.%s\" was reindexed",
-							relationNamespace, relationName),
-					 errdetail("%s.",
-							   pg_rusage_show(&ru0))));
-		}
-	}
-
-	MemoryContextDelete(private_context);
-
 	pgstat_progress_end_command();
 
-	return true;
+	return newIndexIds;
 }
 
 /*
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index fc6afab58a..4a03ab2abb 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2470,12 +2470,12 @@ COMMIT;
 REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relation
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_class_oid_index; -- no catalog index
-ERROR:  cannot reindex system catalogs concurrently
+ERROR:  concurrent index creation on system catalog tables is not supported
 -- These are the toast table and index of pg_authid.
 REINDEX TABLE CONCURRENTLY pg_toast.pg_toast_1260; -- no catalog toast table
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_toast.pg_toast_1260_index; -- no catalog toast index
-ERROR:  cannot reindex system catalogs concurrently
+ERROR:  concurrent index creation on system catalog tables is not supported
 REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM
 ERROR:  cannot reindex system catalogs concurrently
 -- Warns about catalog relations
-- 
2.17.0

v12-0005-More-refactoring.patchtext/x-diff; charset=us-asciiDownload
From fa128228d21fce54babf6f736e833eea3d2f82a3 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 1 Nov 2020 13:46:18 -0600
Subject: [PATCH v12 5/5] More refactoring

---
 src/backend/commands/indexcmds.c           | 183 +++++++++------------
 src/test/regress/expected/create_index.out |   4 +-
 2 files changed, 83 insertions(+), 104 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 08d44a1999..4586942960 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -102,8 +102,8 @@ static void ReindexMultipleInternal(List *relids,
 									ReindexParams *params);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
-static List *ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds,
-										int options, MemoryContext private_context);
+static List *ReindexIndexesConcurrently(List *indexIds, int options,
+										MemoryContext private_context);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
 
@@ -2625,8 +2625,8 @@ ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
 			.indexId = indOid,
 			/* other fields set later */
 		};
+
 		ReindexIndexesConcurrently(list_make1(&idxinfo),
-				list_make1_oid(IndexGetRelation(indOid, false)),
 				params->options, CurrentMemoryContext);
 	}
 	else
@@ -2961,12 +2961,11 @@ reindex_error_callback(void *arg)
 
 
 /*
- * Given a list of index oids, return a list of leaf partitions by removing
- * any intermediate parents.  heaprels is populated with the corresponding
- * tables.
+ * Given a list of index oids, return a new list of leaf partitions by
+ * excluding any intermediate parents.
  */
 static List *
-leaf_partitions(List *inhoids, int options, List **heaprels)
+leaf_partitions(List *inhoids, int options)
 {
 	List		*partitions = NIL;
 	ListCell	*lc;
@@ -2998,7 +2997,6 @@ leaf_partitions(List *inhoids, int options, List **heaprels)
 
 		/* Save partition OID in current MemoryContext */
 		partitions = lappend_oid(partitions, partoid);
-		*heaprels = lappend_oid(*heaprels, tableoid);
 	}
 
 	return partitions;
@@ -3010,13 +3008,11 @@ leaf_partitions(List *inhoids, int options, List **heaprels)
  *
  * Reindex a set of partitions, per the partitioned index or table given
  * by the caller.
- * XXX: should be further refactored with logic from ReindexRelationConcurrently
  */
 static void
 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 {
-	List	   *partitions = NIL,
-			*heaprels = NIL;
+	List	   *partitions = NIL;
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
@@ -3070,7 +3066,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	if (relkind == RELKIND_PARTITIONED_INDEX)
 	{
 		old_context = MemoryContextSwitchTo(reindex_context);
-		partitions = leaf_partitions(inhoids, params->options, &heaprels);
+		partitions = leaf_partitions(inhoids, params->options);
 		MemoryContextSwitchTo(old_context);
 	} else {
 		/* Loop over parent tables */
@@ -3083,7 +3079,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 			parttable = table_open(partoid, ShareLock);
 			old_context = MemoryContextSwitchTo(reindex_context);
 			partindexes = RelationGetIndexList(parttable);
-			partindexes = leaf_partitions(partindexes, params->options, &heaprels);
+			partindexes = leaf_partitions(partindexes, params->options);
 			partitions = list_concat(partitions, partindexes);
 
 			MemoryContextSwitchTo(old_context);
@@ -3092,10 +3088,9 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	}
 
 	if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
-		relkind == RELKIND_PARTITIONED_INDEX &&
 		get_rel_persistence(relid) != RELPERSISTENCE_TEMP)
 	{
-		List			   *idxinfos = NIL;
+		List			    *idxinfos = NIL;
 		ReindexIndexInfo	*idxinfo;
 
 		old_context = MemoryContextSwitchTo(reindex_context);
@@ -3110,7 +3105,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		MemoryContextSwitchTo(old_context);
 
 		/* Process all indexes in a single loop */
-		ReindexIndexesConcurrently(idxinfos, heaprels, params->options, reindex_context);
+		ReindexIndexesConcurrently(idxinfos, params->options, reindex_context);
 	} else {
 		/*
 		 * Process each partition listed in a separate transaction.  Note that
@@ -3262,7 +3257,6 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 static bool
 ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 {
-	List	   *heapRelationIds = NIL;
 	List	   *indexIds = NIL;
 	List	   *newIndexIds = NIL;
 	ListCell   *lc,
@@ -3315,14 +3309,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 				 */
 				Relation	heapRelation;
 
-				/* Save the list of relation OIDs in private context */
-				oldcontext = MemoryContextSwitchTo(private_context);
-
-				/* Track this relation for session locks */
-				heapRelationIds = lappend_oid(heapRelationIds, relationOid);
-
-				MemoryContextSwitchTo(oldcontext);
-
 				if (IsCatalogRelationOid(relationOid))
 					ereport(ERROR,
 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -3335,7 +3321,7 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 												  ShareUpdateExclusiveLock);
 					/* leave if relation does not exist */
 					if (!heapRelation)
-						break;
+						break; // XXX: lremove
 				}
 				else
 					heapRelation = table_open(relationOid,
@@ -3386,14 +3372,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 					Relation	toastRelation = table_open(toastOid,
 														   ShareUpdateExclusiveLock);
 
-					/* Save the list of relation OIDs in private context */
-					oldcontext = MemoryContextSwitchTo(private_context);
-
-					/* Track this relation for session locks */
-					heapRelationIds = lappend_oid(heapRelationIds, toastOid);
-
-					MemoryContextSwitchTo(oldcontext);
-
 					foreach(lc2, RelationGetIndexList(toastRelation))
 					{
 						Oid			cellOid = lfirst_oid(lc2);
@@ -3434,70 +3412,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 				break;
 			}
 		case RELKIND_INDEX:
-			{
-				Oid			heapId = IndexGetRelation(relationOid,
-													  (params->options & REINDEXOPT_MISSING_OK) != 0);
-				Relation	heapRelation;
-				ReindexIndexInfo *idx;
-
-				/* if relation is missing, leave */
-				if (!OidIsValid(heapId))
-					break;
-
-				if (IsCatalogRelationOid(heapId))
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("cannot reindex system catalogs concurrently")));
-
-				/*
-				 * Don't allow reindex for an invalid index on TOAST table, as
-				 * if rebuilt it would not be possible to drop it.  Match
-				 * error message in reindex_index().
-				 */
-				if (IsToastNamespace(get_rel_namespace(relationOid)) &&
-					!get_index_isvalid(relationOid))
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("cannot reindex invalid index on TOAST table")));
-
-				/*
-				 * Check if parent relation can be locked and if it exists,
-				 * this needs to be done at this stage as the list of indexes
-				 * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
-				 * should not be used once all the session locks are taken.
-				 */
-				if ((params->options & REINDEXOPT_MISSING_OK) != 0)
-				{
-					heapRelation = try_table_open(heapId,
-												  ShareUpdateExclusiveLock);
-					/* leave if relation does not exist */
-					if (!heapRelation)
-						break;
-				}
-				else
-					heapRelation = table_open(heapId,
-											  ShareUpdateExclusiveLock);
-				table_close(heapRelation, NoLock);
-
-				/* Save the list of relation OIDs in private context */
-				oldcontext = MemoryContextSwitchTo(private_context);
-
-				/* Track the heap relation of this index for session locks */
-				heapRelationIds = list_make1_oid(heapId);
-
-				/*
-				 * Save the list of relation OIDs in private context.  Note
-				 * that invalid indexes are allowed here.
-				 */
-				idx = palloc(sizeof(ReindexIndexInfo));
-				idx->indexId = relationOid;
-				indexIds = lappend(indexIds, idx);
-				/* other fields set later */
-
-				MemoryContextSwitchTo(oldcontext);
-				break;
-			}
-
 		case RELKIND_PARTITIONED_TABLE:
 		case RELKIND_PARTITIONED_INDEX:
 		default:
@@ -3521,10 +3435,8 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 		return false;
 	}
 
-	Assert(heapRelationIds != NIL);
-
 	/* Do the work */
-	newIndexIds = ReindexIndexesConcurrently(indexIds, heapRelationIds, params->options, private_context);
+	newIndexIds = ReindexIndexesConcurrently(indexIds, params->options, private_context);
 
 	/* Log what we did */
 	if ((params->options & REINDEXOPT_VERBOSE) != 0)
@@ -3566,9 +3478,10 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
  * This is called by ReindexRelationConcurrently and
  */
 static List *
-ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds, int options,
+ReindexIndexesConcurrently(List *indexIds, int options,
 		MemoryContext private_context)
 {
+	List		*heapRelationIds = NIL;
 	List	   *newIndexIds = NIL;
 	List	   *relationLocks = NIL;
 	List	   *lockTags = NIL;
@@ -3586,6 +3499,72 @@ ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds, int options,
 	};
 	int64		progress_vals[4];
 
+	foreach(lc, indexIds)
+	{
+		ReindexIndexInfo	*idx = lfirst(lc);
+		Oid			indexrelid = idx->indexId;
+		Oid			heapId = IndexGetRelation(indexrelid,
+											  (options & REINDEXOPT_MISSING_OK) != 0);
+		Relation	heapRelation;
+
+		/* if relation is missing, leave */
+		if (!OidIsValid(heapId))
+			break; // XXX: ldelete?
+
+		if (IsCatalogRelationOid(heapId))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot reindex system catalogs concurrently")));
+
+		/*
+		 * Don't allow reindex for an invalid index on TOAST table, as
+		 * if rebuilt it would not be possible to drop it.  Match
+		 * error message in reindex_index().
+		 */
+		if (IsToastNamespace(get_rel_namespace(indexrelid)) &&
+			!get_index_isvalid(indexrelid))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot reindex invalid index on TOAST table")));
+
+		if (IsToastNamespace(get_rel_namespace(indexrelid)) &&
+			!get_index_isvalid(indexrelid))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot reindex invalid index on TOAST table")));
+
+		/*
+		 * Check if parent relation can be locked and if it exists,
+		 * this needs to be done at this stage as the list of indexes
+		 * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
+		 * should not be used once all the session locks are taken.
+		 */
+		if ((options & REINDEXOPT_MISSING_OK) != 0)
+		{
+			heapRelation = try_table_open(heapId,
+										  ShareUpdateExclusiveLock);
+			/* leave if relation does not exist */
+			if (!heapRelation)
+				break; // ldelete
+		}
+		else
+			heapRelation = table_open(heapId,
+									  ShareUpdateExclusiveLock);
+		table_close(heapRelation, NoLock);
+
+		/* Save the list of relation OIDs in private context */
+		oldcontext = MemoryContextSwitchTo(private_context);
+
+		/* Track the heap relation of this index for session locks */
+		heapRelationIds = lappend_oid(heapRelationIds, heapId);
+		// heapRelationIds = list_make1_oid(heapId);
+
+		/* Note that invalid indexes are allowed here. */
+
+		MemoryContextSwitchTo(oldcontext);
+		// break;
+	}
+
 	/*-----
 	 * Now we have all the indexes we want to process in indexIds.
 	 *
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 4a03ab2abb..fc6afab58a 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2470,12 +2470,12 @@ COMMIT;
 REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relation
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_class_oid_index; -- no catalog index
-ERROR:  concurrent index creation on system catalog tables is not supported
+ERROR:  cannot reindex system catalogs concurrently
 -- These are the toast table and index of pg_authid.
 REINDEX TABLE CONCURRENTLY pg_toast.pg_toast_1260; -- no catalog toast table
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_toast.pg_toast_1260_index; -- no catalog toast index
-ERROR:  concurrent index creation on system catalog tables is not supported
+ERROR:  cannot reindex system catalogs concurrently
 REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM
 ERROR:  cannot reindex system catalogs concurrently
 -- Warns about catalog relations
-- 
2.17.0

#5Anastasia Lubennikova
a.lubennikova@postgrespro.ru
In reply to: Justin Pryzby (#4)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On 28.01.2021 17:30, Justin Pryzby wrote:

On Thu, Jan 28, 2021 at 09:51:51PM +0900, Masahiko Sawada wrote:

On Mon, Nov 30, 2020 at 5:22 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

On Sat, Oct 31, 2020 at 01:31:17AM -0500, Justin Pryzby wrote:

Forking this thread, since the existing CFs have been closed.
/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with indisvalid=false,
and then process them like REINDEX CONCURRENTLY. If it's interrupted, it
leaves INVALID indexes, which can be cleaned up with DROP or REINDEX, same as
CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:
Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended. However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier wrote:

It would be good also to check if
we have a partition index tree that maps partially with a partition
table tree (aka no all table partitions have a partition index), where
these don't get clustered because there is no index to work on.

This should not happen, since a incomplete partitioned index is "invalid".

I had been waiting to rebase since there hasn't been any review comments and I
expected additional, future conflicts.

I attempted to review this feature, but the last patch conflicts with
the recent refactoring, so I wasn't able to test it properly.
Could you please send a new version?

Meanwhile, here are my questions about the patch:

1) I don't see a reason to change the logic here. We don't skip counting
existing indexes when create parent index. Why should we skip them in
CONCURRENTLY mode?

��� ��� ��� // If concurrent, maybe this should be done after excluding
indexes which already exist ?
pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
��� ��� ��� ��� ��� ��� ��� ��� ��� ��� �nparts);

2) Here we access relation field after closing the relation. Is it safe?

��� /* save lockrelid and locktag for below */
��� heaprelid = rel->rd_lockInfo.lockRelId;

3) leaf_partitions() function only handles indexes, so I suggest to name
it more specifically and add a comment about meaning of 'options' parameter.

4) I don't quite understand the idea of the regression test. Why do we
expect to see invalid indexes there?
+��� "idxpart_a_idx1" UNIQUE, btree (a) INVALID

5) Speaking of documentation, I think we need to add a paragraph about
CIC on partitioned indexes which will explain that invalid indexes may
appear and what user should do to fix them.

6) ReindexIndexesConcurrently() needs some code cleanup.

--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#6Anastasia Lubennikova
a.lubennikova@postgrespro.ru
In reply to: Justin Pryzby (#4)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On 28.01.2021 17:30, Justin Pryzby wrote:

On Thu, Jan 28, 2021 at 09:51:51PM +0900, Masahiko Sawada wrote:

On Mon, Nov 30, 2020 at 5:22 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

On Sat, Oct 31, 2020 at 01:31:17AM -0500, Justin Pryzby wrote:

Forking this thread, since the existing CFs have been closed.
/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with indisvalid=false,
and then process them like REINDEX CONCURRENTLY. If it's interrupted, it
leaves INVALID indexes, which can be cleaned up with DROP or REINDEX, same as
CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:
Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended. However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier wrote:

It would be good also to check if
we have a partition index tree that maps partially with a partition
table tree (aka no all table partitions have a partition index), where
these don't get clustered because there is no index to work on.

This should not happen, since a incomplete partitioned index is "invalid".

I had been waiting to rebase since there hasn't been any review comments and I
expected additional, future conflicts.

I attempted to review this feature, but the last patch conflicts with
the recent refactoring, so I wasn't able to test it properly.
Could you please send a new version?

Meanwhile, here are my questions about the patch:

1) I don't see a reason to change the logic here. We don't skip counting
existing indexes when create parent index. Why should we skip them in
CONCURRENTLY mode?

��� ��� ��� // If concurrent, maybe this should be done after excluding
indexes which already exist ?
pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
��� ��� ��� ��� ��� ��� ��� ��� ��� ��� �nparts);

2) Here we access relation field after closing the relation. Is it safe?

��� /* save lockrelid and locktag for below */
��� heaprelid = rel->rd_lockInfo.lockRelId;

3) leaf_partitions() function only handles indexes, so I suggest to name
it more specifically and add a comment about meaning of 'options' parameter.

4) I don't quite understand the idea of the regression test. Why do we
expect to see invalid indexes there?
+��� "idxpart_a_idx1" UNIQUE, btree (a) INVALID

5) Speaking of documentation, I think we need to add a paragraph about
CIC on partitioned indexes which will explain that invalid indexes may
appear and what user should do to fix them.

6) ReindexIndexesConcurrently() needs some code cleanup.

--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#7Justin Pryzby
pryzby@telsasoft.com
In reply to: Anastasia Lubennikova (#5)
6 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Mon, Feb 15, 2021 at 10:06:47PM +0300, Anastasia Lubennikova wrote:

On 28.01.2021 17:30, Justin Pryzby wrote:

On Thu, Jan 28, 2021 at 09:51:51PM +0900, Masahiko Sawada wrote:

On Mon, Nov 30, 2020 at 5:22 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

On Sat, Oct 31, 2020 at 01:31:17AM -0500, Justin Pryzby wrote:

Forking this thread, since the existing CFs have been closed.
/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with indisvalid=false,
and then process them like REINDEX CONCURRENTLY. If it's interrupted, it
leaves INVALID indexes, which can be cleaned up with DROP or REINDEX, same as
CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:
Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended. However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier wrote:

It would be good also to check if
we have a partition index tree that maps partially with a partition
table tree (aka no all table partitions have a partition index), where
these don't get clustered because there is no index to work on.

This should not happen, since a incomplete partitioned index is "invalid".

I had been waiting to rebase since there hasn't been any review comments and I
expected additional, future conflicts.

I attempted to review this feature, but the last patch conflicts with the
recent refactoring, so I wasn't able to test it properly.
Could you please send a new version?

I rebased this yesterday, so here's my latest.

2) Here we access relation field after closing the relation. Is it safe?
��� /* save lockrelid and locktag for below */
��� heaprelid = rel->rd_lockInfo.lockRelId;

Thanks, fixed this just now.

3) leaf_partitions() function only handles indexes, so I suggest to name it
more specifically and add a comment about meaning of 'options' parameter.

4) I don't quite understand the idea of the regression test. Why do we
expect to see invalid indexes there?
+��� "idxpart_a_idx1" UNIQUE, btree (a) INVALID

Because of the unique failure:
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart

This shows that CIC first creates catalog-only INVALID indexes, and then
reindexes them to "validate".

--
Justin

Attachments:

v13-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-t.patchtext/x-diff; charset=us-asciiDownload
From c846ddfc287bfeddb9b389de1869aadf7173c068 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH v13 01/18] Allow CREATE INDEX CONCURRENTLY on partitioned
 table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.
---
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 143 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 ++++++++++-
 src/test/regress/sql/indexing.sql      |  18 +++-
 4 files changed, 176 insertions(+), 54 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index a5271a9f8f..6869a18968 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -686,15 +686,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 127ba7835d..4ac1dacd7d 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -680,17 +681,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1128,6 +1118,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1183,6 +1178,14 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
@@ -1193,8 +1196,10 @@ DefineIndex(Oid relationId,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1237,10 +1242,12 @@ DefineIndex(Oid relationId,
 					continue;
 				}
 
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				childidxs = RelationGetIndexList(childrel);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1311,10 +1318,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1366,10 +1377,18 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1379,41 +1398,33 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
-	if (!concurrent)
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
+		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
@@ -1617,6 +1628,62 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+	ReindexParams params = {
+		.options = REINDEXOPT_CONCURRENTLY
+	};
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		/* XXX: need to retrofit progress reporting into it */
+		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 // npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 * XXX: since this is done in 2*N transactions, it could just as well
+	 * call ReindexRelationConcurrently directly
+	 */
+	ReindexMultipleInternal(partitions, &params);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index c93f4470c9..f04abc6897 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c..3d4b6e9bc9 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.17.0

v13-0002-f-progress-reporting.patchtext/x-diff; charset=us-asciiDownload
From 6cbf4b5f0e3f795e0253896b13d22a67e513b602 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 14 Feb 2021 18:31:43 -0600
Subject: [PATCH v13 02/18] f! progress reporting

---
 src/backend/commands/indexcmds.c | 33 +++++++-------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 4ac1dacd7d..d6567ec231 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1638,40 +1638,20 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 		.options = REINDEXOPT_CONCURRENTLY
 	};
 
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
 	PreventInTransactionBlock(true, "REINDEX INDEX");
 
-	foreach (lc, childs)
+	foreach (lc, find_inheritance_children(indexRelationId, ShareLock))
 	{
 		Oid			partoid = lfirst_oid(lc);
 
-		/* XXX: need to retrofit progress reporting into it */
-		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 // npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
+		if (!get_index_isvalid(partoid) &&
+				RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			ReindexRelationConcurrently(partoid, &params);
 
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 npart++);
 	}
 
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 * XXX: since this is done in 2*N transactions, it could just as well
-	 * call ReindexRelationConcurrently directly
-	 */
-	ReindexMultipleInternal(partitions, &params);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
@@ -1680,6 +1660,7 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * partitions.
 	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
-- 
2.17.0

v13-0003-WIP-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; charset=us-asciiDownload
From 6dff5512b07735e51c2c7f82cce5b3bd8b9a6faa Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 16:23:02 -0500
Subject: [PATCH v13 03/18] WIP: Add SKIPVALID flag for more integration

XXX: this breaks progress reporting?
---
 src/backend/commands/indexcmds.c | 36 +++++++++++++++-----------------
 src/include/catalog/index.h      |  1 +
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d6567ec231..10d4da136f 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1628,40 +1628,33 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
 	};
 
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, find_inheritance_children(indexRelationId, ShareLock))
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		if (!get_index_isvalid(partoid) &&
-				RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			ReindexRelationConcurrently(partoid, &params);
-
-		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 npart++);
-	}
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId, &params, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -3085,6 +3078,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip invalid indexes, if requested */
+		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index e22d506436..994fe94fa1 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.17.0

v13-0004-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; charset=us-asciiDownload
From 04d6b45f488d67400f7e6d7beb73ad92c00083e3 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 23:52:31 -0500
Subject: [PATCH v13 04/18] ReindexPartitions() to set indisvalid..

Something like this should probably have been included in
a6642b3ae060976b42830b7dc8f29ec190ab05e4

See also 71a05b223, which mentioned the absence of any way to validate an
index.
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 10d4da136f..b21555b6cf 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1643,8 +1643,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1654,9 +1652,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * this commits and then starts a new transaction immediately.
 	 */
 	ReindexPartitions(indexRelationId, &params, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -3098,6 +3093,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, params);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.17.0

v13-0005-Refactor-to-allow-reindexing-all-index-partition.patchtext/x-diff; charset=us-asciiDownload
From 0ba6f83c6df1140037016eb100f8c85dd13070fe Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 1 Nov 2020 12:25:15 -0600
Subject: [PATCH v13 05/18] Refactor to allow reindexing all index partitions
 at once..

The utility of this is to reindex N partitions in 2 transactions, rather than
2*N transactions.
---
 src/backend/commands/indexcmds.c           | 258 ++++++++++++++-------
 src/test/regress/expected/create_index.out |   4 +-
 2 files changed, 182 insertions(+), 80 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index b21555b6cf..f5fea14ff4 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -102,6 +102,8 @@ static void ReindexMultipleInternal(List *relids,
 									ReindexParams *params);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
+static List *ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds,
+		ReindexParams *params, MemoryContext private_context);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
 
@@ -124,6 +126,15 @@ typedef struct ReindexErrorInfo
 	char		relkind;
 } ReindexErrorInfo;
 
+/* Argument to ReindexIndexConcurrently takes a List* of these */
+typedef struct ReindexIndexInfo
+{
+	Oid			indexId;
+	Oid			tableId;
+	Oid			amId;
+	bool		safe;		/* for set_indexsafe_procflags */
+} ReindexIndexInfo;
+
 /*
  * CheckIndexCompatible
  *		Determine whether an existing index definition is compatible with a
@@ -2636,7 +2647,15 @@ ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
 		ReindexPartitions(indOid, params, isTopLevel);
 	else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
 			 persistence != RELPERSISTENCE_TEMP)
-		ReindexRelationConcurrently(indOid, params);
+	{
+		ReindexIndexInfo idxinfo = {
+			.indexId = indOid,
+			/* other fields set later */
+		};
+		ReindexIndexesConcurrently(list_make1(&idxinfo),
+				list_make1_oid(IndexGetRelation(indOid, false)),
+				params, CurrentMemoryContext);
+	}
 	else
 	{
 		ReindexParams newparams = *params;
@@ -3002,20 +3021,69 @@ reindex_error_callback(void *arg)
 				   errinfo->relnamespace, errinfo->relname);
 }
 
+
+/*
+ * Given a list of index oids, return a list of leaf partitions by removing
+ * any intermediate parents.  heaprels is populated with the corresponding
+ * tables.
+ */
+static List *
+leaf_indexes(List *inhoids, int options, List **heaprels)
+{
+	List		*partitions = NIL;
+	ListCell	*lc;
+
+	foreach(lc, inhoids)
+	{
+		Oid			partoid = lfirst_oid(lc);
+		Oid			tableoid;
+		Relation	table;
+		char		partkind = get_rel_relkind(partoid);
+
+		/*
+		 * This discards partitioned indexes and foreign tables.
+		 */
+		if (!RELKIND_HAS_STORAGE(partkind))
+			continue;
+
+		Assert(partkind == RELKIND_INDEX);
+
+		/* Skip invalid indexes, if requested */
+		if ((options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
+		/* (try to) Open the table, with lock */
+		tableoid = IndexGetRelation(partoid, false);
+		table = table_open(tableoid, ShareLock);
+		table_close(table, NoLock);
+
+		/* Save partition OID in current MemoryContext */
+		partitions = lappend_oid(partitions, partoid);
+		*heaprels = lappend_oid(*heaprels, tableoid);
+	}
+
+	return partitions;
+}
+
+
 /*
  * ReindexPartitions
  *
  * Reindex a set of partitions, per the partitioned index or table given
  * by the caller.
+ * XXX: should be further refactored with logic from ReindexRelationConcurrently
  */
 static void
 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 {
-	List	   *partitions = NIL;
+	List	   *partitions = NIL,
+			*heaprels = NIL;
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
 	MemoryContext reindex_context;
+	MemoryContext old_context;
 	List	   *inhoids;
 	ListCell   *lc;
 	ErrorContextCallback errcallback;
@@ -3060,38 +3128,58 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 * The list of relations to reindex are the physical partitions of the
 	 * tree so discard any partitioned table or index.
 	 */
-	foreach(lc, inhoids)
-	{
-		Oid			partoid = lfirst_oid(lc);
-		char		partkind = get_rel_relkind(partoid);
-		MemoryContext old_context;
-
-		/*
-		 * This discards partitioned tables, partitioned indexes and foreign
-		 * tables.
-		 */
-		if (!RELKIND_HAS_STORAGE(partkind))
-			continue;
 
-		/* Skip invalid indexes, if requested */
-		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
-				get_index_isvalid(partoid))
-			continue;
+	if (relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		old_context = MemoryContextSwitchTo(reindex_context);
+		partitions = leaf_indexes(inhoids, params->options, &heaprels);
+		MemoryContextSwitchTo(old_context);
+	} else {
+		/* Loop over parent tables */
+		foreach(lc, inhoids)
+		{
+			Oid		partoid = lfirst_oid(lc);
+			Relation parttable;
+			List	*partindexes;
+
+			parttable = table_open(partoid, ShareLock);
+			old_context = MemoryContextSwitchTo(reindex_context);
+			partindexes = RelationGetIndexList(parttable);
+			partindexes = leaf_indexes(partindexes, params->options, &heaprels);
+			partitions = list_concat(partitions, partindexes);
+
+			MemoryContextSwitchTo(old_context);
+			table_close(parttable, ShareLock);
+		}
+	}
 
-		Assert(partkind == RELKIND_INDEX ||
-			   partkind == RELKIND_RELATION);
+	if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+		relkind == RELKIND_PARTITIONED_INDEX &&
+		get_rel_persistence(relid) != RELPERSISTENCE_TEMP)
+	{
+		List			   *idxinfos = NIL;
+		ReindexIndexInfo	*idxinfo;
 
-		/* Save partition OID */
 		old_context = MemoryContextSwitchTo(reindex_context);
-		partitions = lappend_oid(partitions, partoid);
+		foreach (lc, partitions)
+		{
+			Oid partoid = lfirst_oid(lc);
+			idxinfo = palloc(sizeof(ReindexIndexInfo));
+			idxinfo->indexId = partoid;
+			/* other fields set later */
+			idxinfos = lappend(idxinfos, idxinfo);
+		}
 		MemoryContextSwitchTo(old_context);
-	}
 
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 */
-	ReindexMultipleInternal(partitions, params);
+		/* Process all indexes in a single loop */
+		ReindexIndexesConcurrently(idxinfos, heaprels, params, reindex_context);
+	} else {
+		/*
+		 * Process each partition listed in a separate transaction.  Note that
+		 * this commits and then starts a new transaction immediately.
+		 */
+		ReindexMultipleInternal(partitions, params);
+	}
 
 	/*
 	 * If indexes exist on all of the partitioned table's children, and we
@@ -3254,18 +3342,9 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 static bool
 ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 {
-	typedef struct ReindexIndexInfo
-	{
-		Oid			indexId;
-		Oid			tableId;
-		Oid			amId;
-		bool		safe;		/* for set_indexsafe_procflags */
-	} ReindexIndexInfo;
 	List	   *heapRelationIds = NIL;
 	List	   *indexIds = NIL;
 	List	   *newIndexIds = NIL;
-	List	   *relationLocks = NIL;
-	List	   *lockTags = NIL;
 	ListCell   *lc,
 			   *lc2;
 	MemoryContext private_context;
@@ -3274,13 +3353,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 	char	   *relationName = NULL;
 	char	   *relationNamespace = NULL;
 	PGRUsage	ru0;
-	const int	progress_index[] = {
-		PROGRESS_CREATEIDX_COMMAND,
-		PROGRESS_CREATEIDX_PHASE,
-		PROGRESS_CREATEIDX_INDEX_OID,
-		PROGRESS_CREATEIDX_ACCESS_METHOD_OID
-	};
-	int64		progress_vals[4];
 
 	/*
 	 * Create a memory context that will survive forced transaction commits we
@@ -3553,6 +3625,69 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 	Assert(heapRelationIds != NIL);
 
+	/* Do the work */
+	newIndexIds = ReindexIndexesConcurrently(indexIds, heapRelationIds, params, private_context);
+
+	/* Log what we did */
+	if ((params->options & REINDEXOPT_VERBOSE) != 0)
+	{
+		if (relkind == RELKIND_INDEX)
+			ereport(INFO,
+					(errmsg("index \"%s.%s\" was reindexed",
+							relationNamespace, relationName),
+					 errdetail("%s.",
+							   pg_rusage_show(&ru0))));
+		else
+		{
+			foreach(lc, newIndexIds)
+			{
+				Oid			indOid = lfirst_oid(lc);
+				ereport(INFO,
+						(errmsg("index \"%s.%s\" was reindexed",
+								get_namespace_name(get_rel_namespace(indOid)),
+								get_rel_name(indOid))));
+				/* Don't show rusage here, since it's not per index. */
+			}
+
+			ereport(INFO,
+					(errmsg("table \"%s.%s\" was reindexed",
+							relationNamespace, relationName),
+					 errdetail("%s.",
+							   pg_rusage_show(&ru0))));
+		}
+	}
+
+
+	MemoryContextDelete(private_context);
+
+	return true;
+}
+
+/*
+ * Reindex concurrently for an arbitrary list of index relations
+ * This is called by ReindexRelationConcurrently and
+ */
+static List *
+ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds,
+		ReindexParams *params, MemoryContext private_context)
+{
+	List	   *newIndexIds = NIL;
+	List	   *relationLocks = NIL;
+	List	   *lockTags = NIL;
+
+	ListCell   *lc,
+			   *lc2;
+
+	MemoryContext oldcontext;
+
+	const int	progress_index[] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_ACCESS_METHOD_OID
+	};
+	int64		progress_vals[4];
+
 	/*-----
 	 * Now we have all the indexes we want to process in indexIds.
 	 *
@@ -4026,42 +4161,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 	/* Start a new transaction to finish process properly */
 	StartTransactionCommand();
 
-	/* Log what we did */
-	if ((params->options & REINDEXOPT_VERBOSE) != 0)
-	{
-		if (relkind == RELKIND_INDEX)
-			ereport(INFO,
-					(errmsg("index \"%s.%s\" was reindexed",
-							relationNamespace, relationName),
-					 errdetail("%s.",
-							   pg_rusage_show(&ru0))));
-		else
-		{
-			foreach(lc, newIndexIds)
-			{
-				ReindexIndexInfo *idx = lfirst(lc);
-				Oid			indOid = idx->indexId;
-
-				ereport(INFO,
-						(errmsg("index \"%s.%s\" was reindexed",
-								get_namespace_name(get_rel_namespace(indOid)),
-								get_rel_name(indOid))));
-				/* Don't show rusage here, since it's not per index. */
-			}
-
-			ereport(INFO,
-					(errmsg("table \"%s.%s\" was reindexed",
-							relationNamespace, relationName),
-					 errdetail("%s.",
-							   pg_rusage_show(&ru0))));
-		}
-	}
-
-	MemoryContextDelete(private_context);
-
 	pgstat_progress_end_command();
 
-	return true;
+	return newIndexIds;
 }
 
 /*
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 830fdddf24..6f41adf736 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2470,12 +2470,12 @@ COMMIT;
 REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relation
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_class_oid_index; -- no catalog index
-ERROR:  cannot reindex system catalogs concurrently
+ERROR:  concurrent index creation on system catalog tables is not supported
 -- These are the toast table and index of pg_authid.
 REINDEX TABLE CONCURRENTLY pg_toast.pg_toast_1260; -- no catalog toast table
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_toast.pg_toast_1260_index; -- no catalog toast index
-ERROR:  cannot reindex system catalogs concurrently
+ERROR:  concurrent index creation on system catalog tables is not supported
 REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM
 ERROR:  cannot reindex system catalogs concurrently
 -- Warns about catalog relations
-- 
2.17.0

v13-0006-More-refactoring.patchtext/x-diff; charset=us-asciiDownload
From edde23839fe1dafffe804b24f9068c0f294c0651 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 1 Nov 2020 13:46:18 -0600
Subject: [PATCH v13 06/18] More refactoring

---
 src/backend/commands/indexcmds.c           | 201 ++++++++++-----------
 src/test/regress/expected/create_index.out |   4 +-
 2 files changed, 93 insertions(+), 112 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index f5fea14ff4..5c5596cd28 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -102,8 +102,8 @@ static void ReindexMultipleInternal(List *relids,
 									ReindexParams *params);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
-static List *ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds,
-		ReindexParams *params, MemoryContext private_context);
+static List *ReindexIndexesConcurrently(List *indexIds, ReindexParams *params,
+		MemoryContext private_context);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
 
@@ -2652,8 +2652,8 @@ ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
 			.indexId = indOid,
 			/* other fields set later */
 		};
+
 		ReindexIndexesConcurrently(list_make1(&idxinfo),
-				list_make1_oid(IndexGetRelation(indOid, false)),
 				params, CurrentMemoryContext);
 	}
 	else
@@ -3023,12 +3023,11 @@ reindex_error_callback(void *arg)
 
 
 /*
- * Given a list of index oids, return a list of leaf partitions by removing
- * any intermediate parents.  heaprels is populated with the corresponding
- * tables.
+ * Given a list of index oids, return a new list of leaf partitions by
+ * excluding any intermediate parents.
  */
 static List *
-leaf_indexes(List *inhoids, int options, List **heaprels)
+leaf_indexes(List *inhoids, int options)
 {
 	List		*partitions = NIL;
 	ListCell	*lc;
@@ -3060,7 +3059,6 @@ leaf_indexes(List *inhoids, int options, List **heaprels)
 
 		/* Save partition OID in current MemoryContext */
 		partitions = lappend_oid(partitions, partoid);
-		*heaprels = lappend_oid(*heaprels, tableoid);
 	}
 
 	return partitions;
@@ -3072,13 +3070,11 @@ leaf_indexes(List *inhoids, int options, List **heaprels)
  *
  * Reindex a set of partitions, per the partitioned index or table given
  * by the caller.
- * XXX: should be further refactored with logic from ReindexRelationConcurrently
  */
 static void
 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 {
-	List	   *partitions = NIL,
-			*heaprels = NIL;
+	List	   *partitions = NIL;
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
@@ -3132,7 +3128,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	if (relkind == RELKIND_PARTITIONED_INDEX)
 	{
 		old_context = MemoryContextSwitchTo(reindex_context);
-		partitions = leaf_indexes(inhoids, params->options, &heaprels);
+		partitions = leaf_indexes(inhoids, params->options);
 		MemoryContextSwitchTo(old_context);
 	} else {
 		/* Loop over parent tables */
@@ -3145,7 +3141,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 			parttable = table_open(partoid, ShareLock);
 			old_context = MemoryContextSwitchTo(reindex_context);
 			partindexes = RelationGetIndexList(parttable);
-			partindexes = leaf_indexes(partindexes, params->options, &heaprels);
+			partindexes = leaf_indexes(partindexes, params->options);
 			partitions = list_concat(partitions, partindexes);
 
 			MemoryContextSwitchTo(old_context);
@@ -3154,10 +3150,9 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	}
 
 	if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
-		relkind == RELKIND_PARTITIONED_INDEX &&
 		get_rel_persistence(relid) != RELPERSISTENCE_TEMP)
 	{
-		List			   *idxinfos = NIL;
+		List			    *idxinfos = NIL;
 		ReindexIndexInfo	*idxinfo;
 
 		old_context = MemoryContextSwitchTo(reindex_context);
@@ -3172,7 +3167,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		MemoryContextSwitchTo(old_context);
 
 		/* Process all indexes in a single loop */
-		ReindexIndexesConcurrently(idxinfos, heaprels, params, reindex_context);
+		ReindexIndexesConcurrently(idxinfos, params, reindex_context);
 	} else {
 		/*
 		 * Process each partition listed in a separate transaction.  Note that
@@ -3342,7 +3337,6 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 static bool
 ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 {
-	List	   *heapRelationIds = NIL;
 	List	   *indexIds = NIL;
 	List	   *newIndexIds = NIL;
 	ListCell   *lc,
@@ -3395,14 +3389,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 				 */
 				Relation	heapRelation;
 
-				/* Save the list of relation OIDs in private context */
-				oldcontext = MemoryContextSwitchTo(private_context);
-
-				/* Track this relation for session locks */
-				heapRelationIds = lappend_oid(heapRelationIds, relationOid);
-
-				MemoryContextSwitchTo(oldcontext);
-
 				if (IsCatalogRelationOid(relationOid))
 					ereport(ERROR,
 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -3415,7 +3401,7 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 												  ShareUpdateExclusiveLock);
 					/* leave if relation does not exist */
 					if (!heapRelation)
-						break;
+						break; // XXX: lremove
 				}
 				else
 					heapRelation = table_open(relationOid,
@@ -3473,14 +3459,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 					Relation	toastRelation = table_open(toastOid,
 														   ShareUpdateExclusiveLock);
 
-					/* Save the list of relation OIDs in private context */
-					oldcontext = MemoryContextSwitchTo(private_context);
-
-					/* Track this relation for session locks */
-					heapRelationIds = lappend_oid(heapRelationIds, toastOid);
-
-					MemoryContextSwitchTo(oldcontext);
-
 					foreach(lc2, RelationGetIndexList(toastRelation))
 					{
 						Oid			cellOid = lfirst_oid(lc2);
@@ -3521,78 +3499,6 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 				break;
 			}
 		case RELKIND_INDEX:
-			{
-				Oid			heapId = IndexGetRelation(relationOid,
-													  (params->options & REINDEXOPT_MISSING_OK) != 0);
-				Relation	heapRelation;
-				ReindexIndexInfo *idx;
-
-				/* if relation is missing, leave */
-				if (!OidIsValid(heapId))
-					break;
-
-				if (IsCatalogRelationOid(heapId))
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("cannot reindex system catalogs concurrently")));
-
-				/*
-				 * Don't allow reindex for an invalid index on TOAST table, as
-				 * if rebuilt it would not be possible to drop it.  Match
-				 * error message in reindex_index().
-				 */
-				if (IsToastNamespace(get_rel_namespace(relationOid)) &&
-					!get_index_isvalid(relationOid))
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("cannot reindex invalid index on TOAST table")));
-
-				/*
-				 * Check if parent relation can be locked and if it exists,
-				 * this needs to be done at this stage as the list of indexes
-				 * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
-				 * should not be used once all the session locks are taken.
-				 */
-				if ((params->options & REINDEXOPT_MISSING_OK) != 0)
-				{
-					heapRelation = try_table_open(heapId,
-												  ShareUpdateExclusiveLock);
-					/* leave if relation does not exist */
-					if (!heapRelation)
-						break;
-				}
-				else
-					heapRelation = table_open(heapId,
-											  ShareUpdateExclusiveLock);
-
-				if (OidIsValid(params->tablespaceOid) &&
-					IsSystemRelation(heapRelation))
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("cannot move system relation \"%s\"",
-									get_rel_name(relationOid))));
-
-				table_close(heapRelation, NoLock);
-
-				/* Save the list of relation OIDs in private context */
-				oldcontext = MemoryContextSwitchTo(private_context);
-
-				/* Track the heap relation of this index for session locks */
-				heapRelationIds = list_make1_oid(heapId);
-
-				/*
-				 * Save the list of relation OIDs in private context.  Note
-				 * that invalid indexes are allowed here.
-				 */
-				idx = palloc(sizeof(ReindexIndexInfo));
-				idx->indexId = relationOid;
-				indexIds = lappend(indexIds, idx);
-				/* other fields set later */
-
-				MemoryContextSwitchTo(oldcontext);
-				break;
-			}
-
 		case RELKIND_PARTITIONED_TABLE:
 		case RELKIND_PARTITIONED_INDEX:
 		default:
@@ -3623,10 +3529,10 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 				 errmsg("cannot move non-shared relation to tablespace \"%s\"",
 						get_tablespace_name(params->tablespaceOid))));
 
-	Assert(heapRelationIds != NIL);
+	// Assert(heapRelationIds != NIL);
 
 	/* Do the work */
-	newIndexIds = ReindexIndexesConcurrently(indexIds, heapRelationIds, params, private_context);
+	newIndexIds = ReindexIndexesConcurrently(indexIds, params, private_context);
 
 	/* Log what we did */
 	if ((params->options & REINDEXOPT_VERBOSE) != 0)
@@ -3668,9 +3574,10 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
  * This is called by ReindexRelationConcurrently and
  */
 static List *
-ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds,
-		ReindexParams *params, MemoryContext private_context)
+ReindexIndexesConcurrently(List *indexIds, ReindexParams *params,
+		MemoryContext private_context)
 {
+	List		*heapRelationIds = NIL;
 	List	   *newIndexIds = NIL;
 	List	   *relationLocks = NIL;
 	List	   *lockTags = NIL;
@@ -3688,6 +3595,80 @@ ReindexIndexesConcurrently(List *indexIds, List *heapRelationIds,
 	};
 	int64		progress_vals[4];
 
+	/* It's not a shared catalog, so refuse to move it to shared tablespace */
+	if (params->tablespaceOid == GLOBALTABLESPACE_OID && false)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot move non-shared relation to tablespace \"%s\"",
+						get_tablespace_name(params->tablespaceOid))));
+
+	foreach(lc, indexIds)
+	{
+		ReindexIndexInfo	*idx = lfirst(lc);
+		Oid			indexrelid = idx->indexId;
+		Oid			heapId = IndexGetRelation(indexrelid,
+											  (params->options & REINDEXOPT_MISSING_OK) != 0);
+		Relation	heapRelation;
+
+		/* if relation is missing, leave */
+		if (!OidIsValid(heapId))
+			break; // XXX: ldelete?
+
+		if (IsCatalogRelationOid(heapId))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot reindex system catalogs concurrently")));
+
+		/*
+		 * Don't allow reindex for an invalid index on TOAST table, as
+		 * if rebuilt it would not be possible to drop it.  Match
+		 * error message in reindex_index().
+		 */
+		if (IsToastNamespace(get_rel_namespace(indexrelid)) &&
+			!get_index_isvalid(indexrelid))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot reindex invalid index on TOAST table")));
+
+		if (OidIsValid(params->tablespaceOid) &&
+			IsCatalogRelationOid(indexrelid))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot move system relation \"%s\"",
+							get_rel_name(indexrelid))));
+
+		/*
+		 * Check if parent relation can be locked and if it exists,
+		 * this needs to be done at this stage as the list of indexes
+		 * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
+		 * should not be used once all the session locks are taken.
+		 */
+		if ((params->options & REINDEXOPT_MISSING_OK) != 0)
+		{
+			heapRelation = try_table_open(heapId,
+										  ShareUpdateExclusiveLock);
+			/* leave if relation does not exist */
+			if (!heapRelation)
+				break; // ldelete
+		}
+		else
+			heapRelation = table_open(heapId,
+									  ShareUpdateExclusiveLock);
+		table_close(heapRelation, NoLock);
+
+		/* Save the list of relation OIDs in private context */
+		oldcontext = MemoryContextSwitchTo(private_context);
+
+		/* Track the heap relation of this index for session locks */
+		heapRelationIds = lappend_oid(heapRelationIds, heapId);
+		// heapRelationIds = list_make1_oid(heapId);
+
+		/* Note that invalid indexes are allowed here. */
+
+		MemoryContextSwitchTo(oldcontext);
+		// break;
+	}
+
 	/*-----
 	 * Now we have all the indexes we want to process in indexIds.
 	 *
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 6f41adf736..830fdddf24 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2470,12 +2470,12 @@ COMMIT;
 REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relation
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_class_oid_index; -- no catalog index
-ERROR:  concurrent index creation on system catalog tables is not supported
+ERROR:  cannot reindex system catalogs concurrently
 -- These are the toast table and index of pg_authid.
 REINDEX TABLE CONCURRENTLY pg_toast.pg_toast_1260; -- no catalog toast table
 ERROR:  cannot reindex system catalogs concurrently
 REINDEX INDEX CONCURRENTLY pg_toast.pg_toast_1260_index; -- no catalog toast index
-ERROR:  concurrent index creation on system catalog tables is not supported
+ERROR:  cannot reindex system catalogs concurrently
 REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM
 ERROR:  cannot reindex system catalogs concurrently
 -- Warns about catalog relations
-- 
2.17.0

#8Zhihong Yu
zyu@yugabyte.com
In reply to: Justin Pryzby (#7)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Hi,
For v13-0006-More-refactoring.patch :

+   /* It's not a shared catalog, so refuse to move it to shared tablespace
*/
+   if (params->tablespaceOid == GLOBALTABLESPACE_OID && false)
+       ereport(ERROR,

Do you intend to remove the ineffective check ?

+       else
+           heapRelation = table_open(heapId,
+                                     ShareUpdateExclusiveLock);
+       table_close(heapRelation, NoLock);

The table_open() seems to be unnecessary since there is no check after the
open.

+ // heapRelationIds = list_make1_oid(heapId);
If the code is not needed, you can remove the above.

For v13-0005-Refactor-to-allow-reindexing-all-index-partition.patch :

+       /* Skip invalid indexes, if requested */
+       if ((options & REINDEXOPT_SKIPVALID) != 0 &&
+               get_index_isvalid(partoid))

The comment seems to diverge from the name of the flag (which says skip
valid index).

Cheers

On Mon, Feb 15, 2021 at 11:34 AM Justin Pryzby <pryzby@telsasoft.com> wrote:

Show quoted text

On Mon, Feb 15, 2021 at 10:06:47PM +0300, Anastasia Lubennikova wrote:

On 28.01.2021 17:30, Justin Pryzby wrote:

On Thu, Jan 28, 2021 at 09:51:51PM +0900, Masahiko Sawada wrote:

On Mon, Nov 30, 2020 at 5:22 AM Justin Pryzby <pryzby@telsasoft.com>

wrote:

On Sat, Oct 31, 2020 at 01:31:17AM -0500, Justin Pryzby wrote:

Forking this thread, since the existing CFs have been closed.

/messages/by-id/20200914143102.GX18552@telsasoft.com

The strategy is to create catalog entries for all tables with

indisvalid=false,

and then process them like REINDEX CONCURRENTLY. If it's

interrupted, it

leaves INVALID indexes, which can be cleaned up with DROP or

REINDEX, same as

CIC on a plain table.

On Sat, Aug 08, 2020 at 01:37:44AM -0500, Justin Pryzby wrote:

On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier

wrote:

Note that the mentioned problem wasn't serious: there was

missing index on

child table, therefor the parent index was invalid, as

intended. However I

agree that it's not nice that the command can fail so easily

and leave behind

some indexes created successfully and some failed some not

created at all.

But I took your advice initially creating invalid inds.

...

That gave me the idea to layer CIC on top of Reindex, since I

think it does

exactly what's needed.

On Sat, Sep 26, 2020 at 02:56:55PM -0500, Justin Pryzby wrote:

On Thu, Sep 24, 2020 at 05:11:03PM +0900, Michael Paquier

wrote:

It would be good also to check if
we have a partition index tree that maps partially with a

partition

table tree (aka no all table partitions have a partition

index), where

these don't get clustered because there is no index to work

on.

This should not happen, since a incomplete partitioned index

is "invalid".

I had been waiting to rebase since there hasn't been any

review comments and I

expected additional, future conflicts.

I attempted to review this feature, but the last patch conflicts with the
recent refactoring, so I wasn't able to test it properly.
Could you please send a new version?

I rebased this yesterday, so here's my latest.

2) Here we access relation field after closing the relation. Is it safe?
/* save lockrelid and locktag for below */
heaprelid = rel->rd_lockInfo.lockRelId;

Thanks, fixed this just now.

3) leaf_partitions() function only handles indexes, so I suggest to name

it

more specifically and add a comment about meaning of 'options' parameter.

4) I don't quite understand the idea of the regression test. Why do we
expect to see invalid indexes there?
+ "idxpart_a_idx1" UNIQUE, btree (a) INVALID

Because of the unique failure:
+create unique index concurrently on idxpart (a); -- partitioned, unique
failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart

This shows that CIC first creates catalog-only INVALID indexes, and then
reindexes them to "validate".

--
Justin

#9Justin Pryzby
pryzby@telsasoft.com
In reply to: Anastasia Lubennikova (#6)
4 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Mon, Feb 15, 2021 at 10:07:05PM +0300, Anastasia Lubennikova wrote:

5) Speaking of documentation, I think we need to add a paragraph about CIC
on partitioned indexes which will explain that invalid indexes may appear
and what user should do to fix them.

I'm not sure about that - it's already documented in general, for
nonpartitioned indexes.

--
Justin

Attachments:

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; charset=us-asciiDownload
From fb60da3c0fac8f1699a6caeea57476770c66576d Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH 1/5] Allow CREATE INDEX CONCURRENTLY on partitioned table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.
---
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 143 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 ++++++++++-
 src/test/regress/sql/indexing.sql      |  18 +++-
 4 files changed, 176 insertions(+), 54 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 965dcf472c..7c75119d78 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -686,15 +686,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 8bc652ecd3..9ab1a66971 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -680,17 +681,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1128,6 +1118,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1183,6 +1178,14 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
@@ -1193,8 +1196,10 @@ DefineIndex(Oid relationId,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1237,10 +1242,12 @@ DefineIndex(Oid relationId,
 					continue;
 				}
 
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				childidxs = RelationGetIndexList(childrel);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1311,10 +1318,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1366,10 +1377,18 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1379,41 +1398,33 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
-	if (!concurrent)
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
+		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
@@ -1626,6 +1637,62 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+	ReindexParams params = {
+		.options = REINDEXOPT_CONCURRENTLY
+	};
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		/* XXX: need to retrofit progress reporting into it */
+		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 // npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 * XXX: since this is done in 2*N transactions, it could just as well
+	 * call ReindexRelationConcurrently directly
+	 */
+	ReindexMultipleInternal(partitions, &params);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index c93f4470c9..f04abc6897 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c..3d4b6e9bc9 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.17.0

0002-f-progress-reporting.patchtext/x-diff; charset=us-asciiDownload
From 0dfaaf20b8333b816ab2d4501675e9a7dd0fc436 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 14 Feb 2021 18:31:43 -0600
Subject: [PATCH 2/5] f! progress reporting

---
 src/backend/commands/indexcmds.c | 33 +++++++-------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 9ab1a66971..8f4eab22eb 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1647,40 +1647,20 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 		.options = REINDEXOPT_CONCURRENTLY
 	};
 
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
 	PreventInTransactionBlock(true, "REINDEX INDEX");
 
-	foreach (lc, childs)
+	foreach (lc, find_inheritance_children(indexRelationId, ShareLock))
 	{
 		Oid			partoid = lfirst_oid(lc);
 
-		/* XXX: need to retrofit progress reporting into it */
-		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 // npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
+		if (!get_index_isvalid(partoid) &&
+				RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			ReindexRelationConcurrently(partoid, &params);
 
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 npart++);
 	}
 
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 * XXX: since this is done in 2*N transactions, it could just as well
-	 * call ReindexRelationConcurrently directly
-	 */
-	ReindexMultipleInternal(partitions, &params);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
@@ -1689,6 +1669,7 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * partitions.
 	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
-- 
2.17.0

0003-WIP-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; charset=us-asciiDownload
From 2a24e8bbc6f23d3e688d72a652ba0bd0dd2dc4af Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 16:23:02 -0500
Subject: [PATCH 3/5] WIP: Add SKIPVALID flag for more integration

XXX: this breaks progress reporting?
---
 src/backend/commands/indexcmds.c | 36 +++++++++++++++-----------------
 src/include/catalog/index.h      |  1 +
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 8f4eab22eb..e54314e9a4 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1637,40 +1637,33 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
 	};
 
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, find_inheritance_children(indexRelationId, ShareLock))
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		if (!get_index_isvalid(partoid) &&
-				RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			ReindexRelationConcurrently(partoid, &params);
-
-		pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 npart++);
-	}
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId, &params, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -3094,6 +3087,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip valid indexes, if requested */
+		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index e22d506436..994fe94fa1 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.17.0

0004-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; charset=us-asciiDownload
From 2b6fedc7cc9f3a0dfe522d789ffed414a799605c Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 30 Oct 2020 23:52:31 -0500
Subject: [PATCH 4/5] ReindexPartitions() to set indisvalid..

Something like this should probably have been included in
a6642b3ae060976b42830b7dc8f29ec190ab05e4

See also 71a05b223, which mentioned the absence of any way to validate an
index.
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index e54314e9a4..99508b0d36 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1652,8 +1652,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1663,9 +1661,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * this commits and then starts a new transaction immediately.
 	 */
 	ReindexPartitions(indexRelationId, &params, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -3107,6 +3102,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, params);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.17.0

#10Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Justin Pryzby (#9)
4 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Justin Pryzby писал 2021-02-26 21:20:

On Mon, Feb 15, 2021 at 10:07:05PM +0300, Anastasia Lubennikova wrote:

5) Speaking of documentation, I think we need to add a paragraph about
CIC
on partitioned indexes which will explain that invalid indexes may
appear
and what user should do to fix them.

I'm not sure about that - it's already documented in general, for
nonpartitioned indexes.

Hi.

I've rebased patches and tried to fix issues I've seen. I've fixed
reference after table_close() in the first patch (can be seen while
building with CPPFLAGS='-DRELCACHE_FORCE_RELEASE'). It seems childidxs
shouldn't live in ind_context, so I moved it out of it. Updated
documentation to state that CIC can leave invalid or valid indexes on
partitions if it's not succeeded. Also merged old
0002-f-progress-reporting.patch and
0003-WIP-Add-SKIPVALID-flag-for-more-integration.patch. It seems the
first one didn't really fixed issue with progress report (as
ReindexRelationConcurrently() uses pgstat_progress_start_command(),
which seems to mess up the effect of this command in DefineIndex()).
Note, that third patch completely removes attempts to report create
index progress correctly (reindex reports about individual commands, not
the whole CREATE INDEX).

So I've added 0003-Try-to-fix-create-index-progress-report.patch, which
tries to fix the mess with create index progress report. It introduces
new flag REINDEXOPT_REPORT_CREATE_PART to ReindexParams->options. Given
this flag, ReindexRelationConcurrently() will not report about
individual operations start/stop, but ReindexMultipleInternal() will
report about reindexed partitions. To make the issue worse, some
partitions can be handled in ReindexPartitions() and
ReindexMultipleInternal() should know how many to correctly update
PROGRESS_CREATEIDX_PARTITIONS_DONE counter. Also it needs IndexOid to
correctly generate pg_stat_progress_create_index record, so we pass
these parameters to it.
--
Best regards,
Alexander Pyhalov,
Postgres Professional

Attachments:

v1-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; name=v1-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchDownload
From eaad7c3ed2fda93fdb91aea60294f60489444bf7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:28:42 +0300
Subject: [PATCH 1/4] Allow CREATE INDEX CONCURRENTLY on partitioned table

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com

Fixes:
  - rel was used after table_close();
  - it seems childidxs shouldn't live in ind_context;
  - updated doc.
---
 doc/src/sgml/ref/create_index.sgml     |  14 +--
 src/backend/commands/indexcmds.c       | 151 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 +++++++++-
 src/test/regress/sql/indexing.sql      |  18 ++-
 4 files changed, 186 insertions(+), 57 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 91eaaabc90f..ffa98692430 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -641,7 +641,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while creating index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -688,15 +691,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 560dcc87a2c..666ced8e1d7 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -670,17 +671,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1119,6 +1109,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1174,18 +1169,30 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel, true);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
 			TupleDesc	parentDesc;
 			Oid		   *opfamOids;
+			char		*relname;
 
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			relname = pstrdup(RelationGetRelationName(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1220,18 +1227,21 @@ DefineIndex(Oid relationId,
 						ereport(ERROR,
 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 								 errmsg("cannot create unique index on partitioned table \"%s\"",
-										RelationGetRelationName(rel)),
+										relname),
 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
-										   RelationGetRelationName(rel))));
+										   relname)));
 
 					table_close(childrel, lockmode);
 					continue;
 				}
 
 				childidxs = RelationGetIndexList(childrel);
+
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1302,10 +1312,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1357,12 +1371,21 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
+			pfree(relname);
 
 			/*
 			 * The pg_index row we inserted for this index was marked
@@ -1370,41 +1393,33 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
-	if (!concurrent)
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
+		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
@@ -1617,6 +1632,62 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+	ReindexParams params = {
+		.options = REINDEXOPT_CONCURRENTLY
+	};
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		/* XXX: need to retrofit progress reporting into it */
+		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 // npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 * XXX: since this is done in 2*N transactions, it could just as well
+	 * call ReindexRelationConcurrently directly
+	 */
+	ReindexMultipleInternal(partitions, &params);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 193f7801912..a4ccae50de3 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c2..3d4b6e9bc95 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.25.1

v1-0002-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; name=v1-0002-Add-SKIPVALID-flag-for-more-integration.patchDownload
From 91bed69a737ca73fb5f79725e7bffc31e617b61b Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:31:40 +0300
Subject: [PATCH 2/4] Add SKIPVALID flag for more integration

Combined
0002-f-progress-reporting.patch and
0003-WIP-Add-SKIPVALID-flag-for-more-integration.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com
---
 src/backend/commands/indexcmds.c | 57 ++++++++++----------------------
 src/include/catalog/index.h      |  1 +
 2 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 666ced8e1d7..56e4c0b7575 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1632,59 +1632,33 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
 	};
 
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, childs)
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		/* XXX: need to retrofit progress reporting into it */
-		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 // npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
-
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 * XXX: since this is done in 2*N transactions, it could just as well
-	 * call ReindexRelationConcurrently directly
-	 */
-	ReindexMultipleInternal(partitions, &params);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId, &params, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -3106,6 +3080,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip valid indexes, if requested */
+		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index a1d6e3b645f..c31b66ad0b9 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.25.1

v1-0003-Try-to-fix-create-index-progress-report.patchtext/x-diff; name=v1-0003-Try-to-fix-create-index-progress-report.patchDownload
From 1b9cb0f8f32c1863a52c5a3fd53103537a938ebd Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Date: Tue, 8 Feb 2022 21:15:05 +0300
Subject: [PATCH 3/4] Try to fix create index progress report

---
 src/backend/commands/indexcmds.c | 67 ++++++++++++++++++++++++++------
 src/include/catalog/index.h      |  1 +
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 56e4c0b7575..b237c30bc80 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -99,11 +99,14 @@ static void reindex_error_callback(void *args);
 static void ReindexPartitions(Oid relid, ReindexParams *params,
 							  bool isTopLevel);
 static void ReindexMultipleInternal(List *relids,
-									ReindexParams *params);
+									ReindexParams *params,
+									Oid parent,
+									int npart);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
+static void report_create_partition_index_done(Oid parent, int npart);
 
 /*
  * callback argument type for RangeVarCallbackForReindexIndex()
@@ -1184,6 +1187,7 @@ DefineIndex(Oid relationId,
 			Oid		   *opfamOids;
 			char		*relname;
 
+
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
@@ -1640,7 +1644,7 @@ static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID | REINDEXOPT_REPORT_CREATE_PART
 	};
 
 	/*
@@ -1653,6 +1657,8 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
 
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN);
+
 	/*
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
@@ -2987,7 +2993,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 	 * Process each relation listed in a separate transaction.  Note that this
 	 * commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(relids, params);
+	ReindexMultipleInternal(relids, params, InvalidOid, 0);
 
 	MemoryContextDelete(private_context);
 }
@@ -3023,6 +3029,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
+	int			npart = 1;
 	MemoryContext reindex_context;
 	List	   *inhoids;
 	ListCell   *lc;
@@ -3083,7 +3090,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		/* Skip valid indexes, if requested */
 		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
 				get_index_isvalid(partoid))
+		{
+			if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+				report_create_partition_index_done(relid, npart++);
 			continue;
+		}
 
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
@@ -3098,7 +3109,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(partitions, params);
+	ReindexMultipleInternal(partitions, params, relid, npart);
 
 	/*
 	 * Clean up working storage --- note we must do this after
@@ -3116,7 +3127,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
  * and starts a new transaction when finished.
  */
 static void
-ReindexMultipleInternal(List *relids, ReindexParams *params)
+ReindexMultipleInternal(List *relids, ReindexParams *params, Oid parent, int npart)
 {
 	ListCell   *l;
 
@@ -3210,6 +3221,9 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 		}
 
 		CommitTransactionCommand();
+
+		if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+			report_create_partition_index_done(parent, npart++);
 	}
 
 	StartTransactionCommand();
@@ -3592,7 +3606,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
 			elog(ERROR, "cannot reindex a temporary table concurrently");
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		/* Don't overwrite CREATE INDEX command */
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  idx->tableId);
 
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
@@ -3745,9 +3761,11 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved.  Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
 		progress_vals[2] = newidx->indexId;
@@ -3809,10 +3827,12 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved. Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
 		progress_vals[2] = newidx->indexId;
@@ -4047,7 +4067,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 	MemoryContextDelete(private_context);
 
-	pgstat_progress_end_command();
+	/* Don't overwrite CREATE INDEX command. */
+	if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+		pgstat_progress_end_command();
 
 	return true;
 }
@@ -4183,6 +4205,29 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
 	}
 }
 
+/*
+ * Update pgstat progress report to indicate that create index on
+ * partition was finished.
+ */
+static void
+report_create_partition_index_done(Oid index, int npart)
+{
+	const int   progress_cols[] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_PARTITIONS_DONE
+	};
+	const int64 progress_vals[] = {
+		PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY,
+		index,
+		PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
+		npart
+	};
+
+	pgstat_progress_update_multi_param(4, progress_cols, progress_vals);
+}
+
 /*
  * Subroutine of IndexSetParentIndex to update the relispartition flag of the
  * given index to the given value.
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index c31b66ad0b9..b5b0a71e7d4 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -43,6 +43,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
 #define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
+#define REINDEXOPT_REPORT_CREATE_PART	0x20	/* report that index was created for partition */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.25.1

v1-0004-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; name=v1-0004-ReindexPartitions-to-set-indisvalid.patchDownload
From 62f2816188d3d38fe0d76d3212d6f551a59c0ae7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:39:59 +0300
Subject: [PATCH 4/4] ReindexPartitions() to set indisvalid

0004-ReindexPartitions-to-set-indisvalid.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index b237c30bc80..355a7626549 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1651,8 +1651,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1664,9 +1662,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * this commits and then starts a new transaction immediately.
 	 */
 	ReindexPartitions(indexRelationId, &params, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -3111,6 +3106,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, params, relid, npart);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.25.1

#11Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Alexander Pyhalov (#10)
5 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Hi.

I've added 0005-Mark-intermediate-partitioned-indexes-as-valid.patch
which fixed the following issues - when partitioned index is created,
indexes on intermediate partitioned tables were preserved in invalid
state. Also added some more tests.
--
Best regards,
Alexander Pyhalov,
Postgres Professional

Attachments:

v2-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; name=v2-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchDownload
From 18fa3c27a3311294a7abfdc0674ef6143c65423b Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:28:42 +0300
Subject: [PATCH 1/5] Allow CREATE INDEX CONCURRENTLY on partitioned table

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com

Fixes:
  - rel was used after table_close();
  - it seems childidxs shouldn't live in ind_context;
  - updated doc.
---
 doc/src/sgml/ref/create_index.sgml     |  14 +--
 src/backend/commands/indexcmds.c       | 151 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 +++++++++-
 src/test/regress/sql/indexing.sql      |  18 ++-
 4 files changed, 186 insertions(+), 57 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 91eaaabc90f..ffa98692430 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -641,7 +641,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while creating index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -688,15 +691,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index cd30f15eba6..a34a1b133a0 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -670,17 +671,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1119,6 +1109,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1174,18 +1169,30 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel, true);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
 			TupleDesc	parentDesc;
 			Oid		   *opfamOids;
+			char		*relname;
 
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			relname = pstrdup(RelationGetRelationName(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1220,18 +1227,21 @@ DefineIndex(Oid relationId,
 						ereport(ERROR,
 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 								 errmsg("cannot create unique index on partitioned table \"%s\"",
-										RelationGetRelationName(rel)),
+										relname),
 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
-										   RelationGetRelationName(rel))));
+										   relname)));
 
 					table_close(childrel, lockmode);
 					continue;
 				}
 
 				childidxs = RelationGetIndexList(childrel);
+
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1302,10 +1312,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1357,12 +1371,21 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
+			pfree(relname);
 
 			/*
 			 * The pg_index row we inserted for this index was marked
@@ -1370,41 +1393,33 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
-	if (!concurrent)
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
+		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
@@ -1617,6 +1632,62 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+	ReindexParams params = {
+		.options = REINDEXOPT_CONCURRENTLY
+	};
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		/* XXX: need to retrofit progress reporting into it */
+		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 // npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 * XXX: since this is done in 2*N transactions, it could just as well
+	 * call ReindexRelationConcurrently directly
+	 */
+	ReindexMultipleInternal(partitions, &params);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 193f7801912..a4ccae50de3 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c2..3d4b6e9bc95 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.25.1

v2-0002-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; name=v2-0002-Add-SKIPVALID-flag-for-more-integration.patchDownload
From 1cba217c9293a4dac1b08451000442733c8c78cd Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:31:40 +0300
Subject: [PATCH 2/5] Add SKIPVALID flag for more integration

Combined
0002-f-progress-reporting.patch and
0003-WIP-Add-SKIPVALID-flag-for-more-integration.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com
---
 src/backend/commands/indexcmds.c | 57 ++++++++++----------------------
 src/include/catalog/index.h      |  1 +
 2 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index a34a1b133a0..090e792ff47 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1632,59 +1632,33 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
 	};
 
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, childs)
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		/* XXX: need to retrofit progress reporting into it */
-		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 // npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
-
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 * XXX: since this is done in 2*N transactions, it could just as well
-	 * call ReindexRelationConcurrently directly
-	 */
-	ReindexMultipleInternal(partitions, &params);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId, &params, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -3106,6 +3080,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip valid indexes, if requested */
+		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index a1d6e3b645f..c31b66ad0b9 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.25.1

v2-0003-Try-to-fix-create-index-progress-report.patchtext/x-diff; name=v2-0003-Try-to-fix-create-index-progress-report.patchDownload
From 1a4e873d247dffc1b1af16a546f0d28714d3fb9b Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Date: Tue, 8 Feb 2022 21:15:05 +0300
Subject: [PATCH 3/5] Try to fix create index progress report

---
 src/backend/commands/indexcmds.c | 67 ++++++++++++++++++++++++++------
 src/include/catalog/index.h      |  1 +
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 090e792ff47..57df92985fe 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -99,11 +99,14 @@ static void reindex_error_callback(void *args);
 static void ReindexPartitions(Oid relid, ReindexParams *params,
 							  bool isTopLevel);
 static void ReindexMultipleInternal(List *relids,
-									ReindexParams *params);
+									ReindexParams *params,
+									Oid parent,
+									int npart);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
+static void report_create_partition_index_done(Oid parent, int npart);
 
 /*
  * callback argument type for RangeVarCallbackForReindexIndex()
@@ -1184,6 +1187,7 @@ DefineIndex(Oid relationId,
 			Oid		   *opfamOids;
 			char		*relname;
 
+
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
@@ -1640,7 +1644,7 @@ static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID | REINDEXOPT_REPORT_CREATE_PART
 	};
 
 	/*
@@ -1653,6 +1657,8 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
 
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN);
+
 	/*
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
@@ -2987,7 +2993,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 	 * Process each relation listed in a separate transaction.  Note that this
 	 * commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(relids, params);
+	ReindexMultipleInternal(relids, params, InvalidOid, 0);
 
 	MemoryContextDelete(private_context);
 }
@@ -3023,6 +3029,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
+	int			npart = 1;
 	MemoryContext reindex_context;
 	List	   *inhoids;
 	ListCell   *lc;
@@ -3083,7 +3090,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		/* Skip valid indexes, if requested */
 		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
 				get_index_isvalid(partoid))
+		{
+			if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+				report_create_partition_index_done(relid, npart++);
 			continue;
+		}
 
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
@@ -3098,7 +3109,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(partitions, params);
+	ReindexMultipleInternal(partitions, params, relid, npart);
 
 	/*
 	 * Clean up working storage --- note we must do this after
@@ -3116,7 +3127,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
  * and starts a new transaction when finished.
  */
 static void
-ReindexMultipleInternal(List *relids, ReindexParams *params)
+ReindexMultipleInternal(List *relids, ReindexParams *params, Oid parent, int npart)
 {
 	ListCell   *l;
 
@@ -3210,6 +3221,9 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 		}
 
 		CommitTransactionCommand();
+
+		if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+			report_create_partition_index_done(parent, npart++);
 	}
 
 	StartTransactionCommand();
@@ -3592,7 +3606,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
 			elog(ERROR, "cannot reindex a temporary table concurrently");
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		/* Don't overwrite CREATE INDEX command */
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  idx->tableId);
 
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
@@ -3745,9 +3761,11 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved.  Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
 		progress_vals[2] = newidx->indexId;
@@ -3809,10 +3827,12 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved. Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
 		progress_vals[2] = newidx->indexId;
@@ -4047,7 +4067,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 	MemoryContextDelete(private_context);
 
-	pgstat_progress_end_command();
+	/* Don't overwrite CREATE INDEX command. */
+	if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+		pgstat_progress_end_command();
 
 	return true;
 }
@@ -4183,6 +4205,29 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
 	}
 }
 
+/*
+ * Update pgstat progress report to indicate that create index on
+ * partition was finished.
+ */
+static void
+report_create_partition_index_done(Oid index, int npart)
+{
+	const int   progress_cols[] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_PARTITIONS_DONE
+	};
+	const int64 progress_vals[] = {
+		PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY,
+		index,
+		PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
+		npart
+	};
+
+	pgstat_progress_update_multi_param(4, progress_cols, progress_vals);
+}
+
 /*
  * Subroutine of IndexSetParentIndex to update the relispartition flag of the
  * given index to the given value.
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index c31b66ad0b9..b5b0a71e7d4 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -43,6 +43,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
 #define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
+#define REINDEXOPT_REPORT_CREATE_PART	0x20	/* report that index was created for partition */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.25.1

v2-0004-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; name=v2-0004-ReindexPartitions-to-set-indisvalid.patchDownload
From 44055a8f644d153a6df919eba02db41f8085b9e8 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:39:59 +0300
Subject: [PATCH 4/5] ReindexPartitions() to set indisvalid

0004-ReindexPartitions-to-set-indisvalid.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 57df92985fe..21f1ceaea63 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1651,8 +1651,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1664,9 +1662,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * this commits and then starts a new transaction immediately.
 	 */
 	ReindexPartitions(indexRelationId, &params, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -3111,6 +3106,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, params, relid, npart);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.25.1

v2-0005-Mark-intermediate-partitioned-indexes-as-valid.patchtext/x-diff; name=v2-0005-Mark-intermediate-partitioned-indexes-as-valid.patchDownload
From 75db7db9fae509c64609315b86aa85706e710e2b Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Date: Mon, 28 Feb 2022 10:50:58 +0300
Subject: [PATCH 5/5] Mark intermediate partitioned indexes as valid

---
 src/backend/commands/indexcmds.c       | 33 ++++++++++-
 src/test/regress/expected/indexing.out | 80 +++++++++++++++++++++++++-
 src/test/regress/sql/indexing.sql      |  8 +++
 3 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 21f1ceaea63..fabb5b14898 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -3021,6 +3021,7 @@ static void
 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 {
 	List	   *partitions = NIL;
+	List	   *inhpartindexes = NIL;
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
@@ -3075,6 +3076,17 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		char		partkind = get_rel_relkind(partoid);
 		MemoryContext old_context;
 
+		/* Create a list of invalid inherited partitioned indexes */
+		if (partkind == RELKIND_PARTITIONED_INDEX)
+		{
+			if (partoid == relid || get_index_isvalid(partoid))
+				continue;
+
+			old_context = MemoryContextSwitchTo(reindex_context);
+			inhpartindexes = lappend_oid(inhpartindexes, partoid);
+			MemoryContextSwitchTo(old_context);
+		}
+
 		/*
 		 * This discards partitioned tables, partitioned indexes and foreign
 		 * tables.
@@ -3119,9 +3131,28 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		Oid	tableoid = IndexGetRelation(relid, false);
 		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
 
-		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		/*
+		 * Both lists include their parent relation as well as any
+		 * intermediate partitioned rels
+		 */
 		if (list_length(inhoids) == list_length(child_tables))
+		{
 			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+
+			/* Mark any intermediate partitioned index as valid */
+			foreach(lc, inhpartindexes)
+			{
+				Oid         partoid = lfirst_oid(lc);
+
+				Assert(get_rel_relkind(partoid) == RELKIND_PARTITIONED_INDEX);
+				Assert(!get_index_isvalid(partoid));
+
+				/* Can't mark an index valid without marking it ready */
+				index_set_state_flags(partoid, INDEX_CREATE_SET_READY);
+				CommandCounterIncrement();
+				index_set_state_flags(partoid, INDEX_CREATE_SET_VALID);
+			}
+		}
 	}
 
 	/*
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index a4ccae50de3..b4f1aea6fca 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -57,6 +57,8 @@ create table idxpart11 partition of idxpart1 for values from (0) to (10) partiti
 create table idxpart111 partition of idxpart11 default partition by range(a);
 create table idxpart1111 partition of idxpart111 default partition by range(a);
 create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
 insert into idxpart2 values(10),(10); -- not unique
 create index concurrently on idxpart (a); -- partitioned
 create index concurrently on idxpart1 (a); -- partitioned and partition
@@ -76,7 +78,7 @@ Partition key: RANGE (a)
 Indexes:
     "idxpart_a_idx" btree (a)
     "idxpart_a_idx1" UNIQUE, btree (a) INVALID
-Number of partitions: 2 (Use \d+ to list them.)
+Number of partitions: 3 (Use \d+ to list them.)
 
 \d idxpart1
         Partitioned table "public.idxpart1"
@@ -88,11 +90,59 @@ Number of partitions: 2 (Use \d+ to list them.)
 Partition of: idxpart FOR VALUES FROM (0) TO (10)
 Partition key: RANGE (a)
 Indexes:
-    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx" btree (a)
     "idxpart1_a_idx1" btree (a)
     "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
 Number of partitions: 1 (Use \d+ to list them.)
 
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" btree (a)
+    "idxpart11_a_idx2" btree (a)
+    "idxpart11_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" btree (a)
+    "idxpart111_a_idx2" btree (a)
+    "idxpart111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" btree (a)
+    "idxpart1111_a_idx2" btree (a)
+    "idxpart1111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
 \d idxpart2
               Table "public.idxpart2"
  Column |  Type   | Collation | Nullable | Default 
@@ -107,6 +157,32 @@ Indexes:
     "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
     "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
 
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 3d4b6e9bc95..06673c15199 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -36,6 +36,9 @@ create table idxpart11 partition of idxpart1 for values from (0) to (10) partiti
 create table idxpart111 partition of idxpart11 default partition by range(a);
 create table idxpart1111 partition of idxpart111 default partition by range(a);
 create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
 insert into idxpart2 values(10),(10); -- not unique
 create index concurrently on idxpart (a); -- partitioned
 create index concurrently on idxpart1 (a); -- partitioned and partition
@@ -44,7 +47,12 @@ create index concurrently on idxpart2 (a); -- leaf
 create unique index concurrently on idxpart (a); -- partitioned, unique failure
 \d idxpart
 \d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
 \d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.25.1

#12Greg Stark
stark@mit.edu
In reply to: Alexander Pyhalov (#11)
Re: CREATE INDEX CONCURRENTLY on partitioned index

This patch is marked "waiting on author" in the CF. However the most
recent emails have patches and it's not clear to me what's left from
previous reviews that might not be addressed yet. Should this patch be
marked "Needs Review"?

Anastasia and Alexander are marked as reviewers. Are you still able to
review it or are there still pending issues that need to be resolved
from previous reviews?

#13Justin Pryzby
pryzby@telsasoft.com
In reply to: Greg Stark (#12)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Fri, Mar 25, 2022 at 01:05:49AM -0400, Greg Stark wrote:

This patch is marked "waiting on author" in the CF. However the most
recent emails have patches and it's not clear to me what's left from
previous reviews that might not be addressed yet. Should this patch be
marked "Needs Review"?

Anastasia and Alexander are marked as reviewers. Are you still able to
review it or are there still pending issues that need to be resolved
from previous reviews?

I still haven't responded to Alexander's feedback, so I need to do that.
(Sorry).

However, since the patch attracted no attention for 50 some weeks last year, so
now is a weird time to shift attention to it. As such, I will move it to the
next CF.

/messages/by-id/20210226182019.GU20769@telsasoft.com

--
Justin

#14Justin Pryzby
pryzby@telsasoft.com
In reply to: Alexander Pyhalov (#10)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Hi,

On Thu, Feb 10, 2022 at 06:07:08PM +0300, Alexander Pyhalov wrote:

I've rebased patches and tried to fix issues I've seen. I've fixed reference
after table_close() in the first patch (can be seen while building with
CPPFLAGS='-DRELCACHE_FORCE_RELEASE').

Thanks for finding that.

The patches other than 0001 are more experimental, and need someone to check if
it's even a good approach to use, so I kept them separate from the essential
patch.

Your latest 0005 patch (mark intermediate partitioned indexes as valid) is
probably fixing a bug in my SKIPVALID patch, right ? I'm not sure whether the
SKIPVALID patch should be merged into 0001, and I've been awaiting feedback on
the main patch before handling progress reporting.

Sorry for not responding sooner. The patch saw no activity for ~11 months so I
wasn't prepared to pick it up in March, at least not without guidance from a
committer.

Would you want to take over this patch ? I wrote it following someone's
question, but don't expect that I'd use the feature myself. I can help review
it or try to clarify the organization of my existing patches (but still haven't
managed to work my way through your amendments to my patches).

Thanks for caring about partitioned DDL ;)

--
Justin

#15Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Justin Pryzby (#14)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Justin Pryzby писал 2022-06-28 21:33:

Hi,

On Thu, Feb 10, 2022 at 06:07:08PM +0300, Alexander Pyhalov wrote:

I've rebased patches and tried to fix issues I've seen. I've fixed
reference
after table_close() in the first patch (can be seen while building
with
CPPFLAGS='-DRELCACHE_FORCE_RELEASE').

Thanks for finding that.

The patches other than 0001 are more experimental, and need someone to
check if
it's even a good approach to use, so I kept them separate from the
essential
patch.

Your latest 0005 patch (mark intermediate partitioned indexes as valid)
is
probably fixing a bug in my SKIPVALID patch, right ? I'm not sure
whether the
SKIPVALID patch should be merged into 0001, and I've been awaiting
feedback on
the main patch before handling progress reporting.

Hi. I think it's more about fixing ReindexPartitions-to-set-indisvalid
patch, as
we also should mark intermediate indexes as valid when reindex succeeds.

Sorry for not responding sooner. The patch saw no activity for ~11
months so I
wasn't prepared to pick it up in March, at least not without guidance
from a
committer.

Would you want to take over this patch ? I wrote it following
someone's
question, but don't expect that I'd use the feature myself. I can help
review
it or try to clarify the organization of my existing patches (but still
haven't
managed to work my way through your amendments to my patches).

Yes, I'm glad to work on the patches, as this for us this is a very
important feature.
--
Best regards,
Alexander Pyhalov,
Postgres Professional

#16Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Justin Pryzby (#14)
5 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Justin Pryzby писал 2022-06-28 21:33:

Hi,

On Thu, Feb 10, 2022 at 06:07:08PM +0300, Alexander Pyhalov wrote:

I've rebased patches and tried to fix issues I've seen. I've fixed
reference
after table_close() in the first patch (can be seen while building
with
CPPFLAGS='-DRELCACHE_FORCE_RELEASE').

Rebased patches on the current master.
They still require proper review.

--
Best regards,
Alexander Pyhalov,
Postgres Professional

Attachments:

v3-0005-Mark-intermediate-partitioned-indexes-as-valid.patchtext/x-diff; name=v3-0005-Mark-intermediate-partitioned-indexes-as-valid.patchDownload
From 5c11849ceb2a1feb0e44dbdf30cc27de0282a659 Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Date: Mon, 28 Feb 2022 10:50:58 +0300
Subject: [PATCH 5/5] Mark intermediate partitioned indexes as valid

---
 src/backend/commands/indexcmds.c       | 33 ++++++++++-
 src/test/regress/expected/indexing.out | 80 +++++++++++++++++++++++++-
 src/test/regress/sql/indexing.sql      |  8 +++
 3 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d09f0390413..d3ced6265b6 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -3139,6 +3139,7 @@ static void
 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 {
 	List	   *partitions = NIL;
+	List	   *inhpartindexes = NIL;
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
@@ -3193,6 +3194,17 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		char		partkind = get_rel_relkind(partoid);
 		MemoryContext old_context;
 
+		/* Create a list of invalid inherited partitioned indexes */
+		if (partkind == RELKIND_PARTITIONED_INDEX)
+		{
+			if (partoid == relid || get_index_isvalid(partoid))
+				continue;
+
+			old_context = MemoryContextSwitchTo(reindex_context);
+			inhpartindexes = lappend_oid(inhpartindexes, partoid);
+			MemoryContextSwitchTo(old_context);
+		}
+
 		/*
 		 * This discards partitioned tables, partitioned indexes and foreign
 		 * tables.
@@ -3237,9 +3249,28 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		Oid	tableoid = IndexGetRelation(relid, false);
 		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
 
-		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		/*
+		 * Both lists include their parent relation as well as any
+		 * intermediate partitioned rels
+		 */
 		if (list_length(inhoids) == list_length(child_tables))
+		{
 			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+
+			/* Mark any intermediate partitioned index as valid */
+			foreach(lc, inhpartindexes)
+			{
+				Oid         partoid = lfirst_oid(lc);
+
+				Assert(get_rel_relkind(partoid) == RELKIND_PARTITIONED_INDEX);
+				Assert(!get_index_isvalid(partoid));
+
+				/* Can't mark an index valid without marking it ready */
+				index_set_state_flags(partoid, INDEX_CREATE_SET_READY);
+				CommandCounterIncrement();
+				index_set_state_flags(partoid, INDEX_CREATE_SET_VALID);
+			}
+		}
 	}
 
 	/*
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index a4ccae50de3..b4f1aea6fca 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -57,6 +57,8 @@ create table idxpart11 partition of idxpart1 for values from (0) to (10) partiti
 create table idxpart111 partition of idxpart11 default partition by range(a);
 create table idxpart1111 partition of idxpart111 default partition by range(a);
 create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
 insert into idxpart2 values(10),(10); -- not unique
 create index concurrently on idxpart (a); -- partitioned
 create index concurrently on idxpart1 (a); -- partitioned and partition
@@ -76,7 +78,7 @@ Partition key: RANGE (a)
 Indexes:
     "idxpart_a_idx" btree (a)
     "idxpart_a_idx1" UNIQUE, btree (a) INVALID
-Number of partitions: 2 (Use \d+ to list them.)
+Number of partitions: 3 (Use \d+ to list them.)
 
 \d idxpart1
         Partitioned table "public.idxpart1"
@@ -88,11 +90,59 @@ Number of partitions: 2 (Use \d+ to list them.)
 Partition of: idxpart FOR VALUES FROM (0) TO (10)
 Partition key: RANGE (a)
 Indexes:
-    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx" btree (a)
     "idxpart1_a_idx1" btree (a)
     "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
 Number of partitions: 1 (Use \d+ to list them.)
 
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" btree (a)
+    "idxpart11_a_idx2" btree (a)
+    "idxpart11_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" btree (a)
+    "idxpart111_a_idx2" btree (a)
+    "idxpart111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" btree (a)
+    "idxpart1111_a_idx2" btree (a)
+    "idxpart1111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
 \d idxpart2
               Table "public.idxpart2"
  Column |  Type   | Collation | Nullable | Default 
@@ -107,6 +157,32 @@ Indexes:
     "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
     "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
 
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 3d4b6e9bc95..06673c15199 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -36,6 +36,9 @@ create table idxpart11 partition of idxpart1 for values from (0) to (10) partiti
 create table idxpart111 partition of idxpart11 default partition by range(a);
 create table idxpart1111 partition of idxpart111 default partition by range(a);
 create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
 insert into idxpart2 values(10),(10); -- not unique
 create index concurrently on idxpart (a); -- partitioned
 create index concurrently on idxpart1 (a); -- partitioned and partition
@@ -44,7 +47,12 @@ create index concurrently on idxpart2 (a); -- leaf
 create unique index concurrently on idxpart (a); -- partitioned, unique failure
 \d idxpart
 \d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
 \d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

v3-0004-ReindexPartitions-to-set-indisvalid.patchtext/x-diff; name=v3-0004-ReindexPartitions-to-set-indisvalid.patchDownload
From 71838e0146e5150013c48818710d899e69786dc0 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:39:59 +0300
Subject: [PATCH 4/5] ReindexPartitions() to set indisvalid

0004-ReindexPartitions-to-set-indisvalid.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com
---
 src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index b231a05b8c9..d09f0390413 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1716,8 +1716,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
-	 * This must be done only while holding a lock which precludes adding
-	 * partitions.
 	 */
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1729,9 +1727,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	 * this commits and then starts a new transaction immediately.
 	 */
 	ReindexPartitions(indexRelationId, &params, true);
-
-	CommandCounterIncrement();
-	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
 
 /*
@@ -3229,6 +3224,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 */
 	ReindexMultipleInternal(partitions, params, relid, npart);
 
+	/*
+	 * If indexes exist on all of the partitioned table's children, and we
+	 * just reindexed them, then we know they're valid, and so can mark the
+	 * parent index as valid.
+	 * This handles the case of CREATE INDEX CONCURRENTLY.
+	 * See also: validatePartitionedIndex().
+	 */
+	if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+			&& !get_index_isvalid(relid))
+	{
+		Oid	tableoid = IndexGetRelation(relid, false);
+		List	*child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+		/* Both lists include their parent relation as well as any intermediate partitioned rels */
+		if (list_length(inhoids) == list_length(child_tables))
+			index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+	}
+
 	/*
 	 * Clean up working storage --- note we must do this after
 	 * StartTransactionCommand, else we might be trying to delete the active
-- 
2.34.1

v3-0003-Try-to-fix-create-index-progress-report.patchtext/x-diff; name=v3-0003-Try-to-fix-create-index-progress-report.patchDownload
From f5b8afe1aafb78c53a527d379b978ce9f1fe06d6 Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Date: Tue, 8 Feb 2022 21:15:05 +0300
Subject: [PATCH 3/5] Try to fix create index progress report

---
 src/backend/commands/indexcmds.c | 67 ++++++++++++++++++++++++++------
 src/include/catalog/index.h      |  1 +
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 149235780b9..b231a05b8c9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -102,11 +102,14 @@ static void reindex_error_callback(void *args);
 static void ReindexPartitions(Oid relid, ReindexParams *params,
 							  bool isTopLevel);
 static void ReindexMultipleInternal(List *relids,
-									ReindexParams *params);
+									ReindexParams *params,
+									Oid parent,
+									int npart);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
+static void report_create_partition_index_done(Oid parent, int npart);
 
 /*
  * callback argument type for RangeVarCallbackForReindexIndex()
@@ -1220,6 +1223,7 @@ DefineIndex(Oid relationId,
 			Oid		   *opfamOids;
 			char		*relname;
 
+
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
@@ -1705,7 +1709,7 @@ static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID | REINDEXOPT_REPORT_CREATE_PART
 	};
 
 	/*
@@ -1718,6 +1722,8 @@ reindex_invalid_child_indexes(Oid indexRelationId)
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
 
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN);
+
 	/*
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
@@ -3105,7 +3111,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 	 * Process each relation listed in a separate transaction.  Note that this
 	 * commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(relids, params);
+	ReindexMultipleInternal(relids, params, InvalidOid, 0);
 
 	MemoryContextDelete(private_context);
 }
@@ -3141,6 +3147,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
+	int			npart = 1;
 	MemoryContext reindex_context;
 	List	   *inhoids;
 	ListCell   *lc;
@@ -3201,7 +3208,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		/* Skip valid indexes, if requested */
 		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
 				get_index_isvalid(partoid))
+		{
+			if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+				report_create_partition_index_done(relid, npart++);
 			continue;
+		}
 
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
@@ -3216,7 +3227,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(partitions, params);
+	ReindexMultipleInternal(partitions, params, relid, npart);
 
 	/*
 	 * Clean up working storage --- note we must do this after
@@ -3234,7 +3245,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
  * and starts a new transaction when finished.
  */
 static void
-ReindexMultipleInternal(List *relids, ReindexParams *params)
+ReindexMultipleInternal(List *relids, ReindexParams *params, Oid parent, int npart)
 {
 	ListCell   *l;
 
@@ -3328,6 +3339,9 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 		}
 
 		CommitTransactionCommand();
+
+		if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+			report_create_partition_index_done(parent, npart++);
 	}
 
 	StartTransactionCommand();
@@ -3723,7 +3737,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
 			elog(ERROR, "cannot reindex a temporary table concurrently");
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		/* Don't overwrite CREATE INDEX command */
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  idx->tableId);
 
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
@@ -3883,9 +3899,11 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved.  Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
 		progress_vals[2] = newidx->indexId;
@@ -3947,10 +3965,12 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved. Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
 		progress_vals[2] = newidx->indexId;
@@ -4185,7 +4205,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 	MemoryContextDelete(private_context);
 
-	pgstat_progress_end_command();
+	/* Don't overwrite CREATE INDEX command. */
+	if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+		pgstat_progress_end_command();
 
 	return true;
 }
@@ -4321,6 +4343,29 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
 	}
 }
 
+/*
+ * Update pgstat progress report to indicate that create index on
+ * partition was finished.
+ */
+static void
+report_create_partition_index_done(Oid index, int npart)
+{
+	const int   progress_cols[] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_PARTITIONS_DONE
+	};
+	const int64 progress_vals[] = {
+		PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY,
+		index,
+		PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
+		npart
+	};
+
+	pgstat_progress_update_multi_param(4, progress_cols, progress_vals);
+}
+
 /*
  * Subroutine of IndexSetParentIndex to update the relispartition flag of the
  * given index to the given value.
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index c31b66ad0b9..b5b0a71e7d4 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -43,6 +43,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
 #define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
+#define REINDEXOPT_REPORT_CREATE_PART	0x20	/* report that index was created for partition */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.34.1

v3-0002-Add-SKIPVALID-flag-for-more-integration.patchtext/x-diff; name=v3-0002-Add-SKIPVALID-flag-for-more-integration.patchDownload
From 4f16e729a835dc8123a8ce593e6ae47542b82c83 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:31:40 +0300
Subject: [PATCH 2/5] Add SKIPVALID flag for more integration

Combined
0002-f-progress-reporting.patch and
0003-WIP-Add-SKIPVALID-flag-for-more-integration.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com
---
 src/backend/commands/indexcmds.c | 57 ++++++++++----------------------
 src/include/catalog/index.h      |  1 +
 2 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 784f6d9eb87..149235780b9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1697,59 +1697,33 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
 static void
 reindex_invalid_child_indexes(Oid indexRelationId)
 {
-	ListCell *lc;
-	int		npart = 0;
 	ReindexParams params = {
-		.options = REINDEXOPT_CONCURRENTLY
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
 	};
 
-	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
-			ALLOCSET_DEFAULT_SIZES);
-	MemoryContext	oldcontext;
-	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
-	List		*partitions = NIL;
-
-	PreventInTransactionBlock(true, "REINDEX INDEX");
-
-	foreach (lc, childs)
-	{
-		Oid			partoid = lfirst_oid(lc);
-
-		/* XXX: need to retrofit progress reporting into it */
-		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-									 // npart++);
-
-		if (get_index_isvalid(partoid) ||
-				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
-			continue;
-
-		/* Save partition OID */
-		oldcontext = MemoryContextSwitchTo(ind_context);
-		partitions = lappend_oid(partitions, partoid);
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	/*
-	 * Process each partition listed in a separate transaction.  Note that
-	 * this commits and then starts a new transaction immediately.
-	 * XXX: since this is done in 2*N transactions, it could just as well
-	 * call ReindexRelationConcurrently directly
-	 */
-	ReindexMultipleInternal(partitions, &params);
-
 	/*
 	 * CIC needs to mark a partitioned index as VALID, which itself
 	 * requires setting READY, which is unset for CIC (even though
 	 * it's meaningless for an index without storage).
 	 * This must be done only while holding a lock which precludes adding
 	 * partitions.
-	 * See also: validatePartitionedIndex().
 	 */
+	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexPartitions(indexRelationId, &params, true);
+
 	CommandCounterIncrement();
 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 }
@@ -3224,6 +3198,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 		if (!RELKIND_HAS_STORAGE(partkind))
 			continue;
 
+		/* Skip valid indexes, if requested */
+		if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
+				get_index_isvalid(partoid))
+			continue;
+
 		Assert(partkind == RELKIND_INDEX ||
 			   partkind == RELKIND_RELATION);
 
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index a1d6e3b645f..c31b66ad0b9 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_SKIPVALID	0x10	/* skip valid indexes */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.34.1

v3-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; name=v3-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchDownload
From f4933625541b030fabcb15d1426f496598949898 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 7 Feb 2022 10:28:42 +0300
Subject: [PATCH 1/5] Allow CREATE INDEX CONCURRENTLY on partitioned table

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patch from
https://www.postgresql.org/message-id/20210226182019.GU20769@telsasoft.com

Fixes:
  - rel was used after table_close();
  - it seems childidxs shouldn't live in ind_context;
  - updated doc.
---
 doc/src/sgml/ref/create_index.sgml     |  14 +--
 src/backend/commands/indexcmds.c       | 151 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out |  60 +++++++++-
 src/test/regress/sql/indexing.sql      |  18 ++-
 4 files changed, 186 insertions(+), 57 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 9ffcdc629e6..2040b27b685 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -642,7 +642,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while creating index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -689,15 +692,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 99f5ab83c32..784f6d9eb87 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -695,17 +696,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1145,6 +1135,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1210,18 +1205,30 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel, true);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
 			TupleDesc	parentDesc;
 			Oid		   *opfamOids;
+			char		*relname;
 
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			relname = pstrdup(RelationGetRelationName(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1265,9 +1272,9 @@ DefineIndex(Oid relationId,
 						ereport(ERROR,
 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 								 errmsg("cannot create unique index on partitioned table \"%s\"",
-										RelationGetRelationName(rel)),
+										relname),
 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
-										   RelationGetRelationName(rel))));
+										   relname)));
 
 					AtEOXact_GUC(false, child_save_nestlevel);
 					SetUserIdAndSecContext(child_save_userid,
@@ -1277,9 +1284,12 @@ DefineIndex(Oid relationId,
 				}
 
 				childidxs = RelationGetIndexList(childrel);
+
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1353,10 +1363,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1417,12 +1431,21 @@ DefineIndex(Oid relationId,
 								skip_build, quiet);
 					SetUserIdAndSecContext(child_save_userid,
 										   child_save_sec_context);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
+			pfree(relname);
 
 			/*
 			 * The pg_index row we inserted for this index was marked
@@ -1430,24 +1453,9 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
@@ -1455,21 +1463,28 @@ DefineIndex(Oid relationId,
 		 */
 		AtEOXact_GUC(false, root_save_nestlevel);
 		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
+		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
@@ -1682,6 +1697,62 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	int		npart = 0;
+	ReindexParams params = {
+		.options = REINDEXOPT_CONCURRENTLY
+	};
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_inheritance_children(indexRelationId, ShareLock);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+
+		/* XXX: need to retrofit progress reporting into it */
+		// pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+									 // npart++);
+
+		if (get_index_isvalid(partoid) ||
+				!RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 * XXX: since this is done in 2*N transactions, it could just as well
+	 * call ReindexRelationConcurrently directly
+	 */
+	ReindexMultipleInternal(partitions, &params);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though
+	 * it's meaningless for an index without storage).
+	 * This must be done only while holding a lock which precludes adding
+	 * partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+	CommandCounterIncrement();
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
 
 /*
  * CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 193f7801912..a4ccae50de3 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a) INVALID
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c2..3d4b6e9bc95 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

#17Justin Pryzby
pryzby@telsasoft.com
In reply to: Alexander Pyhalov (#16)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

I finally found time to digest and integrate your changes into my local
branch. This fixes the three issues you reported: FORCE_RELEASE, issue
with INVALID partitions issue (for which I adapted your patch into an
earlier patch in my series), and progress reporting. And rebased.

--
Justin

Attachments:

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; charset=us-asciiDownload
From 4ba360eaaac5e1ac169d41c26cf6213b0c6a2432 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH] Allow CREATE INDEX CONCURRENTLY on partitioned table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.
---
 doc/src/sgml/ddl.sgml                  |   4 +-
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 214 +++++++++++++++++++------
 src/include/catalog/index.h            |   1 +
 src/test/regress/expected/indexing.out | 136 +++++++++++++++-
 src/test/regress/sql/indexing.sql      |  26 ++-
 6 files changed, 320 insertions(+), 70 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 03c01937094..fd56e21ef49 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4131,9 +4131,7 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      so that they are applied automatically to the entire hierarchy.
      This is very
      convenient, as not only will the existing partitions become indexed, but
-     also any partitions that are created in the future will.  One limitation is
-     that it's not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier when creating such a partitioned index.  To avoid long lock
+     also any partitions that are created in the future will.  To avoid long lock
      times, it is possible to use <command>CREATE INDEX ON ONLY</command>
      the partitioned table; such an index is marked invalid, and the partitions
      do not get the index applied automatically.  The indexes on partitions can
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 40986aa502f..fc8cda655f0 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -692,15 +692,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 91cee27743d..bb98e745267 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -71,6 +71,7 @@
 
 
 /* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 static void CheckPredicate(Expr *predicate);
 static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -104,7 +105,9 @@ static void reindex_error_callback(void *arg);
 static void ReindexPartitions(Oid relid, ReindexParams *params,
 							  bool isTopLevel);
 static void ReindexMultipleInternal(List *relids,
-									ReindexParams *params);
+									ReindexParams *params,
+									Oid parent,
+									int npart);
 static bool ReindexRelationConcurrently(Oid relationOid,
 										ReindexParams *params);
 static void update_relispartition(Oid relationId, bool newval);
@@ -697,17 +700,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1147,6 +1139,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1212,17 +1209,30 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel, true);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			int			nparts = partdesc->nparts;
 			Oid		   *part_oids = palloc_array(Oid, nparts);
 			bool		invalidate_parent = false;
 			Relation	parentIndex;
 			TupleDesc	parentDesc;
+			char		*relname;
 
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
 										 nparts);
 
 			/* Make a local copy of partdesc->oids[], just for safety */
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			relname = pstrdup(RelationGetRelationName(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
 			/*
 			 * We'll need an IndexInfo describing the parent index.  The one
@@ -1235,8 +1245,6 @@ DefineIndex(Oid relationId,
 			parentIndex = index_open(indexRelationId, lockmode);
 			indexInfo = BuildIndexInfo(parentIndex);
 
-			parentDesc = RelationGetDescr(rel);
-
 			/*
 			 * For each partition, scan all existing indexes; if one matches
 			 * our index definition and is not already attached to some other
@@ -1276,9 +1284,9 @@ DefineIndex(Oid relationId,
 						ereport(ERROR,
 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 								 errmsg("cannot create unique index on partitioned table \"%s\"",
-										RelationGetRelationName(rel)),
+										relname),
 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
-										   RelationGetRelationName(rel))));
+										   relname)));
 
 					AtEOXact_GUC(false, child_save_nestlevel);
 					SetUserIdAndSecContext(child_save_userid,
@@ -1364,10 +1372,14 @@ DefineIndex(Oid relationId,
 				 */
 				if (!found)
 				{
-					IndexStmt  *childStmt = copyObject(stmt);
+					IndexStmt  *childStmt;
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					oldcontext = MemoryContextSwitchTo(ind_context);
+					childStmt = copyObject(stmt);
+					MemoryContextSwitchTo(oldcontext);
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -1426,14 +1438,24 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+
 					SetUserIdAndSecContext(child_save_userid,
 										   child_save_sec_context);
+					if (concurrent)
+					{
+						PopActiveSnapshot();
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
+			pfree(relname);
 
 			index_close(parentIndex, lockmode);
 
@@ -1443,46 +1465,40 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
-		}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+		} else
+			table_close(rel, NoLock);
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
+
 		AtEOXact_GUC(false, root_save_nestlevel);
 		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
+
 		if (!OidIsValid(parentIndexId))
+		{
+			if (concurrent)
+				reindex_invalid_child_indexes(indexRelationId);
+
 			pgstat_progress_end_command();
+		}
+
 		return address;
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
+		/*
+		 * We're done if this is the top-level index,
+		 * or the catalog-only phase of a partition built concurrently
+		 */
 
-		/* If this is the top-level index, we're done. */
+		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
@@ -1695,6 +1711,70 @@ DefineIndex(Oid relationId,
 	return address;
 }
 
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+	ListCell *lc;
+	List	*parentindexes = NIL;
+	int	npart = 1;
+	ReindexParams params = {
+		.options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_REPORT_CREATE_PART
+	};
+
+	MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+			ALLOCSET_DEFAULT_SIZES);
+	MemoryContext	oldcontext;
+	List		*childs = find_all_inheritors(indexRelationId, ShareLock, NULL);
+	List		*partitions = NIL;
+
+	PreventInTransactionBlock(true, "REINDEX INDEX");
+
+	foreach (lc, childs)
+	{
+		Oid			partoid = lfirst_oid(lc);
+		char		partkind = get_rel_relkind(partoid);
+
+		if (partkind == RELKIND_PARTITIONED_INDEX)
+		{
+			oldcontext = MemoryContextSwitchTo(ind_context);
+			parentindexes = lappend_oid(parentindexes, partoid);
+			MemoryContextSwitchTo(oldcontext);
+		}
+
+		// npart++ ?
+		if (!RELKIND_HAS_STORAGE(partkind) || get_index_isvalid(partoid))
+			continue;
+
+		/* Save partition OID */
+		oldcontext = MemoryContextSwitchTo(ind_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 * XXX: since this is done in 2*N transactions, it could just as well
+	 * call ReindexRelationConcurrently directly
+	 */
+	ReindexMultipleInternal(partitions, &params, indexRelationId, npart);
+
+	/*
+	 * CIC needs to mark a partitioned index as VALID, which itself
+	 * requires setting READY, which is unset for CIC (even though it's
+	 * meaningless for an index without storage).  This must be done only
+	 * while holding a lock which precludes adding partitions.
+	 * See also: validatePartitionedIndex().
+	 */
+	foreach (lc, parentindexes)
+	{
+		Oid	partoid = lfirst_oid(lc);
+		index_set_state_flags(partoid, INDEX_CREATE_SET_READY);
+		CommandCounterIncrement();
+		index_set_state_flags(partoid, INDEX_CREATE_SET_VALID);
+	}
+}
 
 /*
  * CheckMutability
@@ -3084,7 +3164,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 	 * Process each relation listed in a separate transaction.  Note that this
 	 * commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(relids, params);
+	ReindexMultipleInternal(relids, params, InvalidOid, 0);
 
 	MemoryContextDelete(private_context);
 }
@@ -3120,6 +3200,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	char		relkind = get_rel_relkind(relid);
 	char	   *relname = get_rel_name(relid);
 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
+	int			npart = 1;
 	MemoryContext reindex_context;
 	List	   *inhoids;
 	ListCell   *lc;
@@ -3190,7 +3271,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(partitions, params);
+	ReindexMultipleInternal(partitions, params, relid, npart);
 
 	/*
 	 * Clean up working storage --- note we must do this after
@@ -3208,7 +3289,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
  * and starts a new transaction when finished.
  */
 static void
-ReindexMultipleInternal(List *relids, ReindexParams *params)
+ReindexMultipleInternal(List *relids, ReindexParams *params, Oid parent, int npart)
 {
 	ListCell   *l;
 
@@ -3302,6 +3383,29 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
 		}
 
 		CommitTransactionCommand();
+
+		if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+		{
+			/*
+			 * Update pgstat progress report to indicate that create index on
+			 * partition was finished.
+			 */
+			const int   progress_cols[] = {
+				PROGRESS_CREATEIDX_COMMAND,
+				PROGRESS_CREATEIDX_INDEX_OID,
+				PROGRESS_CREATEIDX_PHASE,
+				PROGRESS_CREATEIDX_PARTITIONS_DONE
+			};
+			const int64 progress_vals[] = {
+				PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY,
+				parent,
+				PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
+				npart++
+			};
+
+			pgstat_progress_update_multi_param(4, progress_cols, progress_vals);
+		}
+
 	}
 
 	StartTransactionCommand();
@@ -3697,7 +3801,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
 			elog(ERROR, "cannot reindex a temporary table concurrently");
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		/* Don't overwrite CREATE INDEX command */
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  idx->tableId);
 
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
@@ -3857,9 +3963,11 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved.  Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
 		progress_vals[2] = newidx->indexId;
@@ -3921,10 +4029,12 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/*
 		 * Update progress for the index to build, with the correct parent
-		 * table involved.
+		 * table involved. Don't overwrite CREATE INDEX command.
 		 */
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  newidx->tableId);
+
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
 		progress_vals[2] = newidx->indexId;
@@ -4159,7 +4269,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 	MemoryContextDelete(private_context);
 
-	pgstat_progress_end_command();
+	/* Don't overwrite CREATE INDEX command. */
+	if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+		pgstat_progress_end_command();
 
 	return true;
 }
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 91c28868d47..4142894c230 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_REPORT_CREATE_PART	0x10	/* report that index was created for partition */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 1bdd430f063..6b2320244b8 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,139 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" btree (a)
+    "idxpart11_a_idx2" btree (a)
+    "idxpart11_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" btree (a)
+    "idxpart111_a_idx2" btree (a)
+    "idxpart111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" btree (a)
+    "idxpart1111_a_idx2" btree (a)
+    "idxpart1111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+    "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 429120e7104..14e0513386c 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.25.1

#18Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Justin Pryzby (#17)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Justin Pryzby писал 2022-11-21 06:00:

I finally found time to digest and integrate your changes into my local
branch. This fixes the three issues you reported: FORCE_RELEASE, issue
with INVALID partitions issue (for which I adapted your patch into an
earlier patch in my series), and progress reporting. And rebased.

Hi.

Thank you for the effort.
I've looked through and tested new patch a bit. Overall it looks good to
me.
The question I have is whether we should update
pg_stat_progress_create_index in reindex_invalid_child_indexes(), when
we skip valid indexes?
--
Best regards,
Alexander Pyhalov,
Postgres Professional

#19Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Justin Pryzby (#17)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Hi,

Thank you Justin and Alexander for working on this, I have reviewed and
tested the latest patch, it works well, the problems mentioned
previously are all fixed. I like the idea of sharing code of reindex
and index, but I have noticed some peculiarities as a user. 

The reporting is somewhat confusing as it switches to reporting for
reindex concurrently while building child indexes, this should be fixed
with the simple patch I have attached. Another thing that I have
noticed is that REINDEX, which is used under the hood, creates new
indexes with suffix _ccnew, and if the index building fails, the
indexes that could not be build will have the name with _ccnew suffix.
This can actually be seen in your test:

ERROR: could not create unique index "idxpart2_a_idx2_ccnew"

I find it quite confusing and I don't think that this the expected
behavior (if it is, I think it should be documented, like it is for
REINDEX). As an example of problems that it might entail, DROP INDEX
will not drop all the invalid indexes in the inheritance tree, because
it will leave _ccnew indexes in place, which is ok for reindex
concurrently, but that's not how C-I-C works now. I think that fixing
this problem requires some heavy code rewrite and I'm not quite sure
how to go about it, if you have any ideas, I will be happy to try them
out.

Thanks,
Ilya

Attachments:

0001-turn-off-reindex-reporting-for-create.patchtext/x-patch; charset=UTF-8; name=0001-turn-off-reindex-reporting-for-create.patchDownload
From 8eb9fd7ce7d34c5c323c47b60a7f883f360ef090 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Sat, 3 Dec 2022 18:20:03 +0400
Subject: [PATCH] turn off reindex reporting for create

---
 src/backend/commands/indexcmds.c | 36 +++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index a2775931e2..b3c713037f 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -3804,14 +3804,16 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 		/* Don't overwrite CREATE INDEX command */
 		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+		{
 			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  idx->tableId);
 
-		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
-		progress_vals[1] = 0;	/* initializing */
-		progress_vals[2] = idx->indexId;
-		progress_vals[3] = idx->amId;
-		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+			progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+			progress_vals[1] = 0;	/* initializing */
+			progress_vals[2] = idx->indexId;
+			progress_vals[3] = idx->amId;
+			pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+		}
 
 		/* Choose a temporary relation name for the new index */
 		concurrentName = ChooseRelationName(get_rel_name(idx->indexId),
@@ -3967,13 +3969,15 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 		 * table involved.  Don't overwrite CREATE INDEX command.
 		 */
 		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+		{
 			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
 
-		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
-		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
-		progress_vals[2] = newidx->indexId;
-		progress_vals[3] = newidx->amId;
-		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+			progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+			progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
+			progress_vals[2] = newidx->indexId;
+			progress_vals[3] = newidx->amId;
+			pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+		}
 
 		/* Perform concurrent build of new index */
 		index_concurrently_build(newidx->tableId, newidx->indexId);
@@ -4033,14 +4037,16 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 		 * table involved. Don't overwrite CREATE INDEX command.
 		 */
 		if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+		{
 			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
 									  newidx->tableId);
 
-		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
-		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
-		progress_vals[2] = newidx->indexId;
-		progress_vals[3] = newidx->amId;
-		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+			progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+			progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
+			progress_vals[2] = newidx->indexId;
+			progress_vals[3] = newidx->amId;
+			pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+		}
 
 		validate_index(newidx->tableId, newidx->indexId, snapshot);
 
-- 
2.30.2

#20Justin Pryzby
pryzby@telsasoft.com
In reply to: Ilya Gladyshev (#19)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Sat, Dec 03, 2022 at 07:13:30PM +0400, Ilya Gladyshev wrote:

Hi,

Thank you Justin and Alexander for working on this, I have reviewed and
tested the latest patch, it works well, the problems mentioned
previously are all fixed. I like the idea of sharing code of reindex
and index, but I have noticed some peculiarities as a user.�

The reporting is somewhat confusing as it switches to reporting for
reindex concurrently while building child indexes, this should be fixed
with the simple patch I have attached. Another thing that I have
noticed is that REINDEX, which is used under the hood, creates new
indexes with suffix _ccnew, and if the index building fails, the
indexes that could not be build will have the name with _ccnew suffix.
This can actually be seen in your test:

ERROR: could not create unique index "idxpart2_a_idx2_ccnew"

I find it quite confusing and I don't think that this the expected
behavior (if it is, I think it should be documented, like it is for
REINDEX). As an example of problems that it might entail, DROP INDEX
will not drop all the invalid indexes in the inheritance tree, because
it will leave _ccnew indexes in place, which is ok for reindex
concurrently, but that's not how C-I-C works now. I think that fixing
this problem requires some heavy code rewrite and I'm not quite sure

This beavior is fixed. I re-factored and re-implented to use
DefineIndex() for building indexes concurrently rather than reindexing.
That makes the patch smaller, actually, and has the added benefit of
splitting off the "Concurrently" part of DefineIndex() into a separate
function.

This currently handles partitions with a loop around the whole CIC
implementation, which means that things like WaitForLockers() happen
once for each index, the same as REINDEX CONCURRENTLY on a partitioned
table. Contrast that with ReindexRelationConcurrently(), which handles
all the indexes on a table in one pass by looping around indexes within
each phase.

BTW, it causes the patch to fail to apply in cfbot when you send an
additional (002) supplementary patch without including the original
(001) patch. You can name it *.txt to avoid the issue.
https://wiki.postgresql.org/wiki/Cfbot#Which_attachments_are_considered_to_be_patches.3F

Thanks for looking.

--
Justin

Attachments:

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; charset=us-asciiDownload
From e25b15173f4ce939efa54426e369b6996129ff59 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                  |   4 +-
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 172 +++++++++++++++++--------
 src/test/regress/expected/indexing.out | 127 +++++++++++++++++-
 src/test/regress/sql/indexing.sql      |  26 +++-
 5 files changed, 268 insertions(+), 70 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 38618de01c5..cd72b455447 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4163,9 +4163,7 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      so that they are applied automatically to the entire hierarchy.
      This is very
      convenient, as not only will the existing partitions become indexed, but
-     also any partitions that are created in the future will.  One limitation is
-     that it's not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier when creating such a partitioned index.  To avoid long lock
+     also any partitions that are created in the future will.  To avoid long lock
      times, it is possible to use <command>CREATE INDEX ON ONLY</command>
      the partitioned table; such an index is marked invalid, and the partitions
      do not get the index applied automatically.  The indexes on partitions can
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 40986aa502f..fc8cda655f0 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -692,15 +692,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index b5b860c3abf..cfab45b9992 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -92,6 +92,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(List *colnames);
 static List *ChooseIndexColumnNames(List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -551,7 +556,6 @@ DefineIndex(Oid relationId,
 	bool		amcanorder;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -559,12 +563,10 @@ DefineIndex(Oid relationId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -697,17 +699,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1079,10 +1070,6 @@ DefineIndex(Oid relationId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1147,6 +1134,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1427,12 +1419,15 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+
 					SetUserIdAndSecContext(child_save_userid,
 										   child_save_sec_context);
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1444,46 +1439,39 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 		}
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		return address;
+
+		if (!concurrent)
+		{
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
+
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
-		/* If this is the top-level index, we're done. */
+		/* If this is the top-level index, the command is complete. */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 
@@ -1495,6 +1483,92 @@ DefineIndex(Oid relationId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		DefineIndexConcurrentInternal(relationId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/* finish CIC by building indexes on partitions */
+		ListCell   *lc;
+		List	   *childs;
+		int			npart = 0;
+		MemoryContext cic_context,
+					old_context;
+
+		/*
+		 * Create special memory context for cross-transaction storage.
+		 */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid = IndexGetRelation(indrelid, false);
+
+			if (RELKIND_HAS_STORAGE(get_rel_relkind(indrelid)) &&
+				!get_index_isvalid(indrelid))
+			{
+				rel = table_open(relationId, ShareUpdateExclusiveLock);
+				heaprelid = rel->rd_lockInfo.lockRelId;
+				table_close(rel, ShareUpdateExclusiveLock);
+				SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+				/* Process each partition in a separate transaction */
+				DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+											  heaplocktag, heaprelid);
+
+				PushActiveSnapshot(GetTransactionSnapshot());
+			}
+
+			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+										 ++npart);
+		}
+
+		/* Set all indexes as valid, including the parent */
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+
+			if (get_rel_relkind(indrelid) != RELKIND_PARTITIONED_INDEX)
+				continue;
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid relationId,
+							  Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+					indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1690,10 +1764,6 @@ DefineIndex(Oid relationId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 1bdd430f063..f1beee6d240 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 429120e7104..fb0baedcc28 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.25.1

#21Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Justin Pryzby (#20)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Sun, 2022-12-04 at 13:09 -0600, Justin Pryzby wrote:

This beavior is fixed.  I re-factored and re-implented to use
DefineIndex() for building indexes concurrently rather than
reindexing.
That makes the patch smaller, actually, and has the added benefit of
splitting off the "Concurrently" part of DefineIndex() into a
separate
function.

Nice, I think it turned out pretty concise. I played around with the
patch quite a bit, didn't find any major problems, the only minor thing
that I can note is that we should skip the top parent index itself in
the loop not to increment the pg_stat counter, something like this:

diff --git a/src/backend/commands/indexcmds.c
b/src/backend/commands/indexcmds.c
index cfab45b999..9049540b5b 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1515,6 +1515,9 @@ DefineIndex(Oid relationId,
                        Oid                     indrelid =
lfirst_oid(lc);
                        Oid                     tabrelid =
IndexGetRelation(indrelid, false);
+                       if (indrelid == indexRelationId)
+                               continue;
+
                        if
(RELKIND_HAS_STORAGE(get_rel_relkind(indrelid)) &&
                                !get_index_isvalid(indrelid))
                        {

BTW, it causes the patch to fail to apply in cfbot when you send an
additional (002) supplementary patch without including the original
(001) patch.  You can name it *.txt to avoid the issue.

https://wiki.postgresql.org/wiki/Cfbot#Which_attachments_are_considered_to_be_patches.3F

Thanks for looking.

My bad, didn't know about this, thanks for the link.

On a side note, I noticed that reindex behaviour is strange on
partitioned tables, it doesn't mark partitioned tables as valid after
reindexing children, as I could understand from the code and mailing
lists, this is the intended behaviour, but I can't quite understand the
rationale for it, do you know why it is done this way?

#22Justin Pryzby
pryzby@telsasoft.com
In reply to: Justin Pryzby (#20)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Sun, Dec 04, 2022 at 01:09:35PM -0600, Justin Pryzby wrote:

This currently handles partitions with a loop around the whole CIC
implementation, which means that things like WaitForLockers() happen
once for each index, the same as REINDEX CONCURRENTLY on a partitioned
table. Contrast that with ReindexRelationConcurrently(), which handles
all the indexes on a table in one pass by looping around indexes within
each phase.

Rebased over the progress reporting fix (27f5c712b).

I added a list of (intermediate) partitioned tables, rather than looping
over the list of inheritors again, to save calling rel_get_relkind().

I think this patch is done.

--
Justin

Attachments:

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; charset=us-asciiDownload
From 941f7f930fc18563e2da42143015b6573d5447b1 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH] Allow CREATE INDEX CONCURRENTLY on partitioned table

https://www.postgresql.org/message-id/flat/20201031063117.GF3080@telsasoft.com
---
 doc/src/sgml/ddl.sgml                  |   4 +-
 doc/src/sgml/ref/create_index.sgml     |  14 +-
 src/backend/commands/indexcmds.c       | 200 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out | 127 +++++++++++++++-
 src/test/regress/sql/indexing.sql      |  26 +++-
 5 files changed, 297 insertions(+), 74 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 91c036d1cbe..64efdf1e879 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4178,9 +4178,7 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      so that they are applied automatically to the entire hierarchy.
      This is very
      convenient, as not only will the existing partitions become indexed, but
-     also any partitions that are created in the future will.  One limitation is
-     that it's not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier when creating such a partitioned index.  To avoid long lock
+     also any partitions that are created in the future will.  To avoid long lock
      times, it is possible to use <command>CREATE INDEX ON ONLY</command>
      the partitioned table; such an index is marked invalid, and the partitions
      do not get the index applied automatically.  The indexes on partitions can
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 40986aa502f..b05102efdaf 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 3ec8b5cca6c..daba8b67dbe 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -93,6 +93,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(List *colnames);
 static List *ChooseIndexColumnNames(List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -559,7 +564,6 @@ DefineIndex(Oid relationId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -567,12 +571,10 @@ DefineIndex(Oid relationId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -705,17 +707,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1089,10 +1080,6 @@ DefineIndex(Oid relationId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1157,6 +1144,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1494,58 +1486,54 @@ DefineIndex(Oid relationId,
 			 * invalid, this is incorrect, so update our row to invalid too.
 			 */
 			if (invalidate_parent)
-			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
-			}
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 		}
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1556,6 +1544,114 @@ DefineIndex(Oid relationId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(relationId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *partitioned = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				partitioned = lappend_oid(partitioned, indrelid);
+				MemoryContextSwitchTo(old_context);
+				continue;
+			}
+
+			rel = table_open(relationId, ShareUpdateExclusiveLock);
+			heaprelid = rel->rd_lockInfo.lockRelId;
+			table_close(rel, ShareUpdateExclusiveLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			tabrelid = IndexGetRelation(indrelid, false);
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, partitioned)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid relationId,
+							  Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1751,10 +1847,6 @@ DefineIndex(Oid relationId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 1bdd430f063..f1beee6d240 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 429120e7104..fb0baedcc28 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

#23Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Justin Pryzby (#22)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Justin Pryzby писал 2023-03-26 17:51:

On Sun, Dec 04, 2022 at 01:09:35PM -0600, Justin Pryzby wrote:

This currently handles partitions with a loop around the whole CIC
implementation, which means that things like WaitForLockers() happen
once for each index, the same as REINDEX CONCURRENTLY on a partitioned
table. Contrast that with ReindexRelationConcurrently(), which
handles
all the indexes on a table in one pass by looping around indexes
within
each phase.

Rebased over the progress reporting fix (27f5c712b).

I added a list of (intermediate) partitioned tables, rather than
looping
over the list of inheritors again, to save calling rel_get_relkind().

I think this patch is done.

Hi.

Overall looks good to me. However, I think that using 'partitioned' as
list of partitioned index oids in DefineIndex() is a bit misleading -
we've just used it as boolean, specifying if we are dealing with a
partitioned relation.

--
Best regards,
Alexander Pyhalov,
Postgres Professional

#24Justin Pryzby
pryzby@telsasoft.com
In reply to: Alexander Pyhalov (#23)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Mon, Mar 27, 2023 at 01:28:24PM +0300, Alexander Pyhalov wrote:

Justin Pryzby писал 2023-03-26 17:51:

On Sun, Dec 04, 2022 at 01:09:35PM -0600, Justin Pryzby wrote:

This currently handles partitions with a loop around the whole CIC
implementation, which means that things like WaitForLockers() happen
once for each index, the same as REINDEX CONCURRENTLY on a partitioned
table. Contrast that with ReindexRelationConcurrently(), which handles
all the indexes on a table in one pass by looping around indexes within
each phase.

Rebased over the progress reporting fix (27f5c712b).

I added a list of (intermediate) partitioned tables, rather than looping
over the list of inheritors again, to save calling rel_get_relkind().

I think this patch is done.

Overall looks good to me. However, I think that using 'partitioned' as list
of partitioned index oids in DefineIndex() is a bit misleading - we've just
used it as boolean, specifying if we are dealing with a partitioned
relation.

Right. This is also rebased on 8c852ba9a4 (Allow some exclusion
constraints on partitions).

--
Justin

Attachments:

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; charset=us-asciiDownload
From 3f60cbdd12b67115f86854ff60a4009028b8b99f Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH] Allow CREATE INDEX CONCURRENTLY on partitioned table

https://www.postgresql.org/message-id/flat/20201031063117.GF3080@telsasoft.com
---
 doc/src/sgml/ddl.sgml                  |   4 +-
 doc/src/sgml/ref/create_index.sgml     |  14 +-
 src/backend/commands/indexcmds.c       | 201 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out | 127 +++++++++++++++-
 src/test/regress/sql/indexing.sql      |  26 +++-
 5 files changed, 297 insertions(+), 75 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 58aaa691c6a..afa982154a8 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4161,9 +4161,7 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      so that they are applied automatically to the entire hierarchy.
      This is very
      convenient, as not only will the existing partitions become indexed, but
-     also any partitions that are created in the future will.  One limitation is
-     that it's not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier when creating such a partitioned index.  To avoid long lock
+     also any partitions that are created in the future will.  To avoid long lock
      times, it is possible to use <command>CREATE INDEX ON ONLY</command>
      the partitioned table; such an index is marked invalid, and the partitions
      do not get the index applied automatically.  The indexes on partitions can
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 40986aa502f..b05102efdaf 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index baf3e6e57a5..dfe64052b81 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -92,6 +92,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(List *colnames);
 static List *ChooseIndexColumnNames(List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -555,7 +560,6 @@ DefineIndex(Oid relationId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -563,12 +567,10 @@ DefineIndex(Oid relationId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -699,20 +701,6 @@ DefineIndex(Oid relationId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1102,10 +1090,6 @@ DefineIndex(Oid relationId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1170,6 +1154,11 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1519,21 +1508,7 @@ DefineIndex(Oid relationId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1545,37 +1520,49 @@ DefineIndex(Oid relationId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1586,6 +1573,114 @@ DefineIndex(Oid relationId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(relationId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *tosetvalid = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				tosetvalid = lappend_oid(tosetvalid, indrelid);
+				MemoryContextSwitchTo(old_context);
+				continue;
+			}
+
+			rel = table_open(relationId, ShareUpdateExclusiveLock);
+			heaprelid = rel->rd_lockInfo.lockRelId;
+			table_close(rel, ShareUpdateExclusiveLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			tabrelid = IndexGetRelation(indrelid, false);
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, tosetvalid)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid relationId,
+							  Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1781,10 +1876,6 @@ DefineIndex(Oid relationId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 2be8ffa7ec4..aefa203b14f 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index b69c41832ca..5ddeaf1c613 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

#25Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Justin Pryzby (#24)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Justin Pryzby писал 2023-07-13 05:27:

On Mon, Mar 27, 2023 at 01:28:24PM +0300, Alexander Pyhalov wrote:

Justin Pryzby писал 2023-03-26 17:51:

On Sun, Dec 04, 2022 at 01:09:35PM -0600, Justin Pryzby wrote:

This currently handles partitions with a loop around the whole CIC
implementation, which means that things like WaitForLockers() happen
once for each index, the same as REINDEX CONCURRENTLY on a partitioned
table. Contrast that with ReindexRelationConcurrently(), which handles
all the indexes on a table in one pass by looping around indexes within
each phase.

Rebased over the progress reporting fix (27f5c712b).

I added a list of (intermediate) partitioned tables, rather than looping
over the list of inheritors again, to save calling rel_get_relkind().

I think this patch is done.

Overall looks good to me. However, I think that using 'partitioned' as
list
of partitioned index oids in DefineIndex() is a bit misleading - we've
just
used it as boolean, specifying if we are dealing with a partitioned
relation.

Right. This is also rebased on 8c852ba9a4 (Allow some exclusion
constraints on partitions).

Hi.
I have some more question.
In the following code (indexcmds.c:1640 and later)

1640 rel = table_open(relationId,
ShareUpdateExclusiveLock);
1641 heaprelid = rel->rd_lockInfo.lockRelId;
1642 table_close(rel, ShareUpdateExclusiveLock);
1643 SET_LOCKTAG_RELATION(heaplocktag,
heaprelid.dbId, heaprelid.relId);

should we release ShareUpdateExclusiveLock before getting session lock
in DefineIndexConcurrentInternal()?
Also we unlock parent table there between reindexing childs in the end
of DefineIndexConcurrentInternal():

1875 /*
1876 * Last thing to do is release the session-level lock on
the parent table.
1877 */
1878 UnlockRelationIdForSession(&heaprelid,
ShareUpdateExclusiveLock);
1879 }

Is it safe? Shouldn't we hold session lock on the parent table while
rebuilding child indexes?

--
Best regards,
Alexander Pyhalov,
Postgres Professional

#26Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Alexander Pyhalov (#25)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Hi.

I've rebased patch on master and it'seems to me there's one more issue -

when we call DefineIndexConcurrentInternal() in partitioned case, it
waits for transactions, locking tableId, not tabrelid - heaprelid
LockRelId is constructed for parent index relation, not for child index
relation.

Attaching fixed version.

Also I'm not sure what to do with locking of child relations. If we
don't do anything, you can drop one of the partitioned table childs
while CIC is in progress, and get error

ERROR: cache lookup failed for index 16399

If you try to lock all child tables in CIC session, you'll get
deadlocks.

--
Best regards,
Alexander Pyhalov,
Postgres Professional

Attachments:

0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchtext/x-diff; name=0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patchDownload
From 37a13b7fa1c3277b9d038b7a0c75399ff05b28a7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzby@telsasoft.com>
Date: Mon, 29 Jan 2024 10:41:01 +0300
Subject: [PATCH] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                  |   4 +-
 doc/src/sgml/ref/create_index.sgml     |  14 +-
 src/backend/commands/indexcmds.c       | 200 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out | 127 +++++++++++++++-
 src/test/regress/sql/indexing.sql      |  26 +++-
 5 files changed, 296 insertions(+), 75 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 075ff329912..8ee80c40e3b 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4194,9 +4194,7 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      so that they are applied automatically to the entire hierarchy.
      This is very
      convenient, as not only will the existing partitions become indexed, but
-     also any partitions that are created in the future will.  One limitation is
-     that it's not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier when creating such a partitioned index.  To avoid long lock
+     also any partitions that are created in the future will.  To avoid long lock
      times, it is possible to use <command>CREATE INDEX ON ONLY</command>
      the partitioned table; such an index is marked invalid, and the partitions
      do not get the index applied automatically.  The indexes on partitions can
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 40986aa502f..b05102efdaf 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index ab8b81b3020..65477aeb3a8 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -93,6 +93,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(const List *colnames);
 static List *ChooseIndexColumnNames(const List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+							  Oid indexRelationId,
+							  IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag,
+							  LockRelId heaprelid);
 static void ReindexIndex(const RangeVar *indexRelation, const ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -554,7 +559,6 @@ DefineIndex(Oid tableId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -562,12 +566,10 @@ DefineIndex(Oid tableId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -697,20 +699,6 @@ DefineIndex(Oid tableId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1100,10 +1088,6 @@ DefineIndex(Oid tableId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1168,6 +1152,11 @@ DefineIndex(Oid tableId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1516,21 +1505,7 @@ DefineIndex(Oid tableId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1542,37 +1517,49 @@ DefineIndex(Oid tableId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1583,6 +1570,113 @@ DefineIndex(Oid tableId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(tableId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *tosetvalid = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				tosetvalid = lappend_oid(tosetvalid, indrelid);
+				MemoryContextSwitchTo(old_context);
+				continue;
+			}
+
+			tabrelid = IndexGetRelation(indrelid, false);
+			rel = table_open(tabrelid, ShareUpdateExclusiveLock);
+			heaprelid = rel->rd_lockInfo.lockRelId;
+			table_close(rel, ShareUpdateExclusiveLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, tosetvalid)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid tableId, Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1778,10 +1872,6 @@ DefineIndex(Oid tableId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 087f955b1e6..5d529454993 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 44f6788915c..96c63615e9a 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

#27Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Alexander Pyhalov (#26)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Hi,

I think it's well worth the effort to revive the patch, so I rebased it
on master, updated it and will return it back to the commitfest.
Alexander, Justin feel free to add yourselves as authors

On 29.01.2024 12:43, Alexander Pyhalov wrote:

Hi.

I've rebased patch on master and it'seems to me there's one more issue -

when we call DefineIndexConcurrentInternal() in partitioned case, it
waits for transactions, locking tableId, not tabrelid - heaprelid
LockRelId is constructed for parent index relation, not for child
index relation.

Attaching fixed version.

Also I'm not sure what to do with locking of child relations. If we
don't do anything, you can drop one of the partitioned table childs
while CIC is in progress, and get error

ERROR:  cache lookup failed for index 16399

I agree that we need to do something about it, in particular, I think we
should lock all the partitions inside the transaction that builds the
catalog entries. Fixed this in the new version.

If you try to lock all child tables in CIC session, you'll get deadlocks.

Do you mean the deadlock between the transaction that drops a partition
and the transaction doing CIC? I think this is unavoidable and can be
reproduced even without partitioning.

Also not sure why a list of children relation was obtained with
ShareLock that CIC is supposed to avoid not to block writes, changed
that to ShareUpdateExclusive.

Regards,

Ilya

Attachments:

v2-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchtext/x-patch; charset=UTF-8; name=v2-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchDownload
From 1cb76c8c19b1c0549fbba70febc32017bc04c0a2 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Thu, 23 May 2024 18:13:41 +0100
Subject: [PATCH v2] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                  |   7 +-
 doc/src/sgml/ref/create_index.sgml     |  14 +-
 src/backend/commands/indexcmds.c       | 293 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out | 127 ++++++++++-
 src/test/regress/sql/indexing.sql      |  26 ++-
 5 files changed, 367 insertions(+), 100 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 6aab79e901..f1d4a59a99 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4314,10 +4314,9 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      As mentioned earlier, it is possible to create indexes on partitioned
      tables so that they are applied automatically to the entire hierarchy.
      This can be very convenient as not only will all existing partitions be
-     indexed, but any future partitions will be as well.  However, one
-     limitation when creating new indexes on partitioned tables is that it
-     is not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier, which could lead to long lock times.  To avoid this, you can
+     indexed, but any future partitions will be as well.
+     <command>CREATE INDEX ... CONCURRENTLY</command> can incur long lock times
+     on huge partitioned tables, to avoid that you can
      use <command>CREATE INDEX ON ONLY</command> the partitioned table, which
      creates the new index marked as invalid, preventing automatic application
      to existing partitions.  Instead, indexes can then be created individually
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 621bc0e253..2366cfd9b5 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 309389e20d..5806eeb8ef 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -95,6 +95,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(const List *colnames);
 static List *ChooseIndexColumnNames(const List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId *heaprelid);
 static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -561,7 +566,6 @@ DefineIndex(Oid tableId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -569,12 +573,10 @@ DefineIndex(Oid tableId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -706,20 +708,6 @@ DefineIndex(Oid tableId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1116,10 +1104,6 @@ DefineIndex(Oid tableId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1184,6 +1168,11 @@ DefineIndex(Oid tableId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1533,21 +1522,7 @@ DefineIndex(Oid tableId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1559,37 +1534,49 @@ DefineIndex(Oid tableId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1600,28 +1587,180 @@ DefineIndex(Oid tableId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
-	/*
-	 * For a concurrent build, it's important to make the catalog entries
-	 * visible to other transactions before we start to build the index. That
-	 * will prevent them from making incompatible HOT updates.  The new index
-	 * will be marked not indisready and not indisvalid, so that no one else
-	 * tries to either insert into it or use it for queries.
-	 *
-	 * We must commit our current transaction so that the index becomes
-	 * visible; then start another.  Note that all the data structures we just
-	 * built are lost in the commit.  The only data we keep past here are the
-	 * relation IDs.
-	 *
-	 * Before committing, get a session-level lock on the table, to ensure
-	 * that neither it nor the index can be dropped before we finish. This
-	 * cannot block, even if someone else is waiting for access, because we
-	 * already have the same lock within our transaction.
-	 *
-	 * Note: we don't currently bother with a session lock on the index,
-	 * because there are no operations that could change its state while we
-	 * hold lock on the parent table.  This might need to change later.
-	 */
-	LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+	if (!partitioned)
+	{
+		/*
+		 * For a concurrent build, it's important to make the catalog entries
+		 * visible to other transactions before we start to build the index.
+		 * That will prevent them from making incompatible HOT updates.  The
+		 * new index will be marked not indisready and not indisvalid, so that
+		 * no one else tries to either insert into it or use it for queries.
+		 *
+		 * DefineIndexConcurrentInternal will commit our current transaction
+		 * so that the index becomes visible; then start another.  Note that
+		 * all the data structures we just built are lost in the commit.  The
+		 * only data we keep past here are the relation IDs.
+		 *
+		 * Before committing, get a session-level lock on the table, to ensure
+		 * that neither it nor the index can be dropped before we finish. This
+		 * cannot block, even if someone else is waiting for access, because
+		 * we already have the same lock within our transaction.
+		 *
+		 * Note: we don't currently bother with a session lock on the index,
+		 * because there are no operations that could change its state while
+		 * we hold lock on the parent table.  This might need to change later.
+		 */
+
+		LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(tableId, indexRelationId,
+									  indexInfo, heaplocktag, &heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		ListCell   *lc2;
+		List	   *childs;
+		List	   *part_idxs = NIL;
+		List	   *leaf_idxs = NIL;
+		List	   *leaf_idx_lockids = NIL;
+		List	   *part_idx_lockids = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareUpdateExclusiveLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			char		relkind;
+			LockRelId  *lockrelid;
+			Oid			tabrelid;
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			tabrelid = IndexGetRelation(indrelid, false);
+			rel = table_open(tabrelid, ShareUpdateExclusiveLock);
+			lockrelid = palloc_object(LockRelId);
+			*lockrelid = rel->rd_lockInfo.lockRelId;
+			table_close(rel, ShareUpdateExclusiveLock);
+
+			/*
+			 * Split partitions in 2 lists: partitioned tables that just need
+			 * to be marked as valid and leaf tables that actually need to
+			 * have their indexes built.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				part_idxs = lappend_oid(part_idxs, indrelid);
+				part_idx_lockids = lappend(part_idx_lockids, lockrelid);
+				MemoryContextSwitchTo(old_context);
+			}
+			else
+			{
+				old_context = MemoryContextSwitchTo(cic_context);
+
+				leaf_idxs = lappend_oid(leaf_idxs, indrelid);
+				leaf_idx_lockids = lappend(leaf_idx_lockids, lockrelid);
+
+				MemoryContextSwitchTo(old_context);
+
+			}
+
+			/*
+			 * All partitions, including top-level parent, need to be locked
+			 * for the session before proceeding with any index rebuilds as
+			 * each of those are done in a separate transaction. After each
+			 * leaf index is built, its corresponding table will be unlocked,
+			 * all the partitioned tables will be unlocked at the very end.
+			 */
+
+			LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+
+		}
+
+		forboth(lc, leaf_idxs, lc2, leaf_idx_lockids)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			LockRelId  *heaprelid = lfirst(lc2);
+			Oid			tabrelid = IndexGetRelation(indrelid, false);
+
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid->dbId, heaprelid->relId);
+
+			/* Process each partition in a separate transaction */
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, part_idxs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+		}
+
+		foreach(lc, part_idx_lockids)
+		{
+			LockRelId  *lockrelid = lfirst(lc);
+
+			UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+		}
+
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid tableId, Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId *heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
 
 	PopActiveSnapshot();
 	CommitTransactionCommand();
@@ -1789,16 +1928,12 @@ DefineIndex(Oid tableId,
 	 * would be useful.  (Note that our earlier commits did not create reasons
 	 * to replan; so relcache flush on the index itself was sufficient.)
 	 */
-	CacheInvalidateRelcacheByRelid(heaprelid.relId);
+	CacheInvalidateRelcacheByRelid(heaprelid->relId);
 
 	/*
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
-	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
+	UnlockRelationIdForSession(heaprelid, ShareUpdateExclusiveLock);
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f25723da92..44a6cf39df 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 5f1f4b80c9..38a730d877 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

#28Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Ilya Gladyshev (#27)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Ilya Gladyshev писал(а) 2024-05-24 00:14:

Hi,

Hi.

I think it's well worth the effort to revive the patch, so I rebased it
on master, updated it and will return it back to the commitfest.
Alexander, Justin feel free to add yourselves as authors

On 29.01.2024 12:43, Alexander Pyhalov wrote:

Hi.

I've rebased patch on master and it'seems to me there's one more issue
-

when we call DefineIndexConcurrentInternal() in partitioned case, it
waits for transactions, locking tableId, not tabrelid - heaprelid
LockRelId is constructed for parent index relation, not for child
index relation.

Attaching fixed version.

Also I'm not sure what to do with locking of child relations. If we
don't do anything, you can drop one of the partitioned table childs
while CIC is in progress, and get error

ERROR:  cache lookup failed for index 16399

I agree that we need to do something about it, in particular, I think
we should lock all the partitions inside the transaction that builds
the catalog entries. Fixed this in the new version.

If you try to lock all child tables in CIC session, you'll get
deadlocks.

Do you mean the deadlock between the transaction that drops a partition
and the transaction doing CIC? I think this is unavoidable and can be
reproduced even without partitioning.

Yes, it seems we trade this error for possible deadlock between
transaction, dropping a partition, and CIC.

Also not sure why a list of children relation was obtained with
ShareLock that CIC is supposed to avoid not to block writes, changed
that to ShareUpdateExclusive.

I expect that it wasn't an issue due to the fact that it's held for a
brief period until DefineIndexConcurrentInternal() commits for the first
time. But it seems, it's more correct to use ShareUpdateExclusive lock
here.

Also I'd like to note that in new patch version there's a strange
wording in documentation:

"This can be very convenient as not only will all existing partitions be
indexed, but any future partitions will be as well.
<command>CREATE INDEX ... CONCURRENTLY</command> can incur long lock
times
on huge partitioned tables, to avoid that you can
use <command>CREATE INDEX ON ONLY</command> the partitioned table,
which
creates the new index marked as invalid, preventing automatic
application
to existing partitions."

All the point of CIC is to avoid long lock times. So it seems this
paragraph should be rewritten in the following way:

"To avoid long lock times, you can use CREATE INDEX CONCURRENTLY or
CREATE INDEX ON ONLY</command> the partitioned table..."

--
Best regards,
Alexander Pyhalov,
Postgres Professional

#29Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Alexander Pyhalov (#28)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On 24.05.2024 10:04, Alexander Pyhalov wrote:

Ilya Gladyshev писал(а) 2024-05-24 00:14:

Hi,

Hi.

I think it's well worth the effort to revive the patch, so I rebased
it on master, updated it and will return it back to the commitfest.
Alexander, Justin feel free to add yourselves as authors

On 29.01.2024 12:43, Alexander Pyhalov wrote:

Hi.

I've rebased patch on master and it'seems to me there's one more
issue -

when we call DefineIndexConcurrentInternal() in partitioned case, it
waits for transactions, locking tableId, not tabrelid - heaprelid
LockRelId is constructed for parent index relation, not for child
index relation.

Attaching fixed version.

Also I'm not sure what to do with locking of child relations. If we
don't do anything, you can drop one of the partitioned table childs
while CIC is in progress, and get error

ERROR:  cache lookup failed for index 16399

I agree that we need to do something about it, in particular, I think
we should lock all the partitions inside the transaction that builds
the catalog entries. Fixed this in the new version.

If you try to lock all child tables in CIC session, you'll get
deadlocks.

Do you mean the deadlock between the transaction that drops a
partition and the transaction doing CIC? I think this is unavoidable
and can be reproduced even without partitioning.

Yes, it seems we trade this error for possible deadlock between
transaction, dropping a partition, and CIC.

Also not sure why a list of children relation was obtained with
ShareLock that CIC is supposed to avoid not to block writes, changed
that to ShareUpdateExclusive.

I expect that it wasn't an issue due to the fact that it's held for a
brief period until DefineIndexConcurrentInternal() commits for the
first time. But it seems, it's more correct to use
ShareUpdateExclusive lock here.

Also I'd like to note that in new patch version there's a strange
wording in documentation:

"This can be very convenient as not only will all existing partitions be
 indexed, but any future partitions will be as well.
 <command>CREATE INDEX ... CONCURRENTLY</command> can incur long lock
times
 on huge partitioned tables, to avoid that you can
 use <command>CREATE INDEX ON ONLY</command> the partitioned table, which
 creates the new index marked as invalid, preventing automatic
application
 to existing partitions."

All the point of CIC is to avoid long lock times. So it seems this
paragraph should be rewritten in the following way:

"To avoid long lock times, you can use CREATE INDEX CONCURRENTLY or
CREATE INDEX ON ONLY</command> the partitioned table..."

True, the current wording doesn't look right. Right now CREATE INDEX ON
ONLY is described as a workaround for the missing CIC. I think it rather
makes sense to say that it gives more fine-grained control of partition
locking than both CIC and ordinary CREATE INDEX. See the updated patch.

Attachments:

v3-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchtext/x-patch; charset=UTF-8; name=v3-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchDownload
From 7687b4c3ba10fc1df5ca4c8f50198dfb269be8bb Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Thu, 23 May 2024 18:13:41 +0100
Subject: [PATCH v3] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                  |  10 +-
 doc/src/sgml/ref/create_index.sgml     |  14 +-
 src/backend/commands/indexcmds.c       | 293 ++++++++++++++++++-------
 src/test/regress/expected/indexing.out | 127 ++++++++++-
 src/test/regress/sql/indexing.sql      |  26 ++-
 5 files changed, 368 insertions(+), 102 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 6aab79e901..904978c6e5 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4314,14 +4314,12 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      As mentioned earlier, it is possible to create indexes on partitioned
      tables so that they are applied automatically to the entire hierarchy.
      This can be very convenient as not only will all existing partitions be
-     indexed, but any future partitions will be as well.  However, one
-     limitation when creating new indexes on partitioned tables is that it
-     is not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier, which could lead to long lock times.  To avoid this, you can
-     use <command>CREATE INDEX ON ONLY</command> the partitioned table, which
+     indexed, but any future partitions will be as well. For more control over
+     locking of the partitions you can use <command>CREATE INDEX ON ONLY</command>
+     on the partitioned table, which
      creates the new index marked as invalid, preventing automatic application
      to existing partitions.  Instead, indexes can then be created individually
-     on each partition using <literal>CONCURRENTLY</literal> and
+     on each partition and
      <firstterm>attached</firstterm> to the partitioned index on the parent
      using <command>ALTER INDEX ... ATTACH PARTITION</command>.  Once indexes for
      all the partitions are attached to the parent index, the parent index will
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 621bc0e253..2366cfd9b5 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 309389e20d..5806eeb8ef 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -95,6 +95,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(const List *colnames);
 static List *ChooseIndexColumnNames(const List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId *heaprelid);
 static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -561,7 +566,6 @@ DefineIndex(Oid tableId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -569,12 +573,10 @@ DefineIndex(Oid tableId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -706,20 +708,6 @@ DefineIndex(Oid tableId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1116,10 +1104,6 @@ DefineIndex(Oid tableId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1184,6 +1168,11 @@ DefineIndex(Oid tableId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1533,21 +1522,7 @@ DefineIndex(Oid tableId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1559,37 +1534,49 @@ DefineIndex(Oid tableId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1600,28 +1587,180 @@ DefineIndex(Oid tableId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
-	/*
-	 * For a concurrent build, it's important to make the catalog entries
-	 * visible to other transactions before we start to build the index. That
-	 * will prevent them from making incompatible HOT updates.  The new index
-	 * will be marked not indisready and not indisvalid, so that no one else
-	 * tries to either insert into it or use it for queries.
-	 *
-	 * We must commit our current transaction so that the index becomes
-	 * visible; then start another.  Note that all the data structures we just
-	 * built are lost in the commit.  The only data we keep past here are the
-	 * relation IDs.
-	 *
-	 * Before committing, get a session-level lock on the table, to ensure
-	 * that neither it nor the index can be dropped before we finish. This
-	 * cannot block, even if someone else is waiting for access, because we
-	 * already have the same lock within our transaction.
-	 *
-	 * Note: we don't currently bother with a session lock on the index,
-	 * because there are no operations that could change its state while we
-	 * hold lock on the parent table.  This might need to change later.
-	 */
-	LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+	if (!partitioned)
+	{
+		/*
+		 * For a concurrent build, it's important to make the catalog entries
+		 * visible to other transactions before we start to build the index.
+		 * That will prevent them from making incompatible HOT updates.  The
+		 * new index will be marked not indisready and not indisvalid, so that
+		 * no one else tries to either insert into it or use it for queries.
+		 *
+		 * DefineIndexConcurrentInternal will commit our current transaction
+		 * so that the index becomes visible; then start another.  Note that
+		 * all the data structures we just built are lost in the commit.  The
+		 * only data we keep past here are the relation IDs.
+		 *
+		 * Before committing, get a session-level lock on the table, to ensure
+		 * that neither it nor the index can be dropped before we finish. This
+		 * cannot block, even if someone else is waiting for access, because
+		 * we already have the same lock within our transaction.
+		 *
+		 * Note: we don't currently bother with a session lock on the index,
+		 * because there are no operations that could change its state while
+		 * we hold lock on the parent table.  This might need to change later.
+		 */
+
+		LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(tableId, indexRelationId,
+									  indexInfo, heaplocktag, &heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		ListCell   *lc2;
+		List	   *childs;
+		List	   *part_idxs = NIL;
+		List	   *leaf_idxs = NIL;
+		List	   *leaf_idx_lockids = NIL;
+		List	   *part_idx_lockids = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareUpdateExclusiveLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			char		relkind;
+			LockRelId  *lockrelid;
+			Oid			tabrelid;
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			tabrelid = IndexGetRelation(indrelid, false);
+			rel = table_open(tabrelid, ShareUpdateExclusiveLock);
+			lockrelid = palloc_object(LockRelId);
+			*lockrelid = rel->rd_lockInfo.lockRelId;
+			table_close(rel, ShareUpdateExclusiveLock);
+
+			/*
+			 * Split partitions in 2 lists: partitioned tables that just need
+			 * to be marked as valid and leaf tables that actually need to
+			 * have their indexes built.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				part_idxs = lappend_oid(part_idxs, indrelid);
+				part_idx_lockids = lappend(part_idx_lockids, lockrelid);
+				MemoryContextSwitchTo(old_context);
+			}
+			else
+			{
+				old_context = MemoryContextSwitchTo(cic_context);
+
+				leaf_idxs = lappend_oid(leaf_idxs, indrelid);
+				leaf_idx_lockids = lappend(leaf_idx_lockids, lockrelid);
+
+				MemoryContextSwitchTo(old_context);
+
+			}
+
+			/*
+			 * All partitions, including top-level parent, need to be locked
+			 * for the session before proceeding with any index rebuilds as
+			 * each of those are done in a separate transaction. After each
+			 * leaf index is built, its corresponding table will be unlocked,
+			 * all the partitioned tables will be unlocked at the very end.
+			 */
+
+			LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+
+		}
+
+		forboth(lc, leaf_idxs, lc2, leaf_idx_lockids)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			LockRelId  *heaprelid = lfirst(lc2);
+			Oid			tabrelid = IndexGetRelation(indrelid, false);
+
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid->dbId, heaprelid->relId);
+
+			/* Process each partition in a separate transaction */
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, part_idxs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+		}
+
+		foreach(lc, part_idx_lockids)
+		{
+			LockRelId  *lockrelid = lfirst(lc);
+
+			UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+		}
+
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid tableId, Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId *heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
 
 	PopActiveSnapshot();
 	CommitTransactionCommand();
@@ -1789,16 +1928,12 @@ DefineIndex(Oid tableId,
 	 * would be useful.  (Note that our earlier commits did not create reasons
 	 * to replan; so relcache flush on the index itself was sufficient.)
 	 */
-	CacheInvalidateRelcacheByRelid(heaprelid.relId);
+	CacheInvalidateRelcacheByRelid(heaprelid->relId);
 
 	/*
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
-	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
+	UnlockRelationIdForSession(heaprelid, ShareUpdateExclusiveLock);
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f25723da92..44a6cf39df 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 5f1f4b80c9..38a730d877 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.34.1

#30Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Ilya Gladyshev (#29)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Ilya Gladyshev писал(а) 2024-05-28 02:52:

Also I'd like to note that in new patch version there's a strange
wording in documentation:

"This can be very convenient as not only will all existing partitions
be
 indexed, but any future partitions will be as well.
 <command>CREATE INDEX ... CONCURRENTLY</command> can incur long lock
times
 on huge partitioned tables, to avoid that you can
 use <command>CREATE INDEX ON ONLY</command> the partitioned table,
which
 creates the new index marked as invalid, preventing automatic
application
 to existing partitions."

All the point of CIC is to avoid long lock times. So it seems this
paragraph should be rewritten in the following way:

"To avoid long lock times, you can use CREATE INDEX CONCURRENTLY or
CREATE INDEX ON ONLY</command> the partitioned table..."

True, the current wording doesn't look right. Right now CREATE INDEX ON
ONLY is described as a workaround for the missing CIC. I think it
rather makes sense to say that it gives more fine-grained control of
partition locking than both CIC and ordinary CREATE INDEX. See the
updated patch.

Hi.

Not sure if it's worth removing mentioning of CIC in

       creates the new index marked as invalid, preventing automatic 
application
       to existing partitions.  Instead, indexes can then be created 
individually
-     on each partition using <literal>CONCURRENTLY</literal> and
+     on each partition and
       <firstterm>attached</firstterm> to the partitioned index on the 
parent
       using <command>ALTER INDEX ... ATTACH PARTITION</command>.  Once 
indexes for
       all the partitions are attached to the parent index, the parent 
index will

but at least now it looks better.
--
Best regards,
Alexander Pyhalov,
Postgres Professional

#31Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Alexander Pyhalov (#30)
2 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On 28.05.2024 07:05, Alexander Pyhalov wrote:

Ilya Gladyshev писал(а) 2024-05-28 02:52:

Also I'd like to note that in new patch version there's a strange
wording in documentation:

"This can be very convenient as not only will all existing
partitions be
 indexed, but any future partitions will be as well.
 <command>CREATE INDEX ... CONCURRENTLY</command> can incur long
lock times
 on huge partitioned tables, to avoid that you can
 use <command>CREATE INDEX ON ONLY</command> the partitioned table,
which
 creates the new index marked as invalid, preventing automatic
application
 to existing partitions."

All the point of CIC is to avoid long lock times. So it seems this
paragraph should be rewritten in the following way:

"To avoid long lock times, you can use CREATE INDEX CONCURRENTLY or
CREATE INDEX ON ONLY</command> the partitioned table..."

True, the current wording doesn't look right. Right now CREATE INDEX
ON ONLY is described as a workaround for the missing CIC. I think it
rather makes sense to say that it gives more fine-grained control of
partition locking than both CIC and ordinary CREATE INDEX. See the
updated patch.

Hi.

Not sure if it's worth removing mentioning of CIC in

      creates the new index marked as invalid, preventing automatic 
application
      to existing partitions.  Instead, indexes can then be created 
individually
-     on each partition using <literal>CONCURRENTLY</literal> and
+     on each partition and
      <firstterm>attached</firstterm> to the partitioned index on the 
parent
      using <command>ALTER INDEX ... ATTACH PARTITION</command>.  Once 
indexes for
      all the partitions are attached to the parent index, the parent 
index will

but at least now it looks better.

The current patch version locks all the partitions in the first
transaction up until each of them is built, which makes for long lock
times for partitions that are built last. Having looked at the
implementation of REINDEX CONCURRENTLY for partitioned tables, I think
we can improve this by using the same approach of just skipping the
relations that we find out are dropped when trying to lock them.
Incidentally, this implementation in the new patch version is also simpler.

In addition, I noticed that progress tracking is once again broken for
partitioned tables, while looking at REINDEX implementation, attaching
the second patch to fix it.

Attachments:

v4-0002-Fix-progress-report-for-partitioned-REINDEX.patchtext/x-patch; charset=UTF-8; name=v4-0002-Fix-progress-report-for-partitioned-REINDEX.patchDownload
From 884be03aaeabee5c6eeb5a3f639ac9afe712c24b Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Tue, 11 Jun 2024 17:48:08 +0100
Subject: [PATCH v4 2/2] Fix progress report for partitioned REINDEX

---
 src/backend/catalog/index.c      | 11 ++++--
 src/backend/commands/indexcmds.c | 63 +++++++++++++++++++++++++++++---
 src/include/catalog/index.h      |  1 +
 3 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 55fdde4b24..c5bc72b350 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3559,6 +3559,7 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
 	volatile bool skipped_constraint = false;
 	PGRUsage	ru0;
 	bool		progress = ((params->options & REINDEXOPT_REPORT_PROGRESS) != 0);
+	bool		partition = ((params->options & REINDEXOPT_PARTITION) != 0);
 	bool		set_tablespace = false;
 
 	pg_rusage_init(&ru0);
@@ -3604,8 +3605,9 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
 			indexId
 		};
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
-									  heapId);
+		if (!partition)
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+										  heapId);
 		pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
 	}
 
@@ -3845,8 +3847,11 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
 	index_close(iRel, NoLock);
 	table_close(heapRelation, NoLock);
 
-	if (progress)
+	if (progress && !partition)
+	{
+		/* progress for partitions is tracked in the caller */
 		pgstat_progress_end_command();
+	}
 }
 
 /*
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 5da2df2d3b..17b30ad6aa 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -118,6 +118,7 @@ static bool ReindexRelationConcurrently(const ReindexStmt *stmt,
 										const ReindexParams *params);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
+static inline void progress_index_partition_done(void);
 
 /*
  * callback argument type for RangeVarCallbackForReindexIndex()
@@ -1550,7 +1551,7 @@ DefineIndex(Oid tableId,
 				 * Update progress for an intermediate partitioned index
 				 * itself
 				 */
-				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				progress_index_partition_done();
 			}
 
 			return address;
@@ -1577,7 +1578,7 @@ DefineIndex(Oid tableId,
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 		else if (!concurrent)
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			progress_index_partition_done();
 
 		return address;
 	}
@@ -1682,7 +1683,7 @@ DefineIndex(Oid tableId,
 										  heaplocktag, heaprelid);
 
 			PushActiveSnapshot(GetTransactionSnapshot());
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			progress_index_partition_done();
 		}
 
 		/* Set as valid all partitioned indexes, including the parent */
@@ -3327,6 +3328,14 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param
 	ListCell   *lc;
 	ErrorContextCallback errcallback;
 	ReindexErrorInfo errinfo;
+	ReindexParams newparams;
+	int			progress_params[3] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_PARTITIONS_TOTAL
+	};
+	int64		progress_values[3];
+	Oid			heapId = relid;
 
 	Assert(RELKIND_HAS_PARTITIONS(relkind));
 
@@ -3388,11 +3397,28 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param
 		MemoryContextSwitchTo(old_context);
 	}
 
+	if (relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		heapId = IndexGetRelation(relid, true);
+	}
+
+	progress_values[0] = (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
+		PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY :
+		PROGRESS_CREATEIDX_COMMAND_REINDEX;
+	progress_values[1] = 0;
+	progress_values[2] = list_length(partitions);
+	pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, heapId);
+	pgstat_progress_update_multi_param(3, progress_params, progress_values);
+
 	/*
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(stmt, partitions, params);
+	newparams = *params;
+	newparams.options |= REINDEXOPT_PARTITION;
+	ReindexMultipleInternal(stmt, partitions, &newparams);
+
+	pgstat_progress_end_command();
 
 	/*
 	 * Clean up working storage --- note we must do this after
@@ -3413,6 +3439,7 @@ static void
 ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const ReindexParams *params)
 {
 	ListCell   *l;
+	bool		partitions = ((params->options & REINDEXOPT_PARTITION) != 0);
 
 	PopActiveSnapshot();
 	CommitTransactionCommand();
@@ -3506,6 +3533,9 @@ ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const Reind
 		}
 
 		CommitTransactionCommand();
+
+		if (partitions)
+			progress_index_partition_done();
 	}
 
 	StartTransactionCommand();
@@ -3558,6 +3588,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 	char	   *relationName = NULL;
 	char	   *relationNamespace = NULL;
 	PGRUsage	ru0;
+	bool		partition = ((params->options & REINDEXOPT_PARTITION) != 0);
 	const int	progress_index[] = {
 		PROGRESS_CREATEIDX_COMMAND,
 		PROGRESS_CREATEIDX_PHASE,
@@ -3901,7 +3932,8 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
 			elog(ERROR, "cannot reindex a temporary table concurrently");
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, idx->tableId);
+		if (!partition)
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, idx->tableId);
 
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = 0;	/* initializing */
@@ -4375,7 +4407,8 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 
 	MemoryContextDelete(private_context);
 
-	pgstat_progress_end_command();
+	if (!partition)
+		pgstat_progress_end_command();
 
 	return true;
 }
@@ -4565,3 +4598,21 @@ set_indexsafe_procflags(void)
 	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
 	LWLockRelease(ProcArrayLock);
 }
+
+static inline void
+progress_index_partition_done(void)
+{
+	int			nparam = 6;
+	const int	progress_idx[] = {
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_SUBPHASE,
+		PROGRESS_CREATEIDX_TUPLES_TOTAL,
+		PROGRESS_CREATEIDX_TUPLES_DONE
+	};
+	const int64 progress_vals[] = {0, 0, 0, 0, 0, 0};
+
+	pgstat_progress_update_multi_param(nparam, progress_idx, progress_vals);
+	pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+}
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 7d434f8e65..ccba65fbbf 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_PARTITION 0x10 /* reindexing is done as part of partitioned table/index reindex */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.43.0

v4-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchtext/x-patch; charset=UTF-8; name=v4-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchDownload
From 45576292012468f2d3deea0e568ee74891cb73b1 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Thu, 23 May 2024 18:13:41 +0100
Subject: [PATCH v4 1/2] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                  |  10 +-
 doc/src/sgml/ref/create_index.sgml     |  14 +-
 src/backend/commands/indexcmds.c       | 224 +++++++++++++++++++------
 src/test/regress/expected/indexing.out | 127 +++++++++++++-
 src/test/regress/sql/indexing.sql      |  26 ++-
 5 files changed, 323 insertions(+), 78 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 6aab79e901..904978c6e5 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4314,14 +4314,12 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      As mentioned earlier, it is possible to create indexes on partitioned
      tables so that they are applied automatically to the entire hierarchy.
      This can be very convenient as not only will all existing partitions be
-     indexed, but any future partitions will be as well.  However, one
-     limitation when creating new indexes on partitioned tables is that it
-     is not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier, which could lead to long lock times.  To avoid this, you can
-     use <command>CREATE INDEX ON ONLY</command> the partitioned table, which
+     indexed, but any future partitions will be as well. For more control over
+     locking of the partitions you can use <command>CREATE INDEX ON ONLY</command>
+     on the partitioned table, which
      creates the new index marked as invalid, preventing automatic application
      to existing partitions.  Instead, indexes can then be created individually
-     on each partition using <literal>CONCURRENTLY</literal> and
+     on each partition and
      <firstterm>attached</firstterm> to the partitioned index on the parent
      using <command>ALTER INDEX ... ATTACH PARTITION</command>.  Once indexes for
      all the partitions are attached to the parent index, the parent index will
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 621bc0e253..2366cfd9b5 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 309389e20d..5da2df2d3b 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -95,6 +95,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(const List *colnames);
 static List *ChooseIndexColumnNames(const List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -561,7 +566,6 @@ DefineIndex(Oid tableId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -569,12 +573,10 @@ DefineIndex(Oid tableId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -706,20 +708,6 @@ DefineIndex(Oid tableId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1116,10 +1104,6 @@ DefineIndex(Oid tableId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1184,6 +1168,11 @@ DefineIndex(Oid tableId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1533,21 +1522,7 @@ DefineIndex(Oid tableId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1559,37 +1534,49 @@ DefineIndex(Oid tableId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1600,6 +1587,137 @@ DefineIndex(Oid tableId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(tableId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *tosetvalid = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareUpdateExclusiveLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+				continue;
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				tosetvalid = lappend_oid(tosetvalid, indrelid);
+				MemoryContextSwitchTo(old_context);
+				continue;
+			}
+
+			/*
+			 * Partition could have been dropped, since we looked it up. In
+			 * this case consider it done and go to the next one.
+			 */
+			tabrelid = IndexGetRelation(indrelid, true);
+			if (!tabrelid)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+			rel = try_table_open(tabrelid, ShareUpdateExclusiveLock);
+			if (!rel)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+
+			heaprelid = rel->rd_lockInfo.lockRelId;
+
+			/*
+			 * Close the table but retain the lock, that should be extended to
+			 * session level in DefineIndexConcurrentInternal.
+			 */
+			table_close(rel, NoLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, tosetvalid)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Relation	indrel = try_index_open(indrelid, ShareUpdateExclusiveLock);
+
+			if (!indrel)
+				continue;
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+			index_close(indrel, ShareUpdateExclusiveLock);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid tableId, Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1795,10 +1913,6 @@ DefineIndex(Oid tableId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f25723da92..44a6cf39df 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 5f1f4b80c9..38a730d877 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.43.0

#32Justin Pryzby
pryzby@telsasoft.com
In reply to: Ilya Gladyshev (#27)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Thu, May 23, 2024 at 10:14:57PM +0100, Ilya Gladyshev wrote:

Hi,

I think it's well worth the effort to revive the patch, so I rebased it on
master, updated it and will return it back to the commitfest. Alexander,
Justin feel free to add yourselves as authors

Thanks -- I was intending to write about this.

I realized that the patch will need some isolation tests to exercise its
concurrent behavior.

--
Justin

#33Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Justin Pryzby (#32)
2 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On 15.06.2024 20:40, Justin Pryzby wrote:

On Thu, May 23, 2024 at 10:14:57PM +0100, Ilya Gladyshev wrote:

Hi,

I think it's well worth the effort to revive the patch, so I rebased it on
master, updated it and will return it back to the commitfest. Alexander,
Justin feel free to add yourselves as authors

Thanks -- I was intending to write about this.

I realized that the patch will need some isolation tests to exercise its
concurrent behavior.

Thanks for the suggestion, added an isolation test that verifies
behaviour of partitioned CIC with simultaneous partition drop/detach
going on. Also fixed some issues in the new patch that I found while
writing the test.

Attachments:

v5-0002-Fix-progress-report-for-partitioned-REINDEX.patchtext/x-patch; charset=UTF-8; name=v5-0002-Fix-progress-report-for-partitioned-REINDEX.patchDownload
From 45f2ec9ee57a5337b77b66db3c8c5092f305a176 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Tue, 11 Jun 2024 17:48:08 +0100
Subject: [PATCH v5 2/2] Fix progress report for partitioned REINDEX

---
 src/backend/catalog/index.c      | 11 ++++--
 src/backend/commands/indexcmds.c | 63 +++++++++++++++++++++++++++++---
 src/include/catalog/index.h      |  1 +
 3 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 55fdde4b24..c5bc72b350 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3559,6 +3559,7 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
 	volatile bool skipped_constraint = false;
 	PGRUsage	ru0;
 	bool		progress = ((params->options & REINDEXOPT_REPORT_PROGRESS) != 0);
+	bool		partition = ((params->options & REINDEXOPT_PARTITION) != 0);
 	bool		set_tablespace = false;
 
 	pg_rusage_init(&ru0);
@@ -3604,8 +3605,9 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
 			indexId
 		};
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
-									  heapId);
+		if (!partition)
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+										  heapId);
 		pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
 	}
 
@@ -3845,8 +3847,11 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
 	index_close(iRel, NoLock);
 	table_close(heapRelation, NoLock);
 
-	if (progress)
+	if (progress && !partition)
+	{
+		/* progress for partitions is tracked in the caller */
 		pgstat_progress_end_command();
+	}
 }
 
 /*
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index dcb4ea89e9..6abe1f017c 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -118,6 +118,7 @@ static bool ReindexRelationConcurrently(const ReindexStmt *stmt,
 										const ReindexParams *params);
 static void update_relispartition(Oid relationId, bool newval);
 static inline void set_indexsafe_procflags(void);
+static inline void progress_index_partition_done(void);
 
 /*
  * callback argument type for RangeVarCallbackForReindexIndex()
@@ -1550,7 +1551,7 @@ DefineIndex(Oid tableId,
 				 * Update progress for an intermediate partitioned index
 				 * itself
 				 */
-				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				progress_index_partition_done();
 			}
 
 			return address;
@@ -1577,7 +1578,7 @@ DefineIndex(Oid tableId,
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 		else if (!concurrent)
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			progress_index_partition_done();
 
 		return address;
 	}
@@ -1686,7 +1687,7 @@ DefineIndex(Oid tableId,
 										  heaplocktag, heaprelid);
 
 			PushActiveSnapshot(GetTransactionSnapshot());
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			progress_index_partition_done();
 		}
 
 		/* Set as valid all partitioned indexes, including the parent */
@@ -3331,6 +3332,14 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param
 	ListCell   *lc;
 	ErrorContextCallback errcallback;
 	ReindexErrorInfo errinfo;
+	ReindexParams newparams;
+	int			progress_params[3] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_PARTITIONS_TOTAL
+	};
+	int64		progress_values[3];
+	Oid			heapId = relid;
 
 	Assert(RELKIND_HAS_PARTITIONS(relkind));
 
@@ -3392,11 +3401,28 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param
 		MemoryContextSwitchTo(old_context);
 	}
 
+	if (relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		heapId = IndexGetRelation(relid, true);
+	}
+
+	progress_values[0] = (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
+		PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY :
+		PROGRESS_CREATEIDX_COMMAND_REINDEX;
+	progress_values[1] = 0;
+	progress_values[2] = list_length(partitions);
+	pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, heapId);
+	pgstat_progress_update_multi_param(3, progress_params, progress_values);
+
 	/*
 	 * Process each partition listed in a separate transaction.  Note that
 	 * this commits and then starts a new transaction immediately.
 	 */
-	ReindexMultipleInternal(stmt, partitions, params);
+	newparams = *params;
+	newparams.options |= REINDEXOPT_PARTITION;
+	ReindexMultipleInternal(stmt, partitions, &newparams);
+
+	pgstat_progress_end_command();
 
 	/*
 	 * Clean up working storage --- note we must do this after
@@ -3417,6 +3443,7 @@ static void
 ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const ReindexParams *params)
 {
 	ListCell   *l;
+	bool		partitions = ((params->options & REINDEXOPT_PARTITION) != 0);
 
 	PopActiveSnapshot();
 	CommitTransactionCommand();
@@ -3510,6 +3537,9 @@ ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const Reind
 		}
 
 		CommitTransactionCommand();
+
+		if (partitions)
+			progress_index_partition_done();
 	}
 
 	StartTransactionCommand();
@@ -3562,6 +3592,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 	char	   *relationName = NULL;
 	char	   *relationNamespace = NULL;
 	PGRUsage	ru0;
+	bool		partition = ((params->options & REINDEXOPT_PARTITION) != 0);
 	const int	progress_index[] = {
 		PROGRESS_CREATEIDX_COMMAND,
 		PROGRESS_CREATEIDX_PHASE,
@@ -3905,7 +3936,8 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
 			elog(ERROR, "cannot reindex a temporary table concurrently");
 
-		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, idx->tableId);
+		if (!partition)
+			pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, idx->tableId);
 
 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
 		progress_vals[1] = 0;	/* initializing */
@@ -4379,7 +4411,8 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 
 	MemoryContextDelete(private_context);
 
-	pgstat_progress_end_command();
+	if (!partition)
+		pgstat_progress_end_command();
 
 	return true;
 }
@@ -4569,3 +4602,21 @@ set_indexsafe_procflags(void)
 	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
 	LWLockRelease(ProcArrayLock);
 }
+
+static inline void
+progress_index_partition_done(void)
+{
+	int			nparam = 6;
+	const int	progress_idx[] = {
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_SUBPHASE,
+		PROGRESS_CREATEIDX_TUPLES_TOTAL,
+		PROGRESS_CREATEIDX_TUPLES_DONE
+	};
+	const int64 progress_vals[] = {0, 0, 0, 0, 0, 0};
+
+	pgstat_progress_update_multi_param(nparam, progress_idx, progress_vals);
+	pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+}
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 7d434f8e65..ccba65fbbf 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
 #define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
 #define REINDEXOPT_MISSING_OK 	0x04	/* skip missing relations */
 #define REINDEXOPT_CONCURRENTLY	0x08	/* concurrent mode */
+#define REINDEXOPT_PARTITION 0x10 /* reindexing is done as part of partitioned table/index reindex */
 
 /* state info for validate_index bulkdelete callback */
 typedef struct ValidateIndexState
-- 
2.43.0

v5-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchtext/x-patch; charset=UTF-8; name=v5-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchDownload
From acf5cf5d4a984c0f8635a25e03c23409601c0c93 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Thu, 23 May 2024 18:13:41 +0100
Subject: [PATCH v5 1/2] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                         |  10 +-
 doc/src/sgml/ref/create_index.sgml            |  14 +-
 src/backend/commands/indexcmds.c              | 228 +++++++++++++-----
 .../isolation/expected/partitioned-cic.out    | 135 +++++++++++
 src/test/isolation/isolation_schedule         |   1 +
 src/test/isolation/specs/partitioned-cic.spec |  57 +++++
 src/test/regress/expected/indexing.out        | 127 +++++++++-
 src/test/regress/sql/indexing.sql             |  26 +-
 8 files changed, 520 insertions(+), 78 deletions(-)
 create mode 100644 src/test/isolation/expected/partitioned-cic.out
 create mode 100644 src/test/isolation/specs/partitioned-cic.spec

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 6aab79e901..904978c6e5 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4314,14 +4314,12 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      As mentioned earlier, it is possible to create indexes on partitioned
      tables so that they are applied automatically to the entire hierarchy.
      This can be very convenient as not only will all existing partitions be
-     indexed, but any future partitions will be as well.  However, one
-     limitation when creating new indexes on partitioned tables is that it
-     is not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier, which could lead to long lock times.  To avoid this, you can
-     use <command>CREATE INDEX ON ONLY</command> the partitioned table, which
+     indexed, but any future partitions will be as well. For more control over
+     locking of the partitions you can use <command>CREATE INDEX ON ONLY</command>
+     on the partitioned table, which
      creates the new index marked as invalid, preventing automatic application
      to existing partitions.  Instead, indexes can then be created individually
-     on each partition using <literal>CONCURRENTLY</literal> and
+     on each partition and
      <firstterm>attached</firstterm> to the partitioned index on the parent
      using <command>ALTER INDEX ... ATTACH PARTITION</command>.  Once indexes for
      all the partitions are attached to the parent index, the parent index will
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 621bc0e253..2366cfd9b5 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 309389e20d..dcb4ea89e9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -95,6 +95,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(const List *colnames);
 static List *ChooseIndexColumnNames(const List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -561,7 +566,6 @@ DefineIndex(Oid tableId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -569,12 +573,10 @@ DefineIndex(Oid tableId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -706,20 +708,6 @@ DefineIndex(Oid tableId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1116,10 +1104,6 @@ DefineIndex(Oid tableId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1184,6 +1168,11 @@ DefineIndex(Oid tableId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1533,21 +1522,7 @@ DefineIndex(Oid tableId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1559,37 +1534,49 @@ DefineIndex(Oid tableId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1600,6 +1587,141 @@ DefineIndex(Oid tableId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(tableId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *tosetvalid = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareUpdateExclusiveLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Partition could have been dropped, since we looked it up. In
+			 * this case consider it done and go to the next one.
+			 */
+			tabrelid = IndexGetRelation(indrelid, true);
+			if (!tabrelid)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+			rel = try_table_open(tabrelid, ShareUpdateExclusiveLock);
+			if (!rel)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+			{
+				table_close(rel, ShareUpdateExclusiveLock);
+				continue;
+			}
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				tosetvalid = lappend_oid(tosetvalid, indrelid);
+				MemoryContextSwitchTo(old_context);
+				table_close(rel, ShareUpdateExclusiveLock);
+				continue;
+			}
+
+			heaprelid = rel->rd_lockInfo.lockRelId;
+
+			/*
+			 * Close the table but retain the lock, that should be extended to
+			 * session level in DefineIndexConcurrentInternal.
+			 */
+			table_close(rel, NoLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, tosetvalid)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Relation	indrel = try_index_open(indrelid, ShareUpdateExclusiveLock);
+
+			if (!indrel)
+				continue;
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+			index_close(indrel, ShareUpdateExclusiveLock);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid tableId, Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1795,10 +1917,6 @@ DefineIndex(Oid tableId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/isolation/expected/partitioned-cic.out b/src/test/isolation/expected/partitioned-cic.out
new file mode 100644
index 0000000000..b66acc6f6a
--- /dev/null
+++ b/src/test/isolation/expected/partitioned-cic.out
@@ -0,0 +1,135 @@
+Parsed test spec with 3 sessions
+
+starting permutation: lock_p1 cic insert drop2 commit chk_content
+step lock_p1: lock cictab_part_1 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step drop2: DROP TABLE cictab_part_2;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
+
+starting permutation: lock_p2 cic insert drop1 commit chk_content
+step lock_p2: lock cictab_part_2 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step drop1: DROP TABLE cictab_part_1;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+
+starting permutation: lock_p1 cic insert detach2 commit chk_content chk_content_part2
+step lock_p1: lock cictab_part_1 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step detach2: ALTER TABLE cictab DETACH PARTITION cictab_part_2;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
+step chk_content_part2: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_2 where i > 0;
+  select * from cictab_part_2 where i > 0;
+
+QUERY PLAN                                           
+-----------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2
+  Index Cond: (i > 0)                                
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+
+starting permutation: lock_p2 cic insert detach1 commit chk_content chk_content_part1
+step lock_p2: lock cictab_part_2 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step detach1: ALTER TABLE cictab DETACH PARTITION cictab_part_1;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+step chk_content_part1: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_1 where i > 0;
+  select * from cictab_part_1 where i > 0;
+
+QUERY PLAN                                           
+-----------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1
+  Index Cond: (i > 0)                                
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 0342eb39e4..57b7948687 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -114,3 +114,4 @@ test: serializable-parallel-2
 test: serializable-parallel-3
 test: matview-write-skew
 test: lock-nowait
+test: partitioned-cic
diff --git a/src/test/isolation/specs/partitioned-cic.spec b/src/test/isolation/specs/partitioned-cic.spec
new file mode 100644
index 0000000000..95f0bb2b47
--- /dev/null
+++ b/src/test/isolation/specs/partitioned-cic.spec
@@ -0,0 +1,57 @@
+# Test the ability to drop/detach partitions while CREATE INDEX CONCURRENTLY is running.
+# To achieve this, start a transaction that will pause CIC in progress by
+# locking a partition in row exclusive mode, giving us a change to drop/detach another partition.
+# Dropping/detaching is tested for each partition to test two scenarios:
+# when the partition has already been indexed and when it's yet to be indexed.
+
+setup {
+  create table cictab(i int, j int) partition by range(i);
+  create table cictab_part_1 partition of cictab for values from (0) to (10);
+  create table cictab_part_2 partition of cictab for values from (10) to (20);
+
+  insert into cictab values (1, 0), (11, 0);
+}
+
+teardown {
+    drop table if exists cictab_part_1;
+    drop table if exists cictab_part_2;
+    drop table cictab;
+}
+
+session s1
+setup {BEGIN;}
+step lock_p1 { lock cictab_part_1 in row exclusive mode; }
+step lock_p2 { lock cictab_part_2 in row exclusive mode; }
+step commit { COMMIT; }
+
+session s2
+step cic { CREATE INDEX CONCURRENTLY ON cictab(i); }
+step chk_content {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+}
+
+step chk_content_part1 {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_1 where i > 0;
+  select * from cictab_part_1 where i > 0;
+}
+
+step chk_content_part2 {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_2 where i > 0;
+  select * from cictab_part_2 where i > 0;
+}
+
+session s3
+step detach1 { ALTER TABLE cictab DETACH PARTITION cictab_part_1; }
+step detach2 { ALTER TABLE cictab DETACH PARTITION cictab_part_2; }
+step drop1 { DROP TABLE cictab_part_1; }
+step drop2 { DROP TABLE cictab_part_2; }
+step insert { insert into cictab values (1, 1), (11, 1); }
+
+permutation lock_p1 cic insert drop2 commit chk_content
+permutation lock_p2 cic insert drop1 commit chk_content
+permutation lock_p1 cic insert detach2 commit chk_content chk_content_part2
+permutation lock_p2 cic insert detach1 commit chk_content chk_content_part1
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f25723da92..44a6cf39df 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 5f1f4b80c9..38a730d877 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.43.0

#34Justin Pryzby
pryzby@telsasoft.com
In reply to: Ilya Gladyshev (#31)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Sat, Jun 15, 2024 at 07:56:38PM +0100, Ilya Gladyshev wrote:

In addition, I noticed that progress tracking is once again broken for
partitioned tables, while looking at REINDEX implementation, attaching the
second patch to fix it.

Thanks for the fixes, I started reviewing them but need some more time
to digest.

Do you mean that progress reporting is broken in master, for REINDEX, or
just with this patch ?

--
Justin

#35Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Justin Pryzby (#34)
Re: CREATE INDEX CONCURRENTLY on partitioned index

It is broken in master, I just didn’t want to create a separate thread, but it can be fixed independently. As I remember, the problem is that progress is tracked for each table in the hierarchy as if the table is processed separately, without ever setting partitions_total and partitions_done counters.

Show quoted text

11 июля 2024 г., в 13:31, Justin Pryzby <pryzby@telsasoft.com> написал(а):

On Sat, Jun 15, 2024 at 07:56:38PM +0100, Ilya Gladyshev wrote:

In addition, I noticed that progress tracking is once again broken for
partitioned tables, while looking at REINDEX implementation, attaching the
second patch to fix it.

Thanks for the fixes, I started reviewing them but need some more time
to digest.

Do you mean that progress reporting is broken in master, for REINDEX, or
just with this patch ?

--
Justin

#36Michael Paquier
michael@paquier.xyz
In reply to: Ilya Gladyshev (#35)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Thu, Jul 11, 2024 at 09:35:24PM +0100, Ilya Gladyshev wrote:

It is broken in master, I just didn’t want to create a separate
thread, but it can be fixed independently. As I remember, the
problem is that progress is tracked for each table in the hierarchy
as if the table is processed separately, without ever setting
partitions_total and partitions_done counters.

Please let's move this point to its own thread and deal with it with
an independent patch. Hiding that in a thread that's already quite
long is not a good idea. This needs proper review, and a separate
thread with a good subject to describe the problem will attract a
better audience to deal with the problem you are seeing.

I was not paying much attention, until you've mentioned that this was
an issue with HEAD.
--
Michael

#37Ilya Gladyshev
ilya.v.gladyshev@gmail.com
In reply to: Michael Paquier (#36)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On 12.07.2024 01:01, Michael Paquier wrote:

Please let's move this point to its own thread and deal with it with
an independent patch. Hiding that in a thread that's already quite
long is not a good idea. This needs proper review, and a separate
thread with a good subject to describe the problem will attract a
better audience to deal with the problem you are seeing.

I was not paying much attention, until you've mentioned that this was
an issue with HEAD.
--
Michael

Sure, created a separate thread [1]/messages/by-id/b72f2d89-820a-4fa2-9058-b155cf646f4f@gmail.com. Please disregard the second patch
in this thread. Duplicating the last version of the relevant patch here
to avoid any confusion.

[1]: /messages/by-id/b72f2d89-820a-4fa2-9058-b155cf646f4f@gmail.com
/messages/by-id/b72f2d89-820a-4fa2-9058-b155cf646f4f@gmail.com

Attachments:

v5-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchtext/x-patch; charset=UTF-8; name=v5-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchDownload
From acf5cf5d4a984c0f8635a25e03c23409601c0c93 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Thu, 23 May 2024 18:13:41 +0100
Subject: [PATCH v5 1/2] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                         |  10 +-
 doc/src/sgml/ref/create_index.sgml            |  14 +-
 src/backend/commands/indexcmds.c              | 228 +++++++++++++-----
 .../isolation/expected/partitioned-cic.out    | 135 +++++++++++
 src/test/isolation/isolation_schedule         |   1 +
 src/test/isolation/specs/partitioned-cic.spec |  57 +++++
 src/test/regress/expected/indexing.out        | 127 +++++++++-
 src/test/regress/sql/indexing.sql             |  26 +-
 8 files changed, 520 insertions(+), 78 deletions(-)
 create mode 100644 src/test/isolation/expected/partitioned-cic.out
 create mode 100644 src/test/isolation/specs/partitioned-cic.spec

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 6aab79e901..904978c6e5 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4314,14 +4314,12 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      As mentioned earlier, it is possible to create indexes on partitioned
      tables so that they are applied automatically to the entire hierarchy.
      This can be very convenient as not only will all existing partitions be
-     indexed, but any future partitions will be as well.  However, one
-     limitation when creating new indexes on partitioned tables is that it
-     is not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier, which could lead to long lock times.  To avoid this, you can
-     use <command>CREATE INDEX ON ONLY</command> the partitioned table, which
+     indexed, but any future partitions will be as well. For more control over
+     locking of the partitions you can use <command>CREATE INDEX ON ONLY</command>
+     on the partitioned table, which
      creates the new index marked as invalid, preventing automatic application
      to existing partitions.  Instead, indexes can then be created individually
-     on each partition using <literal>CONCURRENTLY</literal> and
+     on each partition and
      <firstterm>attached</firstterm> to the partitioned index on the parent
      using <command>ALTER INDEX ... ATTACH PARTITION</command>.  Once indexes for
      all the partitions are attached to the parent index, the parent index will
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 621bc0e253..2366cfd9b5 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -645,7 +645,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -692,15 +695,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 309389e20d..dcb4ea89e9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -95,6 +95,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(const List *colnames);
 static List *ChooseIndexColumnNames(const List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -561,7 +566,6 @@ DefineIndex(Oid tableId,
 	bool		amissummarizing;
 	amoptions_function amoptions;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -569,12 +573,10 @@ DefineIndex(Oid tableId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -706,20 +708,6 @@ DefineIndex(Oid tableId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1116,10 +1104,6 @@ DefineIndex(Oid tableId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1184,6 +1168,11 @@ DefineIndex(Oid tableId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1533,21 +1522,7 @@ DefineIndex(Oid tableId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1559,37 +1534,49 @@ DefineIndex(Oid tableId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1600,6 +1587,141 @@ DefineIndex(Oid tableId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(tableId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *tosetvalid = NIL;
+		MemoryContext cic_context,
+					old_context;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareUpdateExclusiveLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Partition could have been dropped, since we looked it up. In
+			 * this case consider it done and go to the next one.
+			 */
+			tabrelid = IndexGetRelation(indrelid, true);
+			if (!tabrelid)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+			rel = try_table_open(tabrelid, ShareUpdateExclusiveLock);
+			if (!rel)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+			{
+				table_close(rel, ShareUpdateExclusiveLock);
+				continue;
+			}
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				tosetvalid = lappend_oid(tosetvalid, indrelid);
+				MemoryContextSwitchTo(old_context);
+				table_close(rel, ShareUpdateExclusiveLock);
+				continue;
+			}
+
+			heaprelid = rel->rd_lockInfo.lockRelId;
+
+			/*
+			 * Close the table but retain the lock, that should be extended to
+			 * session level in DefineIndexConcurrentInternal.
+			 */
+			table_close(rel, NoLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, tosetvalid)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Relation	indrel = try_index_open(indrelid, ShareUpdateExclusiveLock);
+
+			if (!indrel)
+				continue;
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+			index_close(indrel, ShareUpdateExclusiveLock);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid tableId, Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1795,10 +1917,6 @@ DefineIndex(Oid tableId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/isolation/expected/partitioned-cic.out b/src/test/isolation/expected/partitioned-cic.out
new file mode 100644
index 0000000000..b66acc6f6a
--- /dev/null
+++ b/src/test/isolation/expected/partitioned-cic.out
@@ -0,0 +1,135 @@
+Parsed test spec with 3 sessions
+
+starting permutation: lock_p1 cic insert drop2 commit chk_content
+step lock_p1: lock cictab_part_1 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step drop2: DROP TABLE cictab_part_2;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
+
+starting permutation: lock_p2 cic insert drop1 commit chk_content
+step lock_p2: lock cictab_part_2 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step drop1: DROP TABLE cictab_part_1;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+
+starting permutation: lock_p1 cic insert detach2 commit chk_content chk_content_part2
+step lock_p1: lock cictab_part_1 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step detach2: ALTER TABLE cictab DETACH PARTITION cictab_part_2;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
+step chk_content_part2: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_2 where i > 0;
+  select * from cictab_part_2 where i > 0;
+
+QUERY PLAN                                           
+-----------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2
+  Index Cond: (i > 0)                                
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+
+starting permutation: lock_p2 cic insert detach1 commit chk_content chk_content_part1
+step lock_p2: lock cictab_part_2 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step detach1: ALTER TABLE cictab DETACH PARTITION cictab_part_1;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+step chk_content_part1: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_1 where i > 0;
+  select * from cictab_part_1 where i > 0;
+
+QUERY PLAN                                           
+-----------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1
+  Index Cond: (i > 0)                                
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 0342eb39e4..57b7948687 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -114,3 +114,4 @@ test: serializable-parallel-2
 test: serializable-parallel-3
 test: matview-write-skew
 test: lock-nowait
+test: partitioned-cic
diff --git a/src/test/isolation/specs/partitioned-cic.spec b/src/test/isolation/specs/partitioned-cic.spec
new file mode 100644
index 0000000000..95f0bb2b47
--- /dev/null
+++ b/src/test/isolation/specs/partitioned-cic.spec
@@ -0,0 +1,57 @@
+# Test the ability to drop/detach partitions while CREATE INDEX CONCURRENTLY is running.
+# To achieve this, start a transaction that will pause CIC in progress by
+# locking a partition in row exclusive mode, giving us a change to drop/detach another partition.
+# Dropping/detaching is tested for each partition to test two scenarios:
+# when the partition has already been indexed and when it's yet to be indexed.
+
+setup {
+  create table cictab(i int, j int) partition by range(i);
+  create table cictab_part_1 partition of cictab for values from (0) to (10);
+  create table cictab_part_2 partition of cictab for values from (10) to (20);
+
+  insert into cictab values (1, 0), (11, 0);
+}
+
+teardown {
+    drop table if exists cictab_part_1;
+    drop table if exists cictab_part_2;
+    drop table cictab;
+}
+
+session s1
+setup {BEGIN;}
+step lock_p1 { lock cictab_part_1 in row exclusive mode; }
+step lock_p2 { lock cictab_part_2 in row exclusive mode; }
+step commit { COMMIT; }
+
+session s2
+step cic { CREATE INDEX CONCURRENTLY ON cictab(i); }
+step chk_content {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+}
+
+step chk_content_part1 {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_1 where i > 0;
+  select * from cictab_part_1 where i > 0;
+}
+
+step chk_content_part2 {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_2 where i > 0;
+  select * from cictab_part_2 where i > 0;
+}
+
+session s3
+step detach1 { ALTER TABLE cictab DETACH PARTITION cictab_part_1; }
+step detach2 { ALTER TABLE cictab DETACH PARTITION cictab_part_2; }
+step drop1 { DROP TABLE cictab_part_1; }
+step drop2 { DROP TABLE cictab_part_2; }
+step insert { insert into cictab values (1, 1), (11, 1); }
+
+permutation lock_p1 cic insert drop2 commit chk_content
+permutation lock_p2 cic insert drop1 commit chk_content
+permutation lock_p1 cic insert detach2 commit chk_content chk_content_part2
+permutation lock_p2 cic insert detach1 commit chk_content chk_content_part1
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f25723da92..44a6cf39df 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,130 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+    "idxpart11_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+    "idxpart111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+    "idxpart1111_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" UNIQUE, btree (a) INVALID
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+    "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+    "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 5f1f4b80c9..38a730d877 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,30 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.43.0

#38Michael Paquier
michael@paquier.xyz
In reply to: Ilya Gladyshev (#37)
Re: CREATE INDEX CONCURRENTLY on partitioned index

On Fri, Jul 12, 2024 at 11:17:25PM +0100, Ilya Gladyshev wrote:

Sure, created a separate thread [1]. Please disregard the second patch in
this thread. Duplicating the last version of the relevant patch here to
avoid any confusion.

[1] /messages/by-id/b72f2d89-820a-4fa2-9058-b155cf646f4f@gmail.com

Thanks, will check that.
--
Michael

#39Sergey Sargsyan
sergey.sargsyan.2001@gmail.com
In reply to: Michael Paquier (#38)
Re: CREATE INDEX CONCURRENTLY on partitioned index

I noticed that development on the concurrent index creation for partitioned
tables feature seemed to stall a few months ago. The patch looked solid,
and there didn’t seem to be any issues with it. Has there been any further
progress? This feature would be invaluable, given the prevalence of
partitioned tables in modern databases.

Thanks for any updates!

On Fri, 25 Oct 2024 at 9:14 PM, Michael Paquier <michael@paquier.xyz> wrote:

Show quoted text

On Fri, Jul 12, 2024 at 11:17:25PM +0100, Ilya Gladyshev wrote:

Sure, created a separate thread [1]. Please disregard the second patch in
this thread. Duplicating the last version of the relevant patch here to
avoid any confusion.

[1]

/messages/by-id/b72f2d89-820a-4fa2-9058-b155cf646f4f@gmail.com

Thanks, will check that.
--
Michael

#40Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Ilya Gladyshev (#37)
1 attachment(s)
Re: CREATE INDEX CONCURRENTLY on partitioned index

Ilya Gladyshev писал(а) 2024-07-13 01:17:

On 12.07.2024 01:01, Michael Paquier wrote:

Please let's move this point to its own thread and deal with it with
an independent patch. Hiding that in a thread that's already quite
long is not a good idea. This needs proper review, and a separate
thread with a good subject to describe the problem will attract a
better audience to deal with the problem you are seeing.

I was not paying much attention, until you've mentioned that this
was
an issue with HEAD.
--
Michael

Sure, created a separate thread [1]. Please disregard the second patch
in this thread. Duplicating the last version of the relevant patch
here to avoid any confusion.

[1]
/messages/by-id/b72f2d89-820a-4fa2-9058-b155cf646f4f@gmail.com

Hi. We've found an issue with this patch. CIC can pop last active
snapshot when operating on partitioned table without leaves. This seems
to be similar to the issue, fixed by

commit c426f7c2b36a5efd9bcef2a2dfcc559f7879cd84
Author: Michael Paquier <michael@paquier.xyz>
Date: Thu Dec 7 08:31:02 2023 +0900

Fix assertion failure with REINDEX and event triggers

Attaching rebased patch with the fix.
--
Best regards,
Alexander Pyhalov,
Postgres Professional

Attachments:

v6-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchtext/x-diff; name=v6-0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-ta.patchDownload
From a29654340a50f29cb31a665899bf4635fee295b8 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Tue, 1 Jul 2025 08:59:02 +0300
Subject: [PATCH] Allow CREATE INDEX CONCURRENTLY on partitioned table

---
 doc/src/sgml/ddl.sgml                         |  10 +-
 doc/src/sgml/ref/create_index.sgml            |  14 +-
 src/backend/commands/indexcmds.c              | 231 +++++++++++++-----
 .../isolation/expected/partitioned-cic.out    | 135 ++++++++++
 src/test/isolation/isolation_schedule         |   1 +
 src/test/isolation/specs/partitioned-cic.spec |  57 +++++
 src/test/regress/expected/indexing.out        | 132 +++++++++-
 src/test/regress/sql/indexing.sql             |  42 +++-
 8 files changed, 543 insertions(+), 79 deletions(-)
 create mode 100644 src/test/isolation/expected/partitioned-cic.out
 create mode 100644 src/test/isolation/specs/partitioned-cic.spec

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 65bc070d2e5..85c53f93cd5 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4418,14 +4418,12 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
      As mentioned earlier, it is possible to create indexes on partitioned
      tables so that they are applied automatically to the entire hierarchy.
      This can be very convenient as not only will all existing partitions be
-     indexed, but any future partitions will be as well.  However, one
-     limitation when creating new indexes on partitioned tables is that it
-     is not possible to use the <literal>CONCURRENTLY</literal>
-     qualifier, which could lead to long lock times.  To avoid this, you can
-     use <command>CREATE INDEX ON ONLY</command> the partitioned table, which
+     indexed, but any future partitions will be as well. For more control over
+     locking of the partitions you can use <command>CREATE INDEX ON ONLY</command>
+     on the partitioned table, which
      creates the new index marked as invalid, preventing automatic application
      to existing partitions.  Instead, indexes can then be created individually
-     on each partition using <literal>CONCURRENTLY</literal> and
+     on each partition and
      <firstterm>attached</firstterm> to the partitioned index on the parent
      using <command>ALTER INDEX ... ATTACH PARTITION</command>.  Once indexes for
      all the partitions are attached to the parent index, the parent index will
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index b9c679c41e8..253e62e7348 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -651,7 +651,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    <para>
     If a problem arises while scanning the table, such as a deadlock or a
     uniqueness violation in a unique index, the <command>CREATE INDEX</command>
-    command will fail but leave behind an <quote>invalid</quote> index. This index
+    command will fail but leave behind an <quote>invalid</quote> index.
+    If this happens while build an index concurrently on a partitioned
+    table, the command can also leave behind <quote>valid</quote> or
+    <quote>invalid</quote> indexes on table partitions.  The invalid index
     will be ignored for querying purposes because it might be incomplete;
     however it will still consume update overhead. The <application>psql</application>
     <command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -698,15 +701,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 6f753ab6d7a..c39b3b18ea8 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -98,6 +98,11 @@ static char *ChooseIndexName(const char *tabname, Oid namespaceId,
 							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(const List *colnames);
 static List *ChooseIndexColumnNames(const List *indexElems);
+static void DefineIndexConcurrentInternal(Oid relationId,
+										  Oid indexRelationId,
+										  IndexInfo *indexInfo,
+										  LOCKTAG heaplocktag,
+										  LockRelId heaprelid);
 static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -573,7 +578,6 @@ DefineIndex(Oid tableId,
 	amoptions_function amoptions;
 	bool		exclusion;
 	bool		partitioned;
-	bool		safe_index;
 	Datum		reloptions;
 	int16	   *coloptions;
 	IndexInfo  *indexInfo;
@@ -581,12 +585,10 @@ DefineIndex(Oid tableId,
 	bits16		constr_flags;
 	int			numberOfAttributes;
 	int			numberOfKeyAttributes;
-	TransactionId limitXmin;
 	ObjectAddress address;
 	LockRelId	heaprelid;
 	LOCKTAG		heaplocktag;
 	LOCKMODE	lockmode;
-	Snapshot	snapshot;
 	Oid			root_save_userid;
 	int			root_save_sec_context;
 	int			root_save_nestlevel;
@@ -724,20 +726,6 @@ DefineIndex(Oid tableId,
 	 * partition.
 	 */
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
-	if (partitioned)
-	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
-	}
 
 	/*
 	 * Don't try to CREATE INDEX on temp tables of other backends.
@@ -1166,10 +1154,6 @@ DefineIndex(Oid tableId,
 		}
 	}
 
-	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
-	safe_index = indexInfo->ii_Expressions == NIL &&
-		indexInfo->ii_Predicate == NIL;
-
 	/*
 	 * Report index creation if appropriate (delay this till after most of the
 	 * error checks)
@@ -1234,6 +1218,11 @@ DefineIndex(Oid tableId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+	{
+		/* If concurrent, initially build index partitions as "invalid" */
+		flags |= INDEX_CREATE_INVALID;
+	}
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1551,21 +1540,7 @@ DefineIndex(Oid tableId,
 			 */
 			if (invalidate_parent)
 			{
-				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-				HeapTuple	tup,
-							newtup;
-
-				tup = SearchSysCache1(INDEXRELID,
-									  ObjectIdGetDatum(indexRelationId));
-				if (!HeapTupleIsValid(tup))
-					elog(ERROR, "cache lookup failed for index %u",
-						 indexRelationId);
-				newtup = heap_copytuple(tup);
-				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
-				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
-				ReleaseSysCache(tup);
-				table_close(pg_index, RowExclusiveLock);
-				heap_freetuple(newtup);
+				index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
 
 				/*
 				 * CCI here to make this update visible, in case this recurses
@@ -1577,37 +1552,49 @@ DefineIndex(Oid tableId,
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
-		 * done here.
+		 * done here in the non-concurrent case.
 		 */
-		AtEOXact_GUC(false, root_save_nestlevel);
-		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
-		table_close(rel, NoLock);
-		if (!OidIsValid(parentIndexId))
-			pgstat_progress_end_command();
-		else
+		if (!concurrent)
 		{
-			/* Update progress for an intermediate partitioned index itself */
-			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
-		}
+			AtEOXact_GUC(false, root_save_nestlevel);
+			SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+			table_close(rel, NoLock);
 
-		return address;
+			if (!OidIsValid(parentIndexId))
+				pgstat_progress_end_command();
+			else
+			{
+				/*
+				 * Update progress for an intermediate partitioned index
+				 * itself
+				 */
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+			}
+
+			return address;
+		}
 	}
 
 	AtEOXact_GUC(false, root_save_nestlevel);
 	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
 
-	if (!concurrent)
+	/*
+	 * All done in the non-concurrent case, and when building catalog entries
+	 * of partitions for CIC.
+	 */
+	if (!concurrent || OidIsValid(parentIndexId))
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
 		/*
 		 * If this is the top-level index, the command is done overall;
-		 * otherwise, increment progress to report one child index is done.
+		 * otherwise (when being called recursively), increment progress to
+		 * report that one child index is done.  Except in the concurrent
+		 * (catalog-only) case, which is handled later.
 		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
-		else
+		else if (!concurrent)
 			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
@@ -1618,6 +1605,144 @@ DefineIndex(Oid tableId,
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
 	table_close(rel, NoLock);
 
+	if (!partitioned)
+	{
+		/* CREATE INDEX CONCURRENTLY on a nonpartitioned table */
+		DefineIndexConcurrentInternal(tableId, indexRelationId,
+									  indexInfo, heaplocktag, heaprelid);
+		pgstat_progress_end_command();
+		return address;
+	}
+	else
+	{
+		/*
+		 * For CIC on a partitioned table, finish by building indexes on
+		 * partitions
+		 */
+
+		ListCell   *lc;
+		List	   *childs;
+		List	   *tosetvalid = NIL;
+		MemoryContext cic_context,
+					old_context;
+		bool	pushed_snapshot = false;
+
+		/* Create special memory context for cross-transaction storage */
+		cic_context = AllocSetContextCreate(PortalContext,
+											"Create index concurrently",
+											ALLOCSET_DEFAULT_SIZES);
+
+		old_context = MemoryContextSwitchTo(cic_context);
+		childs = find_all_inheritors(indexRelationId, ShareUpdateExclusiveLock, NULL);
+		MemoryContextSwitchTo(old_context);
+
+		foreach(lc, childs)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Oid			tabrelid;
+			char		relkind;
+
+			/*
+			 * Partition could have been dropped, since we looked it up. In
+			 * this case consider it done and go to the next one.
+			 */
+			tabrelid = IndexGetRelation(indrelid, true);
+			if (!tabrelid)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+			rel = try_table_open(tabrelid, ShareUpdateExclusiveLock);
+			if (!rel)
+			{
+				pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				continue;
+			}
+
+			/*
+			 * Pre-existing partitions which were ATTACHED were already
+			 * counted in the progress report.
+			 */
+			if (get_index_isvalid(indrelid))
+			{
+				table_close(rel, ShareUpdateExclusiveLock);
+				continue;
+			}
+
+			/*
+			 * Partitioned indexes are counted in the progress report, but
+			 * don't need to be further processed.
+			 */
+			relkind = get_rel_relkind(indrelid);
+			if (!RELKIND_HAS_STORAGE(relkind))
+			{
+				/* The toplevel index doesn't count towards "partitions done" */
+				if (indrelid != indexRelationId)
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+
+				/*
+				 * Build up a list of all the intermediate partitioned tables
+				 * which will later need to be set valid.
+				 */
+				old_context = MemoryContextSwitchTo(cic_context);
+				tosetvalid = lappend_oid(tosetvalid, indrelid);
+				MemoryContextSwitchTo(old_context);
+				table_close(rel, ShareUpdateExclusiveLock);
+				continue;
+			}
+
+			heaprelid = rel->rd_lockInfo.lockRelId;
+
+			/*
+			 * Close the table but retain the lock, that should be extended to
+			 * session level in DefineIndexConcurrentInternal.
+			 */
+			table_close(rel, NoLock);
+			SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
+			/* Process each partition in a separate transaction */
+			DefineIndexConcurrentInternal(tabrelid, indrelid, indexInfo,
+										  heaplocktag, heaprelid);
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+			pushed_snapshot = true;
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
+		/* Set as valid all partitioned indexes, including the parent */
+		foreach(lc, tosetvalid)
+		{
+			Oid			indrelid = lfirst_oid(lc);
+			Relation	indrel = try_index_open(indrelid, ShareUpdateExclusiveLock);
+
+			if (!indrel)
+				continue;
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indrelid, INDEX_CREATE_SET_VALID);
+			index_close(indrel, ShareUpdateExclusiveLock);
+		}
+
+		MemoryContextDelete(cic_context);
+		pgstat_progress_end_command();
+		if (pushed_snapshot)
+			PopActiveSnapshot();
+		return address;
+	}
+}
+
+
+static void
+DefineIndexConcurrentInternal(Oid tableId, Oid indexRelationId, IndexInfo *indexInfo,
+							  LOCKTAG heaplocktag, LockRelId heaprelid)
+{
+	TransactionId limitXmin;
+	Snapshot	snapshot;
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	bool		safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
@@ -1821,10 +1946,6 @@ DefineIndex(Oid tableId,
 	 * Last thing to do is release the session-level lock on the parent table.
 	 */
 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
-
-	pgstat_progress_end_command();
-
-	return address;
 }
 
 
diff --git a/src/test/isolation/expected/partitioned-cic.out b/src/test/isolation/expected/partitioned-cic.out
new file mode 100644
index 00000000000..b66acc6f6a2
--- /dev/null
+++ b/src/test/isolation/expected/partitioned-cic.out
@@ -0,0 +1,135 @@
+Parsed test spec with 3 sessions
+
+starting permutation: lock_p1 cic insert drop2 commit chk_content
+step lock_p1: lock cictab_part_1 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step drop2: DROP TABLE cictab_part_2;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
+
+starting permutation: lock_p2 cic insert drop1 commit chk_content
+step lock_p2: lock cictab_part_2 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step drop1: DROP TABLE cictab_part_1;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+
+starting permutation: lock_p1 cic insert detach2 commit chk_content chk_content_part2
+step lock_p1: lock cictab_part_1 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step detach2: ALTER TABLE cictab DETACH PARTITION cictab_part_2;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
+step chk_content_part2: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_2 where i > 0;
+  select * from cictab_part_2 where i > 0;
+
+QUERY PLAN                                           
+-----------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2
+  Index Cond: (i > 0)                                
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+
+starting permutation: lock_p2 cic insert detach1 commit chk_content chk_content_part1
+step lock_p2: lock cictab_part_2 in row exclusive mode;
+step cic: CREATE INDEX CONCURRENTLY ON cictab(i); <waiting ...>
+step insert: insert into cictab values (1, 1), (11, 1);
+step detach1: ALTER TABLE cictab DETACH PARTITION cictab_part_1;
+step commit: COMMIT;
+step cic: <... completed>
+step chk_content: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+
+QUERY PLAN                                                  
+------------------------------------------------------------
+Index Scan using cictab_part_2_i_idx on cictab_part_2 cictab
+  Index Cond: (i > 0)                                       
+(2 rows)
+
+ i|j
+--+-
+11|0
+11|1
+(2 rows)
+
+step chk_content_part1: 
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_1 where i > 0;
+  select * from cictab_part_1 where i > 0;
+
+QUERY PLAN                                           
+-----------------------------------------------------
+Index Scan using cictab_part_1_i_idx on cictab_part_1
+  Index Cond: (i > 0)                                
+(2 rows)
+
+i|j
+-+-
+1|0
+1|1
+(2 rows)
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index e3c669a29c7..270efefdd71 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -116,3 +116,4 @@ test: serializable-parallel-2
 test: serializable-parallel-3
 test: matview-write-skew
 test: lock-nowait
+test: partitioned-cic
diff --git a/src/test/isolation/specs/partitioned-cic.spec b/src/test/isolation/specs/partitioned-cic.spec
new file mode 100644
index 00000000000..95f0bb2b47a
--- /dev/null
+++ b/src/test/isolation/specs/partitioned-cic.spec
@@ -0,0 +1,57 @@
+# Test the ability to drop/detach partitions while CREATE INDEX CONCURRENTLY is running.
+# To achieve this, start a transaction that will pause CIC in progress by
+# locking a partition in row exclusive mode, giving us a change to drop/detach another partition.
+# Dropping/detaching is tested for each partition to test two scenarios:
+# when the partition has already been indexed and when it's yet to be indexed.
+
+setup {
+  create table cictab(i int, j int) partition by range(i);
+  create table cictab_part_1 partition of cictab for values from (0) to (10);
+  create table cictab_part_2 partition of cictab for values from (10) to (20);
+
+  insert into cictab values (1, 0), (11, 0);
+}
+
+teardown {
+    drop table if exists cictab_part_1;
+    drop table if exists cictab_part_2;
+    drop table cictab;
+}
+
+session s1
+setup {BEGIN;}
+step lock_p1 { lock cictab_part_1 in row exclusive mode; }
+step lock_p2 { lock cictab_part_2 in row exclusive mode; }
+step commit { COMMIT; }
+
+session s2
+step cic { CREATE INDEX CONCURRENTLY ON cictab(i); }
+step chk_content {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab where i > 0;
+  select * from cictab where i > 0;
+}
+
+step chk_content_part1 {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_1 where i > 0;
+  select * from cictab_part_1 where i > 0;
+}
+
+step chk_content_part2 {
+  set enable_seqscan to off;
+  explain (costs off) select * from cictab_part_2 where i > 0;
+  select * from cictab_part_2 where i > 0;
+}
+
+session s3
+step detach1 { ALTER TABLE cictab DETACH PARTITION cictab_part_1; }
+step detach2 { ALTER TABLE cictab DETACH PARTITION cictab_part_2; }
+step drop1 { DROP TABLE cictab_part_1; }
+step drop2 { DROP TABLE cictab_part_2; }
+step insert { insert into cictab values (1, 1), (11, 1); }
+
+permutation lock_p1 cic insert drop2 commit chk_content
+permutation lock_p2 cic insert drop1 commit chk_content
+permutation lock_p1 cic insert detach2 commit chk_content chk_content_part2
+permutation lock_p2 cic insert detach1 commit chk_content chk_content_part1
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index bcf1db11d73..ff448e0f988 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,12 +50,136 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx1"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart11
+       Partitioned table "public.idxpart11"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart11_a_idx" btree (a)
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+       Partitioned table "public.idxpart111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart111_a_idx" btree (a)
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+      Partitioned table "public.idxpart1111"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1111_a_idx" btree (a)
+Number of partitions: 0
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+
+\d idxpart3
+        Partitioned table "public.idxpart3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart3_a_idx" btree (a)
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+             Table "public.idxpart31"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart3 DEFAULT
+Indexes:
+    "idxpart31_a_idx" btree (a)
+
+drop table idxpart;
+-- CIC should preserve active snapshot
+create function access_snapshot() returns event_trigger language plpgsql as $$
+begin
+perform 1;
+end
+$$;
+create table idxpart (col1 int) partition by range (col1);
+create event trigger check_trigger on ddl_command_end when tag in ('CREATE INDEX')
+execute procedure access_snapshot();
+create index concurrently on idxpart (col1);
+drop event trigger check_trigger;
 drop table idxpart;
+drop function access_snapshot();
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
 CREATE TABLE idxpart (col1 INT) PARTITION BY RANGE (col1);
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index b5cb01c2d70..b6b76424e76 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,11 +29,45 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-drop table idxpart;
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart2 (a); -- leaf
+create index concurrently on idxpart (a); -- partitioned
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
+\d idxpart2
+\d idxpart3
+\d idxpart31
+drop table idxpart;
+-- CIC should preserve active snapshot
+create function access_snapshot() returns event_trigger language plpgsql as $$
+begin
+perform 1;
+end
+$$;
+create table idxpart (col1 int) partition by range (col1);
+create event trigger check_trigger on ddl_command_end when tag in ('CREATE INDEX')
+execute procedure access_snapshot();
+create index concurrently on idxpart (col1);
+drop event trigger check_trigger;
+drop table idxpart;
+drop function access_snapshot();
+
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
-- 
2.43.0