From b48837a934ef1381d26ef62bbaf66f5e52b0cd6d Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Tue, 31 Jan 2023 19:13:07 +0400
Subject: [PATCH 1/3] fix CREATE INDEX progress report with nested partitions

The progress reporting was added in v12 (ab0dfc961) but the original
patch didn't seem to consider the possibility of nested partitioning.

When called recursively, DefineIndex() would clobber the number of
completed partitions, and it was possible to end up with the TOTAL
counter greater than the DONE counter.

This clarifies/re-defines that the progress reporting counts both direct
and indirect children, as well as intermediate partitioned tables:

- The TOTAL counter is set once at the start of the command.
- For indexes which are newly-built, the recursively-called
DefineIndex() increments the DONE counter.
- For pre-existing indexes which are ATTACHed rather than built,
DefineIndex() increments the DONE counter, unless the attached index is
partitioned, in which case progress report is not updated.

Author: Ilya Gladyshev, Justin Pryzby
Reviewed-By: Tomas Vondra, Dean Rasheed, Alvaro Herrera, Matthias van de Meent, Tom Lane
Discussion: https://www.postgresql.org/message-id/flat/a15f904a70924ffa4ca25c3c744cff31e0e6e143.camel%40gmail.com
---
 doc/src/sgml/monitoring.sgml                  | 10 +++-
 src/backend/bootstrap/bootparse.y             |  2 +
 src/backend/commands/indexcmds.c              | 60 +++++++++++++++++--
 src/backend/commands/tablecmds.c              |  4 +-
 src/backend/tcop/utility.c                    |  6 +-
 src/backend/utils/activity/backend_progress.c | 28 +++++++++
 src/include/commands/defrem.h                 |  1 +
 src/include/utils/backend_progress.h          |  1 +
 8 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 7ab4424bf13..cf88dca53f3 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -6929,7 +6929,10 @@ FROM pg_stat_get_backend_idset() AS backendid;
       </para>
       <para>
        When creating an index on a partitioned table, this column is set to
-       the total number of partitions on which the index is to be created.
+       the total number of partitions on which the index is to be created or attached.
+       In the case of intermediate partitioned tables, this includes both
+       direct and indirect partitions, and includes the intermediate
+       partitioned tables themselves.
        This field is <literal>0</literal> during a <literal>REINDEX</literal>.
       </para></entry>
      </row>
@@ -6940,7 +6943,10 @@ FROM pg_stat_get_backend_idset() AS backendid;
       </para>
       <para>
        When creating an index on a partitioned table, this column is set to
-       the number of partitions on which the index has been created.
+       the number of partitions on which the index has been created or attached.
+       In the case of intermediate partitioned tables, this includes both
+       direct and indirect partitions, and includes the intermediate
+       partitioned tables themselves.
        This field is <literal>0</literal> during a <literal>REINDEX</literal>.
       </para></entry>
      </row>
diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y
index 86804bb598e..81a1b7bfec3 100644
--- a/src/backend/bootstrap/bootparse.y
+++ b/src/backend/bootstrap/bootparse.y
@@ -306,6 +306,7 @@ Boot_DeclareIndexStmt:
 								$4,
 								InvalidOid,
 								InvalidOid,
+								-1,
 								false,
 								false,
 								false,
@@ -358,6 +359,7 @@ Boot_DeclareUniqueIndexStmt:
 								$5,
 								InvalidOid,
 								InvalidOid,
+								-1,
 								false,
 								false,
 								false,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index ff48f44c66f..edb65d4cacb 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -521,6 +521,7 @@ WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
  *		case the caller had better have checked it earlier.
  * 'skip_build': make the catalog entries but don't create the index files
  * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
+ * 'total_parts': total number of direct and indirect partitions
  *
  * Returns the object address of the created index.
  */
@@ -530,6 +531,7 @@ DefineIndex(Oid relationId,
 			Oid indexRelationId,
 			Oid parentIndexId,
 			Oid parentConstraintId,
+			int total_parts,
 			bool is_alter_table,
 			bool check_rights,
 			bool check_not_in_use,
@@ -1225,8 +1227,30 @@ DefineIndex(Oid relationId,
 			Relation	parentIndex;
 			TupleDesc	parentDesc;
 
-			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
-										 nparts);
+			/*
+			 * Set the total number of partitions at the start of the command;
+			 * but don't update it when being called recursively.
+			 */
+			if (!OidIsValid(parentIndexId))
+			{
+				/*
+				 * When called by ProcessUtilitySlow(), the number of
+				 * partitions is passed in as an optimization.  This should
+				 * count partitions the same way.  Subtract one since
+				 * find_all_inheritors() includes the rel itself.
+				 */
+				if (total_parts < 0)
+				{
+					List	   *childs = find_all_inheritors(relationId,
+															 NoLock, NULL);
+
+					total_parts = list_length(childs) - 1;
+					list_free(childs);
+				}
+
+				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
+											 total_parts);
+			}
 
 			/* Make a local copy of partdesc->oids[], just for safety */
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
@@ -1432,14 +1456,23 @@ DefineIndex(Oid relationId,
 								InvalidOid, /* no predefined OID */
 								indexRelationId,	/* this is our child */
 								createdConstraintId,
+								-1,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
 					SetUserIdAndSecContext(child_save_userid,
 										   child_save_sec_context);
 				}
+				else
+				{
+					/*
+					 * If a pre-existing index was ATTACHed, the progress
+					 * report is updated here.  A partitioned index is
+					 * likewise counted when attached, but not its partitions,
+					 * since that's expensive, and ATTACH is fast anyway.
+					 */
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1479,6 +1512,16 @@ DefineIndex(Oid relationId,
 		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
+		else
+		{
+			/*
+			 * Update progress for a partitioned index itself; the
+			 * recursively-called function will have updated the counter for
+			 * its child indexes.
+			 */
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
+		}
+
 		return address;
 	}
 
@@ -1490,9 +1533,16 @@ DefineIndex(Oid relationId,
 		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
-		/* If this is the top-level index, we're done. */
+		/*
+		 * If this is the top-level index, the command is done. When called
+		 * recursively for child tables, the done partition counter is
+		 * incremented now, rather than in the caller, to provide fine-grained
+		 * progress reporting in the case of intermediate partitioning.
+		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
+		else
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
 	}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 3e2c5f797cd..cc74d703f91 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1216,6 +1216,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 						InvalidOid,
 						RelationGetRelid(idxRel),
 						constraintOid,
+						-1,
 						false, false, false, false, false);
 
 			index_close(idxRel, AccessShareLock);
@@ -8640,6 +8641,7 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
 						  InvalidOid,	/* no predefined OID */
 						  InvalidOid,	/* no parent index */
 						  InvalidOid,	/* no parent constraint */
+						  -1,			/* total_parts unknown */
 						  true, /* is_alter_table */
 						  check_rights,
 						  false,	/* check_not_in_use - we did it already */
@@ -18105,7 +18107,7 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 										   &conOid);
 			DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
 						RelationGetRelid(idxRel),
-						conOid,
+						conOid, -1,
 						true, false, false, false, false);
 		}
 
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index eada7353639..2dc92c72cea 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1465,6 +1465,7 @@ ProcessUtilitySlow(ParseState *pstate,
 					Oid			relid;
 					LOCKMODE	lockmode;
 					bool		is_alter_table;
+					int			nparts = 0;
 
 					if (stmt->concurrent)
 						PreventInTransactionBlock(isTopLevel,
@@ -1503,9 +1504,11 @@ ProcessUtilitySlow(ParseState *pstate,
 						List	   *inheritors = NIL;
 
 						inheritors = find_all_inheritors(relid, lockmode, NULL);
+						nparts = list_length(inheritors) - 1;
 						foreach(lc, inheritors)
 						{
-							char		relkind = get_rel_relkind(lfirst_oid(lc));
+							Oid			partrelid = lfirst_oid(lc);
+							char		relkind = get_rel_relkind(partrelid);
 
 							if (relkind != RELKIND_RELATION &&
 								relkind != RELKIND_MATVIEW &&
@@ -1548,6 +1551,7 @@ ProcessUtilitySlow(ParseState *pstate,
 									InvalidOid, /* no predefined OID */
 									InvalidOid, /* no parent index */
 									InvalidOid, /* no parent constraint */
+									nparts,
 									is_alter_table,
 									true,	/* check_rights */
 									true,	/* check_not_in_use */
diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c
index d96af812b19..2a9994b98fd 100644
--- a/src/backend/utils/activity/backend_progress.c
+++ b/src/backend/utils/activity/backend_progress.c
@@ -58,6 +58,34 @@ pgstat_progress_update_param(int index, int64 val)
 	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+/*-----------
+ * pgstat_progress_incr_param() -
+ *
+ * Increment index'th member in st_progress_param[] of the current backend.
+ *-----------
+ */
+void
+pgstat_progress_incr_param(int index, int64 incr)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int64		val;
+
+	Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	/*
+	 * Because no other process should write to this backend's own status, we
+	 * can read its value from shared memory without needing to loop to ensure
+	 * its consistency.
+	 */
+	val = beentry->st_progress_param[index];
+	val += incr;
+
+	pgstat_progress_update_param(index, val);
+}
+
 /*-----------
  * pgstat_progress_update_multi_param() -
  *
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 4f7f87fc62c..478203ed4c4 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -29,6 +29,7 @@ extern ObjectAddress DefineIndex(Oid relationId,
 								 Oid indexRelationId,
 								 Oid parentIndexId,
 								 Oid parentConstraintId,
+								 int total_parts,
 								 bool is_alter_table,
 								 bool check_rights,
 								 bool check_not_in_use,
diff --git a/src/include/utils/backend_progress.h b/src/include/utils/backend_progress.h
index 005e5d75ab6..a84752ade99 100644
--- a/src/include/utils/backend_progress.h
+++ b/src/include/utils/backend_progress.h
@@ -36,6 +36,7 @@ typedef enum ProgressCommandType
 extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
 										  Oid relid);
 extern void pgstat_progress_update_param(int index, int64 val);
+extern void pgstat_progress_incr_param(int index, int64 incr);
 extern void pgstat_progress_update_multi_param(int nparam, const int *index,
 											   const int64 *val);
 extern void pgstat_progress_end_command(void);
-- 
2.34.1

