From 71427bf7cd9927af04513ba3fe99e481a8ba1f61 Mon Sep 17 00:00:00 2001
From: Ilya Gladyshev <ilya.v.gladyshev@gmail.com>
Date: Tue, 31 Jan 2023 19:13:07 +0400
Subject: [PATCH 1/3] fix CREATE INDEX progress report with nested partitions

The progress reporting was added in v12 (ab0dfc961) but the original
patch didn't seem to consider the possibility of nested partitioning.

When called recursively, DefineIndex() would clobber the number of
completed partitions, and it was possible to end up with the TOTAL
counter greater than the DONE counter.

This clarifies/re-defines that the progress reporting counts both direct
and indirect children, but doesn't count intermediate partitioned tables:

- The TOTAL counter is set once at the start of the command.
- For indexes which are newly-built, the recursively-called
DefineIndex() increments the DONE counter.
- For pre-existing indexes which are ATTACHed rather than built,
DefineIndex() increments the DONE counter, and if the attached index is
partitioned, the counter is incremented to account for each of its leaf
partitions.

Author: Ilya Gladyshev
Reviewed-By: Justin Pryzby, Tomas Vondra, Dean Rasheed, Alvaro Herrera, Matthias van de Meent
Discussion: https://www.postgresql.org/message-id/flat/a15f904a70924ffa4ca25c3c744cff31e0e6e143.camel%40gmail.com
---
 doc/src/sgml/monitoring.sgml                  | 10 ++-
 src/backend/commands/indexcmds.c              | 70 +++++++++++++++++--
 src/backend/utils/activity/backend_progress.c | 28 ++++++++
 src/include/utils/backend_progress.h          |  1 +
 4 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index dca50707ad4..945d64ed2fa 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -6896,7 +6896,10 @@ FROM pg_stat_get_backend_idset() AS backendid;
       </para>
       <para>
        When creating an index on a partitioned table, this column is set to
-       the total number of partitions on which the index is to be created.
+       the total number of partitions on which the index is to be created or attached.
+       In the case of intermediate partitioned tables, this includes both
+       direct and indirect partitions, but excludes the intermediate
+       partitioned tables themselves.
        This field is <literal>0</literal> during a <literal>REINDEX</literal>.
       </para></entry>
      </row>
@@ -6907,7 +6910,10 @@ FROM pg_stat_get_backend_idset() AS backendid;
       </para>
       <para>
        When creating an index on a partitioned table, this column is set to
-       the number of partitions on which the index has been created.
+       the number of partitions on which the index has been created or attached.
+       In the case of intermediate partitioned tables, this includes both
+       direct and indirect partitions, but excludes the intermediate
+       partitioned tables themselves.
        This field is <literal>0</literal> during a <literal>REINDEX</literal>.
       </para></entry>
      </row>
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 16ec0b114e6..147265ddcca 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -130,6 +130,30 @@ typedef struct ReindexErrorInfo
 	char		relkind;
 } ReindexErrorInfo;
 
+
+/*
+ * Count the number of direct and indirect leaf partitions.  Note that this
+ * also excludes foreign tables.
+ */
+static int
+count_leaf_partitions(Oid relid)
+{
+	int			nleaves = 0;
+	List	   *childs = find_all_inheritors(relid, NoLock, NULL);
+	ListCell   *lc;
+
+	foreach(lc, childs)
+	{
+		Oid			partrelid = lfirst_oid(lc);
+
+		if (RELKIND_HAS_STORAGE(get_rel_relkind(partrelid)))
+			nleaves++;
+	}
+
+	list_free(childs);
+	return nleaves;
+}
+
 /*
  * CheckIndexCompatible
  *		Determine whether an existing index definition is compatible with a
@@ -1219,8 +1243,18 @@ DefineIndex(Oid relationId,
 			Relation	parentIndex;
 			TupleDesc	parentDesc;
 
-			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
-										 nparts);
+			/*
+			 * Set the total number of partitions at the start of the command;
+			 * don't update it when being called recursively.
+			 */
+			if (!OidIsValid(parentIndexId))
+			{
+				int			total_parts;
+
+				total_parts = count_leaf_partitions(relationId);
+				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
+											 total_parts);
+			}
 
 			/* Make a local copy of partdesc->oids[], just for safety */
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
@@ -1250,6 +1284,7 @@ DefineIndex(Oid relationId,
 			{
 				Oid			childRelid = part_oids[i];
 				Relation	childrel;
+				char		child_relkind;
 				Oid			child_save_userid;
 				int			child_save_sec_context;
 				int			child_save_nestlevel;
@@ -1259,6 +1294,7 @@ DefineIndex(Oid relationId,
 				bool		found = false;
 
 				childrel = table_open(childRelid, lockmode);
+				child_relkind = RelationGetForm(childrel)->relkind;
 
 				GetUserIdAndSecContext(&child_save_userid,
 									   &child_save_sec_context);
@@ -1431,9 +1467,27 @@ DefineIndex(Oid relationId,
 					SetUserIdAndSecContext(child_save_userid,
 										   child_save_sec_context);
 				}
+				else
+				{
+					int			attached_parts;
+
+					/*
+					 * Avoid the overhead of counting partitions when that
+					 * can't apply.
+					 */
+					attached_parts = RELKIND_HAS_PARTITIONS(child_relkind) ?
+						count_leaf_partitions(childRelid) : 1;
+
+					/*
+					 * If a pre-existing index was attached, the progress
+					 * report is updated here.  If the index was partitioned,
+					 * all the children that were counted towards
+					 * PROGRESS_CREATEIDX_PARTITIONS_TOTAL are counted as
+					 * done.
+					 */
+					pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, attached_parts);
+				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1484,9 +1538,15 @@ DefineIndex(Oid relationId,
 		/* Close the heap and we're done, in the non-concurrent case */
 		table_close(rel, NoLock);
 
-		/* If this is the top-level index, we're done. */
+		/*
+		 * If this is the top-level index, we're done. When called recursively
+		 * for child tables, the done partition counter is incremented now,
+		 * rather than in the caller.
+		 */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
+		else
+			pgstat_progress_incr_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, 1);
 
 		return address;
 	}
diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c
index d96af812b19..2a9994b98fd 100644
--- a/src/backend/utils/activity/backend_progress.c
+++ b/src/backend/utils/activity/backend_progress.c
@@ -58,6 +58,34 @@ pgstat_progress_update_param(int index, int64 val)
 	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+/*-----------
+ * pgstat_progress_incr_param() -
+ *
+ * Increment index'th member in st_progress_param[] of the current backend.
+ *-----------
+ */
+void
+pgstat_progress_incr_param(int index, int64 incr)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int64		val;
+
+	Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	/*
+	 * Because no other process should write to this backend's own status, we
+	 * can read its value from shared memory without needing to loop to ensure
+	 * its consistency.
+	 */
+	val = beentry->st_progress_param[index];
+	val += incr;
+
+	pgstat_progress_update_param(index, val);
+}
+
 /*-----------
  * pgstat_progress_update_multi_param() -
  *
diff --git a/src/include/utils/backend_progress.h b/src/include/utils/backend_progress.h
index 005e5d75ab6..a84752ade99 100644
--- a/src/include/utils/backend_progress.h
+++ b/src/include/utils/backend_progress.h
@@ -36,6 +36,7 @@ typedef enum ProgressCommandType
 extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
 										  Oid relid);
 extern void pgstat_progress_update_param(int index, int64 val);
+extern void pgstat_progress_incr_param(int index, int64 incr);
 extern void pgstat_progress_update_multi_param(int nparam, const int *index,
 											   const int64 *val);
 extern void pgstat_progress_end_command(void);
-- 
2.34.1

