From dcdf1c516d06afa75759d38e0903b8585b95ef72 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Mar 2023 19:16:48 -0400
Subject: [PATCH v2 6/6] Simplify pg_dump's creation of parent-table links.

Instead of trying to optimize this by skipping creation of the
links for tables we don't plan to dump, just create them all in
bulk with a single scan over the pg_inherits data.  The previous
approach was more or less O(N^2) in the number of pg_inherits
entries, not to mention being way too complicated.

Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us
---
 src/bin/pg_dump/common.c  | 125 ++++++++++++++++----------------------
 src/bin/pg_dump/pg_dump.h |   5 +-
 2 files changed, 54 insertions(+), 76 deletions(-)

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index df48d0bb17..5d988986ed 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -83,8 +83,6 @@ static void flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
 						  InhInfo *inhinfo, int numInherits);
 static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables);
 static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables);
-static void findParentsByOid(TableInfo *self,
-							 InhInfo *inhinfo, int numInherits);
 static int	strInArray(const char *pattern, char **arr, int arr_size);
 static IndxInfo *findIndexByOid(Oid oid);
 
@@ -288,45 +286,70 @@ static void
 flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
 			  InhInfo *inhinfo, int numInherits)
 {
+	TableInfo  *child = NULL;
+	TableInfo  *parent = NULL;
 	int			i,
 				j;
 
-	for (i = 0; i < numTables; i++)
+	/*
+	 * Set up links from child tables to their parents.
+	 *
+	 * We used to attempt to skip this work for tables that are not to be
+	 * dumped; but the optimizable cases are rare in practice, and setting up
+	 * these links in bulk is cheaper than the old way.  (Note in particular
+	 * that it's very rare for a child to have more than one parent.)
+	 */
+	for (i = 0; i < numInherits; i++)
 	{
-		bool		find_parents = true;
-		bool		mark_parents = true;
-
-		/* Some kinds never have parents */
-		if (tblinfo[i].relkind == RELKIND_SEQUENCE ||
-			tblinfo[i].relkind == RELKIND_VIEW ||
-			tblinfo[i].relkind == RELKIND_MATVIEW)
-			continue;
-
 		/*
-		 * Normally, we don't bother computing anything for non-target tables.
-		 * However, we must find the parents of non-root partitioned tables in
-		 * any case, so that we can trace from leaf partitions up to the root
-		 * (in case a leaf is to be dumped but its parents are not).  We need
-		 * not mark such parents interesting for getTableAttrs, though.
+		 * Skip a hashtable lookup if it's same table as last time.  This is
+		 * unlikely for the child, but less so for the parent.  (Maybe we
+		 * should ask the backend for a sorted array to make it more likely?
+		 * Not clear the sorting effort would be repaid, though.)
 		 */
-		if (!tblinfo[i].dobj.dump)
+		if (child == NULL ||
+			child->dobj.catId.oid != inhinfo[i].inhrelid)
 		{
-			mark_parents = false;
+			child = findTableByOid(inhinfo[i].inhrelid);
 
-			if (!(tblinfo[i].relkind == RELKIND_PARTITIONED_TABLE &&
-				  tblinfo[i].ispartition))
-				find_parents = false;
+			/*
+			 * If we find no TableInfo, assume the pg_inherits entry is for a
+			 * partitioned index, which we don't need to track.
+			 */
+			if (child == NULL)
+				continue;
 		}
+		if (parent == NULL ||
+			parent->dobj.catId.oid != inhinfo[i].inhparent)
+		{
+			parent = findTableByOid(inhinfo[i].inhparent);
+			if (parent == NULL)
+				pg_fatal("failed sanity check, parent OID %u of table \"%s\" (OID %u) not found",
+						 inhinfo[i].inhparent,
+						 child->dobj.name,
+						 child->dobj.catId.oid);
+		}
+		/* Add this parent to the child's list of parents. */
+		if (child->numParents > 0)
+			child->parents = pg_realloc_array(child->parents,
+											  TableInfo *,
+											  child->numParents + 1);
+		else
+			child->parents = pg_malloc_array(TableInfo *, 1);
+		child->parents[child->numParents++] = parent;
+	}
 
-		/* If needed, find all the immediate parent tables. */
-		if (find_parents)
-			findParentsByOid(&tblinfo[i], inhinfo, numInherits);
-
+	/*
+	 * Now consider all child tables and mark parents interesting as needed.
+	 */
+	for (i = 0; i < numTables; i++)
+	{
 		/*
 		 * If needed, mark the parents as interesting for getTableAttrs and
-		 * getIndexes.
+		 * getIndexes.  We only need this for direct parents of dumpable
+		 * tables.
 		 */
-		if (mark_parents)
+		if (tblinfo[i].dobj.dump)
 		{
 			int			numParents = tblinfo[i].numParents;
 			TableInfo **parents = tblinfo[i].parents;
@@ -996,52 +1019,6 @@ findOwningExtension(CatalogId catalogId)
 }
 
 
-/*
- * findParentsByOid
- *	  find a table's parents in tblinfo[]
- */
-static void
-findParentsByOid(TableInfo *self,
-				 InhInfo *inhinfo, int numInherits)
-{
-	Oid			oid = self->dobj.catId.oid;
-	int			i,
-				j;
-	int			numParents;
-
-	numParents = 0;
-	for (i = 0; i < numInherits; i++)
-	{
-		if (inhinfo[i].inhrelid == oid)
-			numParents++;
-	}
-
-	self->numParents = numParents;
-
-	if (numParents > 0)
-	{
-		self->parents = pg_malloc_array(TableInfo *, numParents);
-		j = 0;
-		for (i = 0; i < numInherits; i++)
-		{
-			if (inhinfo[i].inhrelid == oid)
-			{
-				TableInfo  *parent;
-
-				parent = findTableByOid(inhinfo[i].inhparent);
-				if (parent == NULL)
-					pg_fatal("failed sanity check, parent OID %u of table \"%s\" (OID %u) not found",
-							 inhinfo[i].inhparent,
-							 self->dobj.name,
-							 oid);
-				self->parents[j++] = parent;
-			}
-		}
-	}
-	else
-		self->parents = NULL;
-}
-
 /*
  * parseOidArray
  *	  parse a string of numbers delimited by spaces into a character array
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index ffa37265c8..283cd1a602 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -320,6 +320,9 @@ typedef struct _tableInfo
 	bool		ispartition;	/* is table a partition? */
 	bool		unsafe_partitions;	/* is it an unsafe partitioned table? */
 
+	int			numParents;		/* number of (immediate) parent tables */
+	struct _tableInfo **parents;	/* TableInfos of immediate parents */
+
 	/*
 	 * These fields are computed only if we decide the table is interesting
 	 * (it's either a table to dump, or a direct parent of a dumpable table).
@@ -351,8 +354,6 @@ typedef struct _tableInfo
 	/*
 	 * Stuff computed only for dumpable tables.
 	 */
-	int			numParents;		/* number of (immediate) parent tables */
-	struct _tableInfo **parents;	/* TableInfos of immediate parents */
 	int			numIndexes;		/* number of indexes */
 	struct _indxInfo *indexes;	/* indexes */
 	struct _tableDataInfo *dataObj; /* TableDataInfo, if dumping its data */
-- 
2.31.1

