From f2918393ff311ec1646b0082f1b0a0f35a88e77c Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Sat, 11 Dec 2021 22:38:08 +0100
Subject: [PATCH 2/4] Build inherited extended stats on partitioned tables

Commit 859b3003de disabled building of extended stats for inheritance
trees, to prevent updating the same catalog row twice. That resolved the
issue, but it also means there are no extended stats for partitioned
tables, because the (!inh) case has empty sample. This is a regression,
affecting queries that don't estimate the leaf relations directly.

But because partitioned tables are empty, we can invert the condition
and build statistics only for the case with inheritance, without losing
anything. And we can consider them when calculating estimates.

Report and patch by Justin Pryzby, minor fixes and cleanup by me.
Backpatch all the way back to PostgreSQL 10, where extended statistics
were introduced (same as 859b3003de).

Author: Justin Pryzby
Reported-by: Justin Pryzby
Backpatch-through: 10
Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
---
 src/backend/commands/analyze.c          | 18 +++++++++++++++++-
 src/backend/statistics/dependencies.c   |  9 ++++++---
 src/backend/statistics/extended_stats.c |  9 ++++++---
 src/backend/utils/adt/selfuncs.c        | 20 ++++++++++++--------
 src/test/regress/expected/stats_ext.out | 19 +++++++++++++++++++
 src/test/regress/sql/stats_ext.sql      | 10 ++++++++++
 6 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index cd77907fc7..58a82e4929 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -549,6 +549,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	{
 		MemoryContext col_context,
 					old_context;
+		bool		build_ext_stats;
 
 		pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
 									 PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
@@ -612,13 +613,28 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							thisdata->attr_cnt, thisdata->vacattrstats);
 		}
 
+		/*
+		 * Should we build extended statistics for this relation?
+		 *
+		 * The extended statistics catalog does not include an inheritance
+		 * flag, so we can't store statistics built both with and without
+		 * data from child relations. We can store just one set of statistics
+		 * per relation. For plain relations that's fine, but for inheritance
+		 * trees we have to pick whether to store statistics for just the
+		 * one relation or the whole tree. For plain inheritance we store
+		 * the (!inh) version, mostly for backwards compatibility reasons.
+		 * For partitioned tables that's pointless (the non-leaf tables are
+		 * always empty), so we store stats representing the whole tree.
+		 */
+		build_ext_stats = (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ? inh : (!inh);
+
 		/*
 		 * Build extended statistics (if there are any).
 		 *
 		 * For now we only build extended statistics on individual relations,
 		 * not for relations representing inheritance trees.
 		 */
-		if (!inh)
+		if (build_ext_stats)
 			BuildRelationExtStatistics(onerel, totalrows, numrows, rows,
 									   attr_cnt, vacattrstats);
 	}
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index 88479eee8b..a22ca73807 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -1418,10 +1418,13 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	int			unique_exprs_cnt;
 
 	/*
-	 * When dealing with inheritence trees, ignore extended stats (which were
-	 * built without data from child rels, and thus do not represent them).
+	 * When dealing with regular inheritence trees, ignore extended stats
+	 * (which were built without data from child rels, and thus do not
+	 * represent them). For partitioned tables data from partitions are
+	 * in the stats (and there's no data in the non-leaf relations), so
+	 * in this case we do consider extended stats.
 	 */
-	if (rte->inh)
+	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
 		return 1.0;
 
 	/* check if there's any stats that might be useful for us. */
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 607b02ab8e..68be000f3d 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1698,10 +1698,13 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
 
 	/*
-	 * When dealing with inheritence trees, ignore extended stats (which were
-	 * built without data from child rels, and thus do not represent them).
+	 * When dealing with regular inheritence trees, ignore extended stats
+	 * (which were built without data from child rels, and thus do not
+	 * represent them). For partitioned tables data from partitions are
+	 * in the stats (and there's no data in the non-leaf relations), so
+	 * in this case we do consider extended stats.
 	 */
-	if (rte->inh)
+	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
 		return sel;
 
 	/* check if there's any stats that might be useful for us. */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 0b12f5288e..b25337370b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3913,19 +3913,23 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
 	StatisticExtInfo *matched_info = NULL;
-	RangeTblEntry		*rte = planner_rt_fetch(rel->relid, root);
-
-	/*
-	 * When dealing with inheritence trees, ignore extended stats (which were
-	 * built without data from child rels, and thus do not represent them).
-	 */
-	if (rte->inh)
-		return false;
+	RangeTblEntry		*rte;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
 		return false;
 
+	/*
+	 * When dealing with regular inheritence trees, ignore extended stats
+	 * (which were built without data from child rels, and thus do not
+	 * represent them). For partitioned tables data from partitions are
+	 * in the stats (and there's no data in the non-leaf relations), so
+	 * in this case we do consider extended stats.
+	 */
+	rte = planner_rt_fetch(rel->relid, root);
+	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
+		return false;
+
 	/* look for the ndistinct statistics object matching the most vars */
 	nmatches_vars = 0;			/* we require at least two matches */
 	nmatches_exprs = 0;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 5410f58f7f..a11fff655a 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -200,6 +200,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
 (1 row)
 
 DROP TABLE stxdinh, stxdinh1;
+-- Ensure inherited stats ARE applied to inherited query in partitioned table
+CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
+CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1)TO(100);
+INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1,999)a;
+CREATE STATISTICS stxdinp ON (a),(b) FROM stxdinp;
+VACUUM ANALYZE stxdinp; -- partitions are processed recursively
+SELECT 1 FROM pg_statistic_ext WHERE stxrelid='stxdinp'::regclass;
+ ?column? 
+----------
+        1
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+        10 |     10
+(1 row)
+
+DROP TABLE stxdinp;
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 -- expression stats may be built on a single expression column
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index a196d5e2f8..01383e8f5f 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -127,6 +127,16 @@ VACUUM ANALYZE stxdinh, stxdinh1;
 SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
 DROP TABLE stxdinh, stxdinh1;
 
+-- Ensure inherited stats ARE applied to inherited query in partitioned table
+CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
+CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1)TO(100);
+INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1,999)a;
+CREATE STATISTICS stxdinp ON (a),(b) FROM stxdinp;
+VACUUM ANALYZE stxdinp; -- partitions are processed recursively
+SELECT 1 FROM pg_statistic_ext WHERE stxrelid='stxdinp'::regclass;
+SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1,2');
+DROP TABLE stxdinp;
+
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 
-- 
2.31.1

