From ad50749318d5eb989b222862d15bb9c7031ef00a Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Thu, 23 Jan 2025 23:59:08 -0500
Subject: [PATCH v2 6/6] Add pg_ndistinct attnum checking to extended stats
 import.

---
 src/backend/statistics/extended_stats.c    | 92 +++++++++++++---------
 src/test/regress/expected/stats_import.out | 27 +++++++
 src/test/regress/sql/stats_import.sql      | 28 +++++++
 3 files changed, 108 insertions(+), 39 deletions(-)

diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index b54b3aa533..64c3a7001c 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -2869,6 +2869,9 @@ extended_statistics_update(FunctionCallInfo fcinfo, int elevel)
 
 	bool		success = true;
 
+	Datum		exprdatum;
+	bool		isnull;
+	List	   *exprs = NIL;
 	int			numattnums = 0;
 	int			numexprs = 0;
 	int			numattrs = 0;
@@ -2920,6 +2923,37 @@ extended_statistics_update(FunctionCallInfo fcinfo, int elevel)
 
 	stxform = (Form_pg_statistic_ext) GETSTRUCT(tup);
 	expand_stxkind(tup, &enabled);
+	numattnums = stxform->stxkeys.dim1;
+
+	/* decode expression (if any) */
+	exprdatum = SysCacheGetAttr(STATEXTOID,
+								tup,
+								Anum_pg_statistic_ext_stxexprs,
+								&isnull);
+
+	if (!isnull)
+	{
+		char	   *s;
+
+		s = TextDatumGetCString(exprdatum);
+		exprs = (List *) stringToNode(s);
+		pfree(s);
+
+		/*
+			* Run the expressions through eval_const_expressions. This is not
+			* just an optimization, but is necessary, because the planner
+			* will be comparing them to similarly-processed qual clauses, and
+			* may fail to detect valid matches without this.  We must not use
+			* canonicalize_qual, however, since these aren't qual
+			* expressions.
+			*/
+		exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+		/* May as well fix opfuncids too */
+		fix_opfuncids((Node *) exprs);
+	}
+	numexprs = list_length(exprs);
+	numattrs = numattnums + numexprs;
 
 	/* lock table */
 	stats_lock_check_privileges(stxform->stxrelid);
@@ -3004,42 +3038,6 @@ extended_statistics_update(FunctionCallInfo fcinfo, int elevel)
 	 */
 	if (has.mcv || has.expressions)
 	{
-		Datum		exprdatum;
-		bool		isnull;
-		List	   *exprs = NIL;
-
-		/* decode expression (if any) */
-		exprdatum = SysCacheGetAttr(STATEXTOID,
-									tup,
-									Anum_pg_statistic_ext_stxexprs,
-									&isnull);
-
-		if (!isnull)
-		{
-			char	   *s;
-
-			s = TextDatumGetCString(exprdatum);
-			exprs = (List *) stringToNode(s);
-			pfree(s);
-
-			/*
-			 * Run the expressions through eval_const_expressions. This is not
-			 * just an optimization, but is necessary, because the planner
-			 * will be comparing them to similarly-processed qual clauses, and
-			 * may fail to detect valid matches without this.  We must not use
-			 * canonicalize_qual, however, since these aren't qual
-			 * expressions.
-			 */
-			exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
-
-			/* May as well fix opfuncids too */
-			fix_opfuncids((Node *) exprs);
-		}
-
-		numattnums = stxform->stxkeys.dim1;
-		numexprs = list_length(exprs);
-		numattrs = numattnums + numexprs;
-
 		atttypids = palloc0(numattrs * sizeof(Oid));
 		atttypmods = palloc0(numattrs * sizeof(int32));
 		atttypcolls = palloc0(numattrs * sizeof(Oid));
@@ -3102,15 +3100,31 @@ extended_statistics_update(FunctionCallInfo fcinfo, int elevel)
 
 	if (has.ndistinct)
 	{
-		values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PG_GETARG_DATUM(NDISTINCT_ARG);
-		replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
+		Datum			ndistinct_datum = PG_GETARG_DATUM(NDISTINCT_ARG);
+		bytea		   *data = DatumGetByteaPP(ndistinct_datum);
+		MVNDistinct	   *ndistinct = statext_ndistinct_deserialize(data);
+
+		if (pg_ndistinct_validate_items(ndistinct, &stxform->stxkeys, numexprs, elevel))
+		{
+			values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = ndistinct_datum;
+			replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
+		}
+		else
+		{
+			nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
+			success = false;
+		}
+
+		/* TODO: free ndist */
 	}
 	else
 		nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
 
 	if (has.dependencies)
 	{
-		values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PG_GETARG_DATUM(DEPENDENCIES_ARG);
+		Datum	dependencies_datum = PG_GETARG_DATUM(DEPENDENCIES_ARG);
+
+		values[Anum_pg_statistic_ext_data_stxddependencies - 1] = dependencies_datum;
 		replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
 	}
 	else
diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out
index 9f3b45fc55..347c86a6c1 100644
--- a/src/test/regress/expected/stats_import.out
+++ b/src/test/regress/expected/stats_import.out
@@ -1805,6 +1805,33 @@ WHERE s.starelid = 'stats_import.is_odd'::regclass;
 ---------+------------+-------------+----------+-------------+----------+----------+----------+----------+----------+--------+--------+--------+--------+--------+----------+----------+----------+----------+----------+-------------+-------------+-------------+-------------+-------------+-----+-----+-----+-----+-----+-----------
 (0 rows)
 
+-- set n_distinct using at attnum (1) that is not in the statistics object
+SELECT
+    pg_catalog.pg_set_extended_stats(
+        statistics_schemaname => 'stats_import'::regnamespace,
+        statistics_name => 'test_stat_clone'::name,
+        inherited => false,
+        n_distinct => '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct
+      );
+ERROR:  pg_ndistinct: invalid attnum for this statistics object: 1
+-- set n_distinct using at attnum that is 0
+SELECT
+    pg_catalog.pg_set_extended_stats(
+        statistics_schemaname => 'stats_import'::regnamespace,
+        statistics_name => 'test_stat_clone'::name,
+        inherited => false,
+        n_distinct => '{"2, 3": 4, "2, -1": 4, "2, 0": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4}'::pg_ndistinct
+      );
+ERROR:  pg_ndistinct: invalid attnum for this statistics object: 0
+-- set n_distinct using at attnum that is outside the expression bounds(below -2)
+SELECT
+    pg_catalog.pg_set_extended_stats(
+        statistics_schemaname => 'stats_import'::regnamespace,
+        statistics_name => 'test_stat_clone'::name,
+        inherited => false,
+        n_distinct => '{"2, 3": 4, "2, -4": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct
+      );
+ERROR:  pg_ndistinct: invalid attnum for this statistics object: -4
 SELECT
     pg_catalog.pg_set_extended_stats(
         statistics_schemaname => 'stats_import'::regnamespace,
diff --git a/src/test/regress/sql/stats_import.sql b/src/test/regress/sql/stats_import.sql
index 98aa934d12..3a7baf9c92 100644
--- a/src/test/regress/sql/stats_import.sql
+++ b/src/test/regress/sql/stats_import.sql
@@ -1387,6 +1387,34 @@ FROM pg_statistic s
 JOIN pg_attribute a ON a.attrelid = s.starelid AND a.attnum = s.staattnum
 WHERE s.starelid = 'stats_import.is_odd'::regclass;
 
+
+-- set n_distinct using at attnum (1) that is not in the statistics object
+SELECT
+    pg_catalog.pg_set_extended_stats(
+        statistics_schemaname => 'stats_import'::regnamespace,
+        statistics_name => 'test_stat_clone'::name,
+        inherited => false,
+        n_distinct => '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct
+      );
+
+-- set n_distinct using at attnum that is 0
+SELECT
+    pg_catalog.pg_set_extended_stats(
+        statistics_schemaname => 'stats_import'::regnamespace,
+        statistics_name => 'test_stat_clone'::name,
+        inherited => false,
+        n_distinct => '{"2, 3": 4, "2, -1": 4, "2, 0": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "2, 3, -1, -2": 4}'::pg_ndistinct
+      );
+
+-- set n_distinct using at attnum that is outside the expression bounds(below -2)
+SELECT
+    pg_catalog.pg_set_extended_stats(
+        statistics_schemaname => 'stats_import'::regnamespace,
+        statistics_name => 'test_stat_clone'::name,
+        inherited => false,
+        n_distinct => '{"2, 3": 4, "2, -4": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4, "-1, -2": 3, "2, 3, -1": 4, "2, 3, -2": 4, "2, -1, -2": 4, "3, -1, -2": 4, "1, 3, -1, -2": 4}'::pg_ndistinct
+      );
+
 SELECT
     pg_catalog.pg_set_extended_stats(
         statistics_schemaname => 'stats_import'::regnamespace,
-- 
2.48.1

