From 80310e5f7f5a983bdce44bf19f6015ca72ce6277 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 22 Nov 2020 19:33:22 -0600
Subject: [PATCH 6/7] Some cleanup

---
 src/backend/commands/statscmds.c        | 77 +++++++------------------
 src/backend/statistics/extended_stats.c | 51 ++++++++--------
 src/test/regress/expected/stats_ext.out |  2 +-
 3 files changed, 44 insertions(+), 86 deletions(-)

diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 035599469f..c2f63bc115 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -66,7 +66,6 @@ CreateStatistics(CreateStatsStmt *stmt)
 {
 	int16		attnums[STATS_MAX_DIMENSIONS];
 	int			nattnums = 0;
-	int			numcols = 0;
 	char	   *namestr;
 	NameData	stxname;
 	Oid			statoid;
@@ -94,7 +93,6 @@ CreateStatistics(CreateStatsStmt *stmt)
 	bool		build_mcv;
 	bool		build_expressions;
 	bool		build_expressions_only;
-	bool		requested_type = false;
 	int			i;
 	ListCell   *cell;
 	ListCell   *cell2;
@@ -191,12 +189,16 @@ CreateStatistics(CreateStatsStmt *stmt)
 				 errmsg("statistics object \"%s\" already exists", namestr)));
 	}
 
+	/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+	if (list_length(stmt->exprs) >= STATS_MAX_DIMENSIONS)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("cannot have more than %d columns in statistics",
+					 STATS_MAX_DIMENSIONS)));
+
 	/*
-	 * Currently, we only allow simple column references in the expression
-	 * list.  That will change someday, and again the grammar already supports
-	 * it so we have to enforce restrictions here.  For now, we can convert
-	 * the expression list to a simple array of attnums.  While at it, enforce
-	 * some constraints.
+	 * Convert the expression list to a simple array of attnums.  While at
+	 * it, and enforce some constraints.
 	 */
 	foreach(cell, stmt->exprs)
 	{
@@ -209,7 +211,7 @@ CreateStatistics(CreateStatsStmt *stmt)
 		if (!IsA(expr, StatsElem))
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
+					 errmsg("only simple column references are allowed in CREATE STATISTICS"))); // XXX
 		selem = (StatsElem *) expr;
 
 		if (selem->name)	/* column reference */
@@ -239,22 +241,13 @@ CreateStatistics(CreateStatsStmt *stmt)
 						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
 								attname, format_type_be(attForm->atttypid))));
 
-			/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
-			if (numcols >= STATS_MAX_DIMENSIONS)
-				ereport(ERROR,
-						(errcode(ERRCODE_TOO_MANY_COLUMNS),
-						 errmsg("cannot have more than %d columns in statistics",
-								STATS_MAX_DIMENSIONS)));
-
 			attnums[nattnums] = attForm->attnum;
 			nattnums++;
-			numcols++;
 			ReleaseSysCache(atttuple);
 		}
 		else	/* expression */
 		{
 			Node	   *expr = selem->expr;
-			TypeCacheEntry *type;
 			Oid			atttype;
 
 			Assert(expr != NULL);
@@ -283,16 +276,6 @@ CreateStatistics(CreateStatsStmt *stmt)
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("expression cannot be used in statistics because its type %s has no default btree operator class",
 								format_type_be(atttype))));
-
-			/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
-			if (numcols >= STATS_MAX_DIMENSIONS)
-				ereport(ERROR,
-						(errcode(ERRCODE_TOO_MANY_COLUMNS),
-						 errmsg("cannot have more than %d columns in statistics",
-								STATS_MAX_DIMENSIONS)));
-
-			numcols++;
-
 			stxexprs = lappend(stxexprs, expr);
 		}
 	}
@@ -309,25 +292,13 @@ CreateStatistics(CreateStatsStmt *stmt)
 		char	   *type = strVal((Value *) lfirst(cell));
 
 		if (strcmp(type, "ndistinct") == 0)
-		{
 			build_ndistinct = true;
-			requested_type = true;
-		}
 		else if (strcmp(type, "dependencies") == 0)
-		{
 			build_dependencies = true;
-			requested_type = true;
-		}
 		else if (strcmp(type, "mcv") == 0)
-		{
 			build_mcv = true;
-			requested_type = true;
-		}
 		else if (strcmp(type, "expressions") == 0)
-		{
 			build_expressions = true;
-			requested_type = true;
-		}
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -340,13 +311,13 @@ CreateStatistics(CreateStatsStmt *stmt)
 		(!build_ndistinct) && (!build_dependencies) && (!build_mcv);
 
 	/*
-	 * Check that with explicitly requested expression stats there really
-	 * are some expressions.
+	 * Unless building only expressions, check that at least two columns were
+	 * specified.  The upper bound was already checked.
 	 */
-	if (build_expressions && (list_length(stxexprs) == 0))
+	if (!build_expressions_only && (list_length(stmt->exprs) < 2))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
-				 errmsg("extended expression statistics require at least one expression")));
+				 errmsg("multi-variate statistics require at least two columns")));
 
 	/*
 	 * When building only expression stats, all the elements have to be
@@ -359,24 +330,16 @@ CreateStatistics(CreateStatsStmt *stmt)
 	if (build_expressions_only && (nattnums > 0))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
-				 errmsg("building only extended expression statistics on simple columns not allowed")));
+				 errmsg("building only expression statistics on simple columns not allowed")));
 
 	/*
-	 * Check that at least two columns were specified in the statement, or
-	 * one when only expression stats were requested. The upper bound was
-	 * already checked in the loop above.
-	 *
-	 * XXX The first check is probably pointless after the one checking for
-	 * expressions.
+	 * Check that with explicitly requested expression stats there really
+	 * are some expressions.
 	 */
-	if (build_expressions_only && (numcols == 0))
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
-				 errmsg("extended expression statistics require at least 1 column")));
-	else if (!build_expressions_only && (numcols < 2))
+	if (build_expressions && (list_length(stxexprs) == 0))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
-				 errmsg("extended statistics require at least 2 columns")));
+				 errmsg("extended expression statistics require at least one expression")));
 
 	/*
 	 * Sort the attnums, which makes detecting duplicates somewhat easier, and
@@ -431,7 +394,7 @@ CreateStatistics(CreateStatsStmt *stmt)
 	 * If no statistic type was specified, build them all (but request
 	 * expression stats only when there actually are any expressions).
 	 */
-	if (!requested_type)
+	if (stmt->stat_types == NIL)
 	{
 		build_ndistinct = true;
 		build_dependencies = true;
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index aa95e0939a..3eac074b14 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -2574,11 +2574,11 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
 
 	for (exprno = 0; exprno < nexprs; exprno++)
 	{
-		int				i, k;
+		int				validx, k;
 		VacAttrStats   *stats = exprdata[exprno].vacattrstat;
 
 		Datum		values[Natts_pg_statistic];
-		bool		nulls[Natts_pg_statistic];
+		bool		nulls[Natts_pg_statistic] = {0};
 		HeapTuple	stup;
 
 		if (!stats->stats_valid)
@@ -2594,10 +2594,6 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
 		/*
 		 * Construct a new pg_statistic tuple
 		 */
-		for (i = 0; i < Natts_pg_statistic; ++i)
-		{
-			nulls[i] = false;
-		}
 
 		values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid);
 		values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber);
@@ -2605,23 +2601,21 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
 		values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
 		values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
 		values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
-		i = Anum_pg_statistic_stakind1 - 1;
-		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
-		{
-			values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
-		}
-		i = Anum_pg_statistic_staop1 - 1;
+
+		validx = Anum_pg_statistic_stakind1 - 1;
 		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
-		{
-			values[i++] = ObjectIdGetDatum(stats->staop[k]);	/* staopN */
-		}
-		i = Anum_pg_statistic_stacoll1 - 1;
+			values[validx++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+
+		validx = Anum_pg_statistic_staop1 - 1;
 		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
-		{
-			values[i++] = ObjectIdGetDatum(stats->stacoll[k]);	/* stacollN */
-		}
-		i = Anum_pg_statistic_stanumbers1 - 1;
+			values[validx++] = ObjectIdGetDatum(stats->staop[k]);	/* staopN */
+
+		validx = Anum_pg_statistic_stacoll1 - 1;
 		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+			values[validx++] = ObjectIdGetDatum(stats->stacoll[k]);	/* stacollN */
+
+		validx = Anum_pg_statistic_stanumbers1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++, validx++)
 		{
 			int			nnum = stats->numnumbers[k];
 
@@ -2637,16 +2631,17 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
 				arry = construct_array(numdatums, nnum,
 									   FLOAT4OID,
 									   sizeof(float4), true, TYPALIGN_INT);
-				values[i++] = PointerGetDatum(arry);	/* stanumbersN */
+				values[validx] = PointerGetDatum(arry);	/* stanumbersN */
 			}
 			else
 			{
-				nulls[i] = true;
-				values[i++] = (Datum) 0;
+				nulls[validx] = true;
+				values[validx] = (Datum) 0;
 			}
 		}
-		i = Anum_pg_statistic_stavalues1 - 1;
-		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+
+		validx = Anum_pg_statistic_stavalues1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++, validx++)
 		{
 			if (stats->numvalues[k] > 0)
 			{
@@ -2658,12 +2653,12 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
 									   stats->statyplen[k],
 									   stats->statypbyval[k],
 									   stats->statypalign[k]);
-				values[i++] = PointerGetDatum(arry);	/* stavaluesN */
+				values[validx] = PointerGetDatum(arry);	/* stavaluesN */
 			}
 			else
 			{
-				nulls[i] = true;
-				values[i++] = (Datum) 0;
+				nulls[validx] = true;
+				values[validx] = (Datum) 0;
 			}
 		}
 
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 39ff7dd146..fbfcbee330 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -167,7 +167,7 @@ CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 CREATE STATISTICS ab1_exprstat_1 (expressions) ON (a+b) FROM ab1;
 -- we build all stats types by default, requiring at least two columns
 CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1;
-ERROR:  extended statistics require at least 2 columns
+ERROR:  multi-variate statistics require at least two columns
 -- expression must be immutable, but date_trunc on timestamptz is not
 CREATE STATISTICS ab1_exprstat_3 (expressions) ON date_trunc('day', d) FROM ab1;
 ERROR:  functions in statistics expression must be marked IMMUTABLE
-- 
2.17.0

