From 159763cbf70b11675321fb56a00e5ea1a8e4592b Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Wed, 13 Dec 2023 03:55:35 -0500
Subject: [PATCH v3 7/9] Add pg_import_ext_stats()

This is the extended statistics equivalent of pg_import_rel_stats().

The most likely application of this function is to quickly apply stats
to a newly upgraded database faster than could be accomplished by
vacuumdb --analyze-in-stages.

The function takes a best-effort approach, skipping statistics that are
expected but omitted, skipping object that are specified but do not
exist on the target system. The goal is to get better-than-empty
statistics into the STATISTICS object quickly, so that business
operations can resume sooner. The statistics generated will replace
existing rows in pg_statistic_ext_data for the same statistics
object, and this is done in an all-or-nothing basis rather than
attempting to modify existing rows.

The statistics applied are not locked in any way, and will be
overwritten by the next analyze, either explicit or via autovacuum.

The medium of exchange is jsonb, the format of which is specified in the
view pg_statistic_ext_export. Obviously this view does not exist in older
versions of the database, but the view definition can be extracted and
adapted to older versions.

This function also allows for tweaking of table statistics in-place,
allowing the user to simulate correlations, skew histograms, etc, to see
what those changes will evoke from the query planner.
---
 src/include/catalog/pg_proc.dat               |   5 +
 .../statistics/extended_stats_internal.h      |   8 +-
 src/backend/statistics/dependencies.c         | 111 +++++++
 src/backend/statistics/extended_stats.c       | 291 ++++++++++++++++++
 src/backend/statistics/mcv.c                  | 217 ++++++++++++-
 src/backend/statistics/mvdistinct.c           | 101 ++++++
 src/backend/statistics/statistics.c           |   8 +-
 .../regress/expected/stats_export_import.out  |  20 ++
 src/test/regress/sql/stats_export_import.sql  |  18 ++
 doc/src/sgml/func.sgml                        |  21 ++
 10 files changed, 794 insertions(+), 6 deletions(-)

diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 4d1e9bde1f..bd674e232e 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5660,6 +5660,11 @@
   proname => 'pg_import_rel_stats', provolatile => 'v', proisstrict => 'f',
   proparallel => 'u', prorettype => 'bool', proargtypes => 'oid int4 float4 int4 jsonb',
   prosrc => 'pg_import_rel_stats' },
+{ oid => '9162',
+  descr => 'statistics: import to extended stats object',
+  proname => 'pg_import_ext_stats', provolatile => 'v', proisstrict => 't',
+  proparallel => 'u', prorettype => 'bool', proargtypes => 'oid int4 jsonb',
+  prosrc => 'pg_import_ext_stats' },
 { oid => '3150', descr => 'statistics: number of temporary files written',
   proname => 'pg_stat_get_db_temp_files', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 7b55eb8ffa..5fb4523525 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -68,17 +68,21 @@ typedef struct StatsBuildData
 	bool	  **nulls;
 } StatsBuildData;
 
-
 extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data);
+extern MVNDistinct *statext_ndistinct_import(JsonbContainer *cont);
 extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
 extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
 
 extern MVDependencies *statext_dependencies_build(StatsBuildData *data);
+extern MVDependencies *statext_dependencies_import(JsonbContainer *cont);
 extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
 extern MVDependencies *statext_dependencies_deserialize(bytea *data);
+extern bytea *import_dependencies(JsonbContainer *cont);
 
 extern MCVList *statext_mcv_build(StatsBuildData *data,
 								  double totalrows, int stattarget);
+extern MCVList *statext_mcv_import(JsonbContainer *cont,
+									VacAttrStats **stats, int natts);
 extern bytea *statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats);
 extern MCVList *statext_mcv_deserialize(bytea *data);
 
@@ -127,4 +131,6 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
 											 Selectivity *overlap_basesel,
 											 Selectivity *totalsel);
 
+extern Datum pg_import_ext_stats(PG_FUNCTION_ARGS);
+
 #endif							/* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index edb2e5347d..ca8b20adab 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -27,7 +27,9 @@
 #include "parser/parsetree.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
+#include "utils/builtins.h"
 #include "utils/bytea.h"
+#include "utils/float.h"
 #include "utils/fmgroids.h"
 #include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
@@ -1829,3 +1831,112 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 
 	return s1;
 }
+
+/*
+ * statext_dependencies_import
+ *
+ * Like statext_dependencies_build, but import the data
+ * from a JSON object.
+ *
+ * import format:
+ * [
+ *   {
+ *     "attnums": [ intstr, ... ],
+ *     "degree": floatstr
+ *   }
+ * ]
+ *
+ */
+MVDependencies *
+statext_dependencies_import(JsonbContainer *cont)
+{
+	MVDependencies *dependencies = NULL;
+	int				ndeps;
+	int				i;
+
+
+	if (cont == NULL)
+		ndeps = 0;
+	else
+		ndeps = JsonContainerSize(cont);
+
+	if (ndeps == 0)
+		dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
+	else
+		dependencies = (MVDependencies *) palloc0(offsetof(MVDependencies, deps)
+												   + (ndeps * sizeof(MVDependency *)));
+
+	dependencies->magic = STATS_DEPS_MAGIC;
+	dependencies->type = STATS_DEPS_TYPE_BASIC;
+	dependencies->ndeps = ndeps;
+
+	/* compute length of output */
+	for (i = 0; i < ndeps; i++)
+	{
+		JsonbValue	   *j;
+		JsonbContainer *elemobj,
+					   *attnumarr;
+		MVDependency   *d;
+		char		   *s;
+		int				a;
+		int				natts;
+
+		j = getIthJsonbValueFromContainer(cont, i);
+
+		if ((j == NULL)
+				|| (j->type != jbvBinary)
+				|| (!JsonContainerIsObject(j->val.binary.data)))
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdepndencies "
+					  "must be objects.")));
+
+		elemobj = j->val.binary.data;
+		attnumarr = key_lookup_array(elemobj, "attnums");
+
+		if (attnumarr == NULL)
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdependencies "
+					  "must contain an element called attnums which is an array.")));
+
+		natts = JsonContainerSize(attnumarr);
+		d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
+									 + (natts * sizeof(AttrNumber)));
+		dependencies->deps[i] = d;
+
+		d->nattributes = natts;
+
+		s = key_lookup_cstring(elemobj, "degree");
+		if (s != NULL)
+		{
+			d->degree = float8in_internal(s, NULL, "double", s, NULL);
+			pfree(s);
+		}
+		else
+			d->degree = 0;
+
+		for (a = 0; a < natts; a++)
+		{
+			JsonbValue *aj;
+			char	   *str;
+
+			aj = getIthJsonbValueFromContainer(attnumarr, a);
+
+			if ((aj == NULL) || (aj->type != jbvString))
+				ereport(ERROR,
+				  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				   errmsg("invalid statistics format, elements of attnums "
+						  "must be string representations of integers.")));
+
+			str = JsonbStringValueToCString(aj);
+			d->attributes[a] = pg_strtoint16(str);
+			pfree(str);
+			pfree(aj);
+		}
+
+		pfree(j);
+	}
+
+	return dependencies;
+}
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 718826ecf1..69072485fa 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -19,6 +19,7 @@
 #include "access/detoast.h"
 #include "access/genam.h"
 #include "access/htup_details.h"
+#include "access/relation.h"
 #include "access/table.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_collation.h"
@@ -495,6 +496,38 @@ create_stat_ext_entry(HeapTuple htup)
 	return entry;
 }
 
+/*
+ * Return a list (of StatExtEntry) of statistics objects for the given relation.
+ */
+/* TODO needed????
+static StatExtEntry *
+fetch_statentry(Relation pg_statext, Oid stxid)
+{
+	SysScanDesc scan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	StatExtEntry *entry = NULL;
+
+	/-*
+	 * Prepare to scan pg_statistic_ext for entries having given oid.
+	 *-/
+	ScanKeyInit(&skey,
+				Anum_pg_statistic_ext_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(stxid));
+
+	scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
+							  NULL, 1, &skey);
+
+	if (HeapTupleIsValid(htup = systable_getnext(scan)))
+		entry = create_stat_ext_entry(htup);
+
+	systable_endscan(scan);
+
+	return entry;
+}
+*/
+
 /*
  * Return a list (of StatExtEntry) of statistics objects for the given relation.
  */
@@ -2421,6 +2454,8 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
 								  false,
 								  typOid,
 								  CurrentMemoryContext);
+
+		heap_freetuple(stup);
 	}
 
 	table_close(sd, RowExclusiveLock);
@@ -2646,3 +2681,259 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows,
 
 	return result;
 }
+
+/*
+ * Generate VacAttrStats for a single pg_statistic_ext
+ */
+static VacAttrStats **
+examine_ext_stat_types(StatExtEntry *stxentry, Relation rel)
+{
+	TupleDesc		tupdesc = RelationGetDescr(rel);
+	Bitmapset	   *columns = stxentry->columns;
+	List		   *exprs = stxentry->exprs;
+	int				natts = bms_num_members(columns) + list_length(exprs);
+	int				i = 0;
+	int				m = -1;
+
+	VacAttrStats  **stats;
+	ListCell   *lc;
+
+	stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
+
+	/* lookup VacAttrStats info for the requested columns (same attnum) */
+	while ((m = bms_next_member(columns, m)) >= 0)
+	{
+		Form_pg_attribute	attform = TupleDescAttr(tupdesc, m - 1);
+
+		stats[i] = examine_rel_attribute(attform, rel, NULL);
+
+		/* ext expr stats remove the tupattnum */
+		stats[i]->tupattnum = InvalidAttrNumber;
+		i++;
+	}
+
+	/* also add info for expressions */
+	foreach(lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+
+		stats[i] = examine_attribute(expr);
+		i++;
+	}
+
+	return stats;
+}
+
+/*
+ * Generate expressions from imported data.
+ */
+static Datum
+import_expressions(Relation rel, JsonbContainer *cont,
+					VacAttrStats **expr_stats, int nexprs)
+{
+	int			i;
+	int			nelems;
+	Oid			typOid;
+	Relation	sd;
+
+	ArrayBuildState *astate = NULL;
+
+	/* skip if no stats to import */
+	if (cont == NULL)
+		return (Datum) 0;
+
+	nelems = JsonContainerSize(cont);
+
+	if (nelems == 0)
+		return (Datum) 0;
+
+	sd = table_open(StatisticRelationId, RowExclusiveLock);
+
+	/* lookup OID of composite type for pg_statistic */
+	typOid = get_rel_type_id(StatisticRelationId);
+	if (!OidIsValid(typOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("relation \"%s\" does not have a composite type",
+						"pg_statistic")));
+
+	/*
+	 * The number of elements should not exceed the number of columns in the
+	 * extended statistics object. The elements should follow the same order
+	 * as they do on disk: regular attributes first, followed by expressions.
+	 * TODO make this a warning
+	 */
+	if (nelems > nexprs)
+	{
+		nelems = nexprs;
+	}
+	nelems = Min(nelems, nexprs);
+
+	for (i = 0; i < nelems; i++)
+	{
+		Datum		values[Natts_pg_statistic] = { 0 };
+		bool		nulls[Natts_pg_statistic] = { false };
+		bool		replaces[Natts_pg_statistic] = { false };
+		HeapTuple	stup;
+
+		JsonbValue   *j = getIthJsonbValueFromContainer(cont, i);
+		VacAttrStats *stat = expr_stats[i];
+
+		JsonbContainer *exprobj;
+
+		if ((j == NULL)
+				|| (j->type != jbvBinary)
+				|| (!JsonContainerIsObject(j->val.binary.data)))
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdexprs "
+					  "must be objects.")));
+
+		exprobj = j->val.binary.data;
+
+		import_attribute(InvalidOid, stat, exprobj, false, values, nulls, replaces);
+
+		stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
+
+		astate = accumArrayResult(astate,
+								  heap_copy_tuple_as_datum(stup, RelationGetDescr(sd)),
+								  false,
+								  typOid,
+								  CurrentMemoryContext);
+		pfree(j);
+	}
+
+	table_close(sd, RowExclusiveLock);
+
+	return makeArrayResult(astate, CurrentMemoryContext);
+}
+
+/*
+ * import_pg_ext_stats
+ *
+ * Import stats for one aspect (inherited / regular) of an Extended Statistics
+ * object.
+ *
+ * The JSON container should look like this:
+ * {
+ *     "stxkinds": array of single characters (up to 3?),
+ *     "stxdndistinct": [ {ndistinct}, ... ],
+ *     "stxdndependencies": [ {dependency}, ... ]
+ *     "stxdmcv": [ {mcv}, ... ]
+ *     "stxdexprs" : [ {pg_statistic}, ... ]
+ * }
+ */
+static void
+import_pg_statistic_ext_data(StatExtEntry *stxentry, Relation rel,
+							 bool inh, JsonbContainer *cont,
+							 VacAttrStats **stats)
+{
+	int				ncols;
+	int				nexprs;
+	int				natts;
+	VacAttrStats  **expr_stats;
+
+	JsonbContainer *arraycont;
+	MCVList		   *mcvlist;
+	MVDependencies *dependencies;
+	MVNDistinct	   *ndistinct;
+	Datum			exprs;
+
+	/* skip if no stats to import */
+	if (cont == NULL)
+		return;
+
+	ncols = bms_num_members(stxentry->columns);
+	nexprs = list_length(stxentry->exprs);
+	natts = ncols + nexprs;
+	expr_stats = &stats[ncols];
+
+	arraycont = key_lookup_array(cont, "stxdndistinct");
+	ndistinct = statext_ndistinct_import(arraycont);
+
+	arraycont = key_lookup_array(cont, "stxdndependencies");
+	dependencies = statext_dependencies_import(arraycont);
+
+	arraycont = key_lookup_array(cont, "stxdmcv");
+	mcvlist = statext_mcv_import(arraycont, stats, natts);
+
+	arraycont = key_lookup_array(cont, "stxdexprs");
+	exprs = import_expressions(rel, arraycont, expr_stats, nexprs);
+
+	statext_store(stxentry->statOid, inh, ndistinct, dependencies, mcvlist,
+				  exprs, stats);
+}
+
+/*
+ * Import JSON-serialized stats to an Extended Statistics object.
+ */
+Datum
+pg_import_ext_stats(PG_FUNCTION_ARGS)
+{
+	Oid				stxid;
+	int32			stats_version_num;
+	Jsonb		   *jb;
+	JsonbContainer *cont;
+	Relation		rel;
+	HeapTuple		etup;
+	Relation		sd;
+	StatExtEntry   *stxentry;
+	VacAttrStats  **stats;
+
+	Form_pg_statistic_ext stxform;
+
+	if (PG_ARGISNULL(0))
+		ereport(ERROR,
+		  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+		   errmsg("extended statistics oid cannot be NULL")));
+	stxid = PG_GETARG_OID(0);
+
+	if (PG_ARGISNULL(1))
+		ereport(ERROR,
+		  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+		   errmsg("server_version_number cannot be NULL")));
+	stats_version_num = PG_GETARG_INT32(1);
+
+	if (stats_version_num < 100000)
+		ereport(ERROR,
+		  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+		   errmsg("invalid statistics version: %d is earlier than earliest supported version",
+				  stats_version_num)));
+
+	if (PG_ARGISNULL(2))
+		PG_RETURN_BOOL(false);
+
+	jb = PG_GETARG_JSONB_P(2);
+	if (!JB_ROOT_IS_OBJECT(jb))
+		ereport(ERROR,
+		  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+		   errmsg("extended_stats must be jsonb object at root")));
+
+	sd = table_open(StatisticRelationId, RowExclusiveLock);
+	etup = SearchSysCacheCopy1(STATEXTOID, ObjectIdGetDatum(stxid));
+	if (!HeapTupleIsValid(etup))
+		elog(ERROR, "pg_statistic_ext entry for oid %u vanished during statistics import",
+			 stxid);
+
+	stxform = (Form_pg_statistic_ext) GETSTRUCT(etup);
+
+	rel = relation_open(stxform->stxrelid, ShareUpdateExclusiveLock);
+
+	stxentry = create_stat_ext_entry(etup);
+
+	stats = examine_ext_stat_types(stxentry, rel);
+
+	cont = key_lookup_object(&jb->root, "regular");
+	import_pg_statistic_ext_data(stxentry, rel, false, cont, stats);
+
+	if (rel->rd_rel->relhassubclass)
+	{
+		cont = key_lookup_object(&jb->root, "inherited");
+		import_pg_statistic_ext_data(stxentry, rel, true, cont, stats);
+	}
+
+	relation_close(rel, NoLock);
+	table_close(sd, RowExclusiveLock);
+
+	PG_RETURN_BOOL(true);
+}
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 03b9f04bb5..f1de17847a 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -29,6 +29,7 @@
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
+#include "utils/float.h"
 #include "utils/fmgroids.h"
 #include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
@@ -679,7 +680,6 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
 			/* skip NULL values - we don't need to deduplicate those */
 			if (mcvlist->items[i].isnull[dim])
 				continue;
-
 			/* append the value at the end */
 			values[dim][counts[dim]] = mcvlist->items[i].values[dim];
 			counts[dim] += 1;
@@ -2177,3 +2177,218 @@ mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
 
 	return s;
 }
+
+/*
+ * statext_mcv_import
+ *      like statext_mcv_build, but import from JSON.
+ *
+ * import format:
+ * [
+ *   {
+ *     "index": intstr,
+ *     "values": '{va1ue, ...}',
+ *     "nulls": '{t,f,...}',
+ *     "frequency": floatstr,
+ *     "base_frequency": floatstr
+ *   }
+ * ]
+ *
+ */
+MCVList *
+statext_mcv_import(JsonbContainer *cont, VacAttrStats **stats, int ndims)
+{
+	int			nitems;
+	int			i;
+	MCVList	   *mcvlist;
+	Oid		    ioparams[STATS_MAX_DIMENSIONS];
+	FmgrInfo	finfos[STATS_MAX_DIMENSIONS];
+
+	if (cont != NULL)
+		nitems = JsonContainerSize(cont);
+	else
+		nitems = 0;
+
+	mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
+								  (sizeof(MCVItem) * nitems));
+
+	mcvlist->magic = STATS_MCV_MAGIC;
+	mcvlist->type = STATS_MCV_TYPE_BASIC;
+	mcvlist->nitems = nitems;
+	mcvlist->ndimensions = ndims;
+
+	/* We will need these input functions $nitems times. */
+	for (i = 0; i < ndims; i++)
+	{
+		Oid		typid = stats[i]->attrtypid;
+		Oid		infunc;
+
+		mcvlist->types[i] = typid;
+		getTypeInputInfo(typid, &infunc, &ioparams[i]);
+		fmgr_info(infunc, &finfos[i]);
+	}
+
+	for (i = 0; i < nitems; i++)
+	{
+		JsonbValue	   *j;
+		JsonbContainer *itemobj,
+					   *valuesarr,
+					   *nullsarr;
+		int				numvalues,
+						numnulls;
+		int				k;
+		MCVItem		   *item = &mcvlist->items[i];
+		char		   *s;
+
+		item->values = (Datum *) palloc0(sizeof(Datum) * ndims);
+		item->isnull = (bool *) palloc0(sizeof(bool) * ndims);
+
+		j = getIthJsonbValueFromContainer(cont, i);
+
+		if ((j == NULL)
+				|| (j->type != jbvBinary)
+				|| (!JsonContainerIsObject(j->val.binary.data)))
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdmcv "
+					  "must be objects.")));
+
+		itemobj = j->val.binary.data;
+
+		s = key_lookup_cstring(itemobj, "frequency");
+		if (s != NULL)
+		{
+			item->frequency = float8in_internal(s, NULL, "double", s, NULL);
+			pfree(s);
+		}
+		else
+			item->frequency = 0.0;
+
+		s = key_lookup_cstring(itemobj, "base_frequency");
+		if (s != NULL)
+		{
+			item->base_frequency = float8in_internal(s, NULL, "double", s, NULL);
+			pfree(s);
+		}
+		else
+			item->base_frequency = 0.0;
+
+		/*
+		 * Import the nulls array first, because that tells us which elements
+		 * of the values array we can skip.
+		 */
+		nullsarr = key_lookup_array(itemobj, "nulls");
+
+		if (nullsarr == NULL)
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdmcv "
+					  "must contain an element called nulls which is an array.")));
+
+		numnulls = JsonContainerSize(nullsarr);
+
+		/* having more nulls than dimensions is concerning. */
+		if (numnulls > ndims)
+		{
+			ereport(WARNING,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("statistics import has %d mcv dimensions, "
+					  "but the expects %d. Skipping excess dimensions.",
+						numnulls, ndims)));
+			numnulls = ndims;
+		}
+
+		for (k = 0; k < numnulls; k++)
+		{
+			JsonbValue  *nj = getIthJsonbValueFromContainer(nullsarr, k);
+
+			if ((nj == NULL) || (nj->type != jbvBool))
+				ereport(ERROR,
+				  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				   errmsg("invalid statistics format, elements of nulls "
+						  "must be boolens.")));
+
+			item->isnull[k] = nj->val.boolean;
+
+			pfree(nj);
+		}
+
+		/* Any remaining slots are marked null */
+		for (k = numnulls; k < ndims; k++)
+			item->isnull[k] = true;
+
+		valuesarr = key_lookup_array(itemobj, "values");
+
+		if (valuesarr == NULL)
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdmcv "
+					  "must contain an element called values which is an array.")));
+
+		numvalues = JsonContainerSize(valuesarr);
+
+		/* having more values than dimensions is concerning. */
+		if (numvalues > ndims)
+		{
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("statistics import has %d mcv dimensions, "
+					  "but the expects %d. Skipping excess dimensions.",
+						numvalues, ndims)));
+			numvalues = ndims;
+		}
+
+		for (k = 0; k < numvalues; k++)
+		{
+			JsonbValue *vj;
+			bool		import_error = true;
+
+			/* if the element was null flagged, don't bother */
+			if (item->isnull[k])
+			{
+				item->values[k] = (Datum) 0;
+				continue;
+			}
+
+			vj = getIthJsonbValueFromContainer(valuesarr, k);
+
+			if (vj != NULL)
+			{
+				if (vj->type == jbvString)
+				{
+					char   *str = JsonbStringValueToCString(vj);
+
+					item->values[k] = InputFunctionCall(&finfos[k],
+														str,
+														ioparams[k],
+														stats[k]->attrtypmod);
+
+					import_error = false;
+					pfree(str);
+				}
+				else if (vj->type == jbvNull)
+				{
+					item->values[k] = (Datum) 0;
+					item->isnull[k] = true; /* mark just in case */
+					import_error = false;
+				}
+				pfree(vj);
+			}
+
+			if (import_error)
+				ereport(ERROR,
+				  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				   errmsg("invalid statistics format, elements of values "
+						  "must be strings or null.")));
+
+		}
+
+		/* Any remaining slots are marked null */
+		for (k = numvalues; k < ndims; k++)
+		{
+			item->values[k] = (Datum) 0;
+			item->isnull[k] = true;
+		}
+	}
+
+	return mcvlist;
+}
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 6d25c14644..e870ae02a4 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -31,6 +31,8 @@
 #include "lib/stringinfo.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
 #include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
@@ -698,3 +700,102 @@ generate_combinations(CombinationGenerator *state)
 
 	pfree(current);
 }
+
+
+/*
+ * statext_ndistinct_import
+ *
+ * Like statext_ndistinct_build, but import the data
+ * from a JSON container.
+ *
+ * import format:
+ * [
+ *   {
+ *     "attnums": [ intstr, ... ],
+ *     "ndistinct": floatstr
+ *   }
+ * ]
+ *
+ */
+MVNDistinct *
+statext_ndistinct_import(JsonbContainer *cont)
+{
+	MVNDistinct	   *result;
+	int				nitems;
+	int				i;
+
+	if (cont == NULL)
+		return NULL;
+
+	nitems = JsonContainerSize(cont);
+
+	if (nitems == 0)
+		return NULL;
+
+	result = palloc(offsetof(MVNDistinct, items) +
+					(nitems * sizeof(MVNDistinctItem)));
+	result->magic = STATS_NDISTINCT_MAGIC;
+	result->type = STATS_NDISTINCT_TYPE_BASIC;
+	result->nitems = nitems;
+
+	for (i = 0; i < nitems; i++)
+	{
+		JsonbValue	   *j;
+		JsonbContainer *elemobj,
+					   *attnumarr;
+		int				a;
+		int				natts;
+		char		   *s;
+
+		MVNDistinctItem *item = &result->items[i];
+
+		j = getIthJsonbValueFromContainer(cont, i);
+
+		if ((j == NULL)
+				|| (j->type != jbvBinary)
+				|| (!JsonContainerIsObject(j->val.binary.data)))
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdndistinct "
+					  "must be objects.")));
+
+		elemobj = j->val.binary.data;
+
+		s = key_lookup_cstring(elemobj, "ndistinct");
+		item->ndistinct = float8in_internal(s, NULL, "double", s, NULL);
+		pfree(s);
+
+		attnumarr = key_lookup_array(elemobj, "attnums");
+
+		if (attnumarr == NULL)
+			ereport(ERROR,
+			  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			   errmsg("invalid statistics format, elements of stxdndistinct "
+					  "must contain an element called attnums which is an array.")));
+
+		natts = JsonContainerSize(attnumarr);
+		item->nattributes = natts;
+		item->attributes = palloc(sizeof(AttrNumber) * natts);
+
+		for (a = 0; a < natts; a++)
+		{
+			JsonbValue *aj;
+
+			aj = getIthJsonbValueFromContainer(attnumarr, a);
+
+			if ((aj == NULL) || (aj->type != jbvString))
+				ereport(ERROR,
+				  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				   errmsg("invalid statistics format, elements of attnums "
+						  "must be string representations of integers.")));
+			s = JsonbStringValueToCString(aj);
+			item->attributes[a] = pg_strtoint16(s);
+			pfree(s);
+			pfree(aj);
+		}
+
+		pfree(j);
+	}
+
+	return result;
+}
diff --git a/src/backend/statistics/statistics.c b/src/backend/statistics/statistics.c
index 968ccfaaf2..e54f1c9162 100644
--- a/src/backend/statistics/statistics.c
+++ b/src/backend/statistics/statistics.c
@@ -364,7 +364,7 @@ void import_stanumbers(const VacAttrStats *stat, JsonbContainer *cont,
 		if (numnumbers > STATISTIC_NUM_SLOTS)
 			ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				errmsg("invalid format: number of stanumbers %d is greater than available slots %d",
+				errmsg("invalid format: number of stanumbers arrays (%d) is greater than available slots %d",
 						numnumbers, STATISTIC_NUM_SLOTS)));
 
 		fmgr_info(F_ARRAY_IN, &finfo);
@@ -474,8 +474,8 @@ void import_stavalues(const VacAttrStats *stat, JsonbContainer *cont,
 						typoid = typentry->typelem;
 				}
 				valvalues[k] = FunctionCall3(&finfo, CStringGetDatum(s),
-								 ObjectIdGetDatum(typoid),
-								 Int32GetDatum(typmod));
+											 ObjectIdGetDatum(typoid),
+											 Int32GetDatum(typmod));
 				valreplaces[k] = true;
 				pfree(s);
 				pfree(j);
@@ -664,7 +664,7 @@ examine_rel_attribute(Form_pg_attribute attr, Relation onerel, Node *index_expr)
 	if (!HeapTupleIsValid(typtuple))
 		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
 	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
-	stats->anl_context = CurrentMemoryContext;
+	stats->anl_context = NULL; /*DEBUG CurrentMemoryContext; */
 	stats->tupattnum = attr->attnum;
 
 	/*
diff --git a/src/test/regress/expected/stats_export_import.out b/src/test/regress/expected/stats_export_import.out
index 2490472198..7c14f292e4 100644
--- a/src/test/regress/expected/stats_export_import.out
+++ b/src/test/regress/expected/stats_export_import.out
@@ -19,6 +19,7 @@ SELECT 3, 'tre', (3, 3.3, 'TRE', '2003-03-03', NULL)::stats_import_complex_type,
 UNION ALL
 SELECT 4, 'four', NULL, NULL;
 CREATE INDEX is_odd ON stats_import_test(((comp).a % 2 = 1));
+CREATE STATISTICS evens_test ON name, ((comp).a % 2 = 0) FROM stats_import_test;
 ANALYZE stats_import_test;
 -- capture snapshot of source stats
 CREATE TABLE stats_export AS
@@ -57,6 +58,8 @@ WHERE oid = 'stats_import_test'::regclass;
 CREATE TABLE stats_import_clone ( LIKE stats_import_test );
 -- create an index just like is_odd
 CREATE INDEX is_odd2 ON stats_import_clone(((comp).a % 2 = 0));
+-- create a statistics object like evens_test
+CREATE STATISTICS evens_clone ON name, ((comp).a % 2 = 0) FROM stats_import_clone;
 -- copy table stats to clone table
 SELECT pg_import_rel_stats(c.oid, e.server_version_num,
                             e.n_tuples, e.n_pages, e.stats)
@@ -131,6 +134,23 @@ WHERE s.starelid = 'is_odd2'::regclass;
 -----------+------------+-------------+----------+-------------+----------+----------+----------+----------+----------+--------+--------+--------+--------+--------+----------+----------+----------+----------+----------+-------------+-------------+-------------+-------------+-------------+-----+-----+-----+-----+-----
 (0 rows)
 
+-- copy extended stats to clone table
+SELECT pg_import_ext_stats(
+        (
+            SELECT e.oid as ext_clone_oid
+            FROM pg_statistic_ext AS e
+            WHERE e.stxname = 'evens_clone'
+        ),
+        e.server_version_num, e.stats)
+FROM pg_catalog.pg_statistic_ext_export AS e
+WHERE e.schemaname = 'public'
+AND e.tablename = 'stats_import_test'
+AND e.ext_stats_name = 'evens_test';
+ pg_import_ext_stats 
+---------------------
+ t
+(1 row)
+
 DROP TABLE stats_export;
 DROP TABLE stats_import_clone;
 DROP TABLE stats_import_test;
diff --git a/src/test/regress/sql/stats_export_import.sql b/src/test/regress/sql/stats_export_import.sql
index e97b9d1064..3bc8e3d3a3 100644
--- a/src/test/regress/sql/stats_export_import.sql
+++ b/src/test/regress/sql/stats_export_import.sql
@@ -23,6 +23,8 @@ SELECT 4, 'four', NULL, NULL;
 
 CREATE INDEX is_odd ON stats_import_test(((comp).a % 2 = 1));
 
+CREATE STATISTICS evens_test ON name, ((comp).a % 2 = 0) FROM stats_import_test;
+
 ANALYZE stats_import_test;
 
 -- capture snapshot of source stats
@@ -53,6 +55,9 @@ CREATE TABLE stats_import_clone ( LIKE stats_import_test );
 -- create an index just like is_odd
 CREATE INDEX is_odd2 ON stats_import_clone(((comp).a % 2 = 0));
 
+-- create a statistics object like evens_test
+CREATE STATISTICS evens_clone ON name, ((comp).a % 2 = 0) FROM stats_import_clone;
+
 -- copy table stats to clone table
 SELECT pg_import_rel_stats(c.oid, e.server_version_num,
                             e.n_tuples, e.n_pages, e.stats)
@@ -113,6 +118,19 @@ SELECT staattnum, stainherit, stanullfrac, stawidth, stadistinct,
 FROM pg_statistic AS s
 WHERE s.starelid = 'is_odd2'::regclass;
 
+-- copy extended stats to clone table
+SELECT pg_import_ext_stats(
+        (
+            SELECT e.oid as ext_clone_oid
+            FROM pg_statistic_ext AS e
+            WHERE e.stxname = 'evens_clone'
+        ),
+        e.server_version_num, e.stats)
+FROM pg_catalog.pg_statistic_ext_export AS e
+WHERE e.schemaname = 'public'
+AND e.tablename = 'stats_import_test'
+AND e.ext_stats_name = 'evens_test';
+
 DROP TABLE stats_export;
 DROP TABLE stats_import_clone;
 DROP TABLE stats_import_test;
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ae3d1073e3..d9029fd29d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -28191,6 +28191,27 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
        </para></entry>
       </row>
      </tbody>
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_import_ext_stats</primary>
+        </indexterm>
+        <function>pg_import_ext_stats</function> ( <parameter>extended stats object</parameter> <type>oid</type>, <parameter>server_version_num</parameter> <type>integer</type>, <parameter>extended_stats</parameter> <type>jsonb</type> )
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Modifies the <structname>pg_statistic_ext_data</structname> rows for the
+        <structfield>oid</structfield> matching
+        <parameter>extended statistics object</parameter> are transactionally
+        replaced with the values found in <parameter>extended_stats</parameter>.
+        The purpose of this function is to apply statistics values in an upgrade
+        situation that are "good enough" for system operation until they are
+        replaced by the next auto-analyze. This function could be used by
+        <program>pg_upgrade</program> and <program>pg_restore</program> to
+        convey the statistics from the old system version into the new one.
+       </para></entry>
+      </row>
+     </tbody>
     </tgroup>
    </table>
 
-- 
2.43.0

