From 9590e6d911ce5c200e372df4be259c0385842b88 Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Sat, 7 Jun 2025 23:17:03 -0400
Subject: [PATCH v6 1/5] Refactor output format of pg_ndistinct and add working
 input function.

The existing format of pg_ndistinct uses a single-object JSON structure
where each key is itself a comma-separated list of attnums. While this
is a very compact format, it's confusing to read and is difficult to
manipulate values within the object. This wasn't a concern until
statistics import functions were introduced, enabling users to inject
hypothetical statistics into an object to observe their effect on the
query planner.

The new format is an array of objects, each object must have the keys
"attributes", which must contain an array of attnums, and "ndistinct",
which must be an integer. This is a quirk because the underlying
internal storage is a double, but the value stored was always an
integer.

The change in format is adequately described from the changes to
src/test/regress/expected/stats_ext.out so description here is
redundant.
---
 src/backend/statistics/mvdistinct.c     | 463 +++++++++++++++++++++++-
 src/test/regress/expected/stats_ext.out |  56 ++-
 src/test/regress/sql/stats_ext.sql      |  12 +
 3 files changed, 503 insertions(+), 28 deletions(-)

diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 7e7a63405c8..003dc3a74ab 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -27,9 +27,15 @@
 
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
+#include "common/int.h"
+#include "common/jsonapi.h"
 #include "lib/stringinfo.h"
+#include "mb/pg_wchar.h"
+#include "nodes/miscnodes.h"
+#include "nodes/pg_list.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
+#include "utils/builtins.h"
 #include "utils/fmgrprotos.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
@@ -328,28 +334,453 @@ statext_ndistinct_deserialize(bytea *data)
 	return ndistinct;
 }
 
+typedef enum
+{
+	NDIST_EXPECT_START = 0,
+	NDIST_EXPECT_ITEM,
+	NDIST_EXPECT_KEY,
+	NDIST_EXPECT_ATTNUM_LIST,
+	NDIST_EXPECT_ATTNUM,
+	NDIST_EXPECT_NDISTINCT,
+	NDIST_EXPECT_COMPLETE
+}			ndistinctSemanticState;
+
+typedef struct
+{
+	const char *str;
+	ndistinctSemanticState state;
+
+	List	   *distinct_items; /* Accumulated complete MVNDistinctItems */
+	Node	   *escontext;
+
+	bool		found_attributes;	/* Item has an attributes key */
+	bool		found_ndistinct;	/* Item has ndistinct key */
+	List	   *attnum_list;	/* Accumulated attributes attnums */
+	int64		ndistinct;
+}			ndistinctParseState;
+
+/*
+ * Invoked at the start of each MVNDistinctItem.
+ *
+ * The entire JSON document shoul be one array of MVNDistinctItem objects.
+ *
+ * If we're anywhere else in the document, it's an error.
+ */
+static JsonParseErrorType
+ndistinct_object_start(void *state)
+{
+	ndistinctParseState *parse = state;
+
+	if (parse->state != NDIST_EXPECT_ITEM)
+	{
+		ereturn(parse->escontext, (Datum) 0,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+				 errdetail("Expected Item object")));
+		return JSON_SEM_ACTION_FAILED;
+	}
+
+	/* Now we expect to see attributes/ndistinct keys */
+	parse->state = NDIST_EXPECT_KEY;
+	return JSON_SUCCESS;
+}
+
+/*
+ * Routine to allow qsorting of AttNumbers
+ */
+static int
+attnum_compare(const void *aptr, const void *bptr)
+{
+	AttrNumber	a = *(const AttrNumber *) aptr;
+	AttrNumber	b = *(const AttrNumber *) bptr;
+
+	return pg_cmp_s16(a, b);
+}
+
+
+/*
+ * Invoked at the end of an object.
+ *
+ * Check to ensure that it was a complete MVNDistinctItem
+ *
+ */
+static JsonParseErrorType
+ndistinct_object_end(void *state)
+{
+	ndistinctParseState *parse = state;
+
+	int			natts = 0;
+	AttrNumber *attrsort;
+
+	MVNDistinctItem *item;
+
+	if (!parse->found_attributes)
+	{
+		ereturn(parse->escontext, (Datum) 0,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+				 errdetail("Item must contain \"attributes\" key")));
+		return JSON_SEM_ACTION_FAILED;
+	}
+
+	if (!parse->found_ndistinct)
+	{
+		ereturn(parse->escontext, (Datum) 0,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+				 errdetail("Item must contain \"ndistinct\" key")));
+		return JSON_SEM_ACTION_FAILED;
+	}
+
+	if (parse->attnum_list == NIL)
+	{
+		ereturn(parse->escontext, (Datum) 0,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+				 errdetail("The \"attributes\" key must be an non-empty array")));
+		return JSON_SEM_ACTION_FAILED;
+	}
+
+	/*
+	 * We need at least 2 attnums for a ndistinct item, anything less is
+	 * malformed.
+	 */
+	natts = parse->attnum_list->length;
+	if (natts < 2)
+	{
+		ereturn(parse->escontext, (Datum) 0,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+				 errdetail("The attributes key must contain an array of at least two attnums")));
+
+		return JSON_SEM_ACTION_FAILED;
+	}
+	attrsort = palloc0(natts * sizeof(AttrNumber));
+
+	/* Create the MVNDistinctItem */
+	item = palloc(sizeof(MVNDistinctItem));
+	item->nattributes = natts;
+	item->attributes = palloc0(natts * sizeof(AttrNumber));
+	item->ndistinct = (double) parse->ndistinct;
+
+	/* fill out both attnum list and sortable list */
+	for (int i = 0; i < natts; i++)
+	{
+		attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value;
+		item->attributes[i] = attrsort[i];
+	}
+
+	/* Check attrsort for uniqueness */
+	qsort(attrsort, natts, sizeof(AttrNumber), attnum_compare);
+	for (int i = 1; i < natts; i++)
+		if (attrsort[i] == attrsort[i - 1])
+		{
+			ereturn(parse->escontext, (Datum) 0,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+					 errdetail("attnum list duplicate value found: %d", attrsort[i])));
+
+			return JSON_SEM_ACTION_FAILED;
+		}
+	pfree(attrsort);
+
+	parse->distinct_items = lappend(parse->distinct_items, (void *) item);
+
+	/* reset item state vars */
+	list_free(parse->attnum_list);
+	parse->attnum_list = NIL;
+	parse->ndistinct = 0;
+	parse->found_attributes = false;
+	parse->found_ndistinct = false;
+
+	/* Now we are looking for the next MVNDistinctItem */
+	parse->state = NDIST_EXPECT_ITEM;
+	return JSON_SUCCESS;
+}
+
+
+/*
+ * ndsitinct input format has two types of arrays, the outer MVNDistinctItem
+ * array, and the attnum list array within each MVNDistinctItem.
+ */
+static JsonParseErrorType
+ndistinct_array_start(void *state)
+{
+	ndistinctParseState *parse = state;
+
+	switch (parse->state)
+	{
+		case NDIST_EXPECT_ATTNUM_LIST:
+			parse->state = NDIST_EXPECT_ATTNUM;
+			break;
+
+		case NDIST_EXPECT_START:
+			parse->state = NDIST_EXPECT_ITEM;
+			break;
+
+		default:
+			ereturn(parse->escontext, (Datum) 0,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+					 errdetail("Array found in unexpected place")));
+			return JSON_SEM_ACTION_FAILED;
+	}
+
+	return JSON_SUCCESS;
+}
+
+
+static JsonParseErrorType
+ndistinct_array_end(void *state)
+{
+	ndistinctParseState *parse = state;
+
+	/* The attnum list is complete, look for more MVNDistinctItem keys */
+	if (parse->state == NDIST_EXPECT_ATTNUM)
+	{
+		parse->state = NDIST_EXPECT_KEY;
+		return JSON_SUCCESS;
+	}
+
+	if (parse->state == NDIST_EXPECT_ITEM)
+	{
+		parse->state = NDIST_EXPECT_COMPLETE;
+		return JSON_SUCCESS;
+	}
+
+	ereturn(parse->escontext, (Datum) 0,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+			 errdetail("Array found in unexpected place")));
+	return JSON_SEM_ACTION_FAILED;
+}
+
+
+/*
+ * The valid keys for the MVNDistinctItem object are:
+ *   - attributes
+ *   - ndistinct
+ */
+static JsonParseErrorType
+ndistinct_object_field_start(void *state, char *fname, bool isnull)
+{
+	ndistinctParseState *parse = state;
+
+	const char *attributes = "attributes";
+	const char *ndistinct = "ndistinct";
+
+	if (strcmp(fname, attributes) == 0)
+	{
+		parse->found_attributes = true;
+		parse->state = NDIST_EXPECT_ATTNUM_LIST;
+		return JSON_SUCCESS;
+	}
+
+	if (strcmp(fname, ndistinct) == 0)
+	{
+		parse->found_ndistinct = true;
+		parse->state = NDIST_EXPECT_NDISTINCT;
+		return JSON_SUCCESS;
+	}
+
+	ereturn(parse->escontext, (Datum) 0,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+			 errdetail("Only allowed keys are \%s\" and \%s\".", attributes, ndistinct)));
+	return JSON_SEM_ACTION_FAILED;
+}
+
+/*
+ *
+ */
+static JsonParseErrorType
+ndistinct_array_element_start(void *state, bool isnull)
+{
+	ndistinctParseState *parse = state;
+
+	if (parse->state == NDIST_EXPECT_ATTNUM)
+	{
+		if (isnull)
+		{
+			ereturn(parse->escontext, (Datum) 0,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+					 errdetail("Attnum list elements cannot be null.")));
+
+			return JSON_SEM_ACTION_FAILED;
+		}
+		return JSON_SUCCESS;
+	}
+
+	if (parse->state == NDIST_EXPECT_ITEM)
+	{
+		if (isnull)
+		{
+			ereturn(parse->escontext, (Datum) 0,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+					 errdetail("Item list elements cannot be null.")));
+
+			return JSON_SEM_ACTION_FAILED;
+		}
+
+		return JSON_SUCCESS;
+	}
+
+	ereturn(parse->escontext, (Datum) 0,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+			 errdetail("Unexpected array element.")));
+
+	return JSON_SEM_ACTION_FAILED;
+}
+
+/*
+ * Handle scalar events from the ndistinct input parser.
+ *
+ */
+static JsonParseErrorType
+ndistinct_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	ndistinctParseState *parse = state;
+
+	if (parse->state == NDIST_EXPECT_ATTNUM)
+	{
+		AttrNumber	attnum = pg_strtoint16_safe(token, parse->escontext);
+
+		if (SOFT_ERROR_OCCURRED(parse->escontext))
+			return JSON_SEM_ACTION_FAILED;
+
+		parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum);
+		return JSON_SUCCESS;
+	}
+
+	if (parse->state == NDIST_EXPECT_NDISTINCT)
+	{
+		/*
+		 * While the structure dictates that ndistinct in a double precision
+		 * floating point, in practice it has always been an integer, and it
+		 * is output as such. Therefore, we follow usage precendent over the
+		 * actual storage structure, and read it in as an integer.
+		 */
+		parse->ndistinct = pg_strtoint64_safe(token, parse->escontext);
+
+		if (SOFT_ERROR_OCCURRED(parse->escontext))
+			return JSON_SEM_ACTION_FAILED;
+
+		parse->state = NDIST_EXPECT_KEY;
+		return JSON_SUCCESS;
+	}
+
+	ereturn(parse->escontext, (Datum) 0,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+			 errdetail("Unexpected scalar.")));
+
+	return JSON_SEM_ACTION_FAILED;
+}
+
 /*
  * pg_ndistinct_in
  *		input routine for type pg_ndistinct
  *
- * pg_ndistinct is real enough to be a table column, but it has no
- * operations of its own, and disallows input (just like pg_node_tree).
+ * example input:
+ *     [{"attributes": [6, -1], "ndistinct": 14},
+ *      {"attributes": [6, -2], "ndistinct": 9143},
+ *      {"attributes": [-1,-2], "ndistinct": 13454},
+ *      {"attributes": [6, -1, -2], "ndistinct": 14549}]
  */
 Datum
 pg_ndistinct_in(PG_FUNCTION_ARGS)
 {
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+	char	   *str = PG_GETARG_CSTRING(0);
 
-	PG_RETURN_VOID();			/* keep compiler quiet */
+	ndistinctParseState parse_state;
+	JsonParseErrorType result;
+	JsonLexContext *lex;
+	JsonSemAction sem_action;
+
+	/* initialize semantic state */
+	parse_state.str = str;
+	parse_state.state = NDIST_EXPECT_START;
+	parse_state.distinct_items = NIL;
+	parse_state.escontext = fcinfo->context;
+	parse_state.found_attributes = false;
+	parse_state.found_ndistinct = false;
+	parse_state.attnum_list = NIL;
+	parse_state.ndistinct = 0;
+
+	/* set callbacks */
+	sem_action.semstate = (void *) &parse_state;
+	sem_action.object_start = ndistinct_object_start;
+	sem_action.object_end = ndistinct_object_end;
+	sem_action.array_start = ndistinct_array_start;
+	sem_action.array_end = ndistinct_array_end;
+	sem_action.object_field_start = ndistinct_object_field_start;
+	sem_action.object_field_end = NULL;
+	sem_action.array_element_start = ndistinct_array_element_start;
+	sem_action.array_element_end = NULL;
+	sem_action.scalar = ndistinct_scalar;
+
+	lex = makeJsonLexContextCstringLen(NULL, str, strlen(str),
+									   PG_UTF8, true);
+	result = pg_parse_json(lex, &sem_action);
+	freeJsonLexContext(lex);
+
+	if (result == JSON_SUCCESS)
+	{
+		MVNDistinct *ndistinct;
+		int			nitems = parse_state.distinct_items->length;
+		bytea	   *bytes;
+
+		ndistinct = palloc(offsetof(MVNDistinct, items) +
+						   nitems * sizeof(MVNDistinctItem));
+
+		ndistinct->magic = STATS_NDISTINCT_MAGIC;
+		ndistinct->type = STATS_NDISTINCT_TYPE_BASIC;
+		ndistinct->nitems = nitems;
+
+		for (int i = 0; i < nitems; i++)
+		{
+			MVNDistinctItem *item = parse_state.distinct_items->elements[i].ptr_value;
+
+			ndistinct->items[i].ndistinct = item->ndistinct;
+			ndistinct->items[i].nattributes = item->nattributes;
+			ndistinct->items[i].attributes = item->attributes;
+
+			/*
+			 * free the MVNDistinctItem, but not the attributes we're still
+			 * using
+			 */
+			pfree(item);
+		}
+		bytes = statext_ndistinct_serialize(ndistinct);
+
+		list_free(parse_state.distinct_items);
+		for (int i = 0; i < nitems; i++)
+			pfree(ndistinct->items[i].attributes);
+		pfree(ndistinct);
+
+		PG_RETURN_BYTEA_P(bytes);
+	}
+	else if (result == JSON_SEM_ACTION_FAILED)
+		PG_RETURN_NULL();		/* escontext already set */
+
+	/* Anything else is a generic JSON parse error */
+	ereturn(parse_state.escontext, (Datum) 0,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("malformed pg_ndistinct: \"%s\"", str),
+			 errdetail("Must be valid JSON.")));
+	PG_RETURN_NULL();
 }
 
 /*
  * pg_ndistinct
  *		output routine for type pg_ndistinct
  *
- * Produces a human-readable representation of the value.
+ * Produces a human-readable representation of the value, in the format:
+ *   [{"attributes": [attnum,. ..], "ndistinct": int}, ...]
+ *
  */
 Datum
 pg_ndistinct_out(PG_FUNCTION_ARGS)
@@ -360,26 +791,26 @@ pg_ndistinct_out(PG_FUNCTION_ARGS)
 	StringInfoData str;
 
 	initStringInfo(&str);
-	appendStringInfoChar(&str, '{');
+	appendStringInfoChar(&str, '[');
 
 	for (i = 0; i < ndist->nitems; i++)
 	{
-		int			j;
 		MVNDistinctItem item = ndist->items[i];
 
 		if (i > 0)
 			appendStringInfoString(&str, ", ");
 
-		for (j = 0; j < item.nattributes; j++)
-		{
-			AttrNumber	attnum = item.attributes[j];
+		Assert(item.nattributes > 0);	/* TODO: elog? */
 
-			appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
-		}
-		appendStringInfo(&str, "\": %d", (int) item.ndistinct);
+		appendStringInfo(&str, "{\"attributes\": [%d", item.attributes[0]);
+
+		for (int j = 1; j < item.nattributes; j++)
+			appendStringInfo(&str, ", %d", item.attributes[j]);
+
+		appendStringInfo(&str, "], \"ndistinct\": %d}", (int) item.ndistinct);
 	}
 
-	appendStringInfoChar(&str, '}');
+	appendStringInfoChar(&str, ']');
 
 	PG_RETURN_CSTRING(str.data);
 }
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index a1f83b58b23..3f07e3799e4 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -473,9 +473,9 @@ SELECT s.stxkind, d.stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                    stxdndistinct                    
----------+-----------------------------------------------------
- {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
+ stxkind |                                                                              stxdndistinct                                                                              
+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {d,f,m} | [{"attributes": [3, 4], "ndistinct": 11}, {"attributes": [3, 6], "ndistinct": 11}, {"attributes": [4, 6], "ndistinct": 11}, {"attributes": [3, 4, 6], "ndistinct": 11}]
 (1 row)
 
 -- minor improvement, make sure the ctid does not break the matching
@@ -555,9 +555,9 @@ SELECT s.stxkind, d.stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                      stxdndistinct                       
----------+----------------------------------------------------------
- {d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000}
+ stxkind |                                                                                stxdndistinct                                                                                 
+---------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {d,f,m} | [{"attributes": [3, 4], "ndistinct": 221}, {"attributes": [3, 6], "ndistinct": 247}, {"attributes": [4, 6], "ndistinct": 323}, {"attributes": [3, 4, 6], "ndistinct": 1000}]
 (1 row)
 
 -- correct estimates
@@ -704,9 +704,9 @@ SELECT s.stxkind, d.stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                           stxdndistinct                           
----------+-------------------------------------------------------------------
- {d,e}   | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000}
+ stxkind |                                                                                     stxdndistinct                                                                                     
+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {d,e}   | [{"attributes": [-1, -2], "ndistinct": 221}, {"attributes": [-1, -3], "ndistinct": 247}, {"attributes": [-2, -3], "ndistinct": 323}, {"attributes": [-1, -2, -3], "ndistinct": 1000}]
 (1 row)
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
@@ -753,9 +753,9 @@ SELECT s.stxkind, d.stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                        stxdndistinct                        
----------+-------------------------------------------------------------
- {d,e}   | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000}
+ stxkind |                                                                                  stxdndistinct                                                                                  
+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {d,e}   | [{"attributes": [3, 4], "ndistinct": 221}, {"attributes": [3, -1], "ndistinct": 247}, {"attributes": [4, -1], "ndistinct": 323}, {"attributes": [3, 4, -1], "ndistinct": 1000}]
 (1 row)
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
@@ -3544,4 +3544,36 @@ SELECT FROM sb_1 LEFT JOIN sb_2
 
 RESET enable_nestloop;
 RESET enable_mergejoin;
+-- Test input function of pg_ndistinct.
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+         {"attributes" : [2,-1], "ndistinct" : 4},
+         {"attributes" : [2,3,-1], "ndistinct" : 4},
+         {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+                                                                                 pg_ndistinct                                                                                  
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [{"attributes": [2, 3], "ndistinct": 4}, {"attributes": [2, -1], "ndistinct": 4}, {"attributes": [2, 3, -1], "ndistinct": 4}, {"attributes": [1, 3, -1, -2], "ndistinct": 4}]
+(1 row)
+
+-- error, cannot duplicate attribute
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+         {"attributes" : [2,-1], "ndistinct" : 4},
+         {"attributes" : [2,3,2], "ndistinct" : 4},
+         {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+ERROR:  malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : 4},
+         {"attributes" : [2,-1], "ndistinct" : 4},
+         {"attributes" : [2,3,2], "ndistinct" : 4},
+         {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]"
+LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+               ^
+DETAIL:  attnum list duplicate value found: 2
+-- Test input function of pg_dependencies.
+SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.0000},
+         {"attributes" : [2,-1], "dependency" : 4, "degree": 0.0000},
+         {"attributes" : [2,3,-1], "dependency" : 4, "degree": 0.5000},
+         {"attributes" : [1,3,-1,-2], "dependency" : 4, "degree": 1.0000}]'::pg_dependencies;
+                                                                                                                          pg_dependencies                                                                                                                          
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [{"attributes": [2, 3], "dependency": 4, "degree": 1.000000}, {"attributes": [2, -1], "dependency": 4, "degree": 0.000000}, {"attributes": [2, 3, -1], "dependency": 4, "degree": 0.500000}, {"attributes": [1, 3, -1, -2], "dependency": 4, "degree": 1.000000}]
+(1 row)
+
 DROP TABLE sb_1, sb_2 CASCADE;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 823c7db9dab..b3d7977f1ae 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1811,4 +1811,16 @@ SELECT FROM sb_1 LEFT JOIN sb_2
 RESET enable_nestloop;
 RESET enable_mergejoin;
 
+-- Test input function of pg_ndistinct.
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+         {"attributes" : [2,-1], "ndistinct" : 4},
+         {"attributes" : [2,3,-1], "ndistinct" : 4},
+         {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+
+-- error, cannot duplicate attribute
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+         {"attributes" : [2,-1], "ndistinct" : 4},
+         {"attributes" : [2,3,2], "ndistinct" : 4},
+         {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+
 DROP TABLE sb_1, sb_2 CASCADE;

base-commit: 2d756ebbe857e3d395d18350bf232300ebd23981
-- 
2.51.0

