From 768c3cc2e47a8a85098af4aad38c58156c00b374 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Fri, 11 Mar 2022 01:06:50 +0300
Subject: [PATCH 6/6] Add jsonb statistics

---
 src/backend/catalog/system_functions.sql  |   36 +
 src/backend/catalog/system_views.sql      |   56 +
 src/backend/utils/adt/Makefile            |    2 +
 src/backend/utils/adt/jsonb_selfuncs.c    | 1582 ++++++++++++++++++++
 src/backend/utils/adt/jsonb_typanalyze.c  | 1627 +++++++++++++++++++++
 src/backend/utils/adt/jsonpath_exec.c     |    2 +-
 src/include/catalog/pg_operator.dat       |   17 +-
 src/include/catalog/pg_proc.dat           |   11 +
 src/include/catalog/pg_statistic.h        |    2 +
 src/include/catalog/pg_type.dat           |    2 +-
 src/include/utils/json_selfuncs.h         |  113 ++
 src/test/regress/expected/jsonb_stats.out |  713 +++++++++
 src/test/regress/expected/rules.out       |   32 +
 src/test/regress/parallel_schedule        |    2 +-
 src/test/regress/sql/jsonb_stats.sql      |  249 ++++
 15 files changed, 4435 insertions(+), 11 deletions(-)
 create mode 100644 src/backend/utils/adt/jsonb_selfuncs.c
 create mode 100644 src/backend/utils/adt/jsonb_typanalyze.c
 create mode 100644 src/include/utils/json_selfuncs.h
 create mode 100644 src/test/regress/expected/jsonb_stats.out
 create mode 100644 src/test/regress/sql/jsonb_stats.sql

diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 81bac6f5812..0b9f68e88ff 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -594,6 +594,42 @@ LANGUAGE internal
 STRICT IMMUTABLE PARALLEL SAFE
 AS 'unicode_is_normalized';
 
+-- XXX is this function immutable / parallel safe?
+-- XXX do we actually need to cast to text and then to jsonb?
+CREATE FUNCTION pg_json_path_stats(tab regclass, path_index integer) RETURNS text
+AS $$
+	SELECT jsonb_pretty((
+		CASE
+			WHEN stakind1 = 8 THEN stavalues1
+			WHEN stakind2 = 8 THEN stavalues2
+			WHEN stakind3 = 8 THEN stavalues3
+			WHEN stakind4 = 8 THEN stavalues4
+			WHEN stakind5 = 8 THEN stavalues5
+		END::text::jsonb[])[$2])
+	FROM pg_statistic
+	WHERE starelid = $1
+$$ LANGUAGE 'sql';
+
+-- XXX is this function immutable / parallel safe?
+-- XXX do we actually need to cast to text and then to jsonb?
+CREATE FUNCTION pg_json_path_stats(tab regclass, path text) RETURNS text
+AS $$
+	SELECT jsonb_pretty(pathstats)
+	FROM (
+		SELECT unnest(
+			CASE
+				WHEN stakind1 = 8 THEN stavalues1
+				WHEN stakind2 = 8 THEN stavalues2
+				WHEN stakind3 = 8 THEN stavalues3
+				WHEN stakind4 = 8 THEN stavalues4
+				WHEN stakind5 = 8 THEN stavalues5
+			END::text::jsonb[]) pathstats
+		FROM pg_statistic
+		WHERE starelid = $1
+	) paths
+	WHERE pathstats->>'path' = $2
+$$ LANGUAGE 'sql';
+
 --
 -- The default permissions for functions mean that anyone can execute them.
 -- A number of functions shouldn't be executable by just anyone, but rather
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 40b7bca5a96..9d32eeb4a3a 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -364,6 +364,62 @@ CREATE VIEW pg_stats_ext_exprs WITH (security_barrier) AS
 -- unprivileged users may read pg_statistic_ext but not pg_statistic_ext_data
 REVOKE ALL ON pg_statistic_ext_data FROM public;
 
+-- XXX This probably needs to do the same checks as pg_stats, i.e.
+--    WHERE NOT attisdropped
+--    AND has_column_privilege(c.oid, a.attnum, 'select')
+--    AND (c.relrowsecurity = false OR NOT row_security_active(c.oid));
+CREATE VIEW pg_stats_json AS
+	SELECT
+		nspname AS schemaname,
+		relname AS tablename,
+		attname AS attname,
+
+		path->>'path' AS json_path,
+
+		stainherit AS inherited,
+
+		(path->'json'->>'nullfrac')::float4 AS null_frac,
+		(path->'json'->>'width')::float4 AS avg_width,
+		(path->'json'->>'distinct')::float4 AS n_distinct,
+
+		ARRAY(SELECT val FROM jsonb_array_elements(
+				path->'json'->'mcv'->'values') val)::anyarray
+			AS most_common_vals,
+
+		ARRAY(SELECT num::text::float4 FROM jsonb_array_elements(
+				path->'json'->'mcv'->'numbers') num)
+			AS most_common_freqs,
+
+		ARRAY(SELECT val FROM jsonb_array_elements(
+				path->'json'->'histogram'->'values') val)
+			AS histogram_bounds,
+
+		ARRAY(SELECT val::text::int FROM jsonb_array_elements(
+				path->'array_length'->'mcv'->'values') val)
+			AS most_common_array_lengths,
+
+		ARRAY(SELECT num::text::float4 FROM jsonb_array_elements(
+				path->'array_length'->'mcv'->'numbers') num)
+			AS most_common_array_length_freqs,
+
+		(path->'json'->>'correlation')::float4 AS correlation
+
+	FROM
+		pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
+		JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum)
+		LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace),
+		LATERAL (
+			SELECT unnest((CASE
+					WHEN stakind1 = 8 THEN stavalues1
+					WHEN stakind2 = 8 THEN stavalues2
+					WHEN stakind3 = 8 THEN stavalues3
+					WHEN stakind4 = 8 THEN stavalues4
+					WHEN stakind5 = 8 THEN stavalues5
+				END ::text::jsonb[])[2:]) AS path
+		) paths;
+
+-- no need to revoke any privileges, we've already revoked accss to pg_statistic
+
 CREATE VIEW pg_publication_tables AS
     SELECT
         P.pubname AS pubname,
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 41b486bceff..5e359ccf4fb 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -50,6 +50,8 @@ OBJS = \
 	jsonb.o \
 	jsonb_gin.o \
 	jsonb_op.o \
+	jsonb_selfuncs.o \
+	jsonb_typanalyze.o \
 	jsonb_util.o \
 	jsonfuncs.o \
 	jsonbsubs.o \
diff --git a/src/backend/utils/adt/jsonb_selfuncs.c b/src/backend/utils/adt/jsonb_selfuncs.c
new file mode 100644
index 00000000000..f5520f88a1d
--- /dev/null
+++ b/src/backend/utils/adt/jsonb_selfuncs.c
@@ -0,0 +1,1582 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb_selfuncs.c
+ *	  Functions for selectivity estimation of jsonb operators
+ *
+ * Copyright (c) 2016-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/jsonb_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <math.h>
+
+#include "fmgr.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "common/string.h"
+#include "nodes/primnodes.h"
+#include "utils/builtins.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
+#include "utils/json_selfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/selfuncs.h"
+
+#define DEFAULT_JSON_CONTAINS_SEL	0.001
+
+/*
+ * jsonGetField
+ *		Given a JSONB document and a key, extract the JSONB value for the key.
+ */
+static inline Datum
+jsonGetField(Datum obj, const char *field)
+{
+	Jsonb 	   *jb = DatumGetJsonbP(obj);
+	JsonbValue *jbv = findJsonbValueFromContainerLen(&jb->root, JB_FOBJECT,
+													 field, strlen(field));
+	return jbv ? JsonbPGetDatum(JsonbValueToJsonb(jbv)) : PointerGetDatum(NULL);
+}
+
+/*
+ * jsonGetFloat4
+ *		Given a JSONB value, interpret it as a float4 value.
+ *
+ * This expects the JSONB value to be a numeric, because that's how we store
+ * floats in JSONB, and we cast it to float4.
+ */
+static inline float4
+jsonGetFloat4(Datum jsonb, float4 default_val)
+{
+	Jsonb	   *jb;
+	JsonbValue	jv;
+
+	if (!DatumGetPointer(jsonb))
+		return default_val;
+
+	jb = DatumGetJsonbP(jsonb);
+
+	if (!JsonbExtractScalar(&jb->root, &jv) || jv.type != jbvNumeric)
+		return default_val;
+
+	return DatumGetFloat4(DirectFunctionCall1(numeric_float4,
+											  NumericGetDatum(jv.val.numeric)));
+}
+
+/*
+ * jsonStatsInit
+ *		Given a pg_statistic tuple, expand STATISTIC_KIND_JSON into JsonStats.
+ */
+bool
+jsonStatsInit(JsonStats data, const VariableStatData *vardata)
+{
+	Jsonb	   *jb;
+	JsonbValue	prefix;
+
+	if (!vardata->statsTuple)
+		return false;
+
+	data->statsTuple = vardata->statsTuple;
+	memset(&data->attslot, 0, sizeof(data->attslot));
+
+	/* Were there just NULL values in the column? No JSON stats, but still useful. */
+	if (((Form_pg_statistic) GETSTRUCT(data->statsTuple))->stanullfrac >= 1.0)
+	{
+		data->nullfrac = 1.0;
+		return true;
+	}
+
+	/* Do we have the JSON stats built in the pg_statistic? */
+	if (!get_attstatsslot(&data->attslot, data->statsTuple,
+						  STATISTIC_KIND_JSON, InvalidOid,
+						  ATTSTATSSLOT_NUMBERS | ATTSTATSSLOT_VALUES))
+		return false;
+
+	/*
+	 * Valid JSON stats should have at least 2 elements in values:
+	 *  0th - root path prefix
+	 *  1st - root path stats
+	 */
+	if (data->attslot.nvalues < 2)
+	{
+		free_attstatsslot(&data->attslot);
+		return false;
+	}
+
+	/* XXX If the ACL check was not OK, would we even get here? */
+	data->acl_ok = vardata->acl_ok;
+	data->rel = vardata->rel;
+	data->nullfrac =
+		data->attslot.nnumbers > 0 ? data->attslot.numbers[0] : 0.0;
+	data->pathdatums = data->attslot.values + 1;
+	data->npaths = data->attslot.nvalues - 1;
+
+	/* Extract root path prefix */
+	jb = DatumGetJsonbP(data->attslot.values[0]);
+	if (!JsonbExtractScalar(&jb->root, &prefix) || prefix.type != jbvString)
+	{
+		free_attstatsslot(&data->attslot);
+		return false;
+	}
+
+	data->prefix = prefix.val.string.val;
+	data->prefixlen = prefix.val.string.len;
+
+	/* Create path cache, initialze only two fields that acting as flags */
+	data->paths = palloc(sizeof(*data->paths) * data->npaths);
+
+	for (int i = 0; i < data->npaths; i++)
+	{
+		data->paths[i].data = NULL;
+		data->paths[i].path = NULL;
+	}
+
+	return true;
+}
+
+/*
+ * jsonStatsRelease
+ *		Release resources (statistics slot) associated with the JsonStats value.
+ */
+void
+jsonStatsRelease(JsonStats data)
+{
+	free_attstatsslot(&data->attslot);
+}
+
+/*
+ * jsonPathStatsAllocSpecialStats
+ *		Allocate a copy of JsonPathStats for accessing special (length etc.)
+ *		stats for a given JSON path.
+ */
+static JsonPathStats
+jsonPathStatsAllocSpecialStats(JsonPathStats pstats, JsonPathStatsType type)
+{
+	JsonPathStats stats;
+
+	if (!pstats)
+		return NULL;
+
+	/* copy and replace stats type */
+	stats = palloc(sizeof(*stats));
+	*stats = *pstats;
+	stats->type = type;
+
+	return stats;
+}
+
+/*
+ * jsonPathStatsGetArrayLengthStats
+ *		Extract statistics of array lengths for the path.
+ */
+JsonPathStats
+jsonPathStatsGetArrayLengthStats(JsonPathStats pstats)
+{
+	/*
+	 * The array length statistics is relevant only for values that are arrays.
+	 * So if we observed no such values, we know there can't be such
+	 * statistics and so we simply return NULL.
+	 */
+	if (jsonPathStatsGetTypeFreq(pstats, jbvArray, 0.0) <= 0.0)
+		return NULL;
+
+	return jsonPathStatsAllocSpecialStats(pstats, JsonPathStatsArrayLength);
+}
+
+/*
+ * jsonPathStatsGetObjectLengthStats
+ *		Extract statistics of object length for the path.
+ */
+JsonPathStats
+jsonPathStatsGetObjectLengthStats(JsonPathStats pstats)
+{
+	/*
+	 * The object length statistics is relevant only for values that are arrays.
+	 * So if we observed no such values, we know there can't be such
+	 * statistics and so we simply return NULL.
+	 */
+	if (jsonPathStatsGetTypeFreq(pstats, jbvObject, 0.0) <= 0.0)
+		return NULL;
+
+	return jsonPathStatsAllocSpecialStats(pstats, JsonPathStatsObjectLength);
+}
+
+/*
+ * jsonPathStatsGetPath
+ *		Try to use cached path name or extract it from per-path stats datum.
+ *
+ * Returns true on succces, false on error.
+ */
+static inline bool
+jsonPathStatsGetPath(JsonPathStats stats, Datum pathdatum,
+					 const char **path, int *pathlen)
+{
+	*path = stats->path;
+
+	if (*path)
+		/* use cached path name */
+		*pathlen = stats->pathlen;
+	else
+	{
+		Jsonb	   *jsonb = DatumGetJsonbP(pathdatum);
+		JsonbValue	pathkey;
+		JsonbValue *pathval;
+
+		/* extract path from the statistics represented as jsonb document */
+		JsonValueInitStringWithLen(&pathkey, "path", 4);
+		pathval = findJsonbValueFromContainer(&jsonb->root, JB_FOBJECT, &pathkey);
+
+		if (!pathval || pathval->type != jbvString)
+			return false;	/* XXX invalid stats data, maybe throw error */
+
+		/* cache extracted path name */
+		*path = stats->path = pathval->val.string.val;
+		*pathlen = stats->pathlen = pathval->val.string.len;
+	}
+
+	return true;
+}
+
+/* Context for bsearch()ing paths */
+typedef struct JsonPathStatsSearchContext
+{
+	JsonStats	stats;
+	const char *path;
+	int			pathlen;
+} JsonPathStatsSearchContext;
+
+/*
+ * jsonPathStatsCompare
+ *		Compare two JsonPathStats structs, so that we can sort them.
+ *
+ * We do this so that we can search for stats for a given path simply by
+ * bsearch().
+ *
+ * XXX We never build two structs for the same path, so we know the paths
+ * are different - one may be a prefix of the other, but then we sort the
+ * strings by length.
+ */
+static int
+jsonPathStatsCompare(const void *pv1, const void *pv2)
+{
+	JsonPathStatsSearchContext const *cxt = pv1;
+	Datum const *pathdatum = (Datum const *) pv2;
+	int			index = pathdatum - cxt->stats->pathdatums;
+	JsonPathStats stats = &cxt->stats->paths[index];
+	const char *path;
+	int			pathlen;
+	int			res;
+
+	if (!jsonPathStatsGetPath(stats, *pathdatum, &path, &pathlen))
+		return 1;	/* XXX invalid stats data */
+
+	/* compare the shared part first, then compare by length */
+	res = strncmp(cxt->path, path, Min(cxt->pathlen, pathlen));
+
+	return res ? res : cxt->pathlen - pathlen;
+}
+
+/*
+ * jsonStatsFindPath
+ *		Find stats for a given path.
+ *
+ * The stats are sorted by path, so we can simply do bsearch().
+ * This is low-level function and jsdata->prefix is not considered, the caller
+ * should handle it by itself.
+ */
+static JsonPathStats
+jsonStatsFindPath(JsonStats jsdata, const char *path, int pathlen)
+{
+	JsonPathStatsSearchContext cxt;
+	JsonPathStats stats;
+	Datum	   *pdatum;
+	int			index;
+
+	cxt.stats = jsdata;
+	cxt.path = path;
+	cxt.pathlen = pathlen;
+
+	pdatum = bsearch(&cxt, jsdata->pathdatums, jsdata->npaths,
+					 sizeof(*jsdata->pathdatums), jsonPathStatsCompare);
+
+	if (!pdatum)
+		return NULL;
+
+	index = pdatum - jsdata->pathdatums;
+	stats = &jsdata->paths[index];
+
+	Assert(stats->path);
+	Assert(stats->pathlen == pathlen);
+
+	/* Init all fields if needed (stats->data == NULL means uninitialized) */
+	if (!stats->data)
+	{
+		stats->data = jsdata;
+		stats->datum = pdatum;
+		stats->type = JsonPathStatsValues;
+	}
+
+	return stats;
+}
+
+/*
+ * jsonStatsGetPathByStr
+ *		Find stats for a given path string considering jsdata->prefix.
+ */
+JsonPathStats
+jsonStatsGetPathByStr(JsonStats jsdata, const char *subpath, int subpathlen)
+{
+	JsonPathStats stats;
+	char	   *path;
+	int			pathlen;
+
+	if (jsdata->nullfrac >= 1.0)
+		return NULL;
+
+	pathlen = jsdata->prefixlen + subpathlen - 1;
+	path = palloc(pathlen);
+
+	memcpy(path, jsdata->prefix, jsdata->prefixlen);
+	memcpy(&path[jsdata->prefixlen], &subpath[1], subpathlen - 1);
+
+	stats = jsonStatsFindPath(jsdata, path, pathlen);
+
+	if (!stats)
+		pfree(path);
+
+	return stats;
+}
+
+/*
+ * jsonStatsGetRootPath
+ *		Find JSON stats for root prefix path.
+ */
+static JsonPathStats
+jsonStatsGetRootPath(JsonStats jsdata)
+{
+	if (jsdata->nullfrac >= 1.0)
+		return NULL;
+
+	return jsonStatsFindPath(jsdata, jsdata->prefix, jsdata->prefixlen);
+}
+
+#define jsonStatsGetRootArrayPath(jsdata) \
+		jsonStatsGetPathByStr(jsdata, JSON_PATH_ROOT_ARRAY, JSON_PATH_ROOT_ARRAY_LEN)
+
+/*
+ * jsonPathAppendEntry
+ *		Append entry (represented as simple string) to a path.
+ *
+ * NULL entry is treated as wildcard array accessor "[*]".
+ */
+void
+jsonPathAppendEntry(StringInfo path, const char *entry)
+{
+	if (entry)
+	{
+		appendStringInfoCharMacro(path, '.');
+		escape_json(path, entry);
+	}
+	else
+		appendStringInfoString(path, "[*]");
+}
+
+/*
+ * jsonPathAppendEntryWithLen
+ *		Append string (represented as string + length) to a path.
+ */
+static void
+jsonPathAppendEntryWithLen(StringInfo path, const char *entry, int len)
+{
+	char *tmpentry = pnstrdup(entry, len);
+	jsonPathAppendEntry(path, tmpentry);
+	pfree(tmpentry);
+}
+
+/*
+ * jsonPathStatsGetSubpath
+ *		Find JSON path stats for object key or array elements (if 'key' = NULL).
+ */
+JsonPathStats
+jsonPathStatsGetSubpath(JsonPathStats pstats, const char *key)
+{
+	JsonPathStats spstats;
+	StringInfoData str;
+
+	initStringInfo(&str);
+	appendBinaryStringInfo(&str, pstats->path, pstats->pathlen);
+	jsonPathAppendEntry(&str, key);
+
+	spstats = jsonStatsFindPath(pstats->data, str.data, str.len);
+	if (!spstats)
+		pfree(str.data);
+
+	return spstats;
+}
+
+/*
+ * jsonPathStatsGetArrayIndexSelectivity
+ *		Given stats for a path, determine selectivity for an array index.
+ */
+Selectivity
+jsonPathStatsGetArrayIndexSelectivity(JsonPathStats pstats, int index)
+{
+	JsonPathStats lenstats = jsonPathStatsGetArrayLengthStats(pstats);
+	JsonbValue	tmpjbv;
+	Jsonb	   *jb;
+
+	/*
+	 * If we have no array length stats, assume all documents match.
+	 *
+	 * XXX Shouldn't this use a default smaller than 1.0? What do the selfuncs
+	 * for regular arrays use?
+	 */
+	if (!lenstats)
+		return 1.0;
+
+	jb = JsonbValueToJsonb(JsonValueInitInteger(&tmpjbv, index));
+
+	/* calculate fraction of elements smaller than the index */
+	return jsonSelectivity(lenstats, JsonbPGetDatum(jb), JsonbGtOperator);
+}
+
+/*
+ * jsonStatsGetPath
+ *		Find JSON statistics for a given path.
+ *
+ * 'path' is an array of text datums of length 'pathlen' (can be zero).
+ */
+static JsonPathStats
+jsonStatsGetPath(JsonStats jsdata, Datum *path, int pathlen,
+				 bool try_arrays_indexes, float4 *nullfrac)
+{
+	JsonPathStats pstats = jsonStatsGetRootPath(jsdata);
+	Selectivity	sel = 1.0;
+
+	for (int i = 0; pstats && i < pathlen; i++)
+	{
+		char	   *key = TextDatumGetCString(path[i]);
+		char	   *tail;
+		int			index;
+
+		if (!try_arrays_indexes)
+		{
+			/* Find object key stats */
+			pstats = jsonPathStatsGetSubpath(pstats, key);
+			pfree(key);
+			continue;
+		}
+
+		/* Try to interpret path entry as integer array index */
+		errno = 0;
+		index = strtoint(key, &tail, 10);
+
+		if (tail == key || *tail != '\0' || errno != 0)
+		{
+			/* Find object key stats */
+			pstats = jsonPathStatsGetSubpath(pstats, key);
+		}
+		else
+		{
+			/* Find array index stats */
+			/* FIXME consider object key "index" also */
+			JsonPathStats arrstats = jsonPathStatsGetSubpath(pstats, NULL);
+
+			if (arrstats)
+			{
+				float4		arrfreq = jsonPathStatsGetFreq(pstats, 0.0);
+
+				sel *= jsonPathStatsGetArrayIndexSelectivity(pstats, index);
+
+				if (arrfreq > 0.0)
+					sel /= arrfreq;
+			}
+
+			pstats = arrstats;
+		}
+
+		pfree(key);
+	}
+
+	*nullfrac = 1.0 - sel;
+
+	return pstats;
+}
+
+/*
+ * jsonPathStatsGetNextSubpathStats
+ *		Iterate all collected subpaths of a given path.
+ *
+ * This function can be useful for estimation of selectivity of jsonpath
+ * '.*' and  '.**' operators.
+ *
+ * The next found subpath is written into *pkeystats, which should be set to
+ * NULL before the first call.
+ *
+ * If keysOnly is true, emit only top-level object-key subpaths.
+ *
+ * Returns false on the end of iteration and true otherwise.
+ */
+bool
+jsonPathStatsGetNextSubpathStats(JsonPathStats stats, JsonPathStats *pkeystats,
+								 bool keysOnly)
+{
+	JsonPathStats keystats = *pkeystats;
+	/* compute next index */
+	int			index =
+		(keystats ? keystats->datum : stats->datum) - stats->data->pathdatums + 1;
+
+	if (stats->type != JsonPathStatsValues)
+		return false;	/* length stats doe not have subpaths */
+
+	for (; index < stats->data->npaths; index++)
+	{
+		Datum	   *pathdatum = &stats->data->pathdatums[index];
+		const char *path;
+		int			pathlen;
+
+		keystats = &stats->data->paths[index];
+
+		if (!jsonPathStatsGetPath(keystats, *pathdatum, &path, &pathlen))
+			break;	/* invalid path stats */
+
+		/* Break, if subpath does not start from a desired prefix */
+		if (pathlen <= stats->pathlen ||
+			memcmp(path, stats->path, stats->pathlen))
+			break;
+
+		if (keysOnly)
+		{
+			const char *c = &path[stats->pathlen];
+
+			if (*c == '[')
+			{
+				Assert(c[1] == '*' && c[2] == ']');
+
+#if 0	/* TODO add separate flag for requesting top-level array accessors */
+				/* skip if it is not last key in the path */
+				if (pathlen > stats->pathlen + 3)
+#endif
+					continue;	/* skip array accessors */
+			}
+			else if (*c == '.')
+			{
+				/* find end of '."key"' */
+				const char *pathend = path + pathlen - 1;
+
+				if (++c >= pathend || *c != '"')
+					break;		/* invalid path */
+
+				while (++c <= pathend && *c != '"')
+					if (*c == '\\')	/* handle escaped chars */
+						c++;
+
+				if (c > pathend)
+					break;		/* invalid path */
+
+				/* skip if it is not last key in the path */
+				if (c < pathend)
+					continue;
+			}
+			else
+				continue;	/* invalid path */
+		}
+
+		/* Init path stats if needed */
+		if (!keystats->data)
+		{
+			keystats->data = stats->data;
+			keystats->datum = pathdatum;
+			keystats->type = JsonPathStatsValues;
+		}
+
+		*pkeystats = keystats;
+
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * jsonStatsConvertArray
+ *		Convert a JSONB array into an array of some regular data type.
+ *
+ * The "type" identifies what elements are in the input JSONB array, while
+ * typid determines the target type.
+ */
+static Datum
+jsonStatsConvertArray(Datum jsonbValueArray, JsonStatType type, Oid typid,
+					  float4 multiplier)
+{
+	Datum	   *values;
+	Jsonb	   *jbvals;
+	JsonbValue	jbv;
+	JsonbIterator *it;
+	JsonbIteratorToken r;
+	int			nvalues;
+	int			i;
+	int16		typlen;
+	bool		typbyval;
+	char		typalign;
+
+	if (!DatumGetPointer(jsonbValueArray))
+		return PointerGetDatum(NULL);
+
+	jbvals = DatumGetJsonbP(jsonbValueArray);
+
+	nvalues = JsonContainerSize(&jbvals->root);
+
+	values = palloc(sizeof(Datum) * nvalues);
+
+	for (i = 0, it = JsonbIteratorInit(&jbvals->root);
+		(r = JsonbIteratorNext(&it, &jbv, true)) != WJB_DONE;)
+	{
+		if (r == WJB_ELEM)
+		{
+			Datum value;
+
+			switch (type)
+			{
+				case JsonStatJsonb:
+				case JsonStatJsonbWithoutSubpaths:
+					value = JsonbPGetDatum(JsonbValueToJsonb(&jbv));
+					break;
+
+				case JsonStatText:
+				case JsonStatString:
+					Assert(jbv.type == jbvString);
+					value = PointerGetDatum(
+								cstring_to_text_with_len(jbv.val.string.val,
+														 jbv.val.string.len));
+					break;
+
+				case JsonStatNumeric:
+					Assert(jbv.type == jbvNumeric);
+					value = NumericGetDatum(jbv.val.numeric);
+					break;
+
+				case JsonStatFloat4:
+					Assert(jbv.type == jbvNumeric);
+					value = DirectFunctionCall1(numeric_float4,
+												NumericGetDatum(jbv.val.numeric));
+					value = Float4GetDatum(DatumGetFloat4(value) * multiplier);
+					break;
+
+				default:
+					elog(ERROR, "invalid json stat type %d", type);
+					value = (Datum) 0;
+					break;
+			}
+
+			Assert(i < nvalues);
+			values[i++] = value;
+		}
+	}
+
+	Assert(i == nvalues);
+
+	get_typlenbyvalalign(typid, &typlen, &typbyval, &typalign);
+
+	return PointerGetDatum(
+		construct_array(values, nvalues, typid, typlen, typbyval, typalign));
+}
+
+/*
+ * jsonPathStatsExtractData
+ *		Extract pg_statistics values from statistics for a single path.
+ *
+ * Extract ordinary MCV, Histogram, Correlation slots for a requested stats
+ * type. If requested stats for JSONB, include also transformed JSON slot for
+ * a path and possibly for its subpaths.
+ */
+static bool
+jsonPathStatsExtractData(JsonPathStats pstats, JsonStatType stattype,
+						 float4 nullfrac, StatsData *statdata)
+{
+	Datum		data;
+	Datum		nullf;
+	Datum		dist;
+	Datum		width;
+	Datum		mcv;
+	Datum		hst;
+	Datum		corr;
+	Oid			type;
+	Oid			eqop;
+	Oid			ltop;
+	const char *key;
+	StatsSlot  *slot = statdata->slots;
+
+	nullfrac = 1.0 - (1.0 - pstats->data->nullfrac) * (1.0 - nullfrac);
+
+	/*
+	 * Depending on requested statistics type, select:
+	 *	- stavalues data type
+	 *	- corresponding eq/lt operators
+	 *	- JSONB field, containing stats slots for this statistics type
+	 */
+	switch (stattype)
+	{
+		case JsonStatJsonb:
+		case JsonStatJsonbWithoutSubpaths:
+			key = pstats->type == JsonPathStatsArrayLength ? "array_length" :
+				  pstats->type == JsonPathStatsObjectLength ? "object_length" :
+				  "json";
+			type = JSONBOID;
+			eqop = JsonbEqOperator;
+			ltop = JsonbLtOperator;
+			break;
+		case JsonStatText:
+			key = "text";
+			type = TEXTOID;
+			eqop = TextEqualOperator;
+			ltop = TextLessOperator;
+			break;
+		case JsonStatString:
+			key = "string";
+			type = TEXTOID;
+			eqop = TextEqualOperator;
+			ltop = TextLessOperator;
+			break;
+		case JsonStatNumeric:
+			key = "numeric";
+			type = NUMERICOID;
+			eqop = NumericEqOperator;
+			ltop = NumericLtOperator;
+			break;
+		case JsonStatFloat4:	/* special internal stats type */
+		default:
+			elog(ERROR, "invalid json statistic type %d", stattype);
+			break;
+	}
+
+	/* Extract object containing slots */
+	data = jsonGetField(*pstats->datum, key);
+
+	if (!DatumGetPointer(data))
+		return false;
+
+	nullf = jsonGetField(data, "nullfrac");
+	dist = jsonGetField(data, "distinct");
+	width = jsonGetField(data, "width");
+	mcv = jsonGetField(data, "mcv");
+	hst = jsonGetField(data, "histogram");
+	corr = jsonGetField(data, "correlation");
+
+	statdata->nullfrac = jsonGetFloat4(nullf, 0);
+	statdata->distinct = jsonGetFloat4(dist, 0);
+	statdata->width = (int32) jsonGetFloat4(width, 0);
+
+	statdata->nullfrac += (1.0 - statdata->nullfrac) * nullfrac;
+
+	/* Include MCV slot if exists */
+	if (DatumGetPointer(mcv))
+	{
+		slot->kind = STATISTIC_KIND_MCV;
+		slot->opid = eqop;
+		slot->numbers = jsonStatsConvertArray(jsonGetField(mcv, "numbers"),
+											  JsonStatFloat4, FLOAT4OID,
+											  1.0 - nullfrac);
+		slot->values  = jsonStatsConvertArray(jsonGetField(mcv, "values"),
+											  stattype, type, 0);
+		slot++;
+	}
+
+	/* Include Histogram slot if exists */
+	if (DatumGetPointer(hst))
+	{
+		slot->kind = STATISTIC_KIND_HISTOGRAM;
+		slot->opid = ltop;
+		slot->numbers = jsonStatsConvertArray(jsonGetField(hst, "numbers"),
+											  JsonStatFloat4, FLOAT4OID, 1.0);
+		slot->values  = jsonStatsConvertArray(jsonGetField(hst, "values"),
+											  stattype, type, 0);
+		slot++;
+	}
+
+	/* Include Correlation slot if exists */
+	if (DatumGetPointer(corr))
+	{
+		Datum		correlation = Float4GetDatum(jsonGetFloat4(corr, 0));
+
+		slot->kind = STATISTIC_KIND_CORRELATION;
+		slot->opid = ltop;
+		slot->numbers = PointerGetDatum(construct_array(&correlation, 1,
+														FLOAT4OID, 4, true,
+														'i'));
+		slot++;
+	}
+
+	/* Include JSON statistics for a given path and possibly for its subpaths */
+	if ((stattype == JsonStatJsonb ||
+		 stattype == JsonStatJsonbWithoutSubpaths) &&
+		jsonAnalyzeBuildSubPathsData(pstats->data->pathdatums,
+									 pstats->data->npaths,
+									 pstats->datum - pstats->data->pathdatums,
+									 pstats->path,
+									 pstats->pathlen,
+									 stattype == JsonStatJsonb,
+									 nullfrac,
+									 &slot->values,
+									 &slot->numbers))
+	{
+		slot->kind = STATISTIC_KIND_JSON;
+		slot++;
+	}
+
+	return true;
+}
+
+static float4
+jsonPathStatsGetFloat(JsonPathStats pstats, const char *key, float4 defaultval)
+{
+	if (!pstats)
+		return defaultval;
+
+	return jsonGetFloat4(jsonGetField(*pstats->datum, key), defaultval);
+}
+
+float4
+jsonPathStatsGetFreq(JsonPathStats pstats, float4 defaultfreq)
+{
+	return jsonPathStatsGetFloat(pstats, "freq", defaultfreq);
+}
+
+float4
+jsonPathStatsGetAvgArraySize(JsonPathStats pstats)
+{
+	return jsonPathStatsGetFloat(pstats, "avg_array_length", 1.0);
+}
+
+/*
+ * jsonPathStatsGetTypeFreq
+ *		Get frequency of different JSON object types for a given path.
+ *
+ * JSON documents don't have any particular schema, and the same path may point
+ * to values with different types in multiple documents. Consider for example
+ * two documents {"a" : "b"} and {"a" : 100} which have both a string and int
+ * for the same path. So we track the frequency of different JSON types for
+ * each path, so that we can consider this later.
+ */
+float4
+jsonPathStatsGetTypeFreq(JsonPathStats pstats, JsonbValueType type,
+						 float4 defaultfreq)
+{
+	const char *key;
+
+	if (!pstats)
+		return defaultfreq;
+
+	/*
+	 * When dealing with (object/array) length stats, we only really care about
+	 * objects and arrays.
+	 *
+	 * Lengths are always numeric, so simply return 0 if requested frequency
+	 * of non-numeric values.
+	 */
+	if (pstats->type == JsonPathStatsArrayLength)
+	{
+		if (type != jbvNumeric)
+			return 0.0;
+
+		return jsonPathStatsGetFloat(pstats, "freq_array", defaultfreq);
+	}
+
+	if (pstats->type == JsonPathStatsObjectLength)
+	{
+		if (type != jbvNumeric)
+			return 0.0;
+
+		return jsonPathStatsGetFloat(pstats, "freq_object", defaultfreq);
+	}
+
+	/* Which JSON type are we interested in? Pick the right freq_type key. */
+	switch (type)
+	{
+		case jbvNull:
+			key = "freq_null";
+			break;
+		case jbvString:
+			key = "freq_string";
+			break;
+		case jbvNumeric:
+			key = "freq_numeric";
+			break;
+		case jbvBool:
+			key = "freq_boolean";
+			break;
+		case jbvObject:
+			key = "freq_object";
+			break;
+		case jbvArray:
+			key = "freq_array";
+			break;
+		default:
+			elog(ERROR, "Invalid jsonb value type: %d", type);
+			break;
+	}
+
+	return jsonPathStatsGetFloat(pstats, key, defaultfreq);
+}
+
+/*
+ * jsonPathStatsFormTuple
+ *		For a pg_statistic tuple representing JSON statistics.
+ *
+ * XXX Maybe it's a bit expensive to first build StatsData and then transform it
+ * again while building the tuple. Could it be done in a single step? Would it be
+ * more efficient? Not sure how expensive it actually is, though.
+ */
+static HeapTuple
+jsonPathStatsFormTuple(JsonPathStats pstats, JsonStatType type, float4 nullfrac)
+{
+	StatsData	statdata;
+
+	if (!pstats || !pstats->datum)
+		return NULL;
+
+	/*
+	 * If it is the ordinary root path stats, there is no need to transform
+	 * the tuple, it can be simply copied.
+	 */
+	if (pstats->datum == &pstats->data->pathdatums[0] &&
+		pstats->type == JsonPathStatsValues)
+		return heap_copytuple(pstats->data->statsTuple);
+
+	MemSet(&statdata, 0, sizeof(statdata));
+
+	if (!jsonPathStatsExtractData(pstats, type, nullfrac, &statdata))
+		return NULL;
+
+	return stats_form_tuple(&statdata);
+}
+
+/*
+ * jsonStatsGetPathTuple
+ *		Extract JSON statistics for a text[] path and form pg_statistics tuple.
+ */
+static HeapTuple
+jsonStatsGetPathTuple(JsonStats jsdata, JsonStatType type,
+					  Datum *path, int pathlen, bool try_arrays_indexes)
+{
+	float4			nullfrac;
+	JsonPathStats	pstats = jsonStatsGetPath(jsdata, path, pathlen,
+											  try_arrays_indexes, &nullfrac);
+
+	return jsonPathStatsFormTuple(pstats, type, nullfrac);
+}
+
+/*
+ * jsonStatsGetArrayIndexStatsTuple
+ *		Extract JSON statistics for a array index and form pg_statistics tuple.
+ */
+static HeapTuple
+jsonStatsGetArrayIndexStatsTuple(JsonStats jsdata, JsonStatType type, int32 index)
+{
+	/* Extract statistics for root array elements */
+	JsonPathStats arrstats = jsonStatsGetRootArrayPath(jsdata);
+	JsonPathStats rootstats;
+	Selectivity	index_sel;
+
+	if (!arrstats)
+		return NULL;
+
+	/* Compute relative selectivity of 'EXISTS($[index])' */
+	rootstats = jsonStatsGetRootPath(jsdata);
+	index_sel = jsonPathStatsGetArrayIndexSelectivity(rootstats, index);
+	index_sel /= jsonPathStatsGetFreq(arrstats, 0.0);
+
+	/* Form pg_statistics tuple, taking into account array index selectivity */
+	return jsonPathStatsFormTuple(arrstats, type, 1.0 - index_sel);
+}
+
+/*
+ * jsonStatsGetPathFreq
+ *		Return frequency of a path (fraction of documents containing it).
+ */
+static float4
+jsonStatsGetPathFreq(JsonStats jsdata, Datum *path, int pathlen,
+					 bool try_array_indexes)
+{
+	float4		nullfrac;
+	JsonPathStats pstats = jsonStatsGetPath(jsdata, path, pathlen,
+											try_array_indexes, &nullfrac);
+	float4		freq = (1.0 - nullfrac) * jsonPathStatsGetFreq(pstats, 0.0);
+
+	CLAMP_PROBABILITY(freq);
+	return freq;
+}
+
+/*
+ * jsonbStatsVarOpConst
+ *		Prepare optimizer statistics for a given operator, from JSON stats.
+ *
+ * This handles only OpExpr expressions, with variable and a constant. We get
+ * the constant as is, and the variable is represented by statistics fetched
+ * by get_restriction_variable().
+ *
+ * opid    - OID of the operator (input parameter)
+ * resdata - pointer to calculated statistics for result of operator
+ * vardata - statistics for the restriction variable
+ * cnst    - constant from the operator expression
+ *
+ * Returns true when useful optimizer statistics have been calculated.
+ */
+static bool
+jsonbStatsVarOpConst(Oid opid, VariableStatData *resdata,
+					 const VariableStatData *vardata, Const *cnst)
+{
+	JsonStatData jsdata;
+	JsonStatType statype = JsonStatJsonb;
+
+	if (!jsonStatsInit(&jsdata, vardata))
+		return false;
+
+	switch (opid)
+	{
+		case JsonbObjectFieldTextOperator:
+			statype = JsonStatText;
+			/* FALLTHROUGH */
+		case JsonbObjectFieldOperator:
+		{
+			if (cnst->consttype != TEXTOID)
+			{
+				jsonStatsRelease(&jsdata);
+				return false;
+			}
+
+			resdata->statsTuple = jsonStatsGetPathTuple(&jsdata, statype,
+														&cnst->constvalue, 1,
+														false);
+			break;
+		}
+
+		case JsonbArrayElementTextOperator:
+			statype = JsonStatText;
+			/* FALLTHROUGH */
+		case JsonbArrayElementOperator:
+		{
+			if (cnst->consttype != INT4OID)
+			{
+				jsonStatsRelease(&jsdata);
+				return false;
+			}
+
+			resdata->statsTuple =
+				jsonStatsGetArrayIndexStatsTuple(&jsdata, statype,
+												 DatumGetInt32(cnst->constvalue));
+			break;
+		}
+
+		case JsonbExtractPathTextOperator:
+			statype = JsonStatText;
+			/* FALLTHROUGH */
+		case JsonbExtractPathOperator:
+		{
+			Datum	   *path;
+			bool	   *nulls;
+			int			pathlen;
+			bool		have_nulls = false;
+
+			if (cnst->consttype != TEXTARRAYOID)
+			{
+				jsonStatsRelease(&jsdata);
+				return false;
+			}
+
+			deconstruct_array(DatumGetArrayTypeP(cnst->constvalue), TEXTOID,
+							  -1, false, 'i', &path, &nulls, &pathlen);
+
+			for (int i = 0; i < pathlen; i++)
+			{
+				if (nulls[i])
+				{
+					have_nulls = true;
+					break;
+				}
+			}
+
+			if (!have_nulls)
+				resdata->statsTuple = jsonStatsGetPathTuple(&jsdata, statype,
+															path, pathlen,
+															true);
+
+			pfree(path);
+			pfree(nulls);
+			break;
+		}
+
+		default:
+			jsonStatsRelease(&jsdata);
+			return false;
+	}
+
+	if (!resdata->statsTuple)
+		resdata->statsTuple = stats_form_tuple(NULL);	/* form all-NULL tuple */
+
+	resdata->acl_ok = vardata->acl_ok;
+	resdata->freefunc = heap_freetuple;
+	Assert(resdata->rel == vardata->rel);
+	Assert(resdata->atttype ==
+		(statype == JsonStatJsonb ? JSONBOID :
+		 statype == JsonStatText ? TEXTOID :
+		 /* statype == JsonStatFreq */ BOOLOID));
+
+	jsonStatsRelease(&jsdata);
+	return true;
+}
+
+/*
+ * jsonb_stats
+ *		Statistics estimation procedure for JSONB data type.
+ *
+ * This only supports OpExpr expressions, with (Var op Const) shape.
+ *
+ * Var really can be a chain of OpExprs with derived statistics
+ * (jsonb_column -> 'key1' -> key2'), because get_restriction_variable()
+ * already handles this case.
+ */
+Datum
+jsonb_stats(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	OpExpr	   *opexpr = (OpExpr *) PG_GETARG_POINTER(1);
+	int			varRelid = PG_GETARG_INT32(2);
+	VariableStatData *resdata	= (VariableStatData *) PG_GETARG_POINTER(3);
+	VariableStatData vardata;
+	Node	   *constexpr;
+	bool		varonleft;
+
+	/* should only be called for OpExpr expressions */
+	Assert(IsA(opexpr, OpExpr));
+
+	/* Is the expression simple enough? (Var op Const) or similar? */
+	if (!get_restriction_variable(root, opexpr->args, varRelid,
+								  &vardata, &constexpr, &varonleft))
+		PG_RETURN_VOID();
+
+	/* XXX Could we also get varonleft=false in useful cases? */
+	if (IsA(constexpr, Const) && varonleft)
+		jsonbStatsVarOpConst(opexpr->opno, resdata, &vardata,
+							 (Const *) constexpr);
+
+	ReleaseVariableStats(vardata);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * jsonSelectivity
+ *		Use JSON statistics to estimate selectivity for (in)equalities.
+ *
+ * The statistics is represented as (arrays of) JSON values etc. so we
+ * need to pass the right operators to the functions.
+ */
+Selectivity
+jsonSelectivity(JsonPathStats stats, Datum scalar, Oid operator)
+{
+	VariableStatData vardata;
+	Selectivity sel;
+
+	if (!stats)
+		return 0.0;
+
+	vardata.atttype = JSONBOID;
+	vardata.atttypmod = -1;
+	vardata.isunique = false;
+	vardata.rel = stats->data->rel;
+	vardata.var = NULL;
+	vardata.vartype = JSONBOID;
+	vardata.acl_ok = stats->data->acl_ok;
+	vardata.statsTuple = jsonPathStatsFormTuple(stats,
+												JsonStatJsonbWithoutSubpaths, 0.0);
+
+	if (operator == JsonbEqOperator)
+		sel = var_eq_const(&vardata, operator, InvalidOid, scalar, false, true, false);
+	else
+		sel = scalarineqsel(NULL, operator,
+							/* is it greater or greater-or-equal? */
+							operator == JsonbGtOperator ||
+							operator == JsonbGeOperator,
+							/* is it equality? */
+							operator == JsonbLeOperator ||
+							operator == JsonbGeOperator,
+							InvalidOid,
+							&vardata, scalar, JSONBOID);
+
+	if (vardata.statsTuple)
+		heap_freetuple(vardata.statsTuple);
+
+	return sel;
+}
+
+/*
+ * jsonAccumulateSubPathSelectivity
+ *		Transform absolute subpath selectivity into relative and accumulate it
+ *		into parent path simply by multiplication of relative selectivities.
+ */
+static void
+jsonAccumulateSubPathSelectivity(Selectivity subpath_abs_sel,
+								 Selectivity path_freq,
+								 Selectivity *path_relative_sel,
+								 JsonPathStats array_path_stats)
+{
+	Selectivity sel = subpath_abs_sel / path_freq;	/* relative selectivity */
+
+	/* XXX Try to take into account array length */
+	if (array_path_stats)
+		sel = 1.0 - pow(1.0 - sel,
+						jsonPathStatsGetAvgArraySize(array_path_stats));
+
+	/* Accumulate selectivity of subpath into parent path */
+	*path_relative_sel *= sel;
+}
+
+/*
+ * jsonSelectivityContains
+ *		Estimate selectivity for containment operator on JSON.
+ *
+ * Iterate through query jsonb elements, build paths to its leaf elements,
+ * calculate selectivies of 'path == scalar' in leaves, multiply relative
+ * selectivities of subpaths at each path level, propagate computed
+ * selectivities to the root.
+ */
+static Selectivity
+jsonSelectivityContains(JsonStats stats, Jsonb *jb)
+{
+	JsonbValue		v;
+	JsonbIterator  *it;
+	JsonbIteratorToken r;
+	StringInfoData	pathstr;	/* path string */
+	struct Path					/* path stack entry */
+	{
+		struct Path *parent;	/* parent entry */
+		int			len;		/* associated length of pathstr */
+		Selectivity	freq;		/* absolute frequence of path */
+		Selectivity	sel;		/* relative selectivity of subpaths */
+		JsonPathStats stats;	/* statistics for the path */
+		bool		is_array_accesor;	/* is it '[*]' ? */
+	}			root,			/* root path entry */
+			   *path = &root;	/* path entry stack */
+	Selectivity	sel;			/* resulting selectivity */
+	Selectivity	scalarSel;		/* selectivity of 'jsonb == scalar' */
+
+	/* Initialize root path string */
+	initStringInfo(&pathstr);
+	appendBinaryStringInfo(&pathstr, stats->prefix, stats->prefixlen);
+
+	/* Initialize root path entry */
+	root.parent = NULL;
+	root.len = pathstr.len;
+	root.stats = jsonStatsFindPath(stats, pathstr.data, pathstr.len);
+	root.freq = jsonPathStatsGetFreq(root.stats, 0.0);
+	root.sel = 1.0;
+	root.is_array_accesor = pathstr.data[pathstr.len - 1] == ']';
+
+	/* Return 0, if NULL fraction is 1. */
+	if (root.freq <= 0.0)
+		return 0.0;
+
+	/*
+	 * Selectivity of query 'jsonb @> scalar' consists of  selectivities of
+	 * 'jsonb == scalar' and 'jsonb[*] == scalar'.  Selectivity of
+	 * 'jsonb[*] == scalar' will be computed in root.sel, but for
+	 * 'jsonb == scalar' we need additional computation.
+	 */
+	if (JsonContainerIsScalar(&jb->root))
+		scalarSel = jsonSelectivity(root.stats, JsonbPGetDatum(jb),
+									JsonbEqOperator);
+	else
+		scalarSel = 0.0;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		switch (r)
+		{
+			case WJB_BEGIN_OBJECT:
+			{
+				struct Path *p;
+				Selectivity freq =
+					jsonPathStatsGetTypeFreq(path->stats, jbvObject, 0.0);
+
+				/* If there are no objects, selectivity is 0. */
+				if (freq <= 0.0)
+					return 0.0;
+
+				/*
+				 * Push path entry for object keys, actual key names are
+				 * appended later in WJB_KEY case.
+				 */
+				p = palloc(sizeof(*p));
+				p->len = pathstr.len;
+				p->parent = path;
+				p->stats = NULL;
+				p->freq = freq;
+				p->sel = 1.0;
+				p->is_array_accesor = false;
+				path = p;
+				break;
+			}
+
+			case WJB_BEGIN_ARRAY:
+			{
+				struct Path *p;
+				JsonPathStats pstats;
+				Selectivity freq;
+
+				/*
+				 * First, find stats for the parent path if needed, it will be
+				 * used in jsonAccumulateSubPathSelectivity().
+				 */
+				if (!path->stats)
+					path->stats = jsonStatsFindPath(stats, pathstr.data,
+													pathstr.len);
+
+				/* Appeend path string entry for array elements, get stats. */
+				jsonPathAppendEntry(&pathstr, NULL);
+				pstats = jsonStatsFindPath(stats, pathstr.data, pathstr.len);
+				freq = jsonPathStatsGetFreq(pstats, 0.0);
+
+				/* If there are no arrays, return 0 or scalar selectivity */
+				if (freq <= 0.0)
+					return scalarSel;
+
+				/* Push path entry for array elements. */
+				p = palloc(sizeof(*p));
+				p->len = pathstr.len;
+				p->parent = path;
+				p->stats = pstats;
+				p->freq = freq;
+				p->sel = 1.0;
+				p->is_array_accesor = true;
+				path = p;
+				break;
+			}
+
+			case WJB_END_OBJECT:
+			case WJB_END_ARRAY:
+			{
+				struct Path *p = path;
+				/* Absoulte selectivity of the path with its all subpaths */
+				Selectivity abs_sel = p->sel * p->freq;
+
+				/* Pop last path entry */
+				path = path->parent;
+				pfree(p);
+				pathstr.len = path->len;
+				pathstr.data[pathstr.len] = '\0';
+
+				/* Accumulate selectivity into parent path */
+				jsonAccumulateSubPathSelectivity(abs_sel, path->freq,
+												 &path->sel,
+												 path->is_array_accesor ?
+												 path->parent->stats : NULL);
+				break;
+			}
+
+			case WJB_KEY:
+			{
+				/* Remove previous key in the path string */
+				pathstr.len = path->parent->len;
+				pathstr.data[pathstr.len] = '\0';
+
+				/* Append current key to path string */
+				jsonPathAppendEntryWithLen(&pathstr, v.val.string.val,
+										   v.val.string.len);
+				path->len = pathstr.len;
+				break;
+			}
+
+			case WJB_VALUE:
+			case WJB_ELEM:
+			{
+				/*
+				 * Extract statistics for a path.  Array elements share the
+				 * same statistics that was extracted in WJB_BEGIN_ARRAY.
+				 */
+				JsonPathStats pstats = r == WJB_ELEM ? path->stats :
+					jsonStatsFindPath(stats, pathstr.data, pathstr.len);
+				Selectivity abs_sel;	/* Absolute selectivity of 'path == scalar' */
+
+				if (pstats)
+				{
+					/* Make scalar jsonb datum and compute selectivity */
+					Datum		scalar = JsonbPGetDatum(JsonbValueToJsonb(&v));
+
+					abs_sel = jsonSelectivity(pstats, scalar, JsonbEqOperator);
+				}
+				else
+					abs_sel = 0.0;
+
+				/* Accumulate selectivity into parent path */
+				jsonAccumulateSubPathSelectivity(abs_sel, path->freq,
+												 &path->sel,
+												 path->is_array_accesor ?
+												 path->parent->stats : NULL);
+				break;
+			}
+
+			default:
+				break;
+		}
+	}
+
+	/* Compute absolute selectivity for root, including raw scalar case. */
+	sel = root.sel * root.freq + scalarSel;
+	CLAMP_PROBABILITY(sel);
+	return sel;
+}
+
+/*
+ * jsonSelectivityExists
+ *		Estimate selectivity for JSON "exists" operator.
+ */
+static Selectivity
+jsonSelectivityExists(JsonStats stats, Datum key)
+{
+	JsonPathStats rootstats;
+	JsonPathStats arrstats;
+	JsonbValue	jbvkey;
+	Datum		jbkey;
+	Selectivity keysel;
+	Selectivity scalarsel;
+	Selectivity arraysel;
+	Selectivity sel;
+
+	JsonValueInitStringWithLen(&jbvkey,
+							   VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+	jbkey = JsonbPGetDatum(JsonbValueToJsonb(&jbvkey));
+
+	keysel = jsonStatsGetPathFreq(stats, &key, 1, false);
+
+	rootstats = jsonStatsGetRootPath(stats);
+	scalarsel = jsonSelectivity(rootstats, jbkey, JsonbEqOperator);
+
+	arrstats = jsonStatsGetRootArrayPath(stats);
+	arraysel = jsonSelectivity(arrstats, jbkey, JsonbEqOperator);
+	arraysel = 1.0 - pow(1.0 - arraysel,
+						 jsonPathStatsGetAvgArraySize(rootstats));
+
+	sel = keysel + scalarsel + arraysel;
+	CLAMP_PROBABILITY(sel);
+	return sel;
+}
+
+static Selectivity
+jsonb_sel_internal(JsonStats stats, Oid operator, Const *cnst, bool varonleft)
+{
+	switch (operator)
+	{
+		case JsonbExistsOperator:
+			if (!varonleft || cnst->consttype != TEXTOID)
+				break;
+
+			return jsonSelectivityExists(stats, cnst->constvalue);
+
+		case JsonbExistsAnyOperator:
+		case JsonbExistsAllOperator:
+		{
+			Datum	   *keys;
+			bool	   *nulls;
+			Selectivity	freq = 1.0;
+			int			nkeys;
+			int			i;
+			bool		all = operator == JsonbExistsAllOperator;
+
+			if (!varonleft || cnst->consttype != TEXTARRAYOID)
+				break;
+
+			deconstruct_array(DatumGetArrayTypeP(cnst->constvalue), TEXTOID,
+							  -1, false, 'i', &keys, &nulls, &nkeys);
+
+			for (i = 0; i < nkeys; i++)
+				if (!nulls[i])
+				{
+					Selectivity pathfreq = jsonSelectivityExists(stats,
+																 keys[i]);
+					freq *= all ? pathfreq : (1.0 - pathfreq);
+				}
+
+			pfree(keys);
+			pfree(nulls);
+
+			if (!all)
+				freq = 1.0 - freq;
+
+			return freq;
+		}
+
+		case JsonbContainedOperator:
+			if (varonleft || cnst->consttype != JSONBOID)
+				break;
+
+			return jsonSelectivityContains(stats,
+										   DatumGetJsonbP(cnst->constvalue));
+
+		case JsonbContainsOperator:
+			if (!varonleft || cnst->consttype != JSONBOID)
+				break;
+
+			return jsonSelectivityContains(stats,
+										   DatumGetJsonbP(cnst->constvalue));
+
+		default:
+			break;
+	}
+
+	return DEFAULT_JSON_CONTAINS_SEL;
+}
+
+/*
+ * jsonb_sel
+ *		The main procedure estimating selectivity for all JSONB operators.
+ */
+Datum
+jsonb_sel(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	Oid			operator = PG_GETARG_OID(1);
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+	int			varRelid = PG_GETARG_INT32(3);
+	double		sel = DEFAULT_JSON_CONTAINS_SEL;
+	Node	   *other;
+	bool		varonleft;
+	VariableStatData vardata;
+
+	if (get_restriction_variable(root, args, varRelid,
+								  &vardata, &other, &varonleft))
+	{
+		if (IsA(other, Const))
+		{
+			Const	   *cnst = (Const *) other;
+
+			if (cnst->constisnull)
+				sel = 0.0;
+			else
+			{
+				JsonStatData stats;
+
+				if (jsonStatsInit(&stats, &vardata))
+				{
+					sel = jsonb_sel_internal(&stats, operator, cnst, varonleft);
+					jsonStatsRelease(&stats);
+				}
+			}
+		}
+
+		ReleaseVariableStats(vardata);
+	}
+
+	PG_RETURN_FLOAT8((float8) sel);
+}
diff --git a/src/backend/utils/adt/jsonb_typanalyze.c b/src/backend/utils/adt/jsonb_typanalyze.c
new file mode 100644
index 00000000000..7882db23a87
--- /dev/null
+++ b/src/backend/utils/adt/jsonb_typanalyze.c
@@ -0,0 +1,1627 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb_typanalyze.c
+ *	  Functions for gathering statistics from jsonb columns
+ *
+ * Copyright (c) 2016-2022, PostgreSQL Global Development Group
+ *
+ * Functions in this module are used to analyze contents of JSONB columns
+ * and build optimizer statistics. In principle we extract paths from all
+ * sampled documents and calculate the usual statistics (MCV, histogram)
+ * for each path - in principle each path is treated as a column.
+ *
+ * Because we're not enforcing any JSON schema, the documents may differ
+ * a lot - the documents may contain large number of different keys, the
+ * types of values may be entirely different, etc. This makes it more
+ * challenging than building stats for regular columns. For example not
+ * only do we need to decide which values to keep in the MCV, but also
+ * which paths to keep (in case the documents are so variable we can't
+ * keep all paths).
+ *
+ * The statistics is stored in pg_statistic, in a slot with a new stakind
+ * value (STATISTIC_KIND_JSON). The statistics is serialized as an array
+ * of JSONB values, eash element storing statistics for one path.
+ *
+ * For each path, we store the following keys:
+ *
+ * - path         - path this stats is for, serialized as jsonpath
+ * - freq         - frequency of documents containing this path
+ * - json         - the regular per-column stats (MCV, histogram, ...)
+ * - freq_null    - frequency of JSON null values
+ * - freq_array   - frequency of JSON array values
+ * - freq_object  - frequency of JSON object values
+ * - freq_string  - frequency of JSON string values
+ * - freq_numeric - frequency of JSON numeric values
+ *
+ * This is stored in the stavalues array.
+ *
+ * The first element of stavalues is a path prefix.  It is used for avoiding
+ * path transformations when the derived statistics for the chains of ->
+ * operators is computed.
+ *
+ * The per-column stats (stored in the "json" key) have additional internal
+ * structure, to allow storing multiple stakind types (histogram, mcv). See
+ * jsonAnalyzeMakeScalarStats for details.
+ *
+ *
+ * XXX It's a bit weird the "regular" stats are stored in the "json" key,
+ * while the JSON stats (frequencies of different JSON types) are right
+ * at the top level.
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/jsonb_typanalyze.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "access/hash.h"
+#include "access/detoast.h"
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
+#include "commands/vacuum.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/hsearch.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
+#include "utils/json_selfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+
+typedef struct JsonPathEntry JsonPathEntry;
+
+/*
+ * Element of a path in the JSON document (i.e. not jsonpath). Elements
+ * are linked together to build longer paths.
+ *
+ * 'entry' can be not zero-terminated when it is pointing to JSONB keys, so
+ * 'len' is necessary.  'len' is also used for faster entry comparison, to
+ * distinguish array entries ('len' == -1).
+ */
+typedef struct JsonPathEntry
+{
+	JsonPathEntry  *parent;
+	const char	   *entry;		/* element of the path as a string */
+	int				len;		/* length of entry string (may be 0 or -1) */
+	uint32			hash;		/* hash of the whole path (with parent) */
+	char	   *pathstr;		/* full path string */
+	int			depth;			/* nesting level, i.e. path length */
+} JsonPathEntry;
+
+#define JsonPathEntryIsArray(entry) ((entry)->len == -1)
+
+/*
+ * An array containing a dynamic number of values extracted from JSON documents.
+ * All values should have the same data type:
+ *		jsonb   - ordinary path stats, values of different JSON types
+ *		int32   - array/object length stats
+ *		text    - separate stats fro strings
+ *		numeric - separate stats fro numbers
+ */
+typedef struct JsonValues
+{
+	Datum	   *buf;
+	int			count;
+	int			allocated;
+} JsonValues;
+
+/*
+ * Scalar statistics built for an array of values, extracted from a JSON
+ * document (for one particular path).
+ */
+typedef struct JsonScalarStats
+{
+	JsonValues		values;
+	VacAttrStats	stats;
+} JsonScalarStats;
+
+/*
+ * Statistics calculated for a set of values.
+ *
+ *
+ * XXX This seems rather complicated and needs simplification. We're not
+ * really using all the various JsonScalarStats bits, there's a lot of
+ * duplication (e.g. each JsonScalarStats contains it's own array, which
+ * has a copy of data from the one in "jsons").
+ */
+typedef struct JsonValueStats
+{
+	JsonScalarStats	jsons;		/* stats for all JSON types together */
+
+#ifdef JSON_ANALYZE_SCALARS		/* XXX */
+	JsonScalarStats	strings;	/* stats for JSON strings */
+	JsonScalarStats	numerics;	/* stats for JSON numerics */
+#endif
+
+	JsonScalarStats	arrlens;	/* stats of array lengths */
+	JsonScalarStats	objlens;	/* stats of object lengths */
+
+	int				nnulls;		/* number of JSON null values */
+	int				ntrue;		/* number of JSON true values */
+	int				nfalse;		/* number of JSON false values */
+	int				nobjects;	/* number of JSON objects */
+	int				narrays;	/* number of JSON arrays */
+	int				nstrings;	/* number of JSON strings */
+	int				nnumerics;	/* number of JSON numerics */
+
+	int64			narrelems;	/* total number of array elements
+								 * (for avg. array length) */
+} JsonValueStats;
+
+typedef struct JsonPathDocBitmap
+{
+	bool		is_list;
+	int			size;
+	int			allocated;
+	union
+	{
+		int32	   *list;
+		uint8	   *bitmap;
+	}			data;
+} JsonPathDocBitmap;
+
+/* JSON path and list of documents containing it */
+typedef struct JsonPathAnlDocs
+{
+	JsonPathEntry path;
+	JsonPathDocBitmap bitmap;
+} JsonPathAnlDocs;
+
+/* Main structure for analyzed JSON path  */
+typedef struct JsonPathAnlStats
+{
+	JsonPathEntry path;
+	double		freq;		/* frequence of the path */
+	JsonValueStats vstats;	/* collected values and raw computed stats */
+	Jsonb	   *stats;		/* stats converted into jsonb form */
+} JsonPathAnlStats;
+
+/* Some parent path stats counters that used for frequency calculations */
+typedef struct JsonPathParentStats
+{
+	double		freq;
+	int			count;
+	int			narrays;
+} JsonPathParentStats;
+
+/* various bits needed while analyzing JSON */
+typedef struct JsonAnalyzeContext
+{
+	VacAttrStats		   *stats;
+	MemoryContext			mcxt;
+	AnalyzeAttrFetchFunc	fetchfunc;
+	HTAB				   *pathshash;
+	JsonPathAnlStats	   *root;
+	double					totalrows;
+	double					total_width;
+	int						samplerows;
+	int						current_rownum;
+	int						target;
+	int						null_cnt;
+	int						analyzed_cnt;
+	int						maxdepth;
+	bool					scalarsOnly;
+	bool					single_pass;
+} JsonAnalyzeContext;
+
+/*
+ * JsonPathMatch
+ *		Determine when two JSON paths (list of JsonPathEntry) match.
+ *
+ * Returned int instead of bool, because it is an implementation of
+ * HashCompareFunc.
+ */
+static int
+JsonPathEntryMatch(const void *key1, const void *key2, Size keysize)
+{
+	const JsonPathEntry *path1 = key1;
+	const JsonPathEntry *path2 = key2;
+
+	return path1->parent != path2->parent ||
+		   path1->len != path2->len ||
+		   (path1->len > 0 &&
+			strncmp(path1->entry, path2->entry, path1->len));
+}
+
+/*
+ * JsonPathHash
+ *		Calculate hash of the path entry.
+ *
+ * Parent hash should be already calculated.
+ */
+static uint32
+JsonPathEntryHash(const void *key, Size keysize)
+{
+	const JsonPathEntry	   *path = key;
+	uint32					hash = path->parent ? path->parent->hash : 0;
+
+	hash = (hash << 1) | (hash >> 31);
+	hash ^= path->len < 0 ? 0 :
+		DatumGetUInt32(hash_any((const unsigned char *) path->entry, path->len));
+
+	return hash;
+}
+
+static void
+jsonStatsBitmapInit(JsonPathDocBitmap *bitmap)
+{
+	memset(bitmap, 0, sizeof(*bitmap));
+	bitmap->is_list = true;
+}
+
+static void
+jsonStatsBitmapAdd(JsonAnalyzeContext *cxt, JsonPathDocBitmap *bitmap, int doc)
+{
+	/* Use more compact list representation if not too many bits set */
+	if (bitmap->is_list)
+	{
+		int		   *list = bitmap->data.list;
+
+#if 1	/* Enable list representation */
+		if (bitmap->size > 0 && list[bitmap->size - 1] == doc)
+			return;
+
+		if (bitmap->size < cxt->samplerows / sizeof(list[0]) / 8)
+		{
+			if (bitmap->size >= bitmap->allocated)
+			{
+				MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt);
+
+				if (bitmap->allocated)
+				{
+					bitmap->allocated *= 2;
+					list = repalloc(list, sizeof(list[0]) * bitmap->allocated);
+				}
+				else
+				{
+					bitmap->allocated = 8;
+					list = palloc(sizeof(list[0]) * bitmap->allocated);
+				}
+
+				bitmap->data.list = list;
+
+				MemoryContextSwitchTo(oldcxt);
+			}
+
+			list[bitmap->size++] = doc;
+			return;
+		}
+#endif
+		/* convert list to bitmap */
+		bitmap->allocated = (cxt->samplerows + 7) / 8;
+		bitmap->data.bitmap = MemoryContextAllocZero(cxt->mcxt, bitmap->allocated);
+		bitmap->is_list = false;
+
+		if (list)
+		{
+			for (int i = 0; i < bitmap->size; i++)
+			{
+				int			d = list[i];
+
+				bitmap->data.bitmap[d / 8] |= (1 << (d % 8));
+			}
+
+			pfree(list);
+		}
+	}
+
+	/* set bit in bitmap */
+	if (doc < cxt->samplerows &&
+		!(bitmap->data.bitmap[doc / 8] & (1 << (doc % 8))))
+	{
+		bitmap->data.bitmap[doc / 8] |= (1 << (doc % 8));
+		bitmap->size++;
+	}
+}
+
+static bool
+jsonStatsBitmapNext(JsonPathDocBitmap *bitmap, int *pbit)
+{
+	uint8	   *bmp = bitmap->data.bitmap;
+	uint8	   *pb;
+	uint8	   *pb_end = &bmp[bitmap->allocated];
+	int			bit = *pbit;
+
+	Assert(!bitmap->is_list);
+
+	if (bit < 0)
+	{
+		pb = bmp;
+		bit = 0;
+	}
+	else
+	{
+		++bit;
+		pb = &bmp[bit / 8];
+		bit %= 8;
+	}
+
+	for (; pb < pb_end; pb++, bit = 0)
+	{
+		uint8		b;
+
+		/* Skip zero bytes */
+		if (!bit)
+		{
+			while (!*pb)
+			{
+				if (++pb >= pb_end)
+					return false;
+			}
+		}
+
+		b = *pb;
+
+		/* Skip zero bits */
+		while (bit < 8 && !(b & (1 << bit)))
+			bit++;
+
+		if (bit >= 8)
+			continue;	/* Non-zero bit not found, go to next byte */
+
+		/* Output next non-zero bit */
+		*pbit = (pb - bmp) * 8 + bit;
+		return true;
+	}
+
+	return false;
+}
+
+static void
+jsonStatsAnlInit(JsonPathAnlStats *stats)
+{
+	/* initialize the stats counter for this path entry */
+	memset(&stats->vstats, 0, sizeof(JsonValueStats));
+	stats->stats = NULL;
+	stats->freq = 0.0;
+}
+
+/*
+ * jsonAnalyzeAddPath
+ *		Add an entry for a JSON path to the working list of statistics.
+ *
+ * Returns a pointer to JsonPathAnlStats (which might have already existed
+ * if the path was in earlier document), which can then be populated or
+ * updated.
+ */
+static inline JsonPathEntry *
+jsonAnalyzeAddPath(JsonAnalyzeContext *ctx, JsonPathEntry *parent,
+				   const char *entry, int len)
+{
+	JsonPathEntry path;
+	JsonPathEntry *stats;
+	bool		found;
+
+	/* Init path entry */
+	path.parent = parent;
+	path.entry = entry;
+	path.len = len;
+	path.hash = JsonPathEntryHash(&path, 0);
+
+	/* See if we already saw this path earlier. */
+	stats = hash_search_with_hash_value(ctx->pathshash, &path, path.hash,
+										HASH_ENTER, &found);
+
+	/*
+	 * Nope, it's the first time we see this path, so initialize all the
+	 * fields (path string, counters, ...).
+	 */
+	if (!found)
+	{
+		JsonPathEntry *parent = stats->parent;
+		const char *ppath = parent->pathstr;
+		StringInfoData si;
+		MemoryContext oldcxt;
+
+		oldcxt = MemoryContextSwitchTo(ctx->mcxt);
+
+		/* NULL entries are treated as wildcard array accessors "[*]" */
+		if (stats->entry)
+			/* Copy path entry name into the right MemoryContext */
+			stats->entry = pnstrdup(stats->entry, stats->len);
+
+		MemoryContextSwitchTo(oldcxt);
+
+		/* Initialze full path string */
+		initStringInfo(&si);
+		appendStringInfoString(&si, ppath);
+		jsonPathAppendEntry(&si, stats->entry);
+
+		MemoryContextSwitchTo(ctx->mcxt);
+		stats->pathstr = pstrdup(si.data);
+		MemoryContextSwitchTo(oldcxt);
+
+		pfree(si.data);
+
+		if (ctx->single_pass)
+			jsonStatsAnlInit((JsonPathAnlStats *) stats);
+		else
+			jsonStatsBitmapInit(&((JsonPathAnlDocs *) stats)->bitmap);
+
+		stats->depth = parent->depth + 1;
+
+		/* update maximal depth */
+		if (ctx->maxdepth < stats->depth)
+			ctx->maxdepth = stats->depth;
+	}
+
+	return stats;
+}
+
+/*
+ * JsonValuesAppend
+ *		Add a JSON value to the dynamic array (enlarge it if needed).
+ *
+ * XXX This is likely one of the problems - the documents may be pretty
+ * large, with a lot of different values for each path. At that point
+ * it's problematic to keep all of that in memory at once. So maybe we
+ * need to introduce some sort of compaction (e.g. we could try
+ * deduplicating the values), limit on size of the array or something.
+ */
+static inline void
+JsonValuesAppend(JsonValues *values, Datum value, int initialSize)
+{
+	if (values->count >= values->allocated)
+	{
+		if (values->allocated)
+		{
+			values->allocated = values->allocated * 2;
+			values->buf = repalloc(values->buf,
+									sizeof(values->buf[0]) * values->allocated);
+		}
+		else
+		{
+			values->allocated = initialSize;
+			values->buf = palloc(sizeof(values->buf[0]) * values->allocated);
+		}
+	}
+
+	values->buf[values->count++] = value;
+}
+
+/*
+ * jsonAnalyzeJsonValue
+ *		Process a value extracted from the document (for a given path).
+ */
+static inline void
+jsonAnalyzeJsonValue(JsonAnalyzeContext *ctx, JsonValueStats *vstats,
+					 JsonbValue *jv)
+{
+	JsonbValue *jbv;
+	JsonbValue	jbvtmp;
+	Jsonb	   *jb;
+	Datum		value;
+	MemoryContext oldcxt = NULL;
+
+	/* XXX if analyzing only scalar values, make containers empty */
+	if (ctx->scalarsOnly && jv->type == jbvBinary)
+	{
+		if (JsonContainerIsObject(jv->val.binary.data))
+			jbv = JsonValueInitObject(&jbvtmp, 0, 0);
+		else
+		{
+			Assert(JsonContainerIsArray(jv->val.binary.data));
+			jbv = JsonValueInitArray(&jbvtmp, 0, 0, false);
+		}
+	}
+	else
+		jbv = jv;
+
+	jb = JsonbValueToJsonb(jbv);
+
+	if (ctx->single_pass)
+	{
+		oldcxt = MemoryContextSwitchTo(ctx->stats->anl_context);
+		jb = memcpy(palloc(VARSIZE(jb)), jb, VARSIZE(jb));
+	}
+
+	/* always add it to the "global" JSON stats, shared by all types */
+	JsonValuesAppend(&vstats->jsons.values,
+					 JsonbPGetDatum(jb),
+					 ctx->target);
+
+	/* also update the type-specific counters */
+	switch (jv->type)
+	{
+		case jbvNull:
+			vstats->nnulls++;
+			break;
+
+		case jbvBool:
+			if (jv->val.boolean)
+				vstats->ntrue++;
+			else
+				vstats->nfalse++;
+			break;
+
+		case jbvString:
+			vstats->nstrings++;
+#ifdef JSON_ANALYZE_SCALARS
+			value = PointerGetDatum(
+						cstring_to_text_with_len(jv->val.string.val,
+												 jv->val.string.len));
+			JsonValuesAppend(&vstats->strings.values, value, ctx->target);
+#endif
+			break;
+
+		case jbvNumeric:
+			vstats->nnumerics++;
+#ifdef JSON_ANALYZE_SCALARS
+			value = PointerGetDatum(jv->val.numeric);
+			JsonValuesAppend(&vstats->numerics.values, value, ctx->target);
+#endif
+			break;
+
+		case jbvBinary:
+			if (JsonContainerIsObject(jv->val.binary.data))
+			{
+				uint32		size = JsonContainerSize(jv->val.binary.data);
+
+				value = DatumGetInt32(size);
+				vstats->nobjects++;
+				JsonValuesAppend(&vstats->objlens.values, value, ctx->target);
+			}
+			else if (JsonContainerIsArray(jv->val.binary.data))
+			{
+				uint32		size = JsonContainerSize(jv->val.binary.data);
+
+				value = DatumGetInt32(size);
+				vstats->narrays++;
+				JsonValuesAppend(&vstats->arrlens.values, value, ctx->target);
+				vstats->narrelems += size;
+			}
+			break;
+
+		default:
+			elog(ERROR, "invalid scalar json value type %d", jv->type);
+			break;
+	}
+
+	if (ctx->single_pass)
+		MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * jsonAnalyzeCollectPaths
+ *		Parse the JSON document and collect all paths and their values.
+ */
+static void
+jsonAnalyzeCollectPaths(JsonAnalyzeContext *ctx, Jsonb *jb, void *param)
+{
+	JsonbValue			jv;
+	JsonbIterator	   *it;
+	JsonbIteratorToken	tok;
+	JsonPathEntry	   *stats = &ctx->root->path;
+	int					doc = ctx->current_rownum;
+	bool				collect_values = (bool)(intptr_t) param;
+	bool				scalar = false;
+
+	if (collect_values && !JB_ROOT_IS_SCALAR(jb))
+		jsonAnalyzeJsonValue(ctx, &((JsonPathAnlStats *) stats)->vstats,
+							 JsonValueInitBinary(&jv, jb));
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((tok = JsonbIteratorNext(&it, &jv, true)) != WJB_DONE)
+	{
+		switch (tok)
+		{
+			case WJB_BEGIN_OBJECT:
+				/*
+				 * Read next token to see if the object is empty or not.
+				 * If not, make stats for the first key.  Subsequent WJB_KEYs
+				 * and WJB_END_OBJECT will expect that stats will be pointing
+				 * to the key of current object.
+				 */
+				tok = JsonbIteratorNext(&it, &jv, true);
+
+				if (tok == WJB_END_OBJECT)
+					/* Empty object, simply skip stats initialization. */
+					break;
+
+				if (tok != WJB_KEY)
+					elog(ERROR, "unexpected jsonb iterator token: %d", tok);
+
+				stats = jsonAnalyzeAddPath(ctx, stats,
+										   jv.val.string.val,
+										   jv.val.string.len);
+				break;
+
+			case WJB_BEGIN_ARRAY:
+				/* Make stats for non-scalar array and use it for all elements */
+				if (!(scalar = jv.val.array.rawScalar))
+					stats = jsonAnalyzeAddPath(ctx, stats, NULL, -1);
+				break;
+
+			case WJB_END_ARRAY:
+				if (scalar)
+					break;
+				/* FALLTHROUGH */
+			case WJB_END_OBJECT:
+				/* Reset to parent stats */
+				stats = stats->parent;
+				break;
+
+			case WJB_KEY:
+				/*
+				 * Stats should point to the previous key of current object,
+				 * use its parent path as a base path.
+				 */
+				stats = jsonAnalyzeAddPath(ctx, stats->parent,
+										   jv.val.string.val,
+										   jv.val.string.len);
+				break;
+
+			case WJB_VALUE:
+			case WJB_ELEM:
+				if (collect_values)
+					jsonAnalyzeJsonValue(ctx,
+										 &((JsonPathAnlStats *) stats)->vstats,
+										 &jv);
+				else if (stats != &ctx->root->path)
+					jsonStatsBitmapAdd(ctx,
+									   &((JsonPathAnlDocs *) stats)->bitmap,
+									   doc);
+
+				/*
+				 * Manually recurse into container by creating child iterator.
+				 * We use skipNested=true to give jsonAnalyzeJsonValue()
+				 * ability to access jbvBinary containers.
+				 */
+				if (jv.type == jbvBinary)
+				{
+					JsonbIterator *it2 = JsonbIteratorInit(jv.val.binary.data);
+
+					it2->parent = it;
+					it = it2;
+				}
+				break;
+
+			default:
+				break;
+		}
+	}
+}
+
+/*
+ * jsonAnalyzeCollectSubpath
+ *		Recursively extract trailing part of a path and collect its values.
+ */
+static void
+jsonAnalyzeCollectSubpath(JsonAnalyzeContext *ctx, JsonPathAnlStats *pstats,
+						  JsonbValue *jbv, JsonPathEntry **entries,
+						  int start_entry)
+{
+	JsonbValue	scalar;
+	int			i;
+
+	for (i = start_entry; i < pstats->path.depth; i++)
+	{
+		JsonPathEntry  *entry = entries[i];
+		JsonbContainer *jbc = jbv->val.binary.data;
+		JsonbValueType	type = jbv->type;
+
+		if (i > start_entry)
+			pfree(jbv);
+
+		if (type != jbvBinary)
+			return;
+
+		if (JsonPathEntryIsArray(entry))
+		{
+			JsonbIterator	   *it;
+			JsonbIteratorToken	r;
+			JsonbValue			elem;
+
+			if (!JsonContainerIsArray(jbc) || JsonContainerIsScalar(jbc))
+				return;
+
+			it = JsonbIteratorInit(jbc);
+
+			while ((r = JsonbIteratorNext(&it, &elem, true)) != WJB_DONE)
+			{
+				if (r == WJB_ELEM)
+					jsonAnalyzeCollectSubpath(ctx, pstats, &elem, entries, i + 1);
+			}
+
+			return;
+		}
+		else
+		{
+			if (!JsonContainerIsObject(jbc))
+				return;
+
+			jbv = findJsonbValueFromContainerLen(jbc, JB_FOBJECT,
+												 entry->entry, entry->len);
+
+			if (!jbv)
+				return;
+		}
+	}
+
+	if (i == start_entry &&
+		jbv->type == jbvBinary &&
+		JsonbExtractScalar(jbv->val.binary.data, &scalar))
+		jbv = &scalar;
+
+	jsonAnalyzeJsonValue(ctx, &pstats->vstats, jbv);
+
+	if (i > start_entry)
+		pfree(jbv);
+}
+
+/*
+ * jsonAnalyzeCollectPath
+ *		Extract a single path from JSON documents and collect its values.
+ */
+static void
+jsonAnalyzeCollectPath(JsonAnalyzeContext *ctx, Jsonb *jb, void *param)
+{
+	JsonPathAnlStats *pstats = (JsonPathAnlStats *) param;
+	JsonbValue	jbvtmp;
+	JsonbValue *jbv = JsonValueInitBinary(&jbvtmp, jb);
+	JsonPathEntry *path;
+	JsonPathEntry **entries;
+	int			i;
+
+	entries = palloc(sizeof(*entries) * pstats->path.depth);
+
+	/* Build entry array in direct order */
+	for (path = &pstats->path, i = pstats->path.depth - 1;
+		 path->parent && i >= 0;
+		 path = path->parent, i--)
+		entries[i] = path;
+
+	jsonAnalyzeCollectSubpath(ctx, pstats, jbv, entries, 0);
+
+	pfree(entries);
+}
+
+static Datum
+jsonAnalyzePathFetch(VacAttrStatsP stats, int rownum, bool *isnull)
+{
+	*isnull = false;
+	return stats->exprvals[rownum];
+}
+
+/*
+ * jsonAnalyzePathValues
+ *		Calculate per-column statistics for values for a single path.
+ *
+ * We have already accumulated all the values for the path, so we simply
+ * call the typanalyze function for the proper data type, and then
+ * compute_stats (which points to compute_scalar_stats or so).
+ */
+static void
+jsonAnalyzePathValues(JsonAnalyzeContext *ctx, JsonScalarStats *sstats,
+					  Oid typid, double freq, bool use_anl_context)
+{
+	JsonValues			   *values = &sstats->values;
+	VacAttrStats		   *stats = &sstats->stats;
+	FormData_pg_attribute	attr;
+	FormData_pg_type		type;
+	int						i;
+
+	if (!sstats->values.count)
+		return;
+
+	get_typlenbyvalalign(typid, &type.typlen, &type.typbyval, &type.typalign);
+
+	attr.attstattarget = ctx->target;
+
+	stats->attr = &attr;
+	stats->attrtypid = typid;
+	stats->attrtypmod = -1;
+	stats->attrtype = &type;
+	stats->anl_context = use_anl_context ? ctx->stats->anl_context : CurrentMemoryContext;
+
+	stats->exprvals = values->buf;
+
+	/*
+	 * The fields describing the stats->stavalues[n] element types default to
+	 * the type of the data being analyzed, but the type-specific typanalyze
+	 * function can change them if it wants to store something else.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		stats->statypid[i] = stats->attrtypid;
+		stats->statyplen[i] = stats->attrtype->typlen;
+		stats->statypbyval[i] = stats->attrtype->typbyval;
+		stats->statypalign[i] = stats->attrtype->typalign;
+	}
+
+	std_typanalyze(stats);
+
+	stats->compute_stats(stats, jsonAnalyzePathFetch,
+						 values->count,
+						 ctx->totalrows / ctx->samplerows * values->count);
+
+	/*
+	 * We've only kept the non-null values, so compute_stats will always
+	 * leave this as 1.0. But we have enough info to calculate the correct
+	 * value.
+	 */
+	stats->stanullfrac = (float4)(1.0 - freq);
+
+	/*
+	 * Similarly, we need to correct the MCV frequencies, becuse those are
+	 * also calculated only from the non-null values. All we need to do is
+	 * simply multiply that with the non-NULL frequency.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		if (stats->stakind[i] == STATISTIC_KIND_MCV)
+		{
+			int j;
+			for (j = 0; j < stats->numnumbers[i]; j++)
+				stats->stanumbers[i][j] *= freq;
+		}
+	}
+}
+
+/*
+ * jsonAnalyzeMakeScalarStats
+ *		Serialize scalar stats into a JSON representation.
+ *
+ * We simply produce a JSON document with a list of predefined keys:
+ *
+ * - nullfrac
+ * - distinct
+ * - width
+ * - correlation
+ * - mcv or histogram
+ *
+ * For the mcv / histogram, we store a nested values / numbers.
+ */
+static JsonbValue *
+jsonAnalyzeMakeScalarStats(JsonbParseState **ps, const char *name,
+							const VacAttrStats *stats)
+{
+	JsonbValue	val;
+	int			i;
+	int			j;
+
+	pushJsonbKey(ps, &val, name);
+
+	pushJsonbValue(ps, WJB_BEGIN_OBJECT, NULL);
+
+	pushJsonbKeyValueFloat(ps, &val, "nullfrac", stats->stanullfrac);
+	pushJsonbKeyValueFloat(ps, &val, "distinct", stats->stadistinct);
+	pushJsonbKeyValueInteger(ps, &val, "width", stats->stawidth);
+
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		if (!stats->stakind[i])
+			break;
+
+		switch (stats->stakind[i])
+		{
+			case STATISTIC_KIND_MCV:
+				pushJsonbKey(ps, &val, "mcv");
+				break;
+
+			case STATISTIC_KIND_HISTOGRAM:
+				pushJsonbKey(ps, &val, "histogram");
+				break;
+
+			case STATISTIC_KIND_CORRELATION:
+				pushJsonbKeyValueFloat(ps, &val, "correlation",
+									   stats->stanumbers[i][0]);
+				continue;
+
+			default:
+				elog(ERROR, "unexpected stakind %d", stats->stakind[i]);
+				break;
+		}
+
+		pushJsonbValue(ps, WJB_BEGIN_OBJECT, NULL);
+
+		if (stats->numvalues[i] > 0)
+		{
+			pushJsonbKey(ps, &val, "values");
+			pushJsonbValue(ps, WJB_BEGIN_ARRAY, NULL);
+			for (j = 0; j < stats->numvalues[i]; j++)
+			{
+				Datum v = stats->stavalues[i][j];
+				if (stats->attrtypid == JSONBOID)
+					pushJsonbElemBinary(ps, &val, DatumGetJsonbP(v));
+				else if (stats->attrtypid == TEXTOID)
+					pushJsonbElemText(ps, &val, DatumGetTextP(v));
+				else if (stats->attrtypid == NUMERICOID)
+					pushJsonbElemNumeric(ps, &val, DatumGetNumeric(v));
+				else if (stats->attrtypid == INT4OID)
+					pushJsonbElemInteger(ps, &val, DatumGetInt32(v));
+				else
+					elog(ERROR, "unexpected stat value type %d",
+						 stats->attrtypid);
+			}
+			pushJsonbValue(ps, WJB_END_ARRAY, NULL);
+		}
+
+		if (stats->numnumbers[i] > 0)
+		{
+			pushJsonbKey(ps, &val, "numbers");
+			pushJsonbValue(ps, WJB_BEGIN_ARRAY, NULL);
+			for (j = 0; j < stats->numnumbers[i]; j++)
+				pushJsonbElemFloat(ps, &val, stats->stanumbers[i][j]);
+			pushJsonbValue(ps, WJB_END_ARRAY, NULL);
+		}
+
+		pushJsonbValue(ps, WJB_END_OBJECT, NULL);
+	}
+
+	return pushJsonbValue(ps, WJB_END_OBJECT, NULL);
+}
+
+static void
+pushJsonbKeyValueFloatNonZero(JsonbParseState **ps, JsonbValue *jbv,
+							  const char *field, double val)
+{
+	if (val != 0.0)
+		pushJsonbKeyValueFloat(ps, jbv, field, val);
+}
+
+/*
+ * jsonAnalyzeBuildPathStats
+ *		Serialize statistics for a particular json path.
+ *
+ * This includes both the per-column stats (stored in "json" key) and the
+ * JSON specific stats (like frequencies of different object types).
+ */
+static Jsonb *
+jsonAnalyzeBuildPathStats(JsonPathAnlStats *pstats)
+{
+	const JsonValueStats *vstats = &pstats->vstats;
+	float4				freq = pstats->freq;
+	bool				fullstats = true;	/* pstats->path.parent != NULL */
+	JsonbValue			val;
+	JsonbValue		   *jbv;
+	JsonbParseState	   *ps = NULL;
+
+	pushJsonbValue(&ps, WJB_BEGIN_OBJECT, NULL);
+
+	pushJsonbKeyValueString(&ps, &val, "path", pstats->path.pathstr);
+
+	pushJsonbKeyValueFloat(&ps, &val, "freq", freq);
+
+	pushJsonbKeyValueFloatNonZero(&ps, &val, "freq_null",
+								  freq * vstats->nnulls /
+								  vstats->jsons.values.count);
+
+	pushJsonbKeyValueFloatNonZero(&ps, &val, "freq_boolean",
+								  freq * (vstats->nfalse + vstats->ntrue) /
+								  vstats->jsons.values.count);
+
+	pushJsonbKeyValueFloatNonZero(&ps, &val, "freq_string",
+								  freq * vstats->nstrings /
+								  vstats->jsons.values.count);
+
+	pushJsonbKeyValueFloatNonZero(&ps, &val, "freq_numeric",
+								  freq * vstats->nnumerics /
+								  vstats->jsons.values.count);
+
+	pushJsonbKeyValueFloatNonZero(&ps, &val, "freq_array",
+								  freq * vstats->narrays /
+								  vstats->jsons.values.count);
+
+	pushJsonbKeyValueFloatNonZero(&ps, &val, "freq_object",
+								  freq * vstats->nobjects /
+								  vstats->jsons.values.count);
+
+	/*
+	 * We keep array length stats here for queries like jsonpath '$.size() > 5'.
+	 * Object lengths stats can be useful for other query lanuages.
+	 */
+	if (vstats->arrlens.values.count)
+		jsonAnalyzeMakeScalarStats(&ps, "array_length", &vstats->arrlens.stats);
+
+	if (vstats->objlens.values.count)
+		jsonAnalyzeMakeScalarStats(&ps, "object_length", &vstats->objlens.stats);
+
+	if (vstats->narrays)
+		pushJsonbKeyValueFloat(&ps, &val, "avg_array_length",
+							   (float4) vstats->narrelems / vstats->narrays);
+
+	if (fullstats)
+	{
+#ifdef JSON_ANALYZE_SCALARS
+		jsonAnalyzeMakeScalarStats(&ps, "string", &vstats->strings.stats);
+		jsonAnalyzeMakeScalarStats(&ps, "numeric", &vstats->numerics.stats);
+#endif
+		jsonAnalyzeMakeScalarStats(&ps, "json", &vstats->jsons.stats);
+	}
+
+	jbv = pushJsonbValue(&ps, WJB_END_OBJECT, NULL);
+
+	return JsonbValueToJsonb(jbv);
+}
+
+/*
+ * jsonAnalyzeCalcPathFreq
+ *		Calculate path frequency, i.e. how many documents contain this path.
+ */
+static void
+jsonAnalyzeCalcPathFreq(JsonAnalyzeContext *ctx, JsonPathAnlStats *pstats,
+						JsonPathParentStats *parent)
+{
+	if (pstats->path.parent)
+	{
+		int			count = JsonPathEntryIsArray(&pstats->path)	?
+			parent->narrays : pstats->vstats.jsons.values.count;
+
+		pstats->freq = parent->freq * count / parent->count;
+
+		CLAMP_PROBABILITY(pstats->freq);
+	}
+	else
+		pstats->freq = (double) ctx->analyzed_cnt / ctx->samplerows;
+}
+
+/*
+ * jsonAnalyzePath
+ *		Build statistics for values accumulated for this path.
+ *
+ * We're done with accumulating values for this path, so calculate the
+ * statistics for the various arrays.
+ *
+ * XXX I wonder if we could introduce some simple heuristict on which
+ * paths to keep, similarly to what we do for MCV lists. For example a
+ * path that occurred just once is not very interesting, so we could
+ * decide to ignore it and not build the stats. Although that won't
+ * save much, because there'll be very few values accumulated.
+ */
+static Jsonb *
+jsonAnalyzePath(JsonAnalyzeContext *ctx, JsonPathAnlStats *pstats,
+				JsonPathParentStats *parent_stats)
+{
+	JsonValueStats	   *vstats = &pstats->vstats;
+	Jsonb			   *stats;
+
+	jsonAnalyzeCalcPathFreq(ctx, pstats, parent_stats);
+
+	/* values combining all object types */
+	jsonAnalyzePathValues(ctx, &vstats->jsons, JSONBOID, pstats->freq,
+						  /* store root stats in analyze context */
+						  !parent_stats);
+
+	/*
+	 * Lengths and array lengths.  We divide counts by the total number of json
+	 * values to compute correct nullfrac (i.e. not all jsons have lengths).
+	 */
+	jsonAnalyzePathValues(ctx, &vstats->arrlens, INT4OID,
+						  pstats->freq * vstats->arrlens.values.count /
+						  vstats->jsons.values.count, false);
+	jsonAnalyzePathValues(ctx, &vstats->objlens, INT4OID,
+						  pstats->freq * vstats->objlens.values.count /
+						  vstats->jsons.values.count, false);
+
+#ifdef JSON_ANALYZE_SCALARS
+	/* stats for values of string/numeric types only */
+	jsonAnalyzePathValues(ctx, &vstats->strings, TEXTOID, pstats->freq, false);
+	jsonAnalyzePathValues(ctx, &vstats->numerics, NUMERICOID, pstats->freq, false);
+#endif
+
+	/* Build jsonb with path stats */
+	stats = jsonAnalyzeBuildPathStats(pstats);
+
+	/* Copy stats to non-temporary context */
+	return memcpy(MemoryContextAlloc(ctx->stats->anl_context, VARSIZE(stats)),
+				  stats, VARSIZE(stats));
+}
+
+/*
+ * JsonPathStatsCompare
+ *		Compare two path stats (by path string).
+ *
+ * We store the stats sorted by path string, and this is the comparator.
+ */
+static int
+JsonPathStatsCompare(const void *pv1, const void *pv2)
+{
+	return strcmp((*((const JsonPathEntry **) pv1))->pathstr,
+				  (*((const JsonPathEntry **) pv2))->pathstr);
+}
+
+/*
+ * jsonAnalyzeSortPaths
+ *		Reads all stats stored in the hash table and sorts them.
+ */
+static JsonPathEntry **
+jsonAnalyzeSortPaths(JsonAnalyzeContext *ctx, int *p_npaths)
+{
+	HASH_SEQ_STATUS	hseq;
+	JsonPathEntry *path;
+	JsonPathEntry **paths;
+	int			npaths;
+
+	npaths = hash_get_num_entries(ctx->pathshash) + 1;
+	paths = MemoryContextAlloc(ctx->mcxt, sizeof(*paths) * npaths);
+
+	paths[0] = &ctx->root->path;
+
+	hash_seq_init(&hseq, ctx->pathshash);
+
+	for (int i = 1; (path = hash_seq_search(&hseq)) != NULL; i++)
+		paths[i] = path;
+
+	pg_qsort(paths, npaths, sizeof(*paths), JsonPathStatsCompare);
+
+	*p_npaths = npaths;
+	return paths;
+}
+
+/*
+ * jsonAnalyzeBuildPathStatsArray
+ *		Build jsonb datum array for path stats, that will be used as stavalues.
+ *
+ * The first element is a path prefix.
+ */
+static Datum *
+jsonAnalyzeBuildPathStatsArray(Jsonb **pstats, int npaths, int *nvals,
+							   const char *prefix, int prefixlen)
+{
+	Datum	   *values = palloc(sizeof(Datum) * (npaths + 1));
+	JsonbValue *jbvprefix = palloc(sizeof(JsonbValue));
+	int			i;
+
+	JsonValueInitStringWithLen(jbvprefix,
+							   memcpy(palloc(prefixlen), prefix, prefixlen),
+							   prefixlen);
+
+	values[0] = JsonbPGetDatum(JsonbValueToJsonb(jbvprefix));
+
+	for (i = 0; i < npaths; i++)
+		values[i + 1] = JsonbPGetDatum(pstats[i]);
+
+	*nvals = npaths + 1;
+
+	return values;
+}
+
+/*
+ * jsonAnalyzeMakeStats
+ *		Build stavalues jsonb array for the root path prefix.
+ */
+static Datum *
+jsonAnalyzeMakeStats(JsonAnalyzeContext *ctx, Jsonb **paths,
+					 int npaths, int *numvalues)
+{
+	Datum	   *values;
+	MemoryContext oldcxt = MemoryContextSwitchTo(ctx->stats->anl_context);
+
+	values = jsonAnalyzeBuildPathStatsArray(paths, npaths, numvalues,
+											JSON_PATH_ROOT, JSON_PATH_ROOT_LEN);
+
+	MemoryContextSwitchTo(oldcxt);
+
+	return values;
+}
+
+/*
+ * jsonAnalyzeBuildSubPathsData
+ *		Build statvalues and stanumbers arrays for the subset of paths starting
+ *		from a given prefix.
+ *
+ * pathsDatums[index] should point to the desired path.
+ */
+bool
+jsonAnalyzeBuildSubPathsData(Datum *pathsDatums, int npaths, int index,
+							 const char	*path, int pathlen,
+							 bool includeSubpaths, float4 nullfrac,
+							 Datum *pvals, Datum *pnums)
+{
+	Jsonb	  **pvalues = palloc(sizeof(*pvalues) * npaths);
+	Datum	   *values;
+	Datum		numbers[1];
+	JsonbValue	pathkey;
+	int			nsubpaths = 0;
+	int			nvalues;
+	int			i;
+
+	JsonValueInitStringWithLen(&pathkey, "path", 4);
+
+	for (i = index; i < npaths; i++)
+	{
+		/* Extract path name */
+		Jsonb	   *jb = DatumGetJsonbP(pathsDatums[i]);
+		JsonbValue *jbv = findJsonbValueFromContainer(&jb->root, JB_FOBJECT,
+													  &pathkey);
+
+		/* Check if path name starts with a given prefix */
+		if (!jbv || jbv->type != jbvString ||
+			jbv->val.string.len < pathlen ||
+			memcmp(jbv->val.string.val, path, pathlen))
+			break;
+
+		pfree(jbv);
+
+		/* Collect matching path */
+		pvalues[nsubpaths] = jb;
+
+		nsubpaths++;
+
+		/*
+		 * The path should go before its subpaths, so if subpaths are not
+		 * needed the loop is broken after the first matching path.
+		 */
+		if (!includeSubpaths)
+			break;
+	}
+
+	if (!nsubpaths)
+	{
+		pfree(pvalues);
+		return false;
+	}
+
+	/* Construct new array from the selected paths */
+	values = jsonAnalyzeBuildPathStatsArray(pvalues, nsubpaths, &nvalues,
+											path, pathlen);
+	*pvals = PointerGetDatum(construct_array(values, nvalues, JSONBOID, -1,
+											 false, 'i'));
+
+	pfree(pvalues);
+	pfree(values);
+
+	numbers[0] = Float4GetDatum(nullfrac);
+	*pnums = PointerGetDatum(construct_array(numbers, 1, FLOAT4OID, 4,
+											 true /*FLOAT4PASSBYVAL*/, 'i'));
+
+	return true;
+}
+
+/*
+ * jsonAnalyzeInit
+ *		Initialize the analyze context so that we can start adding paths.
+ */
+static void
+jsonAnalyzeInit(JsonAnalyzeContext *ctx, VacAttrStats *stats,
+				AnalyzeAttrFetchFunc fetchfunc,
+				int samplerows, double totalrows, bool single_pass)
+{
+	HASHCTL	hash_ctl;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ctx->stats = stats;
+	ctx->fetchfunc = fetchfunc;
+	ctx->mcxt = CurrentMemoryContext;
+	ctx->samplerows = samplerows;
+	ctx->totalrows = totalrows;
+	ctx->target = stats->attr->attstattarget;
+	ctx->scalarsOnly = false;
+	ctx->single_pass = single_pass;
+
+	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+	hash_ctl.keysize = sizeof(JsonPathEntry);
+	hash_ctl.entrysize = ctx->single_pass ? sizeof(JsonPathAnlStats) : sizeof(JsonPathAnlDocs);
+	hash_ctl.hash = JsonPathEntryHash;
+	hash_ctl.match = JsonPathEntryMatch;
+	hash_ctl.hcxt = ctx->mcxt;
+
+	ctx->pathshash = hash_create("JSON analyze path table", 100, &hash_ctl,
+					HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+	ctx->root = MemoryContextAllocZero(ctx->mcxt, sizeof(JsonPathAnlStats));
+	ctx->root->path.pathstr = JSON_PATH_ROOT;
+}
+
+/*
+ * jsonAnalyzePass
+ *		One analysis pass over the JSON column.
+ *
+ * Performs one analysis pass on the JSON documents, and passes them to the
+ * custom analyzefunc.
+ */
+static void
+jsonAnalyzePass(JsonAnalyzeContext *ctx,
+				void (*analyzefunc)(JsonAnalyzeContext *, Jsonb *, void *),
+				void *analyzearg,
+				JsonPathDocBitmap *bitmap)
+{
+	MemoryContext	tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
+												"Json Analyze Pass Context",
+												ALLOCSET_DEFAULT_MINSIZE,
+												ALLOCSET_DEFAULT_INITSIZE,
+												ALLOCSET_DEFAULT_MAXSIZE);
+	MemoryContext	oldcxt = MemoryContextSwitchTo(tmpcxt);
+	int			row_num = -1;
+
+	ctx->null_cnt = 0;
+	ctx->analyzed_cnt = 0;
+	ctx->total_width = 0;
+
+	/* Loop over the jsonbs. */
+	for (int i = 0; i < (bitmap ? bitmap->size : ctx->samplerows); i++)
+	{
+		Datum		value;
+		Jsonb	   *jb;
+		Size		width;
+		bool		isnull;
+
+		vacuum_delay_point();
+
+		if (bitmap)
+		{
+			if (bitmap->is_list)
+				row_num = bitmap->data.list[i];
+			else if (!jsonStatsBitmapNext(bitmap, &row_num))
+				break;
+		}
+		else
+			row_num = i;
+
+		value = ctx->fetchfunc(ctx->stats, row_num, &isnull);
+
+		if (isnull)
+		{
+			/* json is null, just count that */
+			ctx->null_cnt++;
+			continue;
+		}
+
+		width = toast_raw_datum_size(value);
+
+		ctx->total_width += VARSIZE_ANY(DatumGetPointer(value)); /* FIXME raw width? */
+
+		/* Skip too-large values. */
+#define JSON_WIDTH_THRESHOLD (100 * 1024)
+
+		if (width > JSON_WIDTH_THRESHOLD)
+			continue;
+
+		ctx->analyzed_cnt++;
+
+		jb = DatumGetJsonbP(value);
+
+		if (!ctx->single_pass)
+			MemoryContextSwitchTo(oldcxt);
+
+		ctx->current_rownum = row_num;
+		analyzefunc(ctx, jb, analyzearg);
+
+		if (!ctx->single_pass)
+			oldcxt = MemoryContextSwitchTo(tmpcxt);
+
+		MemoryContextReset(tmpcxt);
+	}
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * compute_json_stats() -- compute statistics for a json column
+ */
+static void
+compute_json_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
+				   int samplerows, double totalrows)
+{
+	JsonAnalyzeContext	ctx;
+	JsonPathEntry **paths;
+	Jsonb	  **pstats;
+	int			npaths;
+	int			root_analyzed_cnt;
+	int			root_null_cnt;
+	double		root_total_width;
+
+	jsonAnalyzeInit(&ctx, stats, fetchfunc, samplerows, totalrows,
+					false /* FIXME make GUC or simply remove */);
+
+	/*
+	 * Collect and analyze JSON path values in single or multiple passes.
+	 * Sigle-pass collection is faster but consumes much more memory than
+	 * collecting and analyzing by the one path at pass.
+	 */
+	if (ctx.single_pass)
+	{
+		/* Collect all values of all paths */
+		jsonAnalyzePass(&ctx, jsonAnalyzeCollectPaths, (void *)(intptr_t) true, NULL);
+
+		root_analyzed_cnt = ctx.analyzed_cnt;
+		root_null_cnt = ctx.null_cnt;
+		root_total_width = ctx.total_width;
+
+		/*
+		 * Now that we're done with processing the documents, we sort the paths
+		 * we extracted and calculate stats for each of them.
+		 *
+		 * XXX I wonder if we could do this in two phases, to maybe not collect
+		 * (or even accumulate) values for paths that are not interesting.
+		 */
+		paths = jsonAnalyzeSortPaths(&ctx, &npaths);
+		pstats = palloc(sizeof(*pstats) * npaths);
+
+		for (int i = 0; i < npaths; i++)
+		{
+			JsonPathAnlStats *astats = (JsonPathAnlStats *) paths[i];
+			JsonPathAnlStats *parent = (JsonPathAnlStats *) paths[i]->parent;
+			JsonPathParentStats parent_stats;
+
+			if (parent)
+			{
+				parent_stats.freq = parent->freq;
+				parent_stats.count = parent->vstats.jsons.values.count;
+				parent_stats.narrays = parent->vstats.narrays;
+			}
+
+			pstats[i] = jsonAnalyzePath(&ctx, astats,
+										parent ? &parent_stats : NULL);
+		}
+	}
+	else
+	{
+		MemoryContext	oldcxt;
+		MemoryContext	tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
+													"Json Analyze Tmp Context",
+													ALLOCSET_DEFAULT_MINSIZE,
+													ALLOCSET_DEFAULT_INITSIZE,
+													ALLOCSET_DEFAULT_MAXSIZE);
+		JsonPathParentStats *stack;
+
+		elog(DEBUG1, "analyzing %s attribute \"%s\"",
+			stats->attrtypid == JSONBOID ? "jsonb" : "json",
+			NameStr(stats->attr->attname));
+
+		elog(DEBUG1, "collecting json paths");
+
+		oldcxt = MemoryContextSwitchTo(tmpcxt);
+
+		/* Collect all paths first without accumulating any Values, sort them */
+		jsonAnalyzePass(&ctx, jsonAnalyzeCollectPaths, (void *)(intptr_t) false, NULL);
+		paths = jsonAnalyzeSortPaths(&ctx, &npaths);
+		pstats = MemoryContextAlloc(oldcxt, sizeof(*pstats) * npaths);
+		stack = MemoryContextAlloc(oldcxt, sizeof(*stack) * (ctx.maxdepth + 1));
+
+		root_analyzed_cnt = ctx.analyzed_cnt;
+		root_null_cnt = ctx.null_cnt;
+		root_total_width = ctx.total_width;
+
+		/*
+		 * Next, process each path independently to save memory (we don't want
+		 * to accumulate all values for all paths, with a lot of duplicities).
+		 */
+		MemoryContextReset(tmpcxt);
+
+		for (int i = 0; i < npaths; i++)
+		{
+			JsonPathEntry *path = paths[i];
+			JsonPathAnlStats astats_tmp;
+			JsonPathAnlStats *astats;
+
+			if (!i)
+				astats = ctx.root;
+			else
+			{
+				astats = &astats_tmp;
+				jsonStatsAnlInit(astats);
+				astats->path = *path;
+			}
+
+			elog(DEBUG1, "analyzing json path (%d/%d) %s",
+				 i + 1, npaths, path->pathstr);
+
+			jsonAnalyzePass(&ctx, jsonAnalyzeCollectPath, astats,
+							/* root has no bitmap */
+							i > 0 ? &((JsonPathAnlDocs *) path)->bitmap : NULL);
+
+			pstats[i] = jsonAnalyzePath(&ctx, astats,
+										path->depth ? &stack[path->depth - 1] : NULL);
+
+			/* Save parent stats in the stack */
+			stack[path->depth].freq = astats->freq;
+			stack[path->depth].count = astats->vstats.jsons.values.count;
+			stack[path->depth].narrays = astats->vstats.narrays;
+
+			MemoryContextReset(tmpcxt);
+		}
+
+		MemoryContextSwitchTo(oldcxt);
+
+		MemoryContextDelete(tmpcxt);
+	}
+
+	/* We can only compute real stats if we found some non-null values. */
+	if (root_null_cnt >= samplerows)
+	{
+		/* We found only nulls; assume the column is entirely null */
+		stats->stats_valid = true;
+		stats->stanullfrac = 1.0;
+		stats->stawidth = 0;		/* "unknown" */
+		stats->stadistinct = 0.0;	/* "unknown" */
+	}
+	else if (!root_analyzed_cnt)
+	{
+		int	nonnull_cnt = samplerows - root_null_cnt;
+
+		/* We found some non-null values, but they were all too wide */
+		stats->stats_valid = true;
+		/* Do the simple null-frac and width stats */
+		stats->stanullfrac = (double) root_null_cnt / (double) samplerows;
+		stats->stawidth = root_total_width / (double) nonnull_cnt;
+		/* Assume all too-wide values are distinct, so it's a unique column */
+		stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+	}
+	else
+	{
+		VacAttrStats   *jsstats = &ctx.root->vstats.jsons.stats;
+		int				i;
+		int				empty_slot = -1;
+
+		stats->stats_valid = true;
+
+		stats->stanullfrac	= jsstats->stanullfrac;
+		stats->stawidth		= jsstats->stawidth;
+		stats->stadistinct	= jsstats->stadistinct;
+
+		/*
+		 * We need to store the statistics the statistics slots. We simply
+		 * store the regular stats in the first slots, and then we put the
+		 * JSON stats into the first empty slot.
+		 */
+		for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+		{
+			/* once we hit an empty slot, we're done */
+			if (!jsstats->staop[i])
+			{
+				empty_slot = i;		/* remember the empty slot */
+				break;
+			}
+
+			stats->stakind[i] 		= jsstats->stakind[i];
+			stats->staop[i] 		= jsstats->staop[i];
+			stats->stanumbers[i] 	= jsstats->stanumbers[i];
+			stats->stavalues[i] 	= jsstats->stavalues[i];
+			stats->statypid[i] 		= jsstats->statypid[i];
+			stats->statyplen[i] 	= jsstats->statyplen[i];
+			stats->statypbyval[i] 	= jsstats->statypbyval[i];
+			stats->statypalign[i] 	= jsstats->statypalign[i];
+			stats->numnumbers[i] 	= jsstats->numnumbers[i];
+			stats->numvalues[i] 	= jsstats->numvalues[i];
+		}
+
+		Assert((empty_slot >= 0) && (empty_slot < STATISTIC_NUM_SLOTS));
+
+		stats->stakind[empty_slot] = STATISTIC_KIND_JSON;
+		stats->staop[empty_slot] = InvalidOid;
+		stats->numnumbers[empty_slot] = 1;
+		stats->stanumbers[empty_slot] = MemoryContextAlloc(stats->anl_context,
+														   sizeof(float4));
+		stats->stanumbers[empty_slot][0] = 0.0; /* nullfrac */
+		stats->stavalues[empty_slot] =
+			jsonAnalyzeMakeStats(&ctx, pstats, npaths,
+								 &stats->numvalues[empty_slot]);
+
+		/* We are storing jsonb values */
+		stats->statypid[empty_slot] = JSONBOID;
+		get_typlenbyvalalign(stats->statypid[empty_slot],
+							 &stats->statyplen[empty_slot],
+							 &stats->statypbyval[empty_slot],
+							 &stats->statypalign[empty_slot]);
+	}
+}
+
+/*
+ * json_typanalyze -- typanalyze function for jsonb
+ */
+Datum
+jsonb_typanalyze(PG_FUNCTION_ARGS)
+{
+	VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
+	Form_pg_attribute attr = stats->attr;
+
+	/* If the attstattarget column is negative, use the default value */
+	/* NB: it is okay to scribble on stats->attr since it's a copy */
+	if (attr->attstattarget < 0)
+		attr->attstattarget = default_statistics_target;
+
+	stats->compute_stats = compute_json_stats;
+	/* see comment about the choice of minrows in commands/analyze.c */
+	stats->minrows = 300 * attr->attstattarget;
+
+	PG_RETURN_BOOL(true);
+}
diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c
index eff3734b6ab..94ae6e1f385 100644
--- a/src/backend/utils/adt/jsonpath_exec.c
+++ b/src/backend/utils/adt/jsonpath_exec.c
@@ -1723,7 +1723,7 @@ executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg,
 		cxt->cflags = jspConvertRegexFlags(jsp->content.like_regex.flags);
 	}
 
-	if (RE_compile_and_execute(cxt->regex, str->val.string.val,
+	if (RE_compile_and_execute(cxt->regex, unconstify(char *, str->val.string.val),
 							   str->val.string.len,
 							   cxt->cflags, DEFAULT_COLLATION_OID, 0, NULL))
 		return jpbTrue;
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 8e0e65ad275..9805bb15038 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -3175,7 +3175,7 @@
 { oid => '3211', oid_symbol => 'JsonbObjectFieldOperator',
   descr => 'get jsonb object field',
   oprname => '->', oprleft => 'jsonb', oprright => 'text', oprresult => 'jsonb',
-  oprcode => 'jsonb_object_field' },
+  oprcode => 'jsonb_object_field', oprstat => 'jsonb_stats' },
 { oid => '3477', oid_symbol => 'JsonbObjectFieldTextOperator',
   descr => 'get jsonb object field as text',
   oprname => '->>', oprleft => 'jsonb', oprright => 'text', oprresult => 'text',
@@ -3183,7 +3183,7 @@
 { oid => '3212', oid_symbol => 'JsonbArrayElementOperator',
   descr => 'get jsonb array element',
   oprname => '->', oprleft => 'jsonb', oprright => 'int4', oprresult => 'jsonb',
-  oprcode => 'jsonb_array_element' },
+  oprcode => 'jsonb_array_element', oprstat => 'jsonb_stats' },
 { oid => '3481', oid_symbol => 'JsonbArrayElementTextOperator',
   descr => 'get jsonb array element as text',
   oprname => '->>', oprleft => 'jsonb', oprright => 'int4', oprresult => 'text',
@@ -3191,7 +3191,8 @@
 { oid => '3213', oid_symbol => 'JsonbExtractPathOperator',
   descr => 'get value from jsonb with path elements',
   oprname => '#>', oprleft => 'jsonb', oprright => '_text',
-  oprresult => 'jsonb', oprcode => 'jsonb_extract_path' },
+  oprresult => 'jsonb', oprcode => 'jsonb_extract_path',
+  oprstat => 'jsonb_stats' },
 { oid => '3206', oid_symbol => 'JsonbExtractPathTextOperator',
   descr => 'get value from jsonb as text with path elements',
   oprname => '#>>', oprleft => 'jsonb', oprright => '_text',
@@ -3229,23 +3230,23 @@
 { oid => '3246', oid_symbol => 'JsonbContainsOperator', descr => 'contains',
   oprname => '@>', oprleft => 'jsonb', oprright => 'jsonb', oprresult => 'bool',
   oprcom => '<@(jsonb,jsonb)', oprcode => 'jsonb_contains',
-  oprrest => 'matchingsel', oprjoin => 'matchingjoinsel' },
+  oprrest => 'jsonb_sel', oprjoin => 'matchingjoinsel' },
 { oid => '3247', oid_symbol => 'JsonbExistsOperator', descr => 'key exists',
   oprname => '?', oprleft => 'jsonb', oprright => 'text', oprresult => 'bool',
-  oprcode => 'jsonb_exists', oprrest => 'matchingsel',
+  oprcode => 'jsonb_exists', oprrest => 'jsonb_sel',
   oprjoin => 'matchingjoinsel' },
 { oid => '3248', oid_symbol => 'JsonbExistsAnyOperator', descr => 'any key exists',
   oprname => '?|', oprleft => 'jsonb', oprright => '_text', oprresult => 'bool',
-  oprcode => 'jsonb_exists_any', oprrest => 'matchingsel',
+  oprcode => 'jsonb_exists_any', oprrest => 'jsonb_sel',
   oprjoin => 'matchingjoinsel' },
 { oid => '3249', oid_symbol => 'JsonbExistsAllOperator', descr => 'all keys exist',
   oprname => '?&', oprleft => 'jsonb', oprright => '_text', oprresult => 'bool',
-  oprcode => 'jsonb_exists_all', oprrest => 'matchingsel',
+  oprcode => 'jsonb_exists_all', oprrest => 'jsonb_sel',
   oprjoin => 'matchingjoinsel' },
 { oid => '3250', oid_symbol => 'JsonbContainedOperator', descr => 'is contained by',
   oprname => '<@', oprleft => 'jsonb', oprright => 'jsonb', oprresult => 'bool',
   oprcom => '@>(jsonb,jsonb)', oprcode => 'jsonb_contained',
-  oprrest => 'matchingsel', oprjoin => 'matchingjoinsel' },
+  oprrest => 'jsonb_sel', oprjoin => 'matchingjoinsel' },
 { oid => '3284', descr => 'concatenate',
   oprname => '||', oprleft => 'jsonb', oprright => 'jsonb',
   oprresult => 'jsonb', oprcode => 'jsonb_concat' },
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d8e8715ed1c..5c8d65aa0d1 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11747,4 +11747,15 @@
   prorettype => 'bytea', proargtypes => 'pg_brin_minmax_multi_summary',
   prosrc => 'brin_minmax_multi_summary_send' },
 
+# jsonb statistics
+{ oid => '8526', descr => 'jsonb typanalyze',
+  proname => 'jsonb_typanalyze', provolatile => 's', prorettype => 'bool',
+  proargtypes => 'internal', prosrc => 'jsonb_typanalyze' },
+{ oid => '8527', descr => 'jsonb selectivity estimation',
+  proname => 'jsonb_sel', provolatile => 's', prorettype => 'float8',
+  proargtypes => 'internal oid internal int4', prosrc => 'jsonb_sel' },
+{ oid => '8528', descr => 'jsonb statsistics estimation',
+  proname => 'jsonb_stats', provolatile => 's', prorettype => 'void',
+  proargtypes => 'internal internal int4 internal', prosrc => 'jsonb_stats' },
+
 ]
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h
index cdf74481398..c4f53ebd1ba 100644
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -277,6 +277,8 @@ DECLARE_FOREIGN_KEY((starelid, staattnum), pg_attribute, (attrelid, attnum));
  */
 #define STATISTIC_KIND_BOUNDS_HISTOGRAM  7
 
+#define STATISTIC_KIND_JSON 8
+
 #endif							/* EXPOSE_TO_CLIENT_CODE */
 
 #endif							/* PG_STATISTIC_H */
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index df458794635..b867db42f45 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -445,7 +445,7 @@
   typname => 'jsonb', typlen => '-1', typbyval => 'f', typcategory => 'U',
   typsubscript => 'jsonb_subscript_handler', typinput => 'jsonb_in',
   typoutput => 'jsonb_out', typreceive => 'jsonb_recv', typsend => 'jsonb_send',
-  typalign => 'i', typstorage => 'x' },
+  typanalyze => 'jsonb_typanalyze', typalign => 'i', typstorage => 'x' },
 { oid => '4072', array_type_oid => '4073', descr => 'JSON path',
   typname => 'jsonpath', typlen => '-1', typbyval => 'f', typcategory => 'U',
   typinput => 'jsonpath_in', typoutput => 'jsonpath_out',
diff --git a/src/include/utils/json_selfuncs.h b/src/include/utils/json_selfuncs.h
new file mode 100644
index 00000000000..9a36567ae65
--- /dev/null
+++ b/src/include/utils/json_selfuncs.h
@@ -0,0 +1,113 @@
+/*-------------------------------------------------------------------------
+ *
+ * json_selfuncs.h
+ *	  JSON cost estimation functions.
+ *
+ *
+ * Portions Copyright (c) 2016-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *    src/include/utils/json_selfuncs.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef JSON_SELFUNCS_H_
+#define JSON_SELFUNCS_H 1
+
+#include "postgres.h"
+#include "access/htup.h"
+#include "utils/jsonb.h"
+#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+
+#define JSON_PATH_ROOT "$"
+#define JSON_PATH_ROOT_LEN 1
+
+#define JSON_PATH_ROOT_ARRAY "$[*]"
+#define JSON_PATH_ROOT_ARRAY_LEN 4
+
+typedef enum
+{
+	JsonPathStatsValues,
+	JsonPathStatsArrayLength,
+	JsonPathStatsObjectLength
+} JsonPathStatsType;
+
+typedef struct JsonStatData JsonStatData, *JsonStats;
+
+/* Per-path JSON stats */
+typedef struct JsonPathStatsData
+{
+	JsonStats	data;			/* pointer to per-column control structure */
+	Datum	   *datum;			/* pointer to JSONB datum with stats data */
+	const char *path;			/* path string, points directly to JSONB data */
+	int			pathlen;		/* path length */
+	JsonPathStatsType type;		/* type of stats (values, lengths etc.) */
+} JsonPathStatsData, *JsonPathStats;
+
+/* Per-column JSON stats */
+struct JsonStatData
+{
+	HeapTuple	statsTuple;		/* original pg_statistic tuple */
+	AttStatsSlot attslot;		/* data extracted from STATISTIC_KIND_JSON
+								 * slot of statsTuple */
+	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */
+	Datum	   *pathdatums;		/* path JSONB datums */
+	JsonPathStatsData *paths;	/* cached paths */
+	int			npaths;			/* number of paths */
+	float4		nullfrac;		/* NULL fraction */
+	const char *prefix;			/* global path prefix which needs to be used
+								 * for searching in pathdatums */
+	int			prefixlen;		/* path prefix length */
+	bool		acl_ok;			/* ACL check is Ok */
+};
+
+typedef enum JsonStatType
+{
+	JsonStatJsonb,
+	JsonStatJsonbWithoutSubpaths,
+	JsonStatText,
+	JsonStatFloat4,
+	JsonStatString,
+	JsonStatNumeric,
+	JsonStatFreq,
+} JsonStatType;
+
+extern bool jsonStatsInit(JsonStats stats, const VariableStatData *vardata);
+extern void jsonStatsRelease(JsonStats data);
+
+extern JsonPathStats jsonStatsGetPathByStr(JsonStats stats,
+										   const char *path, int pathlen);
+
+extern JsonPathStats jsonPathStatsGetSubpath(JsonPathStats stats,
+											 const char *subpath);
+
+extern bool jsonPathStatsGetNextSubpathStats(JsonPathStats stats,
+											 JsonPathStats *keystats,
+											 bool keysOnly);
+
+extern JsonPathStats jsonPathStatsGetArrayLengthStats(JsonPathStats pstats);
+extern JsonPathStats jsonPathStatsGetObjectLengthStats(JsonPathStats pstats);
+
+extern float4 jsonPathStatsGetFreq(JsonPathStats pstats, float4 defaultfreq);
+
+extern float4 jsonPathStatsGetTypeFreq(JsonPathStats pstats,
+									JsonbValueType type, float4 defaultfreq);
+
+extern float4 jsonPathStatsGetAvgArraySize(JsonPathStats pstats);
+
+extern Selectivity jsonPathStatsGetArrayIndexSelectivity(JsonPathStats pstats,
+														 int index);
+
+extern Selectivity jsonSelectivity(JsonPathStats stats, Datum scalar, Oid oper);
+
+extern void jsonPathAppendEntry(StringInfo path, const char *entry);
+
+extern bool jsonAnalyzeBuildSubPathsData(Datum *pathsDatums,
+										 int npaths, int index,
+										 const char	*path, int pathlen,
+										 bool includeSubpaths, float4 nullfrac,
+										 Datum *pvals, Datum *pnums);
+
+#endif /* JSON_SELFUNCS_H */
diff --git a/src/test/regress/expected/jsonb_stats.out b/src/test/regress/expected/jsonb_stats.out
new file mode 100644
index 00000000000..c7b1e644099
--- /dev/null
+++ b/src/test/regress/expected/jsonb_stats.out
@@ -0,0 +1,713 @@
+CREATE OR REPLACE FUNCTION explain_jsonb(sql_query text)
+RETURNS TABLE(explain_line json) AS
+$$
+BEGIN
+	RETURN QUERY EXECUTE 'EXPLAIN (ANALYZE, FORMAT json) ' || sql_query;
+END;
+$$ LANGUAGE plpgsql;
+CREATE OR REPLACE FUNCTION get_plan_and_actual_rows(sql_query text)
+RETURNS TABLE(plan integer, actual integer) AS
+$$
+	SELECT
+		(plan->>'Plan Rows')::integer plan,
+		(plan->>'Actual Rows')::integer actual
+	FROM (
+		SELECT explain_jsonb(sql_query) #> '{0,Plan,Plans,0}'
+	) p(plan)
+$$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION check_estimate(sql_query text, accuracy real)
+RETURNS boolean AS
+$$
+	SELECT plan BETWEEN actual / (1 + accuracy) AND (actual + 1) * (1 + accuracy)
+	FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x
+$$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION check_estimate2(sql_query text, accuracy real)
+RETURNS TABLE(min integer, max integer) AS
+$$
+	SELECT (actual * (1 - accuracy))::integer, ((actual + 1) * (1 + accuracy))::integer
+	FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x
+$$ LANGUAGE sql;
+CREATE TABLE jsonb_stats_test(js jsonb);
+INSERT INTO jsonb_stats_test SELECT NULL FROM generate_series(1, 1000);
+INSERT INTO jsonb_stats_test SELECT 'null' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT 'true' FROM generate_series(1, 300);
+INSERT INTO jsonb_stats_test SELECT 'false' FROM generate_series(1, 500);
+INSERT INTO jsonb_stats_test SELECT '12345' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT (1000 * (i % 10))::text::jsonb FROM generate_series(1, 400) i;
+INSERT INTO jsonb_stats_test SELECT i::text::jsonb FROM generate_series(1, 500) i;
+INSERT INTO jsonb_stats_test SELECT '"foo"' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT format('"bar%s"', i % 10)::jsonb FROM generate_series(1, 400) i;
+INSERT INTO jsonb_stats_test SELECT format('"baz%s"', i)::jsonb FROM generate_series(1, 500) i;
+INSERT INTO jsonb_stats_test SELECT '{}' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'bar') FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'baz' || (i % 10)) FROM generate_series(1, 300) i;
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', i % 10) FROM generate_series(1, 200) i;
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('"foo \"bar"', i % 10) FROM generate_series(1, 200) i;
+INSERT INTO jsonb_stats_test SELECT '[]' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT '["foo"]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[12345]' FROM generate_series(1, 300);
+INSERT INTO jsonb_stats_test SELECT '[["foo"]]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[{"key": "foo"}]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[null, "foo"]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[null, 12345]' FROM generate_series(1, 300);
+INSERT INTO jsonb_stats_test SELECT '[null, ["foo"]]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[null, {"key": "foo"}]' FROM generate_series(1, 200);
+-- Build random variable-length integer arrays
+SELECT setseed(0.0);
+ setseed 
+---------
+ 
+(1 row)
+
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array())
+FROM generate_series(1, 1000);
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array(
+		floor(random() * 10)::int))
+FROM generate_series(1, 4000);
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array(
+		floor(random() * 10)::int,
+		floor(random() * 10)::int))
+FROM generate_series(1, 3000);
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array(
+		floor(random() * 10)::int,
+		floor(random() * 10)::int,
+		floor(random() * 10)::int))
+FROM generate_series(1, 2000);
+ANALYZE jsonb_stats_test;
+CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate(sql_condition text, accuracy real)
+RETURNS boolean AS
+$$
+	SELECT check_estimate('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition, accuracy)
+$$ LANGUAGE sql;
+DROP FUNCTION IF EXISTS check_jsonb_stats_test_estimate2(text, real);
+NOTICE:  function check_jsonb_stats_test_estimate2(text,pg_catalog.float4) does not exist, skipping
+CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate2(sql_condition text, accuracy real)
+RETURNS TABLE(plan integer, actual integer) AS
+$$
+	SELECT get_plan_and_actual_rows('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition)
+$$ LANGUAGE sql;
+-- Check NULL estimate
+SELECT check_jsonb_stats_test_estimate($$js IS NULL$$, 0.03);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js #> '{bad_key}' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 1000000 IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js #> '{1000000}' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key1' -> 'bad_key2' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js #> '{bad_key1,bad_key2}' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key1' -> 1 IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js #> '{bad_key1,1}' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 1000000 -> 'foo' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js #> '{1000000,foo}' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' = '123'$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 1000000 = '123'$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check null eq estimate
+SELECT check_jsonb_stats_test_estimate($$js =  'null'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> 'null'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check boolean eq estimate
+SELECT check_jsonb_stats_test_estimate($$js =  'true'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> 'true'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js =  'false'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> 'false'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check numeric eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '12345'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js#>'{}' = '12345'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js = '3000'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js = '1234'$$, 1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '6000'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check numeric range estimate
+SELECT check_jsonb_stats_test_estimate($$js < '0'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js < '100'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js < '1000'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js < '3456'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js < '10000'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js < '100000'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js > '100' AND js < '600'$$, 0.5);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js > '6800' AND js < '12000'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check string eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '"foo"'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js = '"bar7"'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js = '"baz1234"'$$, 10);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '"bar4"'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check string range estimate
+SELECT check_jsonb_stats_test_estimate($$js > '"foo"'$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js > '"bar"'$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js > '"baz"'$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check object eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '{}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js > '{}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check object key eq estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' = '"bar"'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' = '"baz"'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' = '"baz5"'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js #> '{foo}' = '"bar"'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check object key range estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' >= '"baz2"'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' <  '"baz9"'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' >= '"baz2"' AND js -> 'foo' < '"baz9"'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check array eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '[]'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js >= '[]' AND js < '{}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check variable-length array element eq estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 = '1'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 1 = '6'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 2 = '8'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 3 = '1'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check variable-length array element range estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 < '7'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 1 < '7'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 2 < '7'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 3 < '7'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check variable-length array containment estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[]'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1]'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[100]'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 2]'$$, 1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 100]'$$, 1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 2, 100]'$$, 1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 2, 3]'$$, 5);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '1'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '100'$$, 10);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 @> '1'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 1 @> '1'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 2 @> '1'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 3 @> '1'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 @> '[1]'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": []}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1]}'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [100]}'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 2]}'$$, 1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 100]}'$$, 1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 2, 100]}'$$, 1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 2, 3]}'$$, 3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- check misc containment
+SELECT check_jsonb_stats_test_estimate($$js @> '"foo"'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '12345'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '[]'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '[12345]'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '["foo"]'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '[["foo", "bar"]]'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '[["foo"]]'$$, 0.2);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '[{"key": "foo"}]'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js @> '[null]'$$, 0.3);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check object key null estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' IS NULL$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' IS NOT NULL$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> '"foo \"bar"' IS NOT NULL$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' IS NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' IS NOT NULL$$, 0.01);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+-- Check object key existence
+SELECT check_jsonb_stats_test_estimate($$js ? 'bad_key'$$, 10);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js ? 'foo'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js ? 'array'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js ?| '{foo,bad_key}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js ?| '{foo,array}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js ?& '{foo,bad_key}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
+SELECT check_jsonb_stats_test_estimate($$js ?& '{foo,bar}'$$, 0.1);
+ check_jsonb_stats_test_estimate 
+---------------------------------
+ t
+(1 row)
+
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index ac468568a1a..97b6f002c45 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2504,6 +2504,38 @@ pg_stats_ext_exprs| SELECT cn.nspname AS schemaname,
      LEFT JOIN pg_namespace sn ON ((sn.oid = s.stxnamespace)))
      JOIN LATERAL ( SELECT unnest(pg_get_statisticsobjdef_expressions(s.oid)) AS expr,
             unnest(sd.stxdexpr) AS a) stat ON ((stat.expr IS NOT NULL)));
+pg_stats_json| SELECT n.nspname AS schemaname,
+    c.relname AS tablename,
+    a.attname,
+    (paths.path ->> 'path'::text) AS json_path,
+    s.stainherit AS inherited,
+    (((paths.path -> 'json'::text) ->> 'nullfrac'::text))::real AS null_frac,
+    (((paths.path -> 'json'::text) ->> 'width'::text))::real AS avg_width,
+    (((paths.path -> 'json'::text) ->> 'distinct'::text))::real AS n_distinct,
+    ARRAY( SELECT val.value AS val
+           FROM jsonb_array_elements((((paths.path -> 'json'::text) -> 'mcv'::text) -> 'values'::text)) val(value)) AS most_common_vals,
+    ARRAY( SELECT ((num.value)::text)::real AS num
+           FROM jsonb_array_elements((((paths.path -> 'json'::text) -> 'mcv'::text) -> 'numbers'::text)) num(value)) AS most_common_freqs,
+    ARRAY( SELECT val.value AS val
+           FROM jsonb_array_elements((((paths.path -> 'json'::text) -> 'histogram'::text) -> 'values'::text)) val(value)) AS histogram_bounds,
+    ARRAY( SELECT ((val.value)::text)::integer AS val
+           FROM jsonb_array_elements((((paths.path -> 'array_length'::text) -> 'mcv'::text) -> 'values'::text)) val(value)) AS most_common_array_lengths,
+    ARRAY( SELECT ((num.value)::text)::real AS num
+           FROM jsonb_array_elements((((paths.path -> 'array_length'::text) -> 'mcv'::text) -> 'numbers'::text)) num(value)) AS most_common_array_length_freqs,
+    (((paths.path -> 'json'::text) ->> 'correlation'::text))::real AS correlation
+   FROM (((pg_statistic s
+     JOIN pg_class c ON ((c.oid = s.starelid)))
+     JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
+     LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))),
+    LATERAL ( SELECT unnest((((
+                CASE
+                    WHEN (s.stakind1 = 8) THEN s.stavalues1
+                    WHEN (s.stakind2 = 8) THEN s.stavalues2
+                    WHEN (s.stakind3 = 8) THEN s.stavalues3
+                    WHEN (s.stakind4 = 8) THEN s.stavalues4
+                    WHEN (s.stakind5 = 8) THEN s.stavalues5
+                    ELSE NULL::anyarray
+                END)::text)::jsonb[])[2:]) AS path) paths;
 pg_tables| SELECT n.nspname AS schemaname,
     c.relname AS tablename,
     pg_get_userbyid(c.relowner) AS tableowner,
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 6d8f524ae9e..7117f220ed7 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -111,7 +111,7 @@ test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combo
 # ----------
 # Another group of parallel tests (JSON related)
 # ----------
-test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath
+test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath jsonb_stats
 
 # ----------
 # Another group of parallel tests
diff --git a/src/test/regress/sql/jsonb_stats.sql b/src/test/regress/sql/jsonb_stats.sql
new file mode 100644
index 00000000000..fac71d09ded
--- /dev/null
+++ b/src/test/regress/sql/jsonb_stats.sql
@@ -0,0 +1,249 @@
+CREATE OR REPLACE FUNCTION explain_jsonb(sql_query text)
+RETURNS TABLE(explain_line json) AS
+$$
+BEGIN
+	RETURN QUERY EXECUTE 'EXPLAIN (ANALYZE, FORMAT json) ' || sql_query;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION get_plan_and_actual_rows(sql_query text)
+RETURNS TABLE(plan integer, actual integer) AS
+$$
+	SELECT
+		(plan->>'Plan Rows')::integer plan,
+		(plan->>'Actual Rows')::integer actual
+	FROM (
+		SELECT explain_jsonb(sql_query) #> '{0,Plan,Plans,0}'
+	) p(plan)
+$$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION check_estimate(sql_query text, accuracy real)
+RETURNS boolean AS
+$$
+	SELECT plan BETWEEN actual / (1 + accuracy) AND (actual + 1) * (1 + accuracy)
+	FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x
+$$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION check_estimate2(sql_query text, accuracy real)
+RETURNS TABLE(min integer, max integer) AS
+$$
+	SELECT (actual * (1 - accuracy))::integer, ((actual + 1) * (1 + accuracy))::integer
+	FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x
+$$ LANGUAGE sql;
+
+CREATE TABLE jsonb_stats_test(js jsonb);
+
+INSERT INTO jsonb_stats_test SELECT NULL FROM generate_series(1, 1000);
+
+INSERT INTO jsonb_stats_test SELECT 'null' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT 'true' FROM generate_series(1, 300);
+INSERT INTO jsonb_stats_test SELECT 'false' FROM generate_series(1, 500);
+
+INSERT INTO jsonb_stats_test SELECT '12345' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT (1000 * (i % 10))::text::jsonb FROM generate_series(1, 400) i;
+INSERT INTO jsonb_stats_test SELECT i::text::jsonb FROM generate_series(1, 500) i;
+
+INSERT INTO jsonb_stats_test SELECT '"foo"' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT format('"bar%s"', i % 10)::jsonb FROM generate_series(1, 400) i;
+INSERT INTO jsonb_stats_test SELECT format('"baz%s"', i)::jsonb FROM generate_series(1, 500) i;
+
+INSERT INTO jsonb_stats_test SELECT '{}' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'bar') FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'baz' || (i % 10)) FROM generate_series(1, 300) i;
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', i % 10) FROM generate_series(1, 200) i;
+INSERT INTO jsonb_stats_test SELECT jsonb_build_object('"foo \"bar"', i % 10) FROM generate_series(1, 200) i;
+
+INSERT INTO jsonb_stats_test SELECT '[]' FROM generate_series(1, 100);
+INSERT INTO jsonb_stats_test SELECT '["foo"]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[12345]' FROM generate_series(1, 300);
+INSERT INTO jsonb_stats_test SELECT '[["foo"]]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[{"key": "foo"}]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[null, "foo"]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[null, 12345]' FROM generate_series(1, 300);
+INSERT INTO jsonb_stats_test SELECT '[null, ["foo"]]' FROM generate_series(1, 200);
+INSERT INTO jsonb_stats_test SELECT '[null, {"key": "foo"}]' FROM generate_series(1, 200);
+
+-- Build random variable-length integer arrays
+SELECT setseed(0.0);
+
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array())
+FROM generate_series(1, 1000);
+
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array(
+		floor(random() * 10)::int))
+FROM generate_series(1, 4000);
+
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array(
+		floor(random() * 10)::int,
+		floor(random() * 10)::int))
+FROM generate_series(1, 3000);
+
+INSERT INTO jsonb_stats_test
+SELECT jsonb_build_object('array',
+	jsonb_build_array(
+		floor(random() * 10)::int,
+		floor(random() * 10)::int,
+		floor(random() * 10)::int))
+FROM generate_series(1, 2000);
+
+
+ANALYZE jsonb_stats_test;
+
+CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate(sql_condition text, accuracy real)
+RETURNS boolean AS
+$$
+	SELECT check_estimate('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition, accuracy)
+$$ LANGUAGE sql;
+
+DROP FUNCTION IF EXISTS check_jsonb_stats_test_estimate2(text, real);
+
+CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate2(sql_condition text, accuracy real)
+RETURNS TABLE(plan integer, actual integer) AS
+$$
+	SELECT get_plan_and_actual_rows('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition)
+$$ LANGUAGE sql;
+
+-- Check NULL estimate
+SELECT check_jsonb_stats_test_estimate($$js IS NULL$$, 0.03);
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js #> '{bad_key}' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js -> 1000000 IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js #> '{1000000}' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key1' -> 'bad_key2' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js #> '{bad_key1,bad_key2}' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key1' -> 1 IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js #> '{bad_key1,1}' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js -> 1000000 -> 'foo' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js #> '{1000000,foo}' IS NULL$$, 0.01);
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' = '123'$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js -> 1000000 = '123'$$, 0.01);
+
+-- Check null eq estimate
+SELECT check_jsonb_stats_test_estimate($$js =  'null'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> 'null'$$, 0.1);
+
+-- Check boolean eq estimate
+SELECT check_jsonb_stats_test_estimate($$js =  'true'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> 'true'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js =  'false'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> 'false'$$, 0.1);
+
+-- Check numeric eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '12345'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js#>'{}' = '12345'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js = '3000'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js = '1234'$$, 1);
+SELECT check_jsonb_stats_test_estimate($$js @> '6000'$$, 0.2);
+
+-- Check numeric range estimate
+SELECT check_jsonb_stats_test_estimate($$js < '0'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js < '100'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js < '1000'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js < '3456'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js < '10000'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js < '100000'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js > '100' AND js < '600'$$, 0.5);
+SELECT check_jsonb_stats_test_estimate($$js > '6800' AND js < '12000'$$, 0.1);
+
+-- Check string eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '"foo"'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js = '"bar7"'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js = '"baz1234"'$$, 10);
+SELECT check_jsonb_stats_test_estimate($$js @> '"bar4"'$$, 0.3);
+
+-- Check string range estimate
+SELECT check_jsonb_stats_test_estimate($$js > '"foo"'$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js > '"bar"'$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js > '"baz"'$$, 0.01);
+
+-- Check object eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '{}'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js > '{}'$$, 0.1);
+
+-- Check object key eq estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' = '"bar"'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' = '"baz"'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' = '"baz5"'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js #> '{foo}' = '"bar"'$$, 0.2);
+
+-- Check object key range estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' >= '"baz2"'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' <  '"baz9"'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' >= '"baz2"' AND js -> 'foo' < '"baz9"'$$, 0.1);
+
+-- Check array eq estimate
+SELECT check_jsonb_stats_test_estimate($$js = '[]'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js >= '[]' AND js < '{}'$$, 0.1);
+
+-- Check variable-length array element eq estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 = '1'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 1 = '6'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 2 = '8'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 3 = '1'$$, 0.2);
+
+-- Check variable-length array element range estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 < '7'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 1 < '7'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 2 < '7'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 3 < '7'$$, 0.1);
+
+-- Check variable-length array containment estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[]'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1]'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[100]'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 2]'$$, 1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 100]'$$, 1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 2, 100]'$$, 1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '[1, 2, 3]'$$, 5);
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '1'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' @> '100'$$, 10);
+
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 @> '1'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 1 @> '1'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 2 @> '1'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 3 @> '1'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js -> 'array' -> 0 @> '[1]'$$, 0.3);
+
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": []}'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1]}'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [100]}'$$, 0.3);
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 2]}'$$, 1);
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 100]}'$$, 1);
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 2, 100]}'$$, 1);
+SELECT check_jsonb_stats_test_estimate($$js @> '{"array": [1, 2, 3]}'$$, 3);
+
+-- check misc containment
+SELECT check_jsonb_stats_test_estimate($$js @> '"foo"'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '12345'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '[]'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '[12345]'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '["foo"]'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '[["foo", "bar"]]'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '[["foo"]]'$$, 0.2);
+SELECT check_jsonb_stats_test_estimate($$js @> '[{"key": "foo"}]'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js @> '[null]'$$, 0.3);
+
+-- Check object key null estimate
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' IS NULL$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'foo' IS NOT NULL$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> '"foo \"bar"' IS NOT NULL$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' IS NULL$$, 0.01);
+SELECT check_jsonb_stats_test_estimate($$js -> 'bad_key' IS NOT NULL$$, 0.01);
+
+-- Check object key existence
+SELECT check_jsonb_stats_test_estimate($$js ? 'bad_key'$$, 10);
+SELECT check_jsonb_stats_test_estimate($$js ? 'foo'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js ? 'array'$$, 0.1);
+
+SELECT check_jsonb_stats_test_estimate($$js ?| '{foo,bad_key}'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js ?| '{foo,array}'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js ?& '{foo,bad_key}'$$, 0.1);
+SELECT check_jsonb_stats_test_estimate($$js ?& '{foo,bar}'$$, 0.1);
-- 
2.25.1

