From 595633e0689c91b5fe27f4076e84093f66603e95 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Fri, 13 Nov 2020 02:37:06 +0100
Subject: [PATCH] Support for extended statistics on expressions

---
 src/backend/commands/statscmds.c              | 213 +++--
 src/backend/nodes/copyfuncs.c                 |  14 +
 src/backend/nodes/equalfuncs.c                |  13 +
 src/backend/nodes/outfuncs.c                  |  12 +
 src/backend/optimizer/util/plancat.c          |  40 +
 src/backend/parser/gram.y                     |  31 +-
 src/backend/parser/parse_agg.c                |  10 +
 src/backend/parser/parse_expr.c               |   6 +
 src/backend/parser/parse_func.c               |   3 +
 src/backend/parser/parse_utilcmd.c            |  89 ++-
 src/backend/statistics/dependencies.c         | 343 +++++++-
 src/backend/statistics/extended_stats.c       | 756 +++++++++++++++++-
 src/backend/statistics/mcv.c                  | 289 ++++++-
 src/backend/statistics/mvdistinct.c           |  86 +-
 src/backend/tcop/utility.c                    |  17 +-
 src/backend/utils/adt/ruleutils.c             | 204 +++--
 src/backend/utils/adt/selfuncs.c              | 262 ++++--
 src/bin/psql/describe.c                       |   7 +-
 src/include/catalog/pg_proc.dat               |   4 +
 src/include/catalog/pg_statistic_ext.h        |   3 +
 src/include/nodes/nodes.h                     |   1 +
 src/include/nodes/parsenodes.h                |  16 +
 src/include/nodes/pathnodes.h                 |   1 +
 src/include/parser/parse_node.h               |   1 +
 src/include/parser/parse_utilcmd.h            |   2 +
 .../statistics/extended_stats_internal.h      |  23 +-
 src/include/statistics/statistics.h           |   1 +
 src/test/regress/expected/stats_ext.out       | 459 ++++++++++-
 src/test/regress/sql/stats_ext.sql            | 214 ++++-
 29 files changed, 2839 insertions(+), 281 deletions(-)

diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 3057d89d50..2d51f8a57b 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -29,6 +29,8 @@
 #include "commands/comment.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
 #include "statistics/statistics.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
@@ -42,6 +44,7 @@
 static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
 										 const char *label, Oid namespaceid);
 static char *ChooseExtendedStatisticNameAddition(List *exprs);
+static bool CheckMutability(Expr *expr);
 
 
 /* qsort comparator for the attnums in CreateStatistics */
@@ -62,6 +65,7 @@ ObjectAddress
 CreateStatistics(CreateStatsStmt *stmt)
 {
 	int16		attnums[STATS_MAX_DIMENSIONS];
+	int			nattnums = 0;
 	int			numcols = 0;
 	char	   *namestr;
 	NameData	stxname;
@@ -74,6 +78,8 @@ CreateStatistics(CreateStatsStmt *stmt)
 	Datum		datavalues[Natts_pg_statistic_ext_data];
 	bool		datanulls[Natts_pg_statistic_ext_data];
 	int2vector *stxkeys;
+	List	   *stxexprs = NIL;
+	Datum		exprsDatum;
 	Relation	statrel;
 	Relation	datarel;
 	Relation	rel = NULL;
@@ -192,56 +198,95 @@ CreateStatistics(CreateStatsStmt *stmt)
 	foreach(cell, stmt->exprs)
 	{
 		Node	   *expr = (Node *) lfirst(cell);
-		ColumnRef  *cref;
-		char	   *attname;
+		StatsElem  *selem;
 		HeapTuple	atttuple;
 		Form_pg_attribute attForm;
 		TypeCacheEntry *type;
 
-		if (!IsA(expr, ColumnRef))
+		if (!IsA(expr, StatsElem))
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
-		cref = (ColumnRef *) expr;
+		selem = (StatsElem *) expr;
 
-		if (list_length(cref->fields) != 1)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
-		attname = strVal((Value *) linitial(cref->fields));
-
-		atttuple = SearchSysCacheAttName(relid, attname);
-		if (!HeapTupleIsValid(atttuple))
-			ereport(ERROR,
-					(errcode(ERRCODE_UNDEFINED_COLUMN),
-					 errmsg("column \"%s\" does not exist",
-							attname)));
-		attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
-
-		/* Disallow use of system attributes in extended stats */
-		if (attForm->attnum <= 0)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("statistics creation on system columns is not supported")));
-
-		/* Disallow data types without a less-than operator */
-		type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
-		if (type->lt_opr == InvalidOid)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
-							attname, format_type_be(attForm->atttypid))));
-
-		/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
-		if (numcols >= STATS_MAX_DIMENSIONS)
-			ereport(ERROR,
-					(errcode(ERRCODE_TOO_MANY_COLUMNS),
-					 errmsg("cannot have more than %d columns in statistics",
-							STATS_MAX_DIMENSIONS)));
-
-		attnums[numcols] = attForm->attnum;
-		numcols++;
-		ReleaseSysCache(atttuple);
+		if (selem->name)	/* column reference */
+		{
+			char	   *attname;
+			attname = selem->name;
+
+			atttuple = SearchSysCacheAttName(relid, attname);
+			if (!HeapTupleIsValid(atttuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" does not exist",
+								attname)));
+			attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+			/* Disallow use of system attributes in extended stats */
+			if (attForm->attnum <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("statistics creation on system columns is not supported")));
+
+			/* Disallow data types without a less-than operator */
+			type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+			if (type->lt_opr == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+								attname, format_type_be(attForm->atttypid))));
+
+			/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+			if (numcols >= STATS_MAX_DIMENSIONS)
+				ereport(ERROR,
+						(errcode(ERRCODE_TOO_MANY_COLUMNS),
+						 errmsg("cannot have more than %d columns in statistics",
+								STATS_MAX_DIMENSIONS)));
+
+			attnums[nattnums] = attForm->attnum;
+			nattnums++;
+			numcols++;
+			ReleaseSysCache(atttuple);
+		}
+		else	/* expression */
+		{
+			Node	   *expr = selem->expr;
+			TypeCacheEntry *type;
+			Oid			atttype;
+
+			Assert(expr != NULL);
+
+			/*
+			 * An expression using mutable functions is probably wrong,
+			 * since if you aren't going to get the same result for the
+			 * same data every time, it's not clear what the index entries
+			 * mean at all.
+			 */
+			if (CheckMutability((Expr *) expr))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("functions in statistics expression must be marked IMMUTABLE")));
+
+			/* Disallow data types without a less-than operator */
+			atttype = exprType(expr);
+			type = lookup_type_cache(atttype, TYPECACHE_LT_OPR);
+			if (type->lt_opr == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("expression cannot be used in statistics because its type %s has no default btree operator class",
+								format_type_be(atttype))));
+
+			/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+			if (numcols >= STATS_MAX_DIMENSIONS)
+				ereport(ERROR,
+						(errcode(ERRCODE_TOO_MANY_COLUMNS),
+						 errmsg("cannot have more than %d columns in statistics",
+								STATS_MAX_DIMENSIONS)));
+
+			numcols++;
+
+			stxexprs = lappend(stxexprs, expr);
+		}
 	}
 
 	/*
@@ -258,13 +303,13 @@ CreateStatistics(CreateStatsStmt *stmt)
 	 * it does not hurt (it does not affect the efficiency, unlike for
 	 * indexes, for example).
 	 */
-	qsort(attnums, numcols, sizeof(int16), compare_int16);
+	qsort(attnums, nattnums, sizeof(int16), compare_int16);
 
 	/*
 	 * Check for duplicates in the list of columns. The attnums are sorted so
 	 * just check consecutive elements.
 	 */
-	for (i = 1; i < numcols; i++)
+	for (i = 1; i < nattnums; i++)
 	{
 		if (attnums[i] == attnums[i - 1])
 			ereport(ERROR,
@@ -273,7 +318,7 @@ CreateStatistics(CreateStatsStmt *stmt)
 	}
 
 	/* Form an int2vector representation of the sorted column list */
-	stxkeys = buildint2vector(attnums, numcols);
+	stxkeys = buildint2vector(attnums, nattnums);
 
 	/*
 	 * Parse the statistics kinds.
@@ -325,6 +370,18 @@ CreateStatistics(CreateStatsStmt *stmt)
 	Assert(ntypes > 0 && ntypes <= lengthof(types));
 	stxkind = construct_array(types, ntypes, CHAROID, 1, true, TYPALIGN_CHAR);
 
+	/* convert the expressions (if any) to a text datum */
+	if (stxexprs != NIL)
+	{
+		char	   *exprsString;
+
+		exprsString = nodeToString(stxexprs);
+		exprsDatum = CStringGetTextDatum(exprsString);
+		pfree(exprsString);
+	}
+	else
+		exprsDatum = (Datum) 0;
+
 	statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
 
 	/*
@@ -344,6 +401,10 @@ CreateStatistics(CreateStatsStmt *stmt)
 	values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
 	values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
 
+	values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
+	if (exprsDatum == (Datum) 0)
+		nulls[Anum_pg_statistic_ext_stxexprs - 1] = true;
+
 	/* insert it into pg_statistic_ext */
 	htup = heap_form_tuple(statrel->rd_att, values, nulls);
 	CatalogTupleInsert(statrel, htup);
@@ -389,12 +450,40 @@ CreateStatistics(CreateStatsStmt *stmt)
 	 */
 	ObjectAddressSet(myself, StatisticExtRelationId, statoid);
 
-	for (i = 0; i < numcols; i++)
+	for (i = 0; i < nattnums; i++)
 	{
 		ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
 		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
 	}
 
+	/*
+	 * If there are no simply-referenced columns, give the statistics an
+	 * auto dependency on the whole table.  In most cases, this will
+	 * be redundant, but it might not be if the statistics expressions
+	 * contain no Vars.
+	 *
+	 * XXX This is copied from index_create, not sure if it's applicable
+	 * to extended statistics too.
+	 */
+	if (nattnums)
+	{
+		ObjectAddressSet(parentobject, RelationRelationId, relid);
+		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
+	}
+
+	/*
+	 * Store dependencies on anything mentioned in statistics expressions,
+	 * just like we do e.g. in index_create.
+	 */
+	if (stxexprs)
+	{
+		recordDependencyOnSingleRelExpr(&myself,
+										(Node *) stxexprs,
+										relid,
+										DEPENDENCY_NORMAL,
+										DEPENDENCY_AUTO, false, true);
+	}
+
 	/*
 	 * Also add dependencies on namespace and owner.  These are required
 	 * because the stats object might have a different namespace and/or owner
@@ -724,14 +813,14 @@ ChooseExtendedStatisticNameAddition(List *exprs)
 	buf[0] = '\0';
 	foreach(lc, exprs)
 	{
-		ColumnRef  *cref = (ColumnRef *) lfirst(lc);
+		StatsElem  *selem = (StatsElem *) lfirst(lc);
 		const char *name;
 
 		/* It should be one of these, but just skip if it happens not to be */
-		if (!IsA(cref, ColumnRef))
+		if (!IsA(selem, StatsElem))
 			continue;
 
-		name = strVal((Value *) linitial(cref->fields));
+		name = selem->name;
 
 		if (buflen > 0)
 			buf[buflen++] = '_';	/* insert _ between names */
@@ -747,3 +836,29 @@ ChooseExtendedStatisticNameAddition(List *exprs)
 	}
 	return pstrdup(buf);
 }
+
+/*
+ * CheckMutability
+ *		Test whether given expression is mutable
+ */
+static bool
+CheckMutability(Expr *expr)
+{
+	/*
+	 * First run the expression through the planner.  This has a couple of
+	 * important consequences.  First, function default arguments will get
+	 * inserted, which may affect volatility (consider "default now()").
+	 * Second, inline-able functions will get inlined, which may allow us to
+	 * conclude that the function is really less volatile than it's marked. As
+	 * an example, polymorphic functions must be marked with the most volatile
+	 * behavior that they have for any input type, but once we inline the
+	 * function we may be able to conclude that it's not so volatile for the
+	 * particular input type we're dealing with.
+	 *
+	 * We assume here that expression_planner() won't scribble on its input.
+	 */
+	expr = expression_planner(expr);
+
+	/* Now we can search for non-immutable functions */
+	return contain_mutable_functions((Node *) expr);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 5a591d0a75..0e44aaad59 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2922,6 +2922,17 @@ _copyIndexElem(const IndexElem *from)
 	return newnode;
 }
 
+static StatsElem *
+_copyStatsElem(const StatsElem *from)
+{
+	StatsElem  *newnode = makeNode(StatsElem);
+
+	COPY_STRING_FIELD(name);
+	COPY_NODE_FIELD(expr);
+
+	return newnode;
+}
+
 static ColumnDef *
 _copyColumnDef(const ColumnDef *from)
 {
@@ -5615,6 +5626,9 @@ copyObjectImpl(const void *from)
 		case T_IndexElem:
 			retval = _copyIndexElem(from);
 			break;
+		case T_StatsElem:
+			retval = _copyStatsElem(from);
+			break;
 		case T_ColumnDef:
 			retval = _copyColumnDef(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index e2895a8985..692dd7ca17 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2577,6 +2577,16 @@ _equalIndexElem(const IndexElem *a, const IndexElem *b)
 	return true;
 }
 
+
+static bool
+_equalStatsElem(const StatsElem *a, const StatsElem *b)
+{
+	COMPARE_STRING_FIELD(name);
+	COMPARE_NODE_FIELD(expr);
+
+	return true;
+}
+
 static bool
 _equalColumnDef(const ColumnDef *a, const ColumnDef *b)
 {
@@ -3670,6 +3680,9 @@ equal(const void *a, const void *b)
 		case T_IndexElem:
 			retval = _equalIndexElem(a, b);
 			break;
+		case T_StatsElem:
+			retval = _equalStatsElem(a, b);
+			break;
 		case T_ColumnDef:
 			retval = _equalColumnDef(a, b);
 			break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 4504b1503b..d05a866a24 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2900,6 +2900,15 @@ _outIndexElem(StringInfo str, const IndexElem *node)
 	WRITE_ENUM_FIELD(nulls_ordering, SortByNulls);
 }
 
+static void
+_outStatsElem(StringInfo str, const StatsElem *node)
+{
+	WRITE_NODE_TYPE("STATSELEM");
+
+	WRITE_STRING_FIELD(name);
+	WRITE_NODE_FIELD(expr);
+}
+
 static void
 _outQuery(StringInfo str, const Query *node)
 {
@@ -4210,6 +4219,9 @@ outNode(StringInfo str, const void *obj)
 			case T_IndexElem:
 				_outIndexElem(str, obj);
 				break;
+			case T_StatsElem:
+				_outStatsElem(str, obj);
+				break;
 			case T_Query:
 				_outQuery(str, obj);
 				break;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 52c01eb86b..026954d31e 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -35,6 +35,7 @@
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
@@ -1315,6 +1316,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		HeapTuple	dtup;
 		Bitmapset  *keys = NULL;
 		int			i;
+		List	   *exprs = NIL;
 
 		htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
 		if (!HeapTupleIsValid(htup))
@@ -1333,6 +1335,41 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		for (i = 0; i < staForm->stxkeys.dim1; i++)
 			keys = bms_add_member(keys, staForm->stxkeys.values[i]);
 
+		/*
+		 * preprocess expression (if any)
+		 *
+		 * FIXME we probably need to cache the result somewhere
+		 */
+		{
+			bool		isnull;
+			Datum		datum;
+
+			/* decode expression (if any) */
+			datum = SysCacheGetAttr(STATEXTOID, htup,
+									Anum_pg_statistic_ext_stxexprs, &isnull);
+
+			if (!isnull)
+			{
+				char *exprsString;
+
+				exprsString = TextDatumGetCString(datum);
+				exprs = (List *) stringToNode(exprsString);
+				pfree(exprsString);
+
+				/*
+				 * Run the expressions through eval_const_expressions. This is not just an
+				 * optimization, but is necessary, because the planner will be comparing
+				 * them to similarly-processed qual clauses, and may fail to detect valid
+				 * matches without this.  We must not use canonicalize_qual, however,
+				 * since these aren't qual expressions.
+				 */
+				exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+				/* May as well fix opfuncids too */
+				fix_opfuncids((Node *) exprs);
+			}
+		}
+
 		/* add one StatisticExtInfo for each kind built */
 		if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT))
 		{
@@ -1342,6 +1379,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_NDISTINCT;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
@@ -1354,6 +1392,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_DEPENDENCIES;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
@@ -1366,6 +1405,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_MCV;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2cb377d034..40d85881ed 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -233,6 +233,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	WindowDef			*windef;
 	JoinExpr			*jexpr;
 	IndexElem			*ielem;
+	StatsElem			*selem;
 	Alias				*alias;
 	RangeVar			*range;
 	IntoClause			*into;
@@ -396,7 +397,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				old_aggr_definition old_aggr_list
 				oper_argtypes RuleActionList RuleActionMulti
 				opt_column_list columnList opt_name_list
-				sort_clause opt_sort_clause sortby_list index_params
+				sort_clause opt_sort_clause sortby_list index_params stats_params
 				opt_include opt_c_include index_including_params
 				name_list role_list from_clause from_list opt_array_bounds
 				qualified_name_list any_name any_name_list type_name_list
@@ -502,6 +503,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <list>	func_alias_clause
 %type <sortby>	sortby
 %type <ielem>	index_elem index_elem_options
+%type <selem>	stats_param
 %type <node>	table_ref
 %type <jexpr>	joined_table
 %type <range>	relation_expr
@@ -4007,7 +4009,7 @@ ExistingIndex:   USING INDEX name					{ $$ = $3; }
 
 CreateStatsStmt:
 			CREATE STATISTICS any_name
-			opt_name_list ON expr_list FROM from_list
+			opt_name_list ON stats_params FROM from_list
 				{
 					CreateStatsStmt *n = makeNode(CreateStatsStmt);
 					n->defnames = $3;
@@ -4019,7 +4021,7 @@ CreateStatsStmt:
 					$$ = (Node *)n;
 				}
 			| CREATE STATISTICS IF_P NOT EXISTS any_name
-			opt_name_list ON expr_list FROM from_list
+			opt_name_list ON stats_params FROM from_list
 				{
 					CreateStatsStmt *n = makeNode(CreateStatsStmt);
 					n->defnames = $6;
@@ -4032,6 +4034,29 @@ CreateStatsStmt:
 				}
 			;
 
+stats_params:	stats_param							{ $$ = list_make1($1); }
+			| stats_params ',' stats_param			{ $$ = lappend($1, $3); }
+		;
+
+stats_param:	ColId
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = $1;
+					$$->expr = NULL;
+				}
+			| func_expr_windowless
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = NULL;
+					$$->expr = $1;
+				}
+			| '(' a_expr ')'
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = NULL;
+					$$->expr = $2;
+				}
+		;
 
 /*****************************************************************************
  *
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 783f3fe8f2..12b9e855d5 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -484,6 +484,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
 			else
 				err = _("grouping operations are not allowed in index predicates");
 
+			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			if (isAgg)
+				err = _("aggregate functions are not allowed in statistics expressions");
+			else
+				err = _("grouping operations are not allowed in statistics expressions");
+
 			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			if (isAgg)
@@ -906,6 +913,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
 		case EXPR_KIND_INDEX_EXPRESSION:
 			err = _("window functions are not allowed in index expressions");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("window functions are not allowed in stats expressions");
+			break;
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("window functions are not allowed in index predicates");
 			break;
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index f5165863d7..56803256a4 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -564,6 +564,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 		case EXPR_KIND_FUNCTION_DEFAULT:
 		case EXPR_KIND_INDEX_EXPRESSION:
 		case EXPR_KIND_INDEX_PREDICATE:
+		case EXPR_KIND_STATS_EXPRESSION:
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 		case EXPR_KIND_EXECUTE_PARAMETER:
 		case EXPR_KIND_TRIGGER_WHEN:
@@ -1914,6 +1915,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("cannot use subquery in index predicate");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("cannot use subquery in statistics expression");
+			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			err = _("cannot use subquery in transform expression");
 			break;
@@ -3526,6 +3530,8 @@ ParseExprKindName(ParseExprKind exprKind)
 			return "index expression";
 		case EXPR_KIND_INDEX_PREDICATE:
 			return "index predicate";
+		case EXPR_KIND_STATS_EXPRESSION:
+			return "statistics expression";
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			return "USING";
 		case EXPR_KIND_EXECUTE_PARAMETER:
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 8b4e3ca5e1..6730c5a3c3 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2501,6 +2501,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("set-returning functions are not allowed in index predicates");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("set-returning functions are not allowed in stats expressions");
+			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			err = _("set-returning functions are not allowed in transform expressions");
 			break;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 254c0f65c2..a9640a96ea 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1881,14 +1881,15 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 	/* Determine which columns the statistics are on */
 	for (i = 0; i < statsrec->stxkeys.dim1; i++)
 	{
-		ColumnRef  *cref = makeNode(ColumnRef);
+		StatsElem  *selem = makeNode(StatsElem);
 		AttrNumber	attnum = statsrec->stxkeys.values[i];
 
-		cref->fields = list_make1(makeString(get_attname(heapRelid,
-														 attnum, false)));
-		cref->location = -1;
+		selem->name = get_attname(heapRelid, attnum, false);
+		selem->expr = NULL;
 
-		def_names = lappend(def_names, cref);
+		/* FIXME handle expressions properly */
+
+		def_names = lappend(def_names, selem);
 	}
 
 	/* finally, build the output node */
@@ -2823,6 +2824,84 @@ transformIndexStmt(Oid relid, IndexStmt *stmt, const char *queryString)
 	return stmt;
 }
 
+/*
+ * transformStatsStmt - parse analysis for CREATE STATISTICS
+ *
+ * To avoid race conditions, it's important that this function rely only on
+ * the passed-in relid (and not on stmt->relation) to determine the target
+ * relation.
+ */
+CreateStatsStmt *
+transformStatsStmt(Oid relid, CreateStatsStmt *stmt, const char *queryString)
+{
+	ParseState *pstate;
+	ParseNamespaceItem *nsitem;
+	ListCell   *l;
+	Relation	rel;
+
+	/* Nothing to do if statement already transformed. */
+	if (stmt->transformed)
+		return stmt;
+
+	/*
+	 * We must not scribble on the passed-in CreateStatsStmt, so copy it.  (This is
+	 * overkill, but easy.)
+	 */
+	stmt = copyObject(stmt);
+
+	/* Set up pstate */
+	pstate = make_parsestate(NULL);
+	pstate->p_sourcetext = queryString;
+
+	/*
+	 * Put the parent table into the rtable so that the expressions can refer
+	 * to its fields without qualification.  Caller is responsible for locking
+	 * relation, but we still need to open it.
+	 */
+	rel = relation_open(relid, NoLock);
+	nsitem = addRangeTableEntryForRelation(pstate, rel,
+										   AccessShareLock,
+										   NULL, false, true);
+
+	/* no to join list, yes to namespaces */
+	addNSItemToQuery(pstate, nsitem, false, true, true);
+
+	/* take care of any expressions */
+	foreach(l, stmt->exprs)
+	{
+		StatsElem  *selem = (StatsElem *) lfirst(l);
+
+		if (selem->expr)
+		{
+			/* Now do parse transformation of the expression */
+			selem->expr = transformExpr(pstate, selem->expr,
+										EXPR_KIND_STATS_EXPRESSION);
+
+			/* We have to fix its collations too */
+			assign_expr_collations(pstate, selem->expr);
+		}
+	}
+
+	/*
+	 * Check that only the base rel is mentioned.  (This should be dead code
+	 * now that add_missing_from is history.)
+	 */
+	if (list_length(pstate->p_rtable) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("statistics expressions and predicates can refer only to the table being indexed")));
+
+	free_parsestate(pstate);
+
+	/* Close relation */
+	table_close(rel, NoLock);
+
+	/* Mark statement as successfully transformed */
+	stmt->transformed = true;
+
+	return stmt;
+}
+
 
 /*
  * transformRuleStmt -
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index d950b4eabe..fd7668220a 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -70,15 +70,22 @@ static void generate_dependencies(DependencyGenerator state);
 static DependencyGenerator DependencyGenerator_init(int n, int k);
 static void DependencyGenerator_free(DependencyGenerator state);
 static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
-static double dependency_degree(int numrows, HeapTuple *rows, int k,
-								AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
-static bool dependency_is_fully_matched(MVDependency *dependency,
-										Bitmapset *attnums);
+static double dependency_degree(int numrows, HeapTuple *rows,
+								Datum *exprvals, bool *exprnulls, Oid *exprtypes,
+								int nexprs, int k,
+								AttrNumber *dependency, VacAttrStats **stats,
+								Bitmapset *attrs);
+static bool dependency_is_fully_matched(StatisticExtInfo *info,
+										MVDependency *dependency,
+										Bitmapset *attnums,
+										List *exprs);
 static bool dependency_is_compatible_clause(Node *clause, Index relid,
 											AttrNumber *attnum);
-static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
-											   int ndependencies,
-											   Bitmapset *attnums);
+static bool dependency_is_compatible_expression(Node *clause, Index relid,
+												List *statlist, Node **expr);
+static MVDependency *find_strongest_dependency(MVDependencies **dependencies, StatisticExtInfo **infos,
+						  int ndependencies, Bitmapset *attnums,
+						  List *exprs, StatisticExtInfo **info);
 static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 												 int varRelid, JoinType jointype,
 												 SpecialJoinInfo *sjinfo,
@@ -219,8 +226,8 @@ DependencyGenerator_next(DependencyGenerator state)
  * the last one.
  */
 static double
-dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
-				  VacAttrStats **stats, Bitmapset *attrs)
+dependency_degree(int numrows, HeapTuple *rows, Datum *exprvals, bool *exprnulls, Oid *exprtypes,
+				  int nexprs, int k, AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs)
 {
 	int			i,
 				nitems;
@@ -288,9 +295,11 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
 	 * XXX This relies on all stats entries pointing to the same tuple
 	 * descriptor.  For now that assumption holds, but it might change in the
 	 * future for example if we support statistics on multiple tables.
+	 *
+	 * FIXME pass proper exprtypes
 	 */
-	items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
-							   mss, k, attnums_dep);
+	items = build_sorted_items(numrows, &nitems, rows, exprvals, exprnulls, exprtypes,
+							   nexprs, stats[0]->tupDesc, mss, k, attnums_dep);
 
 	/*
 	 * Walk through the sorted array, split it into rows according to the
@@ -360,7 +369,10 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
  *	   (c) -> b
  */
 MVDependencies *
-statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
+statext_dependencies_build(int numrows, HeapTuple *rows,
+						   Datum *exprvals, bool *exprnulls,
+						   Oid *exprtypes, Oid *exprcollations,
+						   Bitmapset *attrs, List *exprs,
 						   VacAttrStats **stats)
 {
 	int			i,
@@ -371,6 +383,15 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 	/* result */
 	MVDependencies *dependencies = NULL;
 
+	/*
+	 * Copy the bitmapset and add fake attnums representing expressions,
+	 * starting above MaxHeapAttributeNumber.
+	 */
+	attrs = bms_copy(attrs);
+
+	for (i = 1; i <= list_length(exprs); i++)
+		attrs = bms_add_member(attrs, MaxHeapAttributeNumber + i);
+
 	/*
 	 * Transform the bms into an array, to make accessing i-th member easier.
 	 */
@@ -398,7 +419,9 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 			MVDependency *d;
 
 			/* compute how valid the dependency seems */
-			degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
+			degree = dependency_degree(numrows, rows, exprvals, exprnulls, exprtypes,
+									   list_length(exprs), k, dependency,
+									   stats, attrs);
 
 			/*
 			 * if the dependency seems entirely invalid, don't store it
@@ -441,6 +464,8 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 		DependencyGenerator_free(DependencyGenerator);
 	}
 
+	pfree(attrs);
+
 	return dependencies;
 }
 
@@ -600,9 +625,43 @@ statext_dependencies_deserialize(bytea *data)
  *		attributes (assuming the clauses are suitable equality clauses)
  */
 static bool
-dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
+dependency_is_fully_matched(StatisticExtInfo *info, MVDependency *dependency, Bitmapset *attnums, List *exprs)
 {
 	int			j;
+	bool		result = true;	/* match by default */
+
+	/*
+	 * XXX copy attnums, so that we can add attnums for expressions. But
+	 * only when there are expressions.
+	 */
+	if (exprs)
+	{
+		ListCell   *lc;
+
+		attnums = bms_copy(attnums);
+
+		/* loop over expressions, find a matching expression in statistics */
+		foreach (lc, exprs)
+		{
+			ListCell   *lc2;
+			Node	   *clause_expr = (Node *) lfirst(lc);
+			int			offset = 0;
+
+			foreach (lc2, info->exprs)
+			{
+				Node	   *stat_expr = (Node *) lfirst(lc2);
+				offset++;
+
+				/* found a match */
+				if (equal(stat_expr, clause_expr))
+				{
+					attnums = bms_add_member(attnums,
+											 bms_num_members(info->keys) + MaxHeapAttributeNumber + offset);
+					break;
+				}
+			}
+		}
+	}
 
 	/*
 	 * Check that the dependency actually is fully covered by clauses. We have
@@ -613,10 +672,16 @@ dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
 		int			attnum = dependency->attributes[j];
 
 		if (!bms_is_member(attnum, attnums))
-			return false;
+		{
+			result = false;
+			break;
+		}
 	}
 
-	return true;
+	if (exprs)
+		bms_free(attnums);
+
+	return result;
 }
 
 /*
@@ -927,8 +992,9 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
  * (see the comment in dependencies_clauselist_selectivity).
  */
 static MVDependency *
-find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
-						  Bitmapset *attnums)
+find_strongest_dependency(MVDependencies **dependencies, StatisticExtInfo **infos,
+						  int ndependencies, Bitmapset *attnums,
+						  List *exprs, StatisticExtInfo **info)
 {
 	int			i,
 				j;
@@ -936,6 +1002,7 @@ find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
 
 	/* number of attnums in clauses */
 	int			nattnums = bms_num_members(attnums);
+	int			nexprs = list_length(exprs);
 
 	/*
 	 * Iterate over the MVDependency items and find the strongest one from the
@@ -952,7 +1019,7 @@ find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
 			 * Skip dependencies referencing more attributes than available
 			 * clauses, as those can't be fully matched.
 			 */
-			if (dependency->nattributes > nattnums)
+			if (dependency->nattributes > nattnums + nexprs)
 				continue;
 
 			if (strongest)
@@ -968,12 +1035,15 @@ find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
 			}
 
 			/*
-			 * this dependency is stronger, but we must still check that it's
+			 * This dependency is stronger, but we must still check that it's
 			 * fully matched to these attnums. We perform this check last as
 			 * it's slightly more expensive than the previous checks.
 			 */
-			if (dependency_is_fully_matched(dependency, attnums))
+			if (dependency_is_fully_matched(infos[i], dependency, attnums, exprs))
+			{
 				strongest = dependency; /* save new best match */
+				*info = infos[i];
+			}
 		}
 	}
 
@@ -1042,7 +1112,10 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 		{
 			AttrNumber	attnum = dependencies[i]->attributes[j];
 
-			attnums = bms_add_member(attnums, attnum);
+			if (attnum < MaxHeapAttributeNumber)
+				attnums = bms_add_member(attnums, attnum);
+			else
+				elog(ERROR, "FIXME clauselist_apply_dependencies");
 		}
 	}
 
@@ -1157,6 +1230,131 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 	return s1;
 }
 
+/*
+ * Similar to dependency_is_compatible_clause, but don't enforce that the
+ * expression is a simple Var. OTOH we check that there's at least one
+ * statistics matching the expression.
+ */
+static bool
+dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
+{
+	List	   *vars;
+	ListCell   *lc, *lc2;
+
+	RestrictInfo *rinfo = (RestrictInfo *) clause;
+	Node		   *clause_expr;
+
+	if (!IsA(rinfo, RestrictInfo))
+		return false;
+
+	/* Pseudoconstants are not interesting (they couldn't contain a Var) */
+	if (rinfo->pseudoconstant)
+		return false;
+
+	/* Clauses referencing multiple, or no, varnos are incompatible */
+	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+		return false;
+
+	if (is_opclause(rinfo->clause))
+	{
+		/* If it's an opclause, check for Var = Const or Const = Var. */
+		OpExpr	   *expr = (OpExpr *) rinfo->clause;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/* Make sure non-selected argument is a pseudoconstant. */
+		if (is_pseudo_constant_clause(lsecond(expr->args)))
+			clause_expr = linitial(expr->args);
+		else if (is_pseudo_constant_clause(linitial(expr->args)))
+			clause_expr = lsecond(expr->args);
+		else
+			return false;
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies.
+		 *
+		 * This uses the function for estimating selectivity, not the operator
+		 * directly (a bit awkward, but well ...).
+		 *
+		 * XXX this is pretty dubious; probably it'd be better to check btree
+		 * or hash opclass membership, so as not to be fooled by custom
+		 * selectivity functions, and to be more consistent with decisions
+		 * elsewhere in the planner.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (is_notclause(rinfo->clause))
+	{
+		/*
+		 * "NOT x" can be interpreted as "x = false", so get the argument and
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) get_notclausearg(rinfo->clause);
+	}
+	else
+	{
+		/*
+		 * A boolean expression "x" can be interpreted as "x = true", so
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) rinfo->clause;
+	}
+
+	/*
+	 * We may ignore any RelabelType node above the operand.  (There won't be
+	 * more than one, since eval_const_expressions has been applied already.)
+	 */
+	if (IsA(clause_expr, RelabelType))
+		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
+
+	vars = pull_var_clause(clause_expr, 0);
+
+	foreach (lc, vars)
+	{
+		Var *var = (Var *) lfirst(lc);
+
+		/* Ensure Var is from the correct relation */
+		if (var->varno != relid)
+			return false;
+
+		/* We also better ensure the Var is from the current level */
+		if (var->varlevelsup != 0)
+			return false;
+
+		/* Also ignore system attributes (we don't allow stats on those) */
+		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+			return false;
+	}
+
+	foreach (lc, statlist)
+	{
+		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+
+		/* ignore stats without dependencies */
+		if (info->kind != STATS_EXT_DEPENDENCIES)
+			continue;
+
+		foreach (lc2, info->exprs)
+		{
+			Node *stat_expr = (Node *) lfirst(lc2);
+
+			if (equal(clause_expr, stat_expr))
+			{
+				*expr = stat_expr;
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
 /*
  * dependencies_clauselist_selectivity
  *		Return the estimated selectivity of (a subset of) the given clauses
@@ -1197,13 +1395,16 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	ListCell   *l;
 	Bitmapset  *clauses_attnums = NULL;
 	AttrNumber *list_attnums;
+	Node	  **list_exprs;		/* expressions matched to any statistic */
 	int			listidx;
 	MVDependencies **func_dependencies;
+	StatisticExtInfo **func_info;
 	int			nfunc_dependencies;
 	int			total_ndeps;
 	MVDependency **dependencies;
 	int			ndependencies;
 	int			i;
+	int			matching_clauses;
 
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
@@ -1212,6 +1413,8 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
 										 list_length(clauses));
 
+	list_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
+
 	/*
 	 * Pre-process the clauses list to extract the attnums seen in each item.
 	 * We need to determine if there's any clauses which will be useful for
@@ -1224,32 +1427,48 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	 * statistics (we treat them as incompatible).
 	 */
 	listidx = 0;
+	matching_clauses = 0;
 	foreach(l, clauses)
 	{
 		Node	   *clause = (Node *) lfirst(l);
 		AttrNumber	attnum;
+		Node	   *expr = NULL;
+
+		/* ignore clause by default */
+		list_attnums[listidx] = InvalidAttrNumber;
+		list_exprs[listidx] = NULL;
 
-		if (!bms_is_member(listidx, *estimatedclauses) &&
-			dependency_is_compatible_clause(clause, rel->relid, &attnum))
+		if (!bms_is_member(listidx, *estimatedclauses))
 		{
-			list_attnums[listidx] = attnum;
-			clauses_attnums = bms_add_member(clauses_attnums, attnum);
+			if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
+			{
+				list_attnums[listidx] = attnum;
+				clauses_attnums = bms_add_member(clauses_attnums, attnum);
+				matching_clauses++;
+			}
+			else if (dependency_is_compatible_expression(clause, rel->relid,
+														 rel->statlist,
+														 &expr))
+			{
+				Assert(expr != NULL);
+				list_exprs[listidx] = expr;
+				matching_clauses++;
+			}
 		}
-		else
-			list_attnums[listidx] = InvalidAttrNumber;
 
 		listidx++;
 	}
 
 	/*
-	 * If there's not at least two distinct attnums then reject the whole list
-	 * of clauses. We must return 1.0 so the calling function's selectivity is
-	 * unaffected.
+	 * If there's not at least two distinct attnums and expressions, then
+	 * reject the whole list of clauses. We must return 1.0 so the calling
+	 * function's selectivity is unaffected.
 	 */
-	if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
+	if (matching_clauses < 2)
 	{
 		bms_free(clauses_attnums);
 		pfree(list_attnums);
+		pfree(list_exprs);
 		return 1.0;
 	}
 
@@ -1266,6 +1485,8 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	 */
 	func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
 												   list_length(rel->statlist));
+	func_info = (StatisticExtInfo **) palloc(sizeof(StatisticExtInfo *) *
+												   list_length(rel->statlist));
 	nfunc_dependencies = 0;
 	total_ndeps = 0;
 
@@ -1273,22 +1494,48 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	{
 		StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
 		Bitmapset  *matched;
-		BMS_Membership membership;
+		int			nmatched;
+		int			nexprs;
 
 		/* skip statistics that are not of the correct type */
 		if (stat->kind != STATS_EXT_DEPENDENCIES)
 			continue;
 
+		/* count matching simple clauses */
 		matched = bms_intersect(clauses_attnums, stat->keys);
-		membership = bms_membership(matched);
+		nmatched = bms_num_members(matched);
 		bms_free(matched);
 
-		/* skip objects matching fewer than two attributes from clauses */
-		if (membership != BMS_MULTIPLE)
+		/* count matching expressions */
+		nexprs = 0;
+		for (i = 0; i < list_length(clauses); i++)
+		{
+			ListCell   *lc;
+
+			/* not a complex expression */
+			if (!list_exprs[i])
+				continue;
+
+			foreach (lc, stat->exprs)
+			{
+				Node *stat_expr = (Node *) lfirst(lc);
+
+				/* try to match it */
+				if (equal(stat_expr, list_exprs[i]))
+					nexprs++;
+			}
+		}
+
+		/*
+		 * Skip objects matching fewer than two attributes/expressions
+		 * from clauses.
+		 */
+		if (nmatched + nexprs < 2)
 			continue;
 
 		func_dependencies[nfunc_dependencies]
 			= statext_dependencies_load(stat->statOid);
+		func_info[nfunc_dependencies] = stat;
 
 		total_ndeps += func_dependencies[nfunc_dependencies]->ndeps;
 		nfunc_dependencies++;
@@ -1300,6 +1547,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 		pfree(func_dependencies);
 		bms_free(clauses_attnums);
 		pfree(list_attnums);
+		pfree(list_exprs);
 		return 1.0;
 	}
 
@@ -1313,21 +1561,41 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 
 	while (true)
 	{
+		StatisticExtInfo *info = NULL;
 		MVDependency *dependency;
 		AttrNumber	attnum;
+		List	   *exprs = NIL;
+
+		for (i = 0; i < list_length(clauses); i++)
+		{
+			if (!list_exprs[i])
+				continue;
+
+			exprs = lappend(exprs, list_exprs[i]);
+		}
 
 		/* the widest/strongest dependency, fully matched by clauses */
 		dependency = find_strongest_dependency(func_dependencies,
+											   func_info,
 											   nfunc_dependencies,
-											   clauses_attnums);
+											   clauses_attnums,
+											   exprs,
+											   &info);
 		if (!dependency)
 			break;
 
+		Assert(info);
+
 		dependencies[ndependencies++] = dependency;
 
 		/* Ignore dependencies using this implied attribute in later loops */
 		attnum = dependency->attributes[dependency->nattributes - 1];
 		clauses_attnums = bms_del_member(clauses_attnums, attnum);
+
+		/* FIXME Stop considering the expressions. This just stops after
+		 * the first dependency in order not to overrun the array, which
+		 * is wrong though. */
+		break;
 	}
 
 	/*
@@ -1345,6 +1613,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 
 	pfree(dependencies);
 	pfree(func_dependencies);
+	pfree(func_info);
 	bms_free(clauses_attnums);
 	pfree(list_attnums);
 
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 36326927c6..bad76f1225 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -24,6 +24,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
+#include "executor/executor.h"
 #include "commands/progress.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
@@ -66,11 +67,12 @@ typedef struct StatExtEntry
 	Bitmapset  *columns;		/* attribute numbers covered by the object */
 	List	   *types;			/* 'char' list of enabled statistic kinds */
 	int			stattarget;		/* statistics target (-1 for default) */
+	List	   *exprs;			/* expressions */
 } StatExtEntry;
 
 
 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
-static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 											int nvacatts, VacAttrStats **vacatts);
 static void statext_store(Oid relid,
 						  MVNDistinct *ndistinct, MVDependencies *dependencies,
@@ -131,11 +133,20 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		ListCell   *lc2;
 		int			stattarget;
 
+		/* evaluated expressions */
+		Datum	   *exprvals = NULL;
+		bool	   *exprnulls = NULL;
+		Oid		   *exprtypes = NULL;
+		Oid		   *exprcollations = NULL;
+
 		/*
 		 * Check if we can build these stats based on the column analyzed. If
 		 * not, report this fact (except in autovacuum) and move on.
+		 *
+		 * FIXME This is confusing - we have 'stats' list, but it's shadowed
+		 * by another 'stats' variable here.
 		 */
-		stats = lookup_var_attr_stats(onerel, stat->columns,
+		stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 									  natts, vacattrstats);
 		if (!stats)
 		{
@@ -151,8 +162,8 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		}
 
 		/* check allowed number of dimensions */
-		Assert(bms_num_members(stat->columns) >= 2 &&
-			   bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
+		Assert(bms_num_members(stat->columns) + list_length(stat->exprs) >= 2 &&
+			   bms_num_members(stat->columns) + list_length(stat->exprs) <= STATS_MAX_DIMENSIONS);
 
 		/* compute statistics target for this statistics */
 		stattarget = statext_compute_stattarget(stat->stattarget,
@@ -167,6 +178,87 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		if (stattarget == 0)
 			continue;
 
+		if (stat->exprs)
+		{
+			int			i;
+			int			idx;
+			TupleTableSlot *slot;
+			EState	   *estate;
+			ExprContext *econtext;
+			List	   *exprstates = NIL;
+
+			/*
+			 * Need an EState for evaluation of index expressions and
+			 * partial-index predicates.  Create it in the per-index context to be
+			 * sure it gets cleaned up at the bottom of the loop.
+			 */
+			estate = CreateExecutorState();
+			econtext = GetPerTupleExprContext(estate);
+			/* Need a slot to hold the current heap tuple, too */
+			slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
+											&TTSOpsHeapTuple);
+
+			/* Arrange for econtext's scan tuple to be the tuple under test */
+			econtext->ecxt_scantuple = slot;
+
+			/* Compute and save index expression values */
+			exprvals = (Datum *) palloc(numrows * list_length(stat->exprs) * sizeof(Datum));
+			exprnulls = (bool *) palloc(numrows * list_length(stat->exprs) * sizeof(bool));
+			exprtypes = (Oid *) palloc(list_length(stat->exprs) * sizeof(Oid));
+			exprcollations = (Oid *) palloc(list_length(stat->exprs) * sizeof(Oid));
+
+			idx = 0;
+			foreach (lc2, stat->exprs)
+			{
+				Node *expr = (Node *) lfirst(lc2);
+				exprtypes[idx] = exprType(expr);
+				exprcollations[idx] = exprCollation(expr);
+				idx++;
+			}
+
+			/* Set up expression evaluation state */
+			exprstates = ExecPrepareExprList(stat->exprs, estate);
+
+			idx = 0;
+			for (i = 0; i < numrows; i++)
+			{
+				/*
+				 * Reset the per-tuple context each time, to reclaim any cruft
+				 * left behind by evaluating the predicate or index expressions.
+				 */
+				ResetExprContext(econtext);
+
+				/* Set up for predicate or expression evaluation */
+				ExecStoreHeapTuple(rows[i], slot, false);
+
+				foreach (lc2, exprstates)
+				{
+					Datum	datum;
+					bool	isnull;
+					ExprState *exprstate = (ExprState *) lfirst(lc2);
+
+					datum = ExecEvalExprSwitchContext(exprstate,
+											   GetPerTupleExprContext(estate),
+											   &isnull);
+					if (isnull)
+					{
+						exprvals[idx] = (Datum) 0;
+						exprnulls[idx] = true;
+					}
+					else
+					{
+						exprvals[idx] = (Datum) datum;
+						exprnulls[idx] = false;
+					}
+
+					idx++;
+				}
+			}
+
+			ExecDropSingleTupleTableSlot(slot);
+			FreeExecutorState(estate);
+		}
+
 		/* compute statistic of each requested type */
 		foreach(lc2, stat->types)
 		{
@@ -174,13 +266,22 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 
 			if (t == STATS_EXT_NDISTINCT)
 				ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
-													stat->columns, stats);
+													exprvals, exprnulls,
+													exprtypes, exprcollations,
+													stat->columns, stat->exprs,
+													stats);
 			else if (t == STATS_EXT_DEPENDENCIES)
 				dependencies = statext_dependencies_build(numrows, rows,
-														  stat->columns, stats);
+														  exprvals, exprnulls,
+														  exprtypes, exprcollations,
+														  stat->columns,
+														  stat->exprs, stats);
 			else if (t == STATS_EXT_MCV)
-				mcv = statext_mcv_build(numrows, rows, stat->columns, stats,
-										totalrows, stattarget);
+				mcv = statext_mcv_build(numrows, rows,
+										exprvals, exprnulls,
+										exprtypes, exprcollations,
+										stat->columns, stat->exprs,
+										stats, totalrows, stattarget);
 		}
 
 		/* store the statistics in the catalog */
@@ -241,7 +342,7 @@ ComputeExtStatisticsRows(Relation onerel,
 		 * analyzed. If not, ignore it (don't report anything, we'll do that
 		 * during the actual build BuildRelationExtStatistics).
 		 */
-		stats = lookup_var_attr_stats(onerel, stat->columns,
+		stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 									  natts, vacattrstats);
 
 		if (!stats)
@@ -388,6 +489,7 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 		ArrayType  *arr;
 		char	   *enabled;
 		Form_pg_statistic_ext staForm;
+		List	   *exprs = NIL;
 
 		entry = palloc0(sizeof(StatExtEntry));
 		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
@@ -419,6 +521,34 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 			entry->types = lappend_int(entry->types, (int) enabled[i]);
 		}
 
+		/* decode expression (if any) */
+		datum = SysCacheGetAttr(STATEXTOID, htup,
+								Anum_pg_statistic_ext_stxexprs, &isnull);
+
+		if (!isnull)
+		{
+			char *exprsString;
+
+			exprsString = TextDatumGetCString(datum);
+			exprs = (List *) stringToNode(exprsString);
+
+			pfree(exprsString);
+
+			/*
+			 * Run the expressions through eval_const_expressions. This is not just an
+			 * optimization, but is necessary, because the planner will be comparing
+			 * them to similarly-processed qual clauses, and may fail to detect valid
+			 * matches without this.  We must not use canonicalize_qual, however,
+			 * since these aren't qual expressions.
+			 */
+			exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+			/* May as well fix opfuncids too */
+			fix_opfuncids((Node *) exprs);
+		}
+
+		entry->exprs = exprs;
+
 		result = lappend(result, entry);
 	}
 
@@ -427,6 +557,89 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 	return result;
 }
 
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ *
+ * If index_expr isn't NULL, then we're trying to analyze an expression index,
+ * and index_expr is the expression tree representing the column's data.
+ */
+static VacAttrStats *
+examine_attribute(Node *expr)
+{
+	HeapTuple	typtuple;
+	VacAttrStats *stats;
+	int			i;
+	bool		ok;
+
+	/*
+	 * Create the VacAttrStats struct.  Note that we only have a copy of the
+	 * fixed fields of the pg_attribute tuple.
+	 */
+	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+
+	/* fake the attribute */
+	stats->attr = (Form_pg_attribute) palloc0(ATTRIBUTE_FIXED_PART_SIZE);
+	stats->attr->attstattarget = -1;
+
+	/*
+	 * When analyzing an expression index, believe the expression tree's type
+	 * not the column datatype --- the latter might be the opckeytype storage
+	 * type of the opclass, which is not interesting for our purposes.  (Note:
+	 * if we did anything with non-expression index columns, we'd need to
+	 * figure out where to get the correct type info from, but for now that's
+	 * not a problem.)	It's not clear whether anyone will care about the
+	 * typmod, but we store that too just in case.
+	 */
+	stats->attrtypid = exprType(expr);
+	stats->attrtypmod = exprTypmod(expr);
+	stats->attrcollid = exprCollation(expr);
+
+	typtuple = SearchSysCacheCopy1(TYPEOID,
+								   ObjectIdGetDatum(stats->attrtypid));
+	if (!HeapTupleIsValid(typtuple))
+		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+	// stats->anl_context = anl_context;
+	stats->tupattnum = InvalidAttrNumber;
+
+	/*
+	 * The fields describing the stats->stavalues[n] element types default to
+	 * the type of the data being analyzed, but the type-specific typanalyze
+	 * function can change them if it wants to store something else.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		stats->statypid[i] = stats->attrtypid;
+		stats->statyplen[i] = stats->attrtype->typlen;
+		stats->statypbyval[i] = stats->attrtype->typbyval;
+		stats->statypalign[i] = stats->attrtype->typalign;
+	}
+
+	/*
+	 * Call the type-specific typanalyze function.  If none is specified, use
+	 * std_typanalyze().
+	 */
+	if (OidIsValid(stats->attrtype->typanalyze))
+		ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+										   PointerGetDatum(stats)));
+	else
+		ok = std_typanalyze(stats);
+
+	if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+	{
+		heap_freetuple(typtuple);
+		pfree(stats->attr);
+		pfree(stats);
+		return NULL;
+	}
+
+	return stats;
+}
+
 /*
  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
  * VacAttrStats array which includes only the items corresponding to
@@ -435,15 +648,18 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
  * to the caller that the stats should not be built.
  */
 static VacAttrStats **
-lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 					  int nvacatts, VacAttrStats **vacatts)
 {
 	int			i = 0;
 	int			x = -1;
+	int			natts;
 	VacAttrStats **stats;
+	ListCell   *lc;
+
+	natts = bms_num_members(attrs) + list_length(exprs);
 
-	stats = (VacAttrStats **)
-		palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+	stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
 
 	/* lookup VacAttrStats info for the requested columns (same attnum) */
 	while ((x = bms_next_member(attrs, x)) >= 0)
@@ -480,6 +696,24 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
 		i++;
 	}
 
+	/* also add info for expressions */
+	foreach (lc, exprs)
+	{
+		Node *expr = (Node *) lfirst(lc);
+
+		stats[i] = examine_attribute(expr);
+
+		/*
+		 * FIXME We need tuple descriptor later, and we just grab it from
+		 * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded
+		 * examine_attribute does not set that, so just grab it from the
+		 * first vacatts element.
+		 */
+		stats[i]->tupDesc = vacatts[0]->tupDesc;
+
+		i++;
+	}
+
 	return stats;
 }
 
@@ -741,8 +975,10 @@ build_attnums_array(Bitmapset *attrs, int *numattrs)
  * can simply pfree the return value to release all of it.
  */
 SortItem *
-build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
-				   MultiSortSupport mss, int numattrs, AttrNumber *attnums)
+build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
+				   Datum *exprvals, bool *exprnulls, Oid *exprtypes, int nexprs,
+				   TupleDesc tdesc, MultiSortSupport mss,
+				   int numattrs, AttrNumber *attnums)
 {
 	int			i,
 				j,
@@ -789,8 +1025,23 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 		{
 			Datum		value;
 			bool		isnull;
+			int			attlen;
+
+			if (attnums[j] <= MaxHeapAttributeNumber)
+			{
+				value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
+				attlen = TupleDescAttr(tdesc, attnums[j] - 1)->attlen;
+			}
+			else
+			{
+				int	expridx = (attnums[j] - MaxHeapAttributeNumber - 1);
+				int	idx = i * nexprs + expridx;
 
-			value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
+				value = exprvals[idx];
+				isnull = exprnulls[idx];
+
+				attlen = get_typlen(exprtypes[expridx]);
+			}
 
 			/*
 			 * If this is a varlena value, check if it's too wide and if yes
@@ -801,8 +1052,7 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 			 * on the assumption that those are small (below WIDTH_THRESHOLD)
 			 * and will be discarded at the end of analyze.
 			 */
-			if ((!isnull) &&
-				(TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
+			if ((!isnull) && (attlen == -1))
 			{
 				if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
 				{
@@ -881,7 +1131,8 @@ has_stats_of_kind(List *stats, char requiredkind)
  */
 StatisticExtInfo *
 choose_best_statistics(List *stats, char requiredkind,
-					   Bitmapset **clause_attnums, int nclauses)
+					   Bitmapset **clause_attnums, Node **clause_exprs,
+					   int nclauses)
 {
 	ListCell   *lc;
 	StatisticExtInfo *best_match = NULL;
@@ -894,6 +1145,7 @@ choose_best_statistics(List *stats, char requiredkind,
 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
 		Bitmapset  *matched = NULL;
 		int			num_matched;
+		int			num_matched_exprs;
 		int			numkeys;
 
 		/* skip statistics that are not of the correct type */
@@ -920,6 +1172,32 @@ choose_best_statistics(List *stats, char requiredkind,
 		num_matched = bms_num_members(matched);
 		bms_free(matched);
 
+		/*
+		 * Collect expressions in remaining (unestimated) expressions, covered
+		 * by an expression in this statistic object.
+		 */
+		num_matched_exprs = 0;
+		for (i = 0; i < nclauses; i++)
+		{
+			ListCell *lc2;
+
+			/* ignore incompatible/estimated expressions */
+			if (!clause_exprs[i])
+				continue;
+
+			/* ignore expressions that are not covered by this object */
+			foreach(lc2, info->exprs)
+			{
+				Node   *stat_expr = (Node *) lfirst(lc2);
+
+				if (equal(clause_exprs[i], stat_expr))
+				{
+					num_matched_exprs++;
+					break;
+				}
+			}
+		}
+
 		/*
 		 * save the actual number of keys in the stats so that we can choose
 		 * the narrowest stats with the most matching keys.
@@ -931,11 +1209,12 @@ choose_best_statistics(List *stats, char requiredkind,
 		 * when it matches the same number of attributes but these stats have
 		 * fewer keys than any previous match.
 		 */
-		if (num_matched > best_num_matched ||
-			(num_matched == best_num_matched && numkeys < best_match_keys))
+		if (num_matched + num_matched_exprs > best_num_matched ||
+			((num_matched + num_matched_exprs) == best_num_matched &&
+			 numkeys < best_match_keys))
 		{
 			best_match = info;
-			best_num_matched = num_matched;
+			best_num_matched = num_matched + num_matched_exprs;
 			best_match_keys = numkeys;
 		}
 	}
@@ -994,7 +1273,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			return false;
 
 		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_clause_args(expr->args, &var, NULL, NULL))
+		if (!examine_opclause_expression(expr, &var, NULL, NULL))
 			return false;
 
 		/*
@@ -1121,6 +1400,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			/*
 			 * Had we found incompatible clause in the arguments, treat the
 			 * whole clause as incompatible.
+			 *
+			 * XXX This fails for expressions, at the moment.
 			 */
 			if (!statext_is_compatible_clause_internal(root,
 													   (Node *) lfirst(lc),
@@ -1150,6 +1431,152 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 	return false;
 }
 
+
+
+/*
+ * statext_extract_expression_internal
+ *		FIXME
+ *
+ */
+static Node *
+statext_extract_expression_internal(PlannerInfo *root, Node *clause, Index relid)
+{
+	/* Look inside any binary-compatible relabeling (as in examine_variable) */
+	if (IsA(clause, RelabelType))
+		clause = (Node *) ((RelabelType *) clause)->arg;
+
+	/* plain Var references (boolean Vars or recursive checks) */
+	if (IsA(clause, Var))
+	{
+		Var		   *var = (Var *) clause;
+
+		/* Ensure var is from the correct relation */
+		if (var->varno != relid)
+			return NULL;
+
+		/* we also better ensure the Var is from the current level */
+		if (var->varlevelsup > 0)
+			return NULL;
+
+		/* Also skip system attributes (we don't allow stats on those). */
+		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+			return NULL;
+
+		// *attnums = bms_add_member(*attnums, var->varattno);
+		return clause;
+	}
+
+	/* (Var op Const) or (Const op Var) */
+	if (is_opclause(clause))
+	{
+		RangeTblEntry *rte = root->simple_rte_array[relid];
+		OpExpr	   *expr = (OpExpr *) clause;
+		Node	   *expr2 = NULL;
+
+		/* Only expressions with two arguments are considered compatible. */
+		if (list_length(expr->args) != 2)
+			return NULL;
+
+		/* Check if the expression has the right shape (one Expr, one Const) */
+		if (!examine_opclause_expression2(expr, &expr2, NULL, NULL))
+			return false;
+
+		/*
+		 * If it's not one of the supported operators ("=", "<", ">", etc.),
+		 * just ignore the clause, as it's not compatible with MCV lists.
+		 *
+		 * This uses the function for estimating selectivity, not the operator
+		 * directly (a bit awkward, but well ...).
+		 */
+		switch (get_oprrest(expr->opno))
+		{
+			case F_EQSEL:
+			case F_NEQSEL:
+			case F_SCALARLTSEL:
+			case F_SCALARLESEL:
+			case F_SCALARGTSEL:
+			case F_SCALARGESEL:
+				/* supported, will continue with inspection of the Var */
+				break;
+
+			default:
+				/* other estimators are considered unknown/unsupported */
+				return NULL;
+		}
+
+		/*
+		 * If there are any securityQuals on the RTE from security barrier
+		 * views or RLS policies, then the user may not have access to all the
+		 * table's data, and we must check that the operator is leak-proof.
+		 *
+		 * If the operator is leaky, then we must ignore this clause for the
+		 * purposes of estimating with MCV lists, otherwise the operator might
+		 * reveal values from the MCV list that the user doesn't have
+		 * permission to see.
+		 */
+		if (rte->securityQuals != NIL &&
+			!get_func_leakproof(get_opcode(expr->opno)))
+			return NULL;
+
+		return expr2;
+	}
+
+	/* AND/OR/NOT clause */
+	if (is_andclause(clause) ||
+		is_orclause(clause) ||
+		is_notclause(clause))
+	{
+		/*
+		 * AND/OR/NOT-clauses are supported if all sub-clauses are supported
+		 *
+		 * Perhaps we could improve this by handling mixed cases, when some of
+		 * the clauses are supported and some are not. Selectivity for the
+		 * supported subclauses would be computed using extended statistics,
+		 * and the remaining clauses would be estimated using the traditional
+		 * algorithm (product of selectivities).
+		 *
+		 * It however seems overly complex, and in a way we already do that
+		 * because if we reject the whole clause as unsupported here, it will
+		 * be eventually passed to clauselist_selectivity() which does exactly
+		 * this (split into supported/unsupported clauses etc).
+		 */
+		BoolExpr   *expr = (BoolExpr *) clause;
+		ListCell   *lc;
+
+		foreach(lc, expr->args)
+		{
+			/*
+			 * Had we found incompatible clause in the arguments, treat the
+			 * whole clause as incompatible.
+			 */
+			if (!statext_extract_expression_internal(root,
+													 (Node *) lfirst(lc),
+													 relid))
+				return NULL;
+		}
+
+		return clause;
+	}
+
+	/* Var IS NULL */
+	if (IsA(clause, NullTest))
+	{
+		NullTest   *nt = (NullTest *) clause;
+
+		/*
+		 * Only simple (Var IS NULL) expressions supported for now. Maybe we
+		 * could use examine_variable to fix this?
+		 */
+		if (!IsA(nt->arg, Var))
+			return NULL;
+
+		return statext_extract_expression_internal(root, (Node *) (nt->arg),
+												   relid);
+	}
+
+	return NULL;
+}
+
 /*
  * statext_is_compatible_clause
  *		Determines if the clause is compatible with MCV lists.
@@ -1224,6 +1651,51 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 	return true;
 }
 
+/*
+ * statext_extract_expression
+ *		Determines if the clause is compatible with MCV lists.
+ *
+ * Currently, we only support three types of clauses:
+ *
+ * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
+ * is one of ("=", "<", ">", ">=", "<=")
+ *
+ * (b) (Var IS [NOT] NULL)
+ *
+ * (c) combinations using AND/OR/NOT
+ *
+ * In the future, the range of supported clauses may be expanded to more
+ * complex cases, for example (Var op Var).
+ */
+static Node *
+statext_extract_expression(PlannerInfo *root, Node *clause, Index relid)
+{
+	RestrictInfo *rinfo = (RestrictInfo *) clause;
+	Node		 *expr;
+
+	if (!IsA(rinfo, RestrictInfo))
+		return false;
+
+	/* Pseudoconstants are not really interesting here. */
+	if (rinfo->pseudoconstant)
+		return false;
+
+	/* clauses referencing multiple varnos are incompatible */
+	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+		return false;
+
+	/* Check the clause and determine what attributes it references. */
+	expr = statext_extract_expression_internal(root, (Node *) rinfo->clause, relid);
+
+	if (!expr)
+		return NULL;
+
+	/* FIXME do the same ACL check as in statext_is_compatible_clause */
+
+	/* If we reach here, the clause is OK */
+	return expr;
+}
+
 /*
  * statext_mcv_clauselist_selectivity
  *		Estimate clauses using the best multi-column statistics.
@@ -1285,7 +1757,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 								   RelOptInfo *rel, Bitmapset **estimatedclauses)
 {
 	ListCell   *l;
-	Bitmapset **list_attnums;
+	Bitmapset **list_attnums;	/* attnums extracted from the clause */
+	Node	  **list_exprs;		/* expressions matched to any statistic */
 	int			listidx;
 	Selectivity sel = 1.0;
 
@@ -1296,6 +1769,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
 										 list_length(clauses));
 
+	list_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
+
 	/*
 	 * Pre-process the clauses list to extract the attnums seen in each item.
 	 * We need to determine if there's any clauses which will be useful for
@@ -1313,11 +1788,66 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		Node	   *clause = (Node *) lfirst(l);
 		Bitmapset  *attnums = NULL;
 
+		/* the clause is considered incompatible by default */
+		list_attnums[listidx] = NULL;
+
+		/* and it's also not covered exactly by the statistic */
+		list_exprs[listidx] = NULL;
+
+		/*
+		 * First see if the clause is simple enough to be covered directly
+		 * by the attributes. If not, see if there's at least one statistic
+		 * object using the expression as-is.
+		 */
 		if (!bms_is_member(listidx, *estimatedclauses) &&
 			statext_is_compatible_clause(root, clause, rel->relid, &attnums))
+		{
+			/* simple expression, covered through attnum(s) */
 			list_attnums[listidx] = attnums;
+		}
 		else
-			list_attnums[listidx] = NULL;
+		{
+			ListCell *lc;
+			Node	 *expr;
+
+			expr = statext_extract_expression(root, clause, rel->relid);
+
+			/* complex expression, search for statistic */
+			foreach(lc, rel->statlist)
+			{
+				ListCell		   *lc2;
+				StatisticExtInfo   *info = (StatisticExtInfo *) lfirst(lc);
+
+				/* have we already matched the expression to a statistic? */
+				Assert(!list_exprs[listidx]);
+
+				/* no expressions in the statistic */
+				if (!info->exprs)
+					continue;
+
+				/*
+				 * Walk the expressions, see if all expressions extracted from
+				 * the clause are covered by the extended statistic object.
+				 */
+				foreach (lc2, info->exprs)
+				{
+					Node   *stat_expr = (Node *) lfirst(lc2);
+
+					if (equal(expr, stat_expr))
+					{
+						list_exprs[listidx] = expr;
+						break;
+					}
+
+					if (list_exprs[listidx])
+						break;
+				}
+
+				/* stop looking for another statistic */
+				if (list_exprs[listidx])
+					break;
+			}
+		}
 
 		listidx++;
 	}
@@ -1336,7 +1866,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 
 		/* find the best suited statistics object for these attnums */
 		stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
-									  list_attnums, list_length(clauses));
+									  list_attnums, list_exprs,
+									  list_length(clauses));
 
 		/*
 		 * if no (additional) matching stats could be found then we've nothing
@@ -1356,17 +1887,40 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		{
 			/*
 			 * If the clause is compatible with the selected statistics, mark
-			 * it as estimated and add it to the list to estimate.
+			 * it as estimated and add it to the list to estimate. It may be
+			 * either a simple clause, or an expression.
 			 */
 			if (list_attnums[listidx] != NULL &&
 				bms_is_subset(list_attnums[listidx], stat->keys))
 			{
+				/* simple clause */
 				stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
 				*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
 
 				bms_free(list_attnums[listidx]);
 				list_attnums[listidx] = NULL;
 			}
+			else
+			{
+				/* expression */
+				ListCell *lc;
+
+				foreach (lc, stat->exprs)
+				{
+					Node   *stat_expr = (Node *) lfirst(lc);
+
+					if (equal(list_exprs[listidx], stat_expr))
+					{
+						stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
+						*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
+
+						// bms_free(list_attnums[listidx]);
+						list_exprs[listidx] = NULL;
+
+						break;
+					}
+				}
+			}
 
 			listidx++;
 		}
@@ -1506,3 +2060,153 @@ examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
 
 	return true;
 }
+
+bool
+examine_clause_args2(List *args, Node **exprp, Const **cstp, bool *expronleftp)
+{
+	Node	   *expr;
+	Const	   *cst;
+	bool		expronleft;
+	Node	   *leftop,
+			   *rightop;
+
+	/* enforced by statext_is_compatible_clause_internal */
+	Assert(list_length(args) == 2);
+
+	leftop = linitial(args);
+	rightop = lsecond(args);
+
+	/* strip RelabelType from either side of the expression */
+	if (IsA(leftop, RelabelType))
+		leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+	if (IsA(rightop, RelabelType))
+		rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+	if (IsA(rightop, Const))
+	{
+		expr = (Node *) leftop;
+		cst = (Const *) rightop;
+		expronleft = true;
+	}
+	else if (IsA(leftop, Const))
+	{
+		expr = (Node *) rightop;
+		cst = (Const *) leftop;
+		expronleft = false;
+	}
+	else
+		return false;
+
+	/* return pointers to the extracted parts if requested */
+	if (exprp)
+		*exprp = expr;
+
+	if (cstp)
+		*cstp = cst;
+
+	if (expronleftp)
+		*expronleftp = expronleft;
+
+	return true;
+}
+
+bool
+examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
+{
+	Var		   *var;
+	Const	   *cst;
+	bool		varonleft;
+	Node	   *leftop,
+			   *rightop;
+
+	/* enforced by statext_is_compatible_clause_internal */
+	Assert(list_length(expr->args) == 2);
+
+	leftop = linitial(expr->args);
+	rightop = lsecond(expr->args);
+
+	/* strip RelabelType from either side of the expression */
+	if (IsA(leftop, RelabelType))
+		leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+	if (IsA(rightop, RelabelType))
+		rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+	if (IsA(leftop, Var) && IsA(rightop, Const))
+	{
+		var = (Var *) leftop;
+		cst = (Const *) rightop;
+		varonleft = true;
+	}
+	else if (IsA(leftop, Const) && IsA(rightop, Var))
+	{
+		var = (Var *) rightop;
+		cst = (Const *) leftop;
+		varonleft = false;
+	}
+	else
+		return false;
+
+	/* return pointers to the extracted parts if requested */
+	if (varp)
+		*varp = var;
+
+	if (cstp)
+		*cstp = cst;
+
+	if (varonleftp)
+		*varonleftp = varonleft;
+
+	return true;
+}
+
+bool
+examine_opclause_expression2(OpExpr *expr, Node **exprp, Const **cstp, bool *expronleftp)
+{
+	Node	   *expr2;
+	Const	   *cst;
+	bool		expronleft;
+	Node	   *leftop,
+			   *rightop;
+
+	/* enforced by statext_is_compatible_clause_internal */
+	Assert(list_length(expr->args) == 2);
+
+	leftop = linitial(expr->args);
+	rightop = lsecond(expr->args);
+
+	/* strip RelabelType from either side of the expression */
+	if (IsA(leftop, RelabelType))
+		leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+	if (IsA(rightop, RelabelType))
+		rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+	if (IsA(rightop, Const))
+	{
+		expr2 = (Node *) leftop;
+		cst = (Const *) rightop;
+		expronleft = true;
+	}
+	else if (IsA(leftop, Const))
+	{
+		expr2 = (Node *) rightop;
+		cst = (Const *) leftop;
+		expronleft = false;
+	}
+	else
+		return false;
+
+	/* return pointers to the extracted parts if requested */
+	if (exprp)
+		*exprp = expr2;
+
+	if (cstp)
+		*cstp = cst;
+
+	if (expronleftp)
+		*expronleftp = expronleft;
+
+	return true;
+}
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 6a262f1543..319d821fd5 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -73,7 +73,9 @@
 	 ((ndims) * sizeof(DimensionInfo)) + \
 	 ((nitems) * ITEM_SIZE(ndims)))
 
-static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
+static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs,
+								  Oid *exprtypes, Oid *exprcollations,
+								  int nexprs);
 
 static SortItem *build_distinct_groups(int numrows, SortItem *items,
 									   MultiSortSupport mss, int *ndistinct);
@@ -180,7 +182,10 @@ get_mincount_for_mcv_list(int samplerows, double totalrows)
  *
  */
 MCVList *
-statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
+statext_mcv_build(int numrows, HeapTuple *rows,
+				  Datum *exprvals, bool *exprnulls,
+				  Oid *exprtypes, Oid *exprcollations,
+				  Bitmapset *attrs, List *exprs,
 				  VacAttrStats **stats, double totalrows, int stattarget)
 {
 	int			i,
@@ -194,14 +199,24 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 	MCVList    *mcvlist = NULL;
 	MultiSortSupport mss;
 
-	attnums = build_attnums_array(attrs, &numattrs);
-
 	/* comparator for all the columns */
-	mss = build_mss(stats, numattrs);
+	mss = build_mss(stats, bms_num_members(attrs), exprtypes, exprcollations, list_length(exprs));
+
+	/*
+	 * Copy the bitmapset and add fake attnums representing expressions,
+	 * starting above MaxHeapAttributeNumber.
+	 */
+	attrs = bms_copy(attrs);
+
+	for (i = 1; i <= list_length(exprs); i++)
+		attrs = bms_add_member(attrs, MaxHeapAttributeNumber + i);
+
+	attnums = build_attnums_array(attrs, &numattrs);
 
 	/* sort the rows */
-	items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
-							   mss, numattrs, attnums);
+	items = build_sorted_items(numrows, &nitems, rows, exprvals, exprnulls,
+							   exprtypes, list_length(exprs),
+							   stats[0]->tupDesc, mss, numattrs, attnums);
 
 	if (!items)
 		return NULL;
@@ -337,6 +352,7 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 
 	pfree(items);
 	pfree(groups);
+	pfree(attrs);
 
 	return mcvlist;
 }
@@ -346,12 +362,12 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
  *	build MultiSortSupport for the attributes passed in attrs
  */
 static MultiSortSupport
-build_mss(VacAttrStats **stats, int numattrs)
+build_mss(VacAttrStats **stats, int numattrs, Oid *exprtypes, Oid *exprcollations, int nexprs)
 {
 	int			i;
 
 	/* Sort by multiple columns (using array of SortSupport) */
-	MultiSortSupport mss = multi_sort_init(numattrs);
+	MultiSortSupport mss = multi_sort_init(numattrs + nexprs);
 
 	/* prepare the sort functions for all the attributes */
 	for (i = 0; i < numattrs; i++)
@@ -367,6 +383,20 @@ build_mss(VacAttrStats **stats, int numattrs)
 		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
 	}
 
+	/* prepare the sort functions for all the expressions */
+	for (i = 0; i < nexprs; i++)
+	{
+		TypeCacheEntry *type;
+
+		type = lookup_type_cache(exprtypes[i], TYPECACHE_LT_OPR);
+		if (type->lt_opr == InvalidOid) /* shouldn't happen */
+			elog(ERROR, "cache lookup failed for ordering operator for type %u",
+				 exprtypes[i]);
+
+		multi_sort_add_dimension(mss, numattrs + i, type->lt_opr,
+								 exprcollations[i]);
+	}
+
 	return mss;
 }
 
@@ -1543,7 +1573,8 @@ pg_mcv_list_send(PG_FUNCTION_ARGS)
  */
 static bool *
 mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
-					 Bitmapset *keys, MCVList *mcvlist, bool is_or)
+					 Bitmapset *keys, List *exprs,
+					 MCVList *mcvlist, bool is_or)
 {
 	int			i;
 	ListCell   *l;
@@ -1583,8 +1614,10 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_clause_args returns true */
 			Var		   *var;
+			Node	   *clause_expr;
 			Const	   *cst;
 			bool		varonleft;
+			bool		expronleft;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
@@ -1653,6 +1686,89 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 					matches[i] = RESULT_MERGE(matches[i], is_or, match);
 				}
 			}
+			/* extract the expr and const from the expression */
+			else if (examine_clause_args2(expr->args, &clause_expr, &cst, &expronleft))
+			{
+				ListCell   *lc;
+				int			idx;
+				Oid			collid = exprCollation(clause_expr);
+
+				/* match the attribute to a dimension of the statistic */
+				idx = bms_num_members(keys);
+
+				foreach(lc, exprs)
+				{
+					Node *stat_expr = (Node *) lfirst(lc);
+
+					if (equal(clause_expr, stat_expr))
+						break;
+
+					idx++;
+				}
+
+				/* index should be valid */
+				Assert((idx >= 0) &&
+					   (idx < bms_num_members(keys) + list_length(exprs)));
+
+				/*
+				 * Walk through the MCV items and evaluate the current clause.
+				 * We can skip items that were already ruled out, and
+				 * terminate if there are no remaining MCV items that might
+				 * possibly match.
+				 */
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					bool		match = true;
+					MCVItem    *item = &mcvlist->items[i];
+
+					/*
+					 * When the MCV item or the Const value is NULL we can
+					 * treat this as a mismatch. We must not call the operator
+					 * because of strictness.
+					 */
+					if (item->isnull[idx] || cst->constisnull)
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap.
+					 * Once the value gets false for AND-lists, or true for
+					 * OR-lists, we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					/*
+					 * First check whether the constant is below the lower
+					 * boundary (in that case we can skip the bucket, because
+					 * there's no overlap).
+					 *
+					 * We don't store collations used to build the statistics,
+					 * but we can use the collation for the attribute itself,
+					 * as stored in varcollid. We do reset the statistics
+					 * after a type change (including collation change), so
+					 * this is OK. We may need to relax this after allowing
+					 * extended statistics on expressions.
+					 */
+					if (expronleft)
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   item->values[idx],
+															   cst->constvalue));
+					else
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   cst->constvalue,
+															   item->values[idx]));
+
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+				}
+			}
+			else
+				elog(ERROR, "incompatible clause");
 		}
 		else if (IsA(clause, ScalarArrayOpExpr))
 		{
@@ -1661,8 +1777,10 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_clause_args returns true */
 			Var		   *var;
+			Node	   *clause_expr;
 			Const	   *cst;
 			bool		varonleft;
+			bool		expronleft;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
@@ -1760,14 +1878,154 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 					matches[i] = RESULT_MERGE(matches[i], is_or, match);
 				}
 			}
+			/* extract the expr and const from the expression */
+			else if (examine_clause_args2(expr->args, &clause_expr, &cst, &expronleft))
+			{
+				ListCell   *lc;
+				int			idx;
+
+				ArrayType  *arrayval;
+				int16		elmlen;
+				bool		elmbyval;
+				char		elmalign;
+				int			num_elems;
+				Datum	   *elem_values;
+				bool	   *elem_nulls;
+
+				/* ScalarArrayOpExpr has the Var always on the left */
+				Assert(varonleft);
+
+				if (!cst->constisnull)
+				{
+					arrayval = DatumGetArrayTypeP(cst->constvalue);
+					get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+										 &elmlen, &elmbyval, &elmalign);
+					deconstruct_array(arrayval,
+									  ARR_ELEMTYPE(arrayval),
+									  elmlen, elmbyval, elmalign,
+									  &elem_values, &elem_nulls, &num_elems);
+				}
+
+				/* match the attribute to a dimension of the statistic */
+				idx = bms_num_members(keys);
+
+				foreach(lc, exprs)
+				{
+					Node *stat_expr = (Node *) lfirst(lc);
+
+					if (equal(clause_expr, stat_expr))
+						break;
+
+					idx++;
+				}
+
+				/* index should be valid */
+				Assert((idx >= 0) &&
+					   (idx < bms_num_members(keys) + list_length(exprs)));
+
+				/*
+				 * Walk through the MCV items and evaluate the current clause.
+				 * We can skip items that were already ruled out, and
+				 * terminate if there are no remaining MCV items that might
+				 * possibly match.
+				 */
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					int			j;
+					bool		match = (expr->useOr ? false : true);
+					MCVItem    *item = &mcvlist->items[i];
+
+					/*
+					 * When the MCV item or the Const value is NULL we can
+					 * treat this as a mismatch. We must not call the operator
+					 * because of strictness.
+					 */
+					if (item->isnull[idx] || cst->constisnull)
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap.
+					 * Once the value gets false for AND-lists, or true for
+					 * OR-lists, we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					for (j = 0; j < num_elems; j++)
+					{
+						Datum		elem_value = elem_values[j];
+						bool		elem_isnull = elem_nulls[j];
+						bool		elem_match;
+
+						/* NULL values always evaluate as not matching. */
+						if (elem_isnull)
+						{
+							match = RESULT_MERGE(match, expr->useOr, false);
+							continue;
+						}
+
+						/*
+						 * Stop evaluating the array elements once we reach
+						 * match value that can't change - ALL() is the same
+						 * as AND-list, ANY() is the same as OR-list.
+						 */
+						if (RESULT_IS_FINAL(match, expr->useOr))
+							break;
+
+						elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
+																	var->varcollid,
+																	item->values[idx],
+																	elem_value));
+
+						match = RESULT_MERGE(match, expr->useOr, elem_match);
+					}
+
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+				}
+			}
+			else
+				elog(ERROR, "incompatible clause");
 		}
 		else if (IsA(clause, NullTest))
 		{
 			NullTest   *expr = (NullTest *) clause;
-			Var		   *var = (Var *) (expr->arg);
+			Node	   *clause_expr = (Node *) (expr->arg);
 
 			/* match the attribute to a dimension of the statistic */
-			int			idx = bms_member_index(keys, var->varattno);
+			int			idx = -1;
+
+			if (IsA(clause_expr, Var))
+			{
+				/* simple Var, so just lookup using varattno */
+				Var *var = (Var *) clause_expr;
+
+				idx = bms_member_index(keys, var->varattno);
+			}
+			else
+			{
+				ListCell *lc;
+
+				/* expressions are after the simple columns */
+				idx = bms_num_members(keys);
+
+				/* expression - lookup in stats expressions */
+				foreach(lc, exprs)
+				{
+					Node *stat_expr = (Node *) lfirst(lc);
+
+					if (equal(clause_expr, stat_expr))
+						break;
+
+					idx++;
+				}
+			}
+
+			/* index should be valid */
+			Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
 
 			/*
 			 * Walk through the MCV items and evaluate the current clause. We
@@ -1810,7 +2068,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			Assert(list_length(bool_clauses) >= 2);
 
 			/* build the match bitmap for the OR-clauses */
-			bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys,
+			bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
 												mcvlist, is_orclause(clause));
 
 			/*
@@ -1838,7 +2096,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			Assert(list_length(not_args) == 1);
 
 			/* build the match bitmap for the NOT-clause */
-			not_matches = mcv_get_match_bitmap(root, not_args, keys,
+			not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
 											   mcvlist, false);
 
 			/*
@@ -1917,7 +2175,8 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
 	mcv = statext_mcv_load(stat->statOid);
 
 	/* build a match bitmap for the clauses */
-	matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
+	matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
+								   mcv, false);
 
 	/* sum frequencies for all the matching MCV items */
 	*basesel = 0.0;
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 4b86f0ab2d..8a9a60cdea 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -37,8 +37,12 @@
 #include "utils/typcache.h"
 
 static double ndistinct_for_combination(double totalrows, int numrows,
-										HeapTuple *rows, VacAttrStats **stats,
-										int k, int *combination);
+										HeapTuple *rows, Datum *exprvals,
+										bool *exprnulls, Oid *exprtypes,
+										Oid *exprcollations,
+										int nattrs, int nexprs,
+										VacAttrStats **stats, int k,
+										int *combination);
 static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
 static int	n_choose_k(int n, int k);
 static int	num_combinations(int n);
@@ -84,13 +88,26 @@ static void generate_combinations(CombinationGenerator *state);
  */
 MVNDistinct *
 statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
-						Bitmapset *attrs, VacAttrStats **stats)
+						Datum *exprvals, bool *exprnulls,
+						Oid *exprtypes, Oid *exprcollations,
+						Bitmapset *attrs, List *exprs,
+						VacAttrStats **stats)
 {
 	MVNDistinct *result;
+	int			i;
 	int			k;
 	int			itemcnt;
 	int			numattrs = bms_num_members(attrs);
-	int			numcombs = num_combinations(numattrs);
+	int			numcombs = num_combinations(numattrs + list_length(exprs));
+
+	/*
+	 * Copy the bitmapset and add fake attnums representing expressions,
+	 * starting above MaxHeapAttributeNumber.
+	 */
+	attrs = bms_copy(attrs);
+
+	for (i = 1; i <= list_length(exprs); i++)
+		attrs = bms_add_member(attrs, MaxHeapAttributeNumber + i);
 
 	result = palloc(offsetof(MVNDistinct, items) +
 					numcombs * sizeof(MVNDistinctItem));
@@ -99,13 +116,13 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
 	result->nitems = numcombs;
 
 	itemcnt = 0;
-	for (k = 2; k <= numattrs; k++)
+	for (k = 2; k <= bms_num_members(attrs); k++)
 	{
 		int		   *combination;
 		CombinationGenerator *generator;
 
 		/* generate combinations of K out of N elements */
-		generator = generator_init(numattrs, k);
+		generator = generator_init(bms_num_members(attrs), k);
 
 		while ((combination = generator_next(generator)))
 		{
@@ -114,10 +131,19 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
 
 			item->attrs = NULL;
 			for (j = 0; j < k; j++)
-				item->attrs = bms_add_member(item->attrs,
-											 stats[combination[j]]->attr->attnum);
+			{
+				if (combination[j] < numattrs)
+					item->attrs = bms_add_member(item->attrs,
+												 stats[combination[j]]->attr->attnum);
+				else
+					item->attrs = bms_add_member(item->attrs, MaxHeapAttributeNumber + combination[j] + 1);
+			}
+
 			item->ndistinct =
 				ndistinct_for_combination(totalrows, numrows, rows,
+										  exprvals, exprnulls,
+										  exprtypes, exprcollations,
+										  numattrs, list_length(exprs),
 										  stats, k, combination);
 
 			itemcnt++;
@@ -428,6 +454,9 @@ pg_ndistinct_send(PG_FUNCTION_ARGS)
  */
 static double
 ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
+						  Datum *exprvals, bool *exprnulls,
+						  Oid *exprtypes, Oid *exprcollations,
+						  int nattrs, int nexprs,
 						  VacAttrStats **stats, int k, int *combination)
 {
 	int			i,
@@ -467,25 +496,48 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
 	 */
 	for (i = 0; i < k; i++)
 	{
-		VacAttrStats *colstat = stats[combination[i]];
+		Oid				typid;
 		TypeCacheEntry *type;
+		AttrNumber		attnum = InvalidAttrNumber;
+		TupleDesc		tdesc = NULL;
+		Oid				collid = InvalidOid;
+
+		if (combination[i] < nattrs)
+		{
+			VacAttrStats *colstat = stats[combination[i]];
+			typid = colstat->attrtypid;
+			attnum = colstat->attr->attnum;
+			collid = colstat->attrcollid;
+			tdesc = colstat->tupDesc;
+		}
+		else
+		{
+			typid = exprtypes[combination[i] - nattrs];
+			collid = exprcollations[combination[i] - nattrs];
+		}
 
-		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+		type = lookup_type_cache(typid, TYPECACHE_LT_OPR);
 		if (type->lt_opr == InvalidOid) /* shouldn't happen */
 			elog(ERROR, "cache lookup failed for ordering operator for type %u",
-				 colstat->attrtypid);
+				 typid);
 
 		/* prepare the sort function for this dimension */
-		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
+		multi_sort_add_dimension(mss, i, type->lt_opr, collid);
 
 		/* accumulate all the data for this dimension into the arrays */
 		for (j = 0; j < numrows; j++)
 		{
-			items[j].values[i] =
-				heap_getattr(rows[j],
-							 colstat->attr->attnum,
-							 colstat->tupDesc,
-							 &items[j].isnull[i]);
+			if (combination[i] < nattrs)
+				items[j].values[i] =
+					heap_getattr(rows[j],
+								 attnum,
+								 tdesc,
+								 &items[j].isnull[i]);
+			else
+			{
+				items[j].values[i] = exprvals[j * nexprs + combination[i]];
+				items[j].isnull[i] = exprnulls[j * nexprs + combination[i]];
+			}
 		}
 	}
 
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index f398027fa6..28481e2cd1 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1835,7 +1835,22 @@ ProcessUtilitySlow(ParseState *pstate,
 				break;
 
 			case T_CreateStatsStmt:
-				address = CreateStatistics((CreateStatsStmt *) parsetree);
+				{
+					Oid			relid;
+					CreateStatsStmt *stmt = (CreateStatsStmt *) parsetree;
+					RangeVar   *rel = (RangeVar *) linitial(stmt->relations);
+
+					/*
+					 * XXX RangeVarCallbackOwnsRelation not needed needed here,
+					 * to keep the same behavior as before.
+					 */
+					relid = RangeVarGetRelid(rel, ShareLock, false);
+
+					/* Run parse analysis ... */
+					stmt = transformStatsStmt(relid, stmt, queryString);
+
+					address = CreateStatistics(stmt);
+				}
 				break;
 
 			case T_AlterStatsStmt:
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index c2c6df2a4f..f791a2d546 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -337,7 +337,8 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
 									bool attrsOnly, bool keysOnly,
 									bool showTblSpc, bool inherits,
 									int prettyFlags, bool missing_ok);
-static char *pg_get_statisticsobj_worker(Oid statextid, bool missing_ok);
+static char *pg_get_statisticsobj_worker(Oid statextid, bool columns_only,
+										 bool missing_ok);
 static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
 									  bool attrsOnly, bool missing_ok);
 static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@@ -1508,7 +1509,21 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS)
 	Oid			statextid = PG_GETARG_OID(0);
 	char	   *res;
 
-	res = pg_get_statisticsobj_worker(statextid, true);
+	res = pg_get_statisticsobj_worker(statextid, false, true);
+
+	if (res == NULL)
+		PG_RETURN_NULL();
+
+	PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+Datum
+pg_get_statisticsobjdef_columns(PG_FUNCTION_ARGS)
+{
+	Oid			statextid = PG_GETARG_OID(0);
+	char	   *res;
+
+	res = pg_get_statisticsobj_worker(statextid, true, true);
 
 	if (res == NULL)
 		PG_RETURN_NULL();
@@ -1520,7 +1535,7 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS)
  * Internal workhorse to decompile an extended statistics object.
  */
 static char *
-pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
+pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok)
 {
 	Form_pg_statistic_ext statextrec;
 	HeapTuple	statexttup;
@@ -1535,6 +1550,9 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 	bool		dependencies_enabled;
 	bool		mcv_enabled;
 	int			i;
+	List	   *context;
+	ListCell   *lc;
+	List	   *exprs = NIL;
 
 	statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
 
@@ -1549,71 +1567,75 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 
 	initStringInfo(&buf);
 
-	nsp = get_namespace_name(statextrec->stxnamespace);
-	appendStringInfo(&buf, "CREATE STATISTICS %s",
-					 quote_qualified_identifier(nsp,
-												NameStr(statextrec->stxname)));
-
-	/*
-	 * Decode the stxkind column so that we know which stats types to print.
-	 */
-	datum = SysCacheGetAttr(STATEXTOID, statexttup,
-							Anum_pg_statistic_ext_stxkind, &isnull);
-	Assert(!isnull);
-	arr = DatumGetArrayTypeP(datum);
-	if (ARR_NDIM(arr) != 1 ||
-		ARR_HASNULL(arr) ||
-		ARR_ELEMTYPE(arr) != CHAROID)
-		elog(ERROR, "stxkind is not a 1-D char array");
-	enabled = (char *) ARR_DATA_PTR(arr);
-
-	ndistinct_enabled = false;
-	dependencies_enabled = false;
-	mcv_enabled = false;
-
-	for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+	if (!columns_only)
 	{
-		if (enabled[i] == STATS_EXT_NDISTINCT)
-			ndistinct_enabled = true;
-		if (enabled[i] == STATS_EXT_DEPENDENCIES)
-			dependencies_enabled = true;
-		if (enabled[i] == STATS_EXT_MCV)
-			mcv_enabled = true;
-	}
+		nsp = get_namespace_name(statextrec->stxnamespace);
+		appendStringInfo(&buf, "CREATE STATISTICS %s",
+						 quote_qualified_identifier(nsp,
+													NameStr(statextrec->stxname)));
 
-	/*
-	 * If any option is disabled, then we'll need to append the types clause
-	 * to show which options are enabled.  We omit the types clause on purpose
-	 * when all options are enabled, so a pg_dump/pg_restore will create all
-	 * statistics types on a newer postgres version, if the statistics had all
-	 * options enabled on the original version.
-	 */
-	if (!ndistinct_enabled || !dependencies_enabled || !mcv_enabled)
-	{
-		bool		gotone = false;
+		/*
+		 * Decode the stxkind column so that we know which stats types to print.
+		 */
+		datum = SysCacheGetAttr(STATEXTOID, statexttup,
+								Anum_pg_statistic_ext_stxkind, &isnull);
+		Assert(!isnull);
+		arr = DatumGetArrayTypeP(datum);
+		if (ARR_NDIM(arr) != 1 ||
+			ARR_HASNULL(arr) ||
+			ARR_ELEMTYPE(arr) != CHAROID)
+			elog(ERROR, "stxkind is not a 1-D char array");
+		enabled = (char *) ARR_DATA_PTR(arr);
 
-		appendStringInfoString(&buf, " (");
+		ndistinct_enabled = false;
+		dependencies_enabled = false;
+		mcv_enabled = false;
 
-		if (ndistinct_enabled)
+		for (i = 0; i < ARR_DIMS(arr)[0]; i++)
 		{
-			appendStringInfoString(&buf, "ndistinct");
-			gotone = true;
+			if (enabled[i] == STATS_EXT_NDISTINCT)
+				ndistinct_enabled = true;
+			if (enabled[i] == STATS_EXT_DEPENDENCIES)
+				dependencies_enabled = true;
+			if (enabled[i] == STATS_EXT_MCV)
+				mcv_enabled = true;
 		}
 
-		if (dependencies_enabled)
+		/*
+		 * If any option is disabled, then we'll need to append the types clause
+		 * to show which options are enabled.  We omit the types clause on purpose
+		 * when all options are enabled, so a pg_dump/pg_restore will create all
+		 * statistics types on a newer postgres version, if the statistics had all
+		 * options enabled on the original version.
+		 */
+		if (!ndistinct_enabled || !dependencies_enabled || !mcv_enabled)
 		{
-			appendStringInfo(&buf, "%sdependencies", gotone ? ", " : "");
-			gotone = true;
-		}
+			bool		gotone = false;
 
-		if (mcv_enabled)
-			appendStringInfo(&buf, "%smcv", gotone ? ", " : "");
+			appendStringInfoString(&buf, " (");
 
-		appendStringInfoChar(&buf, ')');
-	}
+			if (ndistinct_enabled)
+			{
+				appendStringInfoString(&buf, "ndistinct");
+				gotone = true;
+			}
+
+			if (dependencies_enabled)
+			{
+				appendStringInfo(&buf, "%sdependencies", gotone ? ", " : "");
+				gotone = true;
+			}
 
-	appendStringInfoString(&buf, " ON ");
+			if (mcv_enabled)
+				appendStringInfo(&buf, "%smcv", gotone ? ", " : "");
 
+			appendStringInfoChar(&buf, ')');
+		}
+
+		appendStringInfoString(&buf, " ON ");
+	}
+
+	/* decode simple column references */
 	for (colno = 0; colno < statextrec->stxkeys.dim1; colno++)
 	{
 		AttrNumber	attnum = statextrec->stxkeys.values[colno];
@@ -1627,8 +1649,74 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 		appendStringInfoString(&buf, quote_identifier(attname));
 	}
 
-	appendStringInfo(&buf, " FROM %s",
-					 generate_relation_name(statextrec->stxrelid, NIL));
+	/*
+	 * Get the statistics expressions, if any.  (NOTE: we do not use the
+	 * relcache versions of the expressions and predicate, because we want
+	 * to display non-const-folded expressions.)
+	 */
+	if (!heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL))
+	{
+		Datum		exprsDatum;
+		bool		isnull;
+		char	   *exprsString;
+
+		exprsDatum = SysCacheGetAttr(STATEXTOID, statexttup,
+									 Anum_pg_statistic_ext_stxexprs, &isnull);
+		Assert(!isnull);
+		exprsString = TextDatumGetCString(exprsDatum);
+		exprs = (List *) stringToNode(exprsString);
+		pfree(exprsString);
+
+		/*
+		 * Run the expressions through eval_const_expressions. This is not just an
+		 * optimization, but is necessary, because the planner will be comparing
+		 * them to similarly-processed qual clauses, and may fail to detect valid
+		 * matches without this.  We must not use canonicalize_qual, however,
+		 * since these aren't qual expressions.
+		 *
+		 * XXX Not sure if this is really needed, it's not in pg_get_indexdef. In
+		 * fact the comment above suggests we don't want const-folding here.
+		 */
+		// exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+		/*
+		 * May as well fix opfuncids too
+		 *
+		 * XXX Same here. Is this something we want/need?
+		 */
+		// fix_opfuncids((Node *) exprs);
+
+	}
+	else
+		exprs = NIL;
+
+	context = deparse_context_for(get_relation_name(statextrec->stxrelid),
+								  statextrec->stxrelid);
+
+	foreach (lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+		char	   *str;
+		int			prettyFlags = PRETTYFLAG_INDENT;
+
+		str = deparse_expression_pretty(expr, context, false, false,
+										prettyFlags, 0);
+
+		if (colno > 0)
+			appendStringInfoString(&buf, ", ");
+
+		/* Need parens if it's not a bare function call */
+		if (looks_like_function(expr))
+			appendStringInfoString(&buf, str);
+		else
+			appendStringInfo(&buf, "(%s)", str);
+
+		colno++;
+	}
+
+	if (!columns_only)
+		appendStringInfo(&buf, " FROM %s",
+						 generate_relation_name(statextrec->stxrelid, NIL));
 
 	ReleaseSysCache(statexttup);
 
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bec357fcef..e9be02fbac 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3243,6 +3243,15 @@ typedef struct
 	double		ndistinct;		/* # distinct values */
 } GroupVarInfo;
 
+
+typedef struct
+{
+	Node	   *expr;			/* expression */
+	RelOptInfo *rel;			/* relation it belongs to */
+	List	   *varinfos;		/* info for variables in this expression */
+} GroupExprInfo;
+
+
 static List *
 add_unique_group_var(PlannerInfo *root, List *varinfos,
 					 Node *var, VariableStatData *vardata)
@@ -3291,6 +3300,47 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 	return varinfos;
 }
 
+static List *
+add_unique_group_expr(PlannerInfo *root, List *exprinfos,
+					 Node *expr, List *vars)
+{
+	GroupExprInfo *exprinfo;
+	ListCell   *lc;
+
+	foreach(lc, exprinfos)
+	{
+		exprinfo = (GroupExprInfo *) lfirst(lc);
+
+		/* Drop exact duplicates */
+		if (equal(expr, exprinfo->expr))
+			return exprinfos;
+	}
+
+	exprinfo = (GroupExprInfo *) palloc(sizeof(GroupExprInfo));
+
+	exprinfo->expr = expr;
+	exprinfo->varinfos = NIL;
+
+	foreach (lc, vars)
+	{
+		VariableStatData vardata;
+		Node *var = (Node *) lfirst(lc);
+
+		examine_variable(root, var, 0, &vardata);
+
+		exprinfo->varinfos = add_unique_group_var(root, exprinfo->varinfos, var, &vardata);
+
+		exprinfo->rel = vardata.rel;
+
+		ReleaseVariableStats(vardata);
+	}
+
+	exprinfos = lappend(exprinfos, exprinfo);
+
+	return exprinfos;
+}
+
+
 /*
  * estimate_num_groups		- Estimate number of groups in a grouped query
  *
@@ -3361,6 +3411,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset)
 {
 	List	   *varinfos = NIL;
+	List	   *exprinfos = NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
@@ -3398,6 +3449,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		double		this_srf_multiplier;
 		VariableStatData vardata;
 		List	   *varshere;
+		Relids		varnos;
 		ListCell   *l2;
 
 		/* is expression in this grouping set? */
@@ -3465,6 +3517,18 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 			continue;
 		}
 
+		/*
+		 * Are all the variables from the same relation? If yes, search for
+		 * an extended statistic matching this expression exactly.
+		 */
+		varnos = pull_varnos((Node *) varshere);
+		if (bms_membership(varnos) == BMS_SINGLETON)
+		{
+			exprinfos = add_unique_group_expr(root, exprinfos,
+											  groupexpr, varshere);
+			// elog(WARNING, "exprinfos: %d vars %d", list_length(exprinfos), list_length(varshere));
+		}
+
 		/*
 		 * Else add variables to varinfos list
 		 */
@@ -3506,32 +3570,32 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	do
 	{
-		GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
-		RelOptInfo *rel = varinfo1->rel;
+		GroupExprInfo *exprinfo1 = (GroupExprInfo *) linitial(exprinfos);
+		RelOptInfo *rel = exprinfo1->rel;
 		double		reldistinct = 1;
 		double		relmaxndistinct = reldistinct;
 		int			relvarcount = 0;
-		List	   *newvarinfos = NIL;
-		List	   *relvarinfos = NIL;
+		List	   *newexprinfos = NIL;
+		List	   *relexprinfos = NIL;
 
 		/*
 		 * Split the list of varinfos in two - one for the current rel, one
 		 * for remaining Vars on other rels.
 		 */
-		relvarinfos = lappend(relvarinfos, varinfo1);
-		for_each_from(l, varinfos, 1)
+		relexprinfos = lappend(relexprinfos, exprinfo1);
+		for_each_from(l, exprinfos, 1)
 		{
-			GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+			GroupExprInfo *exprinfo2 = (GroupExprInfo *) lfirst(l);
 
-			if (varinfo2->rel == varinfo1->rel)
+			if (exprinfo2->rel == exprinfo1->rel)
 			{
 				/* varinfos on current rel */
-				relvarinfos = lappend(relvarinfos, varinfo2);
+				relexprinfos = lappend(relexprinfos, exprinfo2);
 			}
 			else
 			{
-				/* not time to process varinfo2 yet */
-				newvarinfos = lappend(newvarinfos, varinfo2);
+				/* not time to process exprinfo2 yet */
+				newexprinfos = lappend(newexprinfos, exprinfo2);
 			}
 		}
 
@@ -3547,11 +3611,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		 * apply.  We apply a fudge factor below, but only if we multiplied
 		 * more than one such values.
 		 */
-		while (relvarinfos)
+		while (relexprinfos)
 		{
 			double		mvndistinct;
 
-			if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+			if (estimate_multivariate_ndistinct(root, rel, &relexprinfos,
 												&mvndistinct))
 			{
 				reldistinct *= mvndistinct;
@@ -3561,18 +3625,27 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 			}
 			else
 			{
-				foreach(l, relvarinfos)
+				// elog(WARNING, "relexprinfos %d", list_length(relexprinfos));
+				foreach(l, relexprinfos)
 				{
-					GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+					ListCell *lc;
+					GroupExprInfo *exprinfo2 = (GroupExprInfo *) lfirst(l);
+
+					// elog(WARNING, "exprinfo2->varinfos %d", list_length(exprinfo2->varinfos));
 
-					reldistinct *= varinfo2->ndistinct;
-					if (relmaxndistinct < varinfo2->ndistinct)
-						relmaxndistinct = varinfo2->ndistinct;
-					relvarcount++;
+					foreach (lc, exprinfo2->varinfos)
+					{
+						GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(lc);
+
+						reldistinct *= varinfo2->ndistinct;
+						if (relmaxndistinct < varinfo2->ndistinct)
+							relmaxndistinct = varinfo2->ndistinct;
+						relvarcount++;
+					}
 				}
 
 				/* we're done with this relation */
-				relvarinfos = NIL;
+				relexprinfos = NIL;
 			}
 		}
 
@@ -3658,8 +3731,8 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 			numdistinct *= reldistinct;
 		}
 
-		varinfos = newvarinfos;
-	} while (varinfos != NIL);
+		exprinfos = newexprinfos;
+	} while (exprinfos != NIL);
 
 	/* Now we can account for the effects of any SRFs */
 	numdistinct *= srf_multiplier;
@@ -3875,53 +3948,84 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
  */
 static bool
 estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
-								List **varinfos, double *ndistinct)
+								List **exprinfos, double *ndistinct)
 {
 	ListCell   *lc;
-	Bitmapset  *attnums = NULL;
-	int			nmatches;
+	int			nmatches_vars;
+	int			nmatches_exprs;
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
-	Bitmapset  *matched = NULL;
+	StatisticExtInfo *matched_info = NULL;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
 		return false;
 
-	/* Determine the attnums we're looking for */
-	foreach(lc, *varinfos)
-	{
-		GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
-		AttrNumber	attnum;
-
-		Assert(varinfo->rel == rel);
-
-		if (!IsA(varinfo->var, Var))
-			continue;
-
-		attnum = ((Var *) varinfo->var)->varattno;
-
-		if (!AttrNumberIsForUserDefinedAttr(attnum))
-			continue;
-
-		attnums = bms_add_member(attnums, attnum);
-	}
+	// elog(WARNING, "A");
 
 	/* look for the ndistinct statistics matching the most vars */
-	nmatches = 1;				/* we require at least two matches */
+	nmatches_vars = 0;				/* we require at least two matches */
+	nmatches_exprs = 0;
 	foreach(lc, rel->statlist)
 	{
+		ListCell	*lc2;
 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
-		Bitmapset  *shared;
-		int			nshared;
+		int			nshared_vars = 0;
+		int			nshared_exprs = 0;
 
 		/* skip statistics of other kinds */
 		if (info->kind != STATS_EXT_NDISTINCT)
 			continue;
 
-		/* compute attnums shared by the vars and the statistics object */
-		shared = bms_intersect(info->keys, attnums);
-		nshared = bms_num_members(shared);
+		/*
+		 * Determine how many expressions (and variables in non-matched
+		 * expressions) match.
+		 */
+		foreach(lc2, *exprinfos)
+		{
+			ListCell *lc3;
+			GroupExprInfo *exprinfo = (GroupExprInfo *) lfirst(lc2);
+			AttrNumber	attnum;
+
+			Assert(exprinfo->rel == rel);
+
+			// elog(WARNING, "B");
+
+			/* simple Var, search in statistics keys directly */
+			if (IsA(exprinfo->expr, Var))
+			{
+				attnum = ((Var *) exprinfo->expr)->varattno;
+
+				if (!AttrNumberIsForUserDefinedAttr(attnum))
+					continue;
+
+				if (bms_is_member(attnum, info->keys))
+					nshared_vars++;
+
+				continue;
+			}
+
+			// elog(WARNING, "C");
+
+			/* expression - see if it's in the statistics */
+			foreach (lc3, info->exprs)
+			{
+				Node *expr = (Node *) lfirst(lc3);
+
+				if (equal(exprinfo->expr, expr))
+				{
+					nshared_exprs++;
+					// elog(WARNING, "list_length(exprinfo->varinfos) = %d", list_length(exprinfo->varinfos));
+					nshared_vars += list_length(exprinfo->varinfos);
+					break;
+				}
+			}
+		}
+
+		// elog(WARNING, "D nshared_vars %d nshared_exprs %d", nshared_vars, nshared_exprs);
+
+		if (nshared_vars + nshared_exprs < 2)
+			continue;
 
 		/*
 		 * Does this statistics object match more columns than the currently
@@ -3930,18 +4034,22 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		 * XXX This should break ties using name of the object, or something
 		 * like that, to make the outcome stable.
 		 */
-		if (nshared > nmatches)
+		if ((nshared_vars > nmatches_vars) ||
+			((nshared_vars == nmatches_vars) && (nshared_exprs > nmatches_exprs)))
 		{
+			// elog(WARNING, "oid %d", info->statOid);
 			statOid = info->statOid;
-			nmatches = nshared;
-			matched = shared;
+			nmatches_vars = nshared_vars;
+			nmatches_exprs = nshared_exprs;
+			matched_info = info;
 		}
 	}
 
 	/* No match? */
 	if (statOid == InvalidOid)
 		return false;
-	Assert(nmatches > 1 && matched != NULL);
+
+	Assert(nmatches_vars + nmatches_exprs > 1);
 
 	stats = statext_ndistinct_load(statOid);
 
@@ -3954,6 +4062,43 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		int			i;
 		List	   *newlist = NIL;
 		MVNDistinctItem *item = NULL;
+		ListCell   *lc2;
+		Bitmapset  *matched = NULL;
+
+		/* see what actually matched */
+		foreach (lc2, *exprinfos)
+		{
+			ListCell   *lc3;
+			int			idx;
+			bool		found = false;
+
+			GroupExprInfo *exprinfo = (GroupExprInfo *) lfirst(lc2);
+
+			/* expression - see if it's in the statistics */
+			idx = 0;
+			foreach (lc3, matched_info->exprs)
+			{
+				Node *expr = (Node *) lfirst(lc3);
+
+				idx++;
+
+				// elog(WARNING, "expr A: %s", nodeToString(exprinfo->expr));
+				// elog(WARNING, "expr B: %s", nodeToString(expr));
+
+				if (equal(exprinfo->expr, expr))
+				{
+					// elog(WARNING, "adding %d", MaxHeapAttributeNumber + idx);
+					matched = bms_add_member(matched, MaxHeapAttributeNumber + idx);
+					found = true;
+					break;
+				}
+			}
+
+			if (!found)
+			{
+				/* FIXME try matching the Vars from exprinfo->varinfos */
+			}
+		}
 
 		/* Find the specific item that exactly matches the combination */
 		for (i = 0; i < stats->nitems; i++)
@@ -3963,6 +4108,7 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 			if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
 			{
 				item = tmpitem;
+				// elog(WARNING, "found item with %f", item->ndistinct);
 				break;
 			}
 		}
@@ -3972,9 +4118,10 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 			elog(ERROR, "corrupt MVNDistinct entry");
 
 		/* Form the output varinfo list, keeping only unmatched ones */
-		foreach(lc, *varinfos)
+		/*
+		foreach(lc, *exprinfos)
 		{
-			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+			GroupExprInfo *exprinfo = (GroupExprInfo *) lfirst(lc);
 			AttrNumber	attnum;
 
 			if (!IsA(varinfo->var, Var))
@@ -3991,8 +4138,9 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 			if (!bms_is_member(attnum, matched))
 				newlist = lappend(newlist, varinfo);
 		}
+		*/
 
-		*varinfos = newlist;
+		*exprinfos = newlist;
 		*ndistinct = item->ndistinct;
 		return true;
 	}
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 07d640021c..cffcc99e91 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2676,15 +2676,14 @@ describeOneTableDetails(const char *schemaname,
 		/* print any extended statistics */
 		if (pset.sversion >= 100000)
 		{
+			/* FIXME this needs to be version-dependent, because older
+			 * versions don't have pg_get_statisticsobjdef_columns */
 			printfPQExpBuffer(&buf,
 							  "SELECT oid, "
 							  "stxrelid::pg_catalog.regclass, "
 							  "stxnamespace::pg_catalog.regnamespace AS nsp, "
 							  "stxname,\n"
-							  "  (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname),', ')\n"
-							  "   FROM pg_catalog.unnest(stxkeys) s(attnum)\n"
-							  "   JOIN pg_catalog.pg_attribute a ON (stxrelid = a.attrelid AND\n"
-							  "        a.attnum = s.attnum AND NOT attisdropped)) AS columns,\n"
+							  "pg_get_statisticsobjdef_columns(oid) AS columns,\n"
 							  "  'd' = any(stxkind) AS ndist_enabled,\n"
 							  "  'f' = any(stxkind) AS deps_enabled,\n"
 							  "  'm' = any(stxkind) AS mcv_enabled,\n");
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index c01da4bf01..71cfe599fb 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3655,6 +3655,10 @@
   proname => 'pg_get_statisticsobjdef', provolatile => 's',
   prorettype => 'text', proargtypes => 'oid',
   prosrc => 'pg_get_statisticsobjdef' },
+{ oid => '8887', descr => 'extended statistics columns',
+  proname => 'pg_get_statisticsobjdef_columns', provolatile => 's',
+  prorettype => 'text', proargtypes => 'oid',
+  prosrc => 'pg_get_statisticsobjdef_columns' },
 { oid => '3352', descr => 'partition key description',
   proname => 'pg_get_partkeydef', provolatile => 's', prorettype => 'text',
   proargtypes => 'oid', prosrc => 'pg_get_partkeydef' },
diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h
index 61d402c600..a2247c448c 100644
--- a/src/include/catalog/pg_statistic_ext.h
+++ b/src/include/catalog/pg_statistic_ext.h
@@ -52,6 +52,9 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId)
 #ifdef CATALOG_VARLEN
 	char		stxkind[1] BKI_FORCE_NOT_NULL;	/* statistics kinds requested
 												 * to build */
+	pg_node_tree stxexprs;		/* expression trees for stats attributes that
+								 * are not simple column references; one for
+								 * each zero entry in stxkeys[] */
 #endif
 
 } FormData_pg_statistic_ext;
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 7ddd8c011b..48b3689a31 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -451,6 +451,7 @@ typedef enum NodeTag
 	T_TypeName,
 	T_ColumnDef,
 	T_IndexElem,
+	T_StatsElem,
 	T_Constraint,
 	T_DefElem,
 	T_RangeTblEntry,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index a2dcdef3fa..ec16529804 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2812,8 +2812,24 @@ typedef struct CreateStatsStmt
 	List	   *relations;		/* rels to build stats on (list of RangeVar) */
 	char	   *stxcomment;		/* comment to apply to stats, or NULL */
 	bool		if_not_exists;	/* do nothing if stats name already exists */
+	bool		transformed;	/* true when transformIndexStmt is finished */
 } CreateStatsStmt;
 
+/*
+ * StatsElem - statistics parameters (used in CREATE STATISTICS)
+ *
+ * For a plain attribute, 'name' is the name of the referenced table column
+ * and 'expr' is NULL.  For an expression, 'name' is NULL and 'expr' is the
+ * expression tree.
+ */
+typedef struct StatsElem
+{
+	NodeTag		type;
+	char	   *name;			/* name of attribute to index, or NULL */
+	Node	   *expr;			/* expression to index, or NULL */
+} StatsElem;
+
+
 /* ----------------------
  *		Alter Statistics Statement
  * ----------------------
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 8f62d61702..f768925a1a 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -911,6 +911,7 @@ typedef struct StatisticExtInfo
 	RelOptInfo *rel;			/* back-link to statistic's table */
 	char		kind;			/* statistic kind of this entry */
 	Bitmapset  *keys;			/* attnums of the columns covered */
+	List	   *exprs;			/* expressions */
 } StatisticExtInfo;
 
 /*
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index d25819aa28..82e5190964 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -69,6 +69,7 @@ typedef enum ParseExprKind
 	EXPR_KIND_FUNCTION_DEFAULT, /* default parameter value for function */
 	EXPR_KIND_INDEX_EXPRESSION, /* index expression */
 	EXPR_KIND_INDEX_PREDICATE,	/* index predicate */
+	EXPR_KIND_STATS_EXPRESSION, /* extended statistics expression */
 	EXPR_KIND_ALTER_COL_TRANSFORM,	/* transform expr in ALTER COLUMN TYPE */
 	EXPR_KIND_EXECUTE_PARAMETER,	/* parameter value in EXECUTE */
 	EXPR_KIND_TRIGGER_WHEN,		/* WHEN condition in CREATE TRIGGER */
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index bc3d66ed88..e50e2e77fe 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -26,6 +26,8 @@ extern AlterTableStmt *transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
 											   List **afterStmts);
 extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt,
 									 const char *queryString);
+extern CreateStatsStmt *transformStatsStmt(Oid relid, CreateStatsStmt *stmt,
+									 const char *queryString);
 extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
 							  List **actions, Node **whereClause);
 extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 61e69696cf..e2bcb2097f 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -59,17 +59,26 @@ typedef struct SortItem
 
 extern MVNDistinct *statext_ndistinct_build(double totalrows,
 											int numrows, HeapTuple *rows,
-											Bitmapset *attrs, VacAttrStats **stats);
+											Datum *exprvals, bool *exprnulls,
+											Oid *exprtypes, Oid *exrcollations,
+											Bitmapset *attrs, List *exprs,
+											VacAttrStats **stats);
 extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
 extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
 
 extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
-												  Bitmapset *attrs, VacAttrStats **stats);
+												  Datum *exprvals, bool *exprnulls,
+												  Oid *exprtypes, Oid *exprcollations,
+												  Bitmapset *attrs, List *exprs,
+												  VacAttrStats **stats);
 extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
 extern MVDependencies *statext_dependencies_deserialize(bytea *data);
 
 extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows,
-								  Bitmapset *attrs, VacAttrStats **stats,
+								  Datum *exprvals, bool *exprnulls,
+								  Oid *exprtypes, Oid *exprcollations,
+								  Bitmapset *attrs, List *exprs,
+								  VacAttrStats **stats,
 								  double totalrows, int stattarget);
 extern bytea *statext_mcv_serialize(MCVList *mcv, VacAttrStats **stats);
 extern MCVList *statext_mcv_deserialize(bytea *data);
@@ -93,11 +102,19 @@ extern void *bsearch_arg(const void *key, const void *base,
 extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs);
 
 extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
+									Datum *exprvals, bool *exprnulls,
+									Oid *exprtypes, int nexprs,
 									TupleDesc tdesc, MultiSortSupport mss,
 									int numattrs, AttrNumber *attnums);
 
 extern bool examine_clause_args(List *args, Var **varp,
 								Const **cstp, bool *varonleftp);
+extern bool examine_clause_args2(List *args, Node **exprp,
+								 Const **cstp, bool *expronleftp);
+extern bool examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp,
+										bool *varonleftp);
+extern bool examine_opclause_expression2(OpExpr *expr, Node **exprp, Const **cstp,
+										 bool *expronleftp);
 
 extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
 											  StatisticExtInfo *stat,
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 50fce4935f..70784c08ea 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -120,6 +120,7 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
 extern bool has_stats_of_kind(List *stats, char requiredkind);
 extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
 												Bitmapset **clause_attnums,
+												Node **clause_exprs,
 												int nclauses);
 
 #endif							/* STATISTICS_H */
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 4c3edd213f..00f16c6e5c 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -43,12 +43,17 @@ CREATE STATISTICS tst ON a, b FROM pg_class;
 ERROR:  column "a" does not exist
 CREATE STATISTICS tst ON relname, relname, relnatts FROM pg_class;
 ERROR:  duplicate column name in statistics definition
-CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
-ERROR:  only simple column references are allowed in CREATE STATISTICS
-CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class;
-ERROR:  only simple column references are allowed in CREATE STATISTICS
 CREATE STATISTICS tst (unrecognized) ON relname, relnatts FROM pg_class;
 ERROR:  unrecognized statistics kind "unrecognized"
+-- incorrect expressions
+CREATE STATISTICS tst ON relnatts + relpages FROM pg_class; -- missing parentheses
+ERROR:  syntax error at or near "+"
+LINE 1: CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
+                                          ^
+CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class; -- tuple expression
+ERROR:  syntax error at or near ","
+LINE 1: CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class...
+                                          ^
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
 CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
@@ -425,6 +430,40 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
          1 |      1
 (1 row)
 
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+-- now do the same thing, but with expressions
+INSERT INTO functional_dependencies (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1');
+ estimated | actual 
+-----------+--------
+         8 |      8
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1 AND mod(c, 31) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+-- create statistics
+CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,23)), (mod(b::int, 29)), (mod(c, 31)) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1');
+ estimated | actual 
+-----------+--------
+         8 |      8
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1 AND mod(c, 31) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
 -- a => b, a => c, b => c
 TRUNCATE functional_dependencies;
 DROP STATISTICS func_deps_stat;
@@ -894,6 +933,39 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
          1 |      1
 (1 row)
 
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+-- random data (no MCV list), but with expression
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+ estimated | actual 
+-----------+--------
+         3 |      4
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,37)), (mod(b::int,41)), (mod(c,47)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+ estimated | actual 
+-----------+--------
+         3 |      4
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -1206,6 +1278,286 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
         50 |     50
 (1 row)
 
+-- 100 distinct combinations, all in the MCV list, but with expressions
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+        26 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+        26 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+ estimated | actual 
+-----------+--------
+        10 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+        10 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,100)), (mod(b::int,50)), (mod(c,25)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+ estimated | actual 
+-----------+--------
+       150 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+       150 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (''1'', ''2'', ''3'') AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (''1'', ''2'', NULL, ''3'') AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
 -- 100 distinct combinations with NULL values, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -1535,6 +1887,105 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AN
 (1 row)
 
 DROP TABLE mcv_lists_multi;
+-- statistics on integer expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+-- FIXME add dependency tracking for expressions, to automatically drop after DROP TABLE
+-- (not it fails, when there are no simple column references)
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
+-- statistics on a mix columns and expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+DROP TABLE expr_stats;
+-- statistics on expressions with different data types
+CREATE TABLE expr_stats (a int, b name, c text);
+INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual 
+-----------+--------
+       111 |   1000
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+-- FIXME add dependency tracking for expressions, to automatically drop after DROP TABLE
+-- (not it fails, when there are no simple column references)
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 9781e590a3..4ec16b1723 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -34,9 +34,10 @@ CREATE STATISTICS tst FROM sometab;
 CREATE STATISTICS tst ON a, b FROM nonexistent;
 CREATE STATISTICS tst ON a, b FROM pg_class;
 CREATE STATISTICS tst ON relname, relname, relnatts FROM pg_class;
-CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
-CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class;
 CREATE STATISTICS tst (unrecognized) ON relname, relnatts FROM pg_class;
+-- incorrect expressions
+CREATE STATISTICS tst ON relnatts + relpages FROM pg_class; -- missing parentheses
+CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class; -- tuple expression
 
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
@@ -270,6 +271,29 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
 TRUNCATE functional_dependencies;
 DROP STATISTICS func_deps_stat;
 
+-- now do the same thing, but with expressions
+INSERT INTO functional_dependencies (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1 AND mod(c, 31) = 1');
+
+-- create statistics
+CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,23)), (mod(b::int, 29)), (mod(c, 31)) FROM functional_dependencies;
+
+ANALYZE functional_dependencies;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 23) = 1 AND mod(b::int, 29) = 1 AND mod(c, 31) = 1');
+
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+
 INSERT INTO functional_dependencies (a, b, c, filler1)
      SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
 
@@ -477,6 +501,28 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
 
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+-- random data (no MCV list), but with expression
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,37)), (mod(b::int,41)), (mod(c,47)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -601,6 +647,113 @@ ANALYZE mcv_lists;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
 
+
+-- 100 distinct combinations, all in the MCV list, but with expressions
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,100)), (mod(b::int,50)), (mod(c,25)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (''1'', ''2'', ''3'') AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (''1'', ''2'', NULL, ''3'') AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+
 -- 100 distinct combinations with NULL values, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -813,6 +966,63 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AN
 
 DROP TABLE mcv_lists_multi;
 
+
+-- statistics on integer expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+
+-- FIXME add dependency tracking for expressions, to automatically drop after DROP TABLE
+-- (not it fails, when there are no simple column references)
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
+
+-- statistics on a mix columns and expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+
+DROP TABLE expr_stats;
+
+-- statistics on expressions with different data types
+CREATE TABLE expr_stats (a int, b name, c text);
+INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+
+-- FIXME add dependency tracking for expressions, to automatically drop after DROP TABLE
+-- (not it fails, when there are no simple column references)
+DROP STATISTICS expr_stats_1;
+
+DROP TABLE expr_stats;
+
+
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
-- 
2.26.2

