From a37237e5f508a5de972c622357d91df84eede80f Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Fri, 13 Nov 2020 02:37:06 +0100
Subject: [PATCH 3/7] Extended statistics on expressions

Allow defining extended statistics on expressions, not just simple
column references. With this commit, it's possible to do things like

  CREATE TABLE t (a int);
  CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t;

and the collected statistics will be useful for estimating queries
using those expressions in various places, like

  SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0;

or

  SELECT mod(a,10), mod(a,20) FROM t GROUP BY 1, 2;

The commit also adds a new statistics type "expressions" which builds
the usual per-column statistics for each expression, allowing better
estimates even for queries with just a single expression, which are
not affected by multi-column statistics. This achieves the same goal
as creating expression indexes, without index maintenance overhead.
---
 doc/src/sgml/ref/create_statistics.sgml       |   99 +-
 src/backend/catalog/Makefile                  |    8 +-
 src/backend/commands/statscmds.c              |  356 +++-
 src/backend/nodes/copyfuncs.c                 |   14 +
 src/backend/nodes/equalfuncs.c                |   13 +
 src/backend/nodes/outfuncs.c                  |   12 +
 src/backend/optimizer/util/plancat.c          |   53 +
 src/backend/parser/gram.y                     |   31 +-
 src/backend/parser/parse_agg.c                |   10 +
 src/backend/parser/parse_expr.c               |    6 +
 src/backend/parser/parse_func.c               |    3 +
 src/backend/parser/parse_utilcmd.c            |  120 +-
 src/backend/statistics/dependencies.c         |  366 +++-
 src/backend/statistics/extended_stats.c       | 1477 ++++++++++++++++-
 src/backend/statistics/mcv.c                  |  291 +++-
 src/backend/statistics/mvdistinct.c           |   99 +-
 src/backend/tcop/utility.c                    |   17 +-
 src/backend/utils/adt/ruleutils.c             |  225 ++-
 src/backend/utils/adt/selfuncs.c              |  407 ++++-
 src/bin/psql/describe.c                       |   22 +-
 src/include/catalog/pg_proc.dat               |    4 +
 src/include/catalog/pg_statistic_ext.h        |    4 +
 src/include/catalog/pg_statistic_ext_data.h   |    1 +
 src/include/nodes/nodes.h                     |    1 +
 src/include/nodes/parsenodes.h                |   16 +
 src/include/nodes/pathnodes.h                 |    1 +
 src/include/parser/parse_node.h               |    1 +
 src/include/parser/parse_utilcmd.h            |    2 +
 .../statistics/extended_stats_internal.h      |   40 +-
 src/include/statistics/statistics.h           |    2 +
 src/test/regress/expected/stats_ext.out       |  659 +++++++-
 src/test/regress/sql/stats_ext.sql            |  304 +++-
 32 files changed, 4328 insertions(+), 336 deletions(-)
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
index 4363be50c3..f4a75b3c8e 100644
--- a/doc/src/sgml/ref/create_statistics.sgml
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -23,7 +23,7 @@ PostgreSQL documentation
 <synopsis>
 CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_name</replaceable>
     [ ( <replaceable class="parameter">statistics_kind</replaceable> [, ... ] ) ]
-    ON <replaceable class="parameter">column_name</replaceable>, <replaceable class="parameter">column_name</replaceable> [, ...]
+    ON { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [, ...]
     FROM <replaceable class="parameter">table_name</replaceable>
 </synopsis>
 
@@ -81,12 +81,15 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_na
      <para>
       A statistics kind to be computed in this statistics object.
       Currently supported kinds are
+      <literal>expressions</literal>, which enables expression statistics,
       <literal>ndistinct</literal>, which enables n-distinct statistics,
       <literal>dependencies</literal>, which enables functional
       dependency statistics, and <literal>mcv</literal> which enables
       most-common values lists.
       If this clause is omitted, all supported statistics kinds are
-      included in the statistics object.
+      included in the statistics object. Expression statistics are included
+      only when the statistics definition includes complex expressions and
+      not just simple column references.
       For more information, see <xref linkend="planner-stats-extended"/>
       and <xref linkend="multivariate-statistics-examples"/>.
      </para>
@@ -104,6 +107,17 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_na
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><replaceable class="parameter">expression</replaceable></term>
+    <listitem>
+     <para>
+      The expression to be covered by the computed statistics. In this case
+      only a single expression is required, in which case only the expression
+      statistics kind is allowed. The order of expressions is insignificant.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><replaceable class="parameter">table_name</replaceable></term>
     <listitem>
@@ -125,6 +139,31 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_na
    reading it.  Once created, however, the ownership of the statistics
    object is independent of the underlying table(s).
   </para>
+
+  <para>
+   Creating expressions statistics is allowed only when there actually are
+   any expression. Expression statistics are per-expression and are very
+   similar to creating index on the expression, except that it eliminates
+   the index maintenance overhead.
+  </para>
+
+  <para>
+   The expression can refer only to columns of the underlying table, but
+   it can use all columns, not just the ones the statistics is defined
+   on.  In fact, the statistics may be defined only on expressions.
+   Presently, subqueries and aggregate expressions are also forbidden
+   in the expressions.
+  </para>
+
+  <para>
+   All functions and operators used in an statistics definition must be
+   <quote>immutable</quote>, that is, their results must depend only on
+   their arguments and never on any outside influence (such as
+   the contents of another table or the current time).  This restriction
+   ensures that the behavior of the statistics is well-defined.  To use a
+   user-defined function in a statistics expression, remember to mark
+   the function immutable when you create it.
+  </para>
  </refsect1>
 
  <refsect1 id="sql-createstatistics-examples">
@@ -196,6 +235,62 @@ EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2);
    in the table, allowing it to generate better estimates in both cases.
   </para>
 
+  <para>
+   Create table <structname>t3</structname> with a single timestamp column,
+   and run a query using an expression on that column. 
+   knowledge of a value in the first column is sufficient for determining the
+   value in the other column. Then functional dependency statistics are built
+   on those columns:
+
+<programlisting>
+CREATE TABLE t3 (
+    a   timestamp
+);
+
+INSERT INTO t3 SELECT i FROM generate_series('2020-01-01'::timestamp,
+                                             '2020-12-31'::timestamp,
+                                             '1 minute'::interval) s(i);
+
+ANALYZE t3;
+
+-- the number of matching rows will be drastically underestimated:
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('month', a) = '2020-01-01'::timestamp;
+
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('day', a) BETWEEN '2020-01-01'::timestamp
+                                 AND '2020-06-30'::timestamp;
+
+EXPLAIN ANALYZE SELECT date_trunc('month', a), date_trunc('day', a)
+   FROM t3 GROUP BY 1, 2;
+
+CREATE STATISTICS s3 (expressions, ndistinct) ON date_trunc('month', a), date_trunc('day', a) FROM t3;
+
+ANALYZE t1;
+
+-- now the row count estimates are more accurate:
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('month', a) = '2020-01-01'::timestamp;
+
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('day', a) BETWEEN '2020-01-01'::timestamp
+                                 AND '2020-06-30'::timestamp;
+
+EXPLAIN ANALYZE SELECT date_trunc('month', a), date_trunc('day', a)
+   FROM t3 GROUP BY 1, 2;
+</programlisting>
+
+   Without expression and ndistinct statistics, the planner would assume
+   that the two <literal>WHERE</literal> and <literal>GROUP BY</literal>
+   conditions are independent, and would multiply their selectivities
+   together to arrive at a much-too-small row count estimate in the first
+   two queries, and a much-too-high group count estimate in the aggregate
+   query. This is further exacerbated by the lack of accurate statistics
+   for the expressions, forcing the planner to use default selectivities.
+   With such statistics, the planner recognizes that the conditions are
+   correlated and arrives at much more accurate estimates.
+  </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 2519771210..203dfb2911 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -49,15 +49,15 @@ include $(top_srcdir)/src/backend/common.mk
 
 # Note: the order of this list determines the order in which the catalog
 # header files are assembled into postgres.bki.  BKI_BOOTSTRAP catalogs
-# must appear first, and there are reputedly other, undocumented ordering
-# dependencies.
+# must appear first, and pg_statistic before pg_statistic_ext_data, and
+# are are reputedly other, undocumented ordering dependencies.
 CATALOG_HEADERS := \
 	pg_proc.h pg_type.h pg_attribute.h pg_class.h \
 	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
 	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
 	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
-	pg_statistic_ext.h pg_statistic_ext_data.h \
-	pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
+	pg_statistic.h pg_statistic_ext.h pg_statistic_ext_data.h \
+	pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
 	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
 	pg_database.h pg_db_role_setting.h pg_tablespace.h \
 	pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 3057d89d50..035599469f 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -29,6 +29,8 @@
 #include "commands/comment.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
 #include "statistics/statistics.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
@@ -42,6 +44,7 @@
 static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
 										 const char *label, Oid namespaceid);
 static char *ChooseExtendedStatisticNameAddition(List *exprs);
+static bool CheckMutability(Expr *expr);
 
 
 /* qsort comparator for the attnums in CreateStatistics */
@@ -62,6 +65,7 @@ ObjectAddress
 CreateStatistics(CreateStatsStmt *stmt)
 {
 	int16		attnums[STATS_MAX_DIMENSIONS];
+	int			nattnums = 0;
 	int			numcols = 0;
 	char	   *namestr;
 	NameData	stxname;
@@ -74,21 +78,26 @@ CreateStatistics(CreateStatsStmt *stmt)
 	Datum		datavalues[Natts_pg_statistic_ext_data];
 	bool		datanulls[Natts_pg_statistic_ext_data];
 	int2vector *stxkeys;
+	List	   *stxexprs = NIL;
+	Datum		exprsDatum;
 	Relation	statrel;
 	Relation	datarel;
 	Relation	rel = NULL;
 	Oid			relid;
 	ObjectAddress parentobject,
 				myself;
-	Datum		types[3];		/* one for each possible type of statistic */
+	Datum		types[4];		/* one for each possible type of statistic */
 	int			ntypes;
 	ArrayType  *stxkind;
 	bool		build_ndistinct;
 	bool		build_dependencies;
 	bool		build_mcv;
+	bool		build_expressions;
+	bool		build_expressions_only;
 	bool		requested_type = false;
 	int			i;
 	ListCell   *cell;
+	ListCell   *cell2;
 
 	Assert(IsA(stmt, CreateStatsStmt));
 
@@ -192,63 +201,179 @@ CreateStatistics(CreateStatsStmt *stmt)
 	foreach(cell, stmt->exprs)
 	{
 		Node	   *expr = (Node *) lfirst(cell);
-		ColumnRef  *cref;
-		char	   *attname;
+		StatsElem  *selem;
 		HeapTuple	atttuple;
 		Form_pg_attribute attForm;
 		TypeCacheEntry *type;
 
-		if (!IsA(expr, ColumnRef))
+		if (!IsA(expr, StatsElem))
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
-		cref = (ColumnRef *) expr;
+		selem = (StatsElem *) expr;
 
-		if (list_length(cref->fields) != 1)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
-		attname = strVal((Value *) linitial(cref->fields));
+		if (selem->name)	/* column reference */
+		{
+			char	   *attname;
+			attname = selem->name;
+
+			atttuple = SearchSysCacheAttName(relid, attname);
+			if (!HeapTupleIsValid(atttuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" does not exist",
+								attname)));
+			attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+			/* Disallow use of system attributes in extended stats */
+			if (attForm->attnum <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("statistics creation on system columns is not supported")));
+
+			/* Disallow data types without a less-than operator */
+			type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+			if (type->lt_opr == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+								attname, format_type_be(attForm->atttypid))));
+
+			/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+			if (numcols >= STATS_MAX_DIMENSIONS)
+				ereport(ERROR,
+						(errcode(ERRCODE_TOO_MANY_COLUMNS),
+						 errmsg("cannot have more than %d columns in statistics",
+								STATS_MAX_DIMENSIONS)));
+
+			attnums[nattnums] = attForm->attnum;
+			nattnums++;
+			numcols++;
+			ReleaseSysCache(atttuple);
+		}
+		else	/* expression */
+		{
+			Node	   *expr = selem->expr;
+			TypeCacheEntry *type;
+			Oid			atttype;
+
+			Assert(expr != NULL);
+
+			/*
+			 * An expression using mutable functions is probably wrong,
+			 * since if you aren't going to get the same result for the
+			 * same data every time, it's not clear what the index entries
+			 * mean at all.
+			 */
+			if (CheckMutability((Expr *) expr))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("functions in statistics expression must be marked IMMUTABLE")));
+
+			/*
+			 * Disallow data types without a less-than operator
+			 *
+			 * XXX Maybe allow this, but only for EXPRESSIONS stats and
+			 * prevent building e.g. MCV etc.
+			 */
+			atttype = exprType(expr);
+			type = lookup_type_cache(atttype, TYPECACHE_LT_OPR);
+			if (type->lt_opr == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("expression cannot be used in statistics because its type %s has no default btree operator class",
+								format_type_be(atttype))));
+
+			/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+			if (numcols >= STATS_MAX_DIMENSIONS)
+				ereport(ERROR,
+						(errcode(ERRCODE_TOO_MANY_COLUMNS),
+						 errmsg("cannot have more than %d columns in statistics",
+								STATS_MAX_DIMENSIONS)));
+
+			numcols++;
+
+			stxexprs = lappend(stxexprs, expr);
+		}
+	}
 
-		atttuple = SearchSysCacheAttName(relid, attname);
-		if (!HeapTupleIsValid(atttuple))
-			ereport(ERROR,
-					(errcode(ERRCODE_UNDEFINED_COLUMN),
-					 errmsg("column \"%s\" does not exist",
-							attname)));
-		attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+	/*
+	 * Parse the statistics kinds.
+	 */
+	build_ndistinct = false;
+	build_dependencies = false;
+	build_mcv = false;
+	build_expressions = false;
+	foreach(cell, stmt->stat_types)
+	{
+		char	   *type = strVal((Value *) lfirst(cell));
 
-		/* Disallow use of system attributes in extended stats */
-		if (attForm->attnum <= 0)
+		if (strcmp(type, "ndistinct") == 0)
+		{
+			build_ndistinct = true;
+			requested_type = true;
+		}
+		else if (strcmp(type, "dependencies") == 0)
+		{
+			build_dependencies = true;
+			requested_type = true;
+		}
+		else if (strcmp(type, "mcv") == 0)
+		{
+			build_mcv = true;
+			requested_type = true;
+		}
+		else if (strcmp(type, "expressions") == 0)
+		{
+			build_expressions = true;
+			requested_type = true;
+		}
+		else
 			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("statistics creation on system columns is not supported")));
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized statistics kind \"%s\"",
+							type)));
+	}
 
-		/* Disallow data types without a less-than operator */
-		type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
-		if (type->lt_opr == InvalidOid)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
-							attname, format_type_be(attForm->atttypid))));
+	/* Are we building only the expression statistics? */
+	build_expressions_only = build_expressions &&
+		(!build_ndistinct) && (!build_dependencies) && (!build_mcv);
 
-		/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
-		if (numcols >= STATS_MAX_DIMENSIONS)
-			ereport(ERROR,
-					(errcode(ERRCODE_TOO_MANY_COLUMNS),
-					 errmsg("cannot have more than %d columns in statistics",
-							STATS_MAX_DIMENSIONS)));
+	/*
+	 * Check that with explicitly requested expression stats there really
+	 * are some expressions.
+	 */
+	if (build_expressions && (list_length(stxexprs) == 0))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("extended expression statistics require at least one expression")));
 
-		attnums[numcols] = attForm->attnum;
-		numcols++;
-		ReleaseSysCache(atttuple);
-	}
+	/*
+	 * When building only expression stats, all the elements have to be
+	 * expressions. It's pointless to build those stats for regular
+	 * columns, as we already have that in pg_statistic.
+	 *
+	 * XXX This is probably easy to evade by doing "dummy" expression on
+	 * the column, but meh.
+	 */
+	if (build_expressions_only && (nattnums > 0))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("building only extended expression statistics on simple columns not allowed")));
 
 	/*
-	 * Check that at least two columns were specified in the statement. The
-	 * upper bound was already checked in the loop above.
+	 * Check that at least two columns were specified in the statement, or
+	 * one when only expression stats were requested. The upper bound was
+	 * already checked in the loop above.
+	 *
+	 * XXX The first check is probably pointless after the one checking for
+	 * expressions.
 	 */
-	if (numcols < 2)
+	if (build_expressions_only && (numcols == 0))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("extended expression statistics require at least 1 column")));
+	else if (!build_expressions_only && (numcols < 2))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 				 errmsg("extended statistics require at least 2 columns")));
@@ -258,13 +383,13 @@ CreateStatistics(CreateStatsStmt *stmt)
 	 * it does not hurt (it does not affect the efficiency, unlike for
 	 * indexes, for example).
 	 */
-	qsort(attnums, numcols, sizeof(int16), compare_int16);
+	qsort(attnums, nattnums, sizeof(int16), compare_int16);
 
 	/*
 	 * Check for duplicates in the list of columns. The attnums are sorted so
 	 * just check consecutive elements.
 	 */
-	for (i = 1; i < numcols; i++)
+	for (i = 1; i < nattnums; i++)
 	{
 		if (attnums[i] == attnums[i - 1])
 			ereport(ERROR,
@@ -272,46 +397,46 @@ CreateStatistics(CreateStatsStmt *stmt)
 					 errmsg("duplicate column name in statistics definition")));
 	}
 
-	/* Form an int2vector representation of the sorted column list */
-	stxkeys = buildint2vector(attnums, numcols);
-
 	/*
-	 * Parse the statistics kinds.
+	 * Check for duplicate expressions. We do two loops, counting the
+	 * occurrences of each expression. This is O(N^2) but we only allow
+	 * small number of expressions and it's not executed often.
 	 */
-	build_ndistinct = false;
-	build_dependencies = false;
-	build_mcv = false;
-	foreach(cell, stmt->stat_types)
+	foreach (cell, stxexprs)
 	{
-		char	   *type = strVal((Value *) lfirst(cell));
+		Node   *expr1 = (Node *) lfirst(cell);
+		int		cnt = 0;
 
-		if (strcmp(type, "ndistinct") == 0)
+		foreach (cell2, stxexprs)
 		{
-			build_ndistinct = true;
-			requested_type = true;
-		}
-		else if (strcmp(type, "dependencies") == 0)
-		{
-			build_dependencies = true;
-			requested_type = true;
-		}
-		else if (strcmp(type, "mcv") == 0)
-		{
-			build_mcv = true;
-			requested_type = true;
+			Node   *expr2 = (Node *) lfirst(cell2);
+
+			if (equal(expr1, expr2))
+				cnt += 1;
 		}
-		else
+
+		/* every expression should find at least itself */
+		Assert(cnt >= 1);
+
+		if (cnt > 1)
 			ereport(ERROR,
-					(errcode(ERRCODE_SYNTAX_ERROR),
-					 errmsg("unrecognized statistics kind \"%s\"",
-							type)));
+					(errcode(ERRCODE_DUPLICATE_COLUMN),
+					 errmsg("duplicate expression in statistics definition")));
 	}
-	/* If no statistic type was specified, build them all. */
+
+	/* Form an int2vector representation of the sorted column list */
+	stxkeys = buildint2vector(attnums, nattnums);
+
+	/*
+	 * If no statistic type was specified, build them all (but request
+	 * expression stats only when there actually are any expressions).
+	 */
 	if (!requested_type)
 	{
 		build_ndistinct = true;
 		build_dependencies = true;
 		build_mcv = true;
+		build_expressions = (list_length(stxexprs) != 0);
 	}
 
 	/* construct the char array of enabled statistic types */
@@ -322,9 +447,23 @@ CreateStatistics(CreateStatsStmt *stmt)
 		types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
 	if (build_mcv)
 		types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
+	if (build_expressions)
+		types[ntypes++] = CharGetDatum(STATS_EXT_EXPRESSIONS);
 	Assert(ntypes > 0 && ntypes <= lengthof(types));
 	stxkind = construct_array(types, ntypes, CHAROID, 1, true, TYPALIGN_CHAR);
 
+	/* convert the expressions (if any) to a text datum */
+	if (stxexprs != NIL)
+	{
+		char	   *exprsString;
+
+		exprsString = nodeToString(stxexprs);
+		exprsDatum = CStringGetTextDatum(exprsString);
+		pfree(exprsString);
+	}
+	else
+		exprsDatum = (Datum) 0;
+
 	statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
 
 	/*
@@ -344,6 +483,10 @@ CreateStatistics(CreateStatsStmt *stmt)
 	values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
 	values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
 
+	values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
+	if (exprsDatum == (Datum) 0)
+		nulls[Anum_pg_statistic_ext_stxexprs - 1] = true;
+
 	/* insert it into pg_statistic_ext */
 	htup = heap_form_tuple(statrel->rd_att, values, nulls);
 	CatalogTupleInsert(statrel, htup);
@@ -366,6 +509,7 @@ CreateStatistics(CreateStatsStmt *stmt)
 	datanulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
 	datanulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
 	datanulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
+	datanulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
 
 	/* insert it into pg_statistic_ext_data */
 	htup = heap_form_tuple(datarel->rd_att, datavalues, datanulls);
@@ -389,12 +533,39 @@ CreateStatistics(CreateStatsStmt *stmt)
 	 */
 	ObjectAddressSet(myself, StatisticExtRelationId, statoid);
 
-	for (i = 0; i < numcols; i++)
+	/* add dependencies for plain column references */
+	for (i = 0; i < nattnums; i++)
 	{
 		ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
 		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
 	}
 
+	/*
+	 * If there are no simply-referenced columns, give the statistics an
+	 * auto dependency on the whole table.  In most cases, this will
+	 * be redundant, but it might not be if the statistics expressions
+	 * contain no Vars (which might seem strange but possible).
+	 *
+	 * XXX This is copied from index_create, not sure if it's applicable
+	 * to extended statistics too.
+	 */
+	if (!nattnums)
+	{
+		ObjectAddressSet(parentobject, RelationRelationId, relid);
+		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
+	}
+
+	/*
+	 * Store dependencies on anything mentioned in statistics expressions,
+	 * just like we do for index expressions.
+	 */
+	if (stxexprs)
+		recordDependencyOnSingleRelExpr(&myself,
+										(Node *) stxexprs,
+										relid,
+										DEPENDENCY_NORMAL,
+										DEPENDENCY_AUTO, false, true);
+
 	/*
 	 * Also add dependencies on namespace and owner.  These are required
 	 * because the stats object might have a different namespace and/or owner
@@ -638,6 +809,7 @@ UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
 
 	replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
 	nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
+	nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
 
 	rel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
 
@@ -724,18 +896,26 @@ ChooseExtendedStatisticNameAddition(List *exprs)
 	buf[0] = '\0';
 	foreach(lc, exprs)
 	{
-		ColumnRef  *cref = (ColumnRef *) lfirst(lc);
+		StatsElem  *selem = (StatsElem *) lfirst(lc);
 		const char *name;
 
 		/* It should be one of these, but just skip if it happens not to be */
-		if (!IsA(cref, ColumnRef))
+		if (!IsA(selem, StatsElem))
 			continue;
 
-		name = strVal((Value *) linitial(cref->fields));
+		name = selem->name;
 
 		if (buflen > 0)
 			buf[buflen++] = '_';	/* insert _ between names */
 
+		/*
+		 * FIXME use 'expr' for expressions, which have empty column names.
+		 * For indexes this is handled in ChooseIndexColumnNames, but we
+		 * have no such function for stats.
+		 */
+		if (!name)
+			name = "expr";
+
 		/*
 		 * At this point we have buflen <= NAMEDATALEN.  name should be less
 		 * than NAMEDATALEN already, but use strlcpy for paranoia.
@@ -747,3 +927,31 @@ ChooseExtendedStatisticNameAddition(List *exprs)
 	}
 	return pstrdup(buf);
 }
+
+/*
+ * CheckMutability
+ *		Test whether given expression is mutable
+ *
+ * FIXME copied from indexcmds.c, maybe use some shared function?
+ */
+static bool
+CheckMutability(Expr *expr)
+{
+	/*
+	 * First run the expression through the planner.  This has a couple of
+	 * important consequences.  First, function default arguments will get
+	 * inserted, which may affect volatility (consider "default now()").
+	 * Second, inline-able functions will get inlined, which may allow us to
+	 * conclude that the function is really less volatile than it's marked. As
+	 * an example, polymorphic functions must be marked with the most volatile
+	 * behavior that they have for any input type, but once we inline the
+	 * function we may be able to conclude that it's not so volatile for the
+	 * particular input type we're dealing with.
+	 *
+	 * We assume here that expression_planner() won't scribble on its input.
+	 */
+	expr = expression_planner(expr);
+
+	/* Now we can search for non-immutable functions */
+	return contain_mutable_functions((Node *) expr);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 5a591d0a75..0e44aaad59 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2922,6 +2922,17 @@ _copyIndexElem(const IndexElem *from)
 	return newnode;
 }
 
+static StatsElem *
+_copyStatsElem(const StatsElem *from)
+{
+	StatsElem  *newnode = makeNode(StatsElem);
+
+	COPY_STRING_FIELD(name);
+	COPY_NODE_FIELD(expr);
+
+	return newnode;
+}
+
 static ColumnDef *
 _copyColumnDef(const ColumnDef *from)
 {
@@ -5615,6 +5626,9 @@ copyObjectImpl(const void *from)
 		case T_IndexElem:
 			retval = _copyIndexElem(from);
 			break;
+		case T_StatsElem:
+			retval = _copyStatsElem(from);
+			break;
 		case T_ColumnDef:
 			retval = _copyColumnDef(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index e2895a8985..692dd7ca17 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2577,6 +2577,16 @@ _equalIndexElem(const IndexElem *a, const IndexElem *b)
 	return true;
 }
 
+
+static bool
+_equalStatsElem(const StatsElem *a, const StatsElem *b)
+{
+	COMPARE_STRING_FIELD(name);
+	COMPARE_NODE_FIELD(expr);
+
+	return true;
+}
+
 static bool
 _equalColumnDef(const ColumnDef *a, const ColumnDef *b)
 {
@@ -3670,6 +3680,9 @@ equal(const void *a, const void *b)
 		case T_IndexElem:
 			retval = _equalIndexElem(a, b);
 			break;
+		case T_StatsElem:
+			retval = _equalStatsElem(a, b);
+			break;
 		case T_ColumnDef:
 			retval = _equalColumnDef(a, b);
 			break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index f26498cea2..e818c2febc 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2900,6 +2900,15 @@ _outIndexElem(StringInfo str, const IndexElem *node)
 	WRITE_ENUM_FIELD(nulls_ordering, SortByNulls);
 }
 
+static void
+_outStatsElem(StringInfo str, const StatsElem *node)
+{
+	WRITE_NODE_TYPE("STATSELEM");
+
+	WRITE_STRING_FIELD(name);
+	WRITE_NODE_FIELD(expr);
+}
+
 static void
 _outQuery(StringInfo str, const Query *node)
 {
@@ -4206,6 +4215,9 @@ outNode(StringInfo str, const void *obj)
 			case T_IndexElem:
 				_outIndexElem(str, obj);
 				break;
+			case T_StatsElem:
+				_outStatsElem(str, obj);
+				break;
 			case T_Query:
 				_outQuery(str, obj);
 				break;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 52c01eb86b..5db02813e3 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -35,6 +35,7 @@
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
@@ -1315,6 +1316,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		HeapTuple	dtup;
 		Bitmapset  *keys = NULL;
 		int			i;
+		List	   *exprs = NIL;
 
 		htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
 		if (!HeapTupleIsValid(htup))
@@ -1333,6 +1335,41 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		for (i = 0; i < staForm->stxkeys.dim1; i++)
 			keys = bms_add_member(keys, staForm->stxkeys.values[i]);
 
+		/*
+		 * preprocess expression (if any)
+		 *
+		 * FIXME Should we cache the result somewhere?
+		 */
+		{
+			bool		isnull;
+			Datum		datum;
+
+			/* decode expression (if any) */
+			datum = SysCacheGetAttr(STATEXTOID, htup,
+									Anum_pg_statistic_ext_stxexprs, &isnull);
+
+			if (!isnull)
+			{
+				char *exprsString;
+
+				exprsString = TextDatumGetCString(datum);
+				exprs = (List *) stringToNode(exprsString);
+				pfree(exprsString);
+
+				/*
+				 * Run the expressions through eval_const_expressions. This is not just an
+				 * optimization, but is necessary, because the planner will be comparing
+				 * them to similarly-processed qual clauses, and may fail to detect valid
+				 * matches without this.  We must not use canonicalize_qual, however,
+				 * since these aren't qual expressions.
+				 */
+				exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+				/* May as well fix opfuncids too */
+				fix_opfuncids((Node *) exprs);
+			}
+		}
+
 		/* add one StatisticExtInfo for each kind built */
 		if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT))
 		{
@@ -1342,6 +1379,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_NDISTINCT;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
@@ -1354,6 +1392,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_DEPENDENCIES;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
@@ -1366,6 +1405,20 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_MCV;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
+
+			stainfos = lappend(stainfos, info);
+		}
+
+		if (statext_is_kind_built(dtup, STATS_EXT_EXPRESSIONS))
+		{
+			StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+			info->statOid = statOid;
+			info->rel = rel;
+			info->kind = STATS_EXT_EXPRESSIONS;
+			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index efc9c99754..ff34261049 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -233,6 +233,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	WindowDef			*windef;
 	JoinExpr			*jexpr;
 	IndexElem			*ielem;
+	StatsElem			*selem;
 	Alias				*alias;
 	RangeVar			*range;
 	IntoClause			*into;
@@ -396,7 +397,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				old_aggr_definition old_aggr_list
 				oper_argtypes RuleActionList RuleActionMulti
 				opt_column_list columnList opt_name_list
-				sort_clause opt_sort_clause sortby_list index_params
+				sort_clause opt_sort_clause sortby_list index_params stats_params
 				opt_include opt_c_include index_including_params
 				name_list role_list from_clause from_list opt_array_bounds
 				qualified_name_list any_name any_name_list type_name_list
@@ -502,6 +503,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <list>	func_alias_clause
 %type <sortby>	sortby
 %type <ielem>	index_elem index_elem_options
+%type <selem>	stats_param
 %type <node>	table_ref
 %type <jexpr>	joined_table
 %type <range>	relation_expr
@@ -4007,7 +4009,7 @@ ExistingIndex:   USING INDEX name					{ $$ = $3; }
 
 CreateStatsStmt:
 			CREATE STATISTICS any_name
-			opt_name_list ON expr_list FROM from_list
+			opt_name_list ON stats_params FROM from_list
 				{
 					CreateStatsStmt *n = makeNode(CreateStatsStmt);
 					n->defnames = $3;
@@ -4019,7 +4021,7 @@ CreateStatsStmt:
 					$$ = (Node *)n;
 				}
 			| CREATE STATISTICS IF_P NOT EXISTS any_name
-			opt_name_list ON expr_list FROM from_list
+			opt_name_list ON stats_params FROM from_list
 				{
 					CreateStatsStmt *n = makeNode(CreateStatsStmt);
 					n->defnames = $6;
@@ -4032,6 +4034,29 @@ CreateStatsStmt:
 				}
 			;
 
+stats_params:	stats_param							{ $$ = list_make1($1); }
+			| stats_params ',' stats_param			{ $$ = lappend($1, $3); }
+		;
+
+stats_param:	ColId
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = $1;
+					$$->expr = NULL;
+				}
+			| func_expr_windowless
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = NULL;
+					$$->expr = $1;
+				}
+			| '(' a_expr ')'
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = NULL;
+					$$->expr = $2;
+				}
+		;
 
 /*****************************************************************************
  *
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 783f3fe8f2..12b9e855d5 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -484,6 +484,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
 			else
 				err = _("grouping operations are not allowed in index predicates");
 
+			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			if (isAgg)
+				err = _("aggregate functions are not allowed in statistics expressions");
+			else
+				err = _("grouping operations are not allowed in statistics expressions");
+
 			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			if (isAgg)
@@ -906,6 +913,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
 		case EXPR_KIND_INDEX_EXPRESSION:
 			err = _("window functions are not allowed in index expressions");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("window functions are not allowed in stats expressions");
+			break;
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("window functions are not allowed in index predicates");
 			break;
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 36002f059d..57ba583f74 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -560,6 +560,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 		case EXPR_KIND_FUNCTION_DEFAULT:
 		case EXPR_KIND_INDEX_EXPRESSION:
 		case EXPR_KIND_INDEX_PREDICATE:
+		case EXPR_KIND_STATS_EXPRESSION:
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 		case EXPR_KIND_EXECUTE_PARAMETER:
 		case EXPR_KIND_TRIGGER_WHEN:
@@ -1865,6 +1866,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("cannot use subquery in index predicate");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("cannot use subquery in statistics expression");
+			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			err = _("cannot use subquery in transform expression");
 			break;
@@ -3472,6 +3476,8 @@ ParseExprKindName(ParseExprKind exprKind)
 			return "index expression";
 		case EXPR_KIND_INDEX_PREDICATE:
 			return "index predicate";
+		case EXPR_KIND_STATS_EXPRESSION:
+			return "statistics expression";
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			return "USING";
 		case EXPR_KIND_EXECUTE_PARAMETER:
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 8b4e3ca5e1..6730c5a3c3 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2501,6 +2501,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("set-returning functions are not allowed in index predicates");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("set-returning functions are not allowed in stats expressions");
+			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			err = _("set-returning functions are not allowed in transform expressions");
 			break;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index c709abad2b..aea2d5e0d5 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1890,6 +1890,8 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 			stat_types = lappend(stat_types, makeString("dependencies"));
 		else if (enabled[i] == STATS_EXT_MCV)
 			stat_types = lappend(stat_types, makeString("mcv"));
+		else if (enabled[i] == STATS_EXT_EXPRESSIONS)
+			stat_types = lappend(stat_types, makeString("expressions"));
 		else
 			elog(ERROR, "unrecognized statistics kind %c", enabled[i]);
 	}
@@ -1897,14 +1899,43 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 	/* Determine which columns the statistics are on */
 	for (i = 0; i < statsrec->stxkeys.dim1; i++)
 	{
-		ColumnRef  *cref = makeNode(ColumnRef);
+		StatsElem  *selem = makeNode(StatsElem);
 		AttrNumber	attnum = statsrec->stxkeys.values[i];
 
-		cref->fields = list_make1(makeString(get_attname(heapRelid,
-														 attnum, false)));
-		cref->location = -1;
+		selem->name = get_attname(heapRelid, attnum, false);
+		selem->expr = NULL;
 
-		def_names = lappend(def_names, cref);
+		def_names = lappend(def_names, selem);
+	}
+
+	/*
+	 * Now handle expressions, if there are any.  The order does not
+	 * matter for extended stats, so we simply append them after
+	 * simple column references.
+     */
+	datum = SysCacheGetAttr(STATEXTOID, ht_stats,
+							Anum_pg_statistic_ext_stxexprs, &isnull);
+
+	if (!isnull)
+	{
+		ListCell   *lc;
+		List	   *exprs = NIL;
+		char	   *exprsString;
+
+		exprsString = TextDatumGetCString(datum);
+		exprs = (List *) stringToNode(exprsString);
+
+		foreach(lc, exprs)
+		{
+			StatsElem  *selem = makeNode(StatsElem);
+
+			selem->name = NULL;
+			selem->expr = (Node *) lfirst(lc);
+
+			def_names = lappend(def_names, selem);
+		}
+
+		pfree(exprsString);
 	}
 
 	/* finally, build the output node */
@@ -1915,6 +1946,7 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 	stats->relations = list_make1(heapRel);
 	stats->stxcomment = NULL;
 	stats->if_not_exists = false;
+	stats->transformed = true;	/* don't need transformStatsStmt */
 
 	/* Clean up */
 	ReleaseSysCache(ht_stats);
@@ -2839,6 +2871,84 @@ transformIndexStmt(Oid relid, IndexStmt *stmt, const char *queryString)
 	return stmt;
 }
 
+/*
+ * transformStatsStmt - parse analysis for CREATE STATISTICS
+ *
+ * To avoid race conditions, it's important that this function rely only on
+ * the passed-in relid (and not on stmt->relation) to determine the target
+ * relation.
+ */
+CreateStatsStmt *
+transformStatsStmt(Oid relid, CreateStatsStmt *stmt, const char *queryString)
+{
+	ParseState *pstate;
+	ParseNamespaceItem *nsitem;
+	ListCell   *l;
+	Relation	rel;
+
+	/* Nothing to do if statement already transformed. */
+	if (stmt->transformed)
+		return stmt;
+
+	/*
+	 * We must not scribble on the passed-in CreateStatsStmt, so copy it.  (This is
+	 * overkill, but easy.)
+	 */
+	stmt = copyObject(stmt);
+
+	/* Set up pstate */
+	pstate = make_parsestate(NULL);
+	pstate->p_sourcetext = queryString;
+
+	/*
+	 * Put the parent table into the rtable so that the expressions can refer
+	 * to its fields without qualification.  Caller is responsible for locking
+	 * relation, but we still need to open it.
+	 */
+	rel = relation_open(relid, NoLock);
+	nsitem = addRangeTableEntryForRelation(pstate, rel,
+										   AccessShareLock,
+										   NULL, false, true);
+
+	/* no to join list, yes to namespaces */
+	addNSItemToQuery(pstate, nsitem, false, true, true);
+
+	/* take care of any expressions */
+	foreach(l, stmt->exprs)
+	{
+		StatsElem  *selem = (StatsElem *) lfirst(l);
+
+		if (selem->expr)
+		{
+			/* Now do parse transformation of the expression */
+			selem->expr = transformExpr(pstate, selem->expr,
+										EXPR_KIND_STATS_EXPRESSION);
+
+			/* We have to fix its collations too */
+			assign_expr_collations(pstate, selem->expr);
+		}
+	}
+
+	/*
+	 * Check that only the base rel is mentioned.  (This should be dead code
+	 * now that add_missing_from is history.)
+	 */
+	if (list_length(pstate->p_rtable) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("statistics expressions and predicates can refer only to the table being indexed")));
+
+	free_parsestate(pstate);
+
+	/* Close relation */
+	table_close(rel, NoLock);
+
+	/* Mark statement as successfully transformed */
+	stmt->transformed = true;
+
+	return stmt;
+}
+
 
 /*
  * transformRuleStmt -
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index d950b4eabe..1d634922f0 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -70,15 +70,18 @@ static void generate_dependencies(DependencyGenerator state);
 static DependencyGenerator DependencyGenerator_init(int n, int k);
 static void DependencyGenerator_free(DependencyGenerator state);
 static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
-static double dependency_degree(int numrows, HeapTuple *rows, int k,
-								AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
+static double dependency_degree(int numrows, HeapTuple *rows,
+								ExprInfo *exprs, int k,
+								AttrNumber *dependency, VacAttrStats **stats,
+								Bitmapset *attrs);
 static bool dependency_is_fully_matched(MVDependency *dependency,
 										Bitmapset *attnums);
 static bool dependency_is_compatible_clause(Node *clause, Index relid,
 											AttrNumber *attnum);
+static bool dependency_is_compatible_expression(Node *clause, Index relid,
+												List *statlist, Node **expr);
 static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
-											   int ndependencies,
-											   Bitmapset *attnums);
+						  int ndependencies, Bitmapset *attnums);
 static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 												 int varRelid, JoinType jointype,
 												 SpecialJoinInfo *sjinfo,
@@ -219,8 +222,9 @@ DependencyGenerator_next(DependencyGenerator state)
  * the last one.
  */
 static double
-dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
-				  VacAttrStats **stats, Bitmapset *attrs)
+dependency_degree(int numrows, HeapTuple *rows, ExprInfo *exprs, int k,
+				  AttrNumber *dependency, VacAttrStats **stats,
+				  Bitmapset *attrs)
 {
 	int			i,
 				nitems;
@@ -289,8 +293,8 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
 	 * descriptor.  For now that assumption holds, but it might change in the
 	 * future for example if we support statistics on multiple tables.
 	 */
-	items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
-							   mss, k, attnums_dep);
+	items = build_sorted_items(numrows, &nitems, rows, exprs,
+							   stats[0]->tupDesc, mss, k, attnums_dep);
 
 	/*
 	 * Walk through the sorted array, split it into rows according to the
@@ -360,7 +364,8 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
  *	   (c) -> b
  */
 MVDependencies *
-statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
+statext_dependencies_build(int numrows, HeapTuple *rows,
+						   ExprInfo *exprs, Bitmapset *attrs,
 						   VacAttrStats **stats)
 {
 	int			i,
@@ -371,6 +376,9 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 	/* result */
 	MVDependencies *dependencies = NULL;
 
+	/* treat expressions as special attributes with high attnums */
+	attrs = add_expressions_to_attributes(attrs, exprs->nexprs);
+
 	/*
 	 * Transform the bms into an array, to make accessing i-th member easier.
 	 */
@@ -398,7 +406,8 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 			MVDependency *d;
 
 			/* compute how valid the dependency seems */
-			degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
+			degree = dependency_degree(numrows, rows, exprs, k, dependency,
+									   stats, attrs);
 
 			/*
 			 * if the dependency seems entirely invalid, don't store it
@@ -441,6 +450,8 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 		DependencyGenerator_free(DependencyGenerator);
 	}
 
+	pfree(attrs);
+
 	return dependencies;
 }
 
@@ -603,6 +614,7 @@ static bool
 dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
 {
 	int			j;
+	bool		result = true;	/* match by default */
 
 	/*
 	 * Check that the dependency actually is fully covered by clauses. We have
@@ -613,10 +625,13 @@ dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
 		int			attnum = dependency->attributes[j];
 
 		if (!bms_is_member(attnum, attnums))
-			return false;
+		{
+			result = false;
+			break;
+		}
 	}
 
-	return true;
+	return result;
 }
 
 /*
@@ -927,8 +942,8 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
  * (see the comment in dependencies_clauselist_selectivity).
  */
 static MVDependency *
-find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
-						  Bitmapset *attnums)
+find_strongest_dependency(MVDependencies **dependencies,
+						  int ndependencies, Bitmapset *attnums)
 {
 	int			i,
 				j;
@@ -1157,6 +1172,131 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 	return s1;
 }
 
+/*
+ * Similar to dependency_is_compatible_clause, but don't enforce that the
+ * expression is a simple Var. OTOH we check that there's at least one
+ * statistics matching the expression.
+ */
+static bool
+dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
+{
+	List	   *vars;
+	ListCell   *lc, *lc2;
+
+	RestrictInfo *rinfo = (RestrictInfo *) clause;
+	Node		   *clause_expr;
+
+	if (!IsA(rinfo, RestrictInfo))
+		return false;
+
+	/* Pseudoconstants are not interesting (they couldn't contain a Var) */
+	if (rinfo->pseudoconstant)
+		return false;
+
+	/* Clauses referencing multiple, or no, varnos are incompatible */
+	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+		return false;
+
+	if (is_opclause(rinfo->clause))
+	{
+		/* If it's an opclause, check for Var = Const or Const = Var. */
+		OpExpr	   *expr = (OpExpr *) rinfo->clause;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/* Make sure non-selected argument is a pseudoconstant. */
+		if (is_pseudo_constant_clause(lsecond(expr->args)))
+			clause_expr = linitial(expr->args);
+		else if (is_pseudo_constant_clause(linitial(expr->args)))
+			clause_expr = lsecond(expr->args);
+		else
+			return false;
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies.
+		 *
+		 * This uses the function for estimating selectivity, not the operator
+		 * directly (a bit awkward, but well ...).
+		 *
+		 * XXX this is pretty dubious; probably it'd be better to check btree
+		 * or hash opclass membership, so as not to be fooled by custom
+		 * selectivity functions, and to be more consistent with decisions
+		 * elsewhere in the planner.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (is_notclause(rinfo->clause))
+	{
+		/*
+		 * "NOT x" can be interpreted as "x = false", so get the argument and
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) get_notclausearg(rinfo->clause);
+	}
+	else
+	{
+		/*
+		 * A boolean expression "x" can be interpreted as "x = true", so
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) rinfo->clause;
+	}
+
+	/*
+	 * We may ignore any RelabelType node above the operand.  (There won't be
+	 * more than one, since eval_const_expressions has been applied already.)
+	 */
+	if (IsA(clause_expr, RelabelType))
+		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
+
+	vars = pull_var_clause(clause_expr, 0);
+
+	foreach (lc, vars)
+	{
+		Var *var = (Var *) lfirst(lc);
+
+		/* Ensure Var is from the correct relation */
+		if (var->varno != relid)
+			return false;
+
+		/* We also better ensure the Var is from the current level */
+		if (var->varlevelsup != 0)
+			return false;
+
+		/* Also ignore system attributes (we don't allow stats on those) */
+		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+			return false;
+	}
+
+	foreach (lc, statlist)
+	{
+		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+
+		/* ignore stats without dependencies */
+		if (info->kind != STATS_EXT_DEPENDENCIES)
+			continue;
+
+		foreach (lc2, info->exprs)
+		{
+			Node *stat_expr = (Node *) lfirst(lc2);
+
+			if (equal(clause_expr, stat_expr))
+			{
+				*expr = stat_expr;
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
 /*
  * dependencies_clauselist_selectivity
  *		Return the estimated selectivity of (a subset of) the given clauses
@@ -1205,6 +1345,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	int			ndependencies;
 	int			i;
 
+	/* unique expressions */
+	Node	  **unique_exprs;
+	int			unique_exprs_cnt;
+
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
 		return 1.0;
@@ -1212,6 +1356,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
 										 list_length(clauses));
 
+	/* unique expressions */
+	unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
+	unique_exprs_cnt = 0;
+
 	/*
 	 * Pre-process the clauses list to extract the attnums seen in each item.
 	 * We need to determine if there's any clauses which will be useful for
@@ -1222,29 +1370,70 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	 *
 	 * We also skip clauses that we already estimated using different types of
 	 * statistics (we treat them as incompatible).
+	 *
+	 * For expressions, we generate attnums higher than MaxHeapAttributeNumber
+	 * so that we can work with attnums only.
 	 */
 	listidx = 0;
 	foreach(l, clauses)
 	{
 		Node	   *clause = (Node *) lfirst(l);
 		AttrNumber	attnum;
+		Node	   *expr = NULL;
+
+		/* ignore clause by default */
+		list_attnums[listidx] = InvalidAttrNumber;
 
-		if (!bms_is_member(listidx, *estimatedclauses) &&
-			dependency_is_compatible_clause(clause, rel->relid, &attnum))
+		if (!bms_is_member(listidx, *estimatedclauses))
 		{
-			list_attnums[listidx] = attnum;
-			clauses_attnums = bms_add_member(clauses_attnums, attnum);
+			if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
+			{
+				list_attnums[listidx] = attnum;
+				clauses_attnums = bms_add_member(clauses_attnums, attnum);
+			}
+			else if (dependency_is_compatible_expression(clause, rel->relid,
+														 rel->statlist,
+														 &expr))
+			{
+				/* special attnum assigned to this expression */
+				attnum = InvalidAttrNumber;
+
+				Assert(expr != NULL);
+
+				/* build list of unique expressions, for re-mapping later */
+				for (i = 0; i < unique_exprs_cnt; i++)
+				{
+					if (equal(unique_exprs[i], expr))
+					{
+						attnum = (i + 1);
+						break;
+					}
+				}
+
+				/* not found in the list, so add it */
+				if (attnum == InvalidAttrNumber)
+				{
+					attnum = EXPRESSION_ATTNUM(unique_exprs_cnt);
+					unique_exprs[unique_exprs_cnt++] = expr;
+
+					/* shouldn't have seen this attnum yet */
+					Assert(!bms_is_member(attnum, clauses_attnums));
+				}
+
+				/* we may add the attnum repeatedly to clauses_attnums */
+				clauses_attnums = bms_add_member(clauses_attnums, attnum);
+
+				list_attnums[listidx] = attnum;
+			}
 		}
-		else
-			list_attnums[listidx] = InvalidAttrNumber;
 
 		listidx++;
 	}
 
 	/*
-	 * If there's not at least two distinct attnums then reject the whole list
-	 * of clauses. We must return 1.0 so the calling function's selectivity is
-	 * unaffected.
+	 * If there's not at least two distinct attnums and expressions, then
+	 * reject the whole list of clauses. We must return 1.0 so the calling
+	 * function's selectivity is unaffected.
 	 */
 	if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
 	{
@@ -1273,25 +1462,138 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	{
 		StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
 		Bitmapset  *matched;
-		BMS_Membership membership;
+		int			nmatched;
+		int			nexprs;
+		MVDependencies *deps;
 
 		/* skip statistics that are not of the correct type */
 		if (stat->kind != STATS_EXT_DEPENDENCIES)
 			continue;
 
+		/* count matching simple clauses */
 		matched = bms_intersect(clauses_attnums, stat->keys);
-		membership = bms_membership(matched);
+		nmatched = bms_num_members(matched);
 		bms_free(matched);
 
-		/* skip objects matching fewer than two attributes from clauses */
-		if (membership != BMS_MULTIPLE)
+		/* count matching expressions */
+		nexprs = 0;
+		for (i = 0; i < unique_exprs_cnt; i++)
+		{
+			ListCell   *lc;
+
+			foreach (lc, stat->exprs)
+			{
+				Node *stat_expr = (Node *) lfirst(lc);
+
+				/* try to match it */
+				if (equal(stat_expr, unique_exprs[i]))
+					nexprs++;
+			}
+		}
+
+		/*
+		 * Skip objects matching fewer than two attributes/expressions
+		 * from clauses.
+		 */
+		if (nmatched + nexprs < 2)
 			continue;
 
-		func_dependencies[nfunc_dependencies]
-			= statext_dependencies_load(stat->statOid);
+		deps = statext_dependencies_load(stat->statOid);
 
-		total_ndeps += func_dependencies[nfunc_dependencies]->ndeps;
-		nfunc_dependencies++;
+		/*
+		 * The expressions may be represented by different attnums in the
+		 * stats, we need to remap them to be consistent with the clauses.
+		 * That will make the later steps (e.g. picking the strongest item
+		 * and so on) much simpler.
+		 *
+		 * When we're at it, we can also remove dependencies referencing
+		 * missing clauses (i.e. expressions that are not in the clauses).
+		 *
+		 * XXX We might also skip clauses referencing missing attnums, not
+		 * just expressions.
+		 */
+		if (stat->exprs)
+		{
+			int			ndeps = 0;
+
+			for (i = 0; i < deps->ndeps; i++)
+			{
+				bool			skip = false;
+				MVDependency   *dep = deps->deps[i];
+				int				j;
+
+				for (j = 0; j < dep->nattributes; j++)
+				{
+					int			idx;
+					Node	   *expr;
+					int			k;
+					AttrNumber	unique_attnum = InvalidAttrNumber;
+
+					/* regular attribute, no need to remap */
+					if (dep->attributes[j] <= MaxHeapAttributeNumber)
+						continue;
+
+					/* index of the expression */
+					idx = EXPRESSION_INDEX(dep->attributes[j]);
+
+					/* make sure the expression index is valid */
+					Assert((idx >= 0) && (idx < list_length(stat->exprs)));
+
+					expr = (Node *) list_nth(stat->exprs, idx);
+
+					/* try to find the expression in the unique list */
+					for (k = 0; k < unique_exprs_cnt; k++)
+					{
+						/*
+						 * found a matching unique expression, use the attnum
+						 * (derived from index of the unique expression)
+						 */
+						if (equal(unique_exprs[k], expr))
+						{
+							unique_attnum = EXPRESSION_ATTNUM(k);
+							break;
+						}
+					}
+
+					/*
+					 * Not found a matching expression, so we can simply
+					 * skip this dependency, because there's no chance it
+					 * will be fully covered.
+					 */
+					if (unique_attnum == InvalidAttrNumber)
+					{
+						skip = true;
+						break;
+					}
+
+					/* otherwise remap it to the new attnum */
+					dep->attributes[j] = unique_attnum;
+				}
+
+				/* if found a matching, */
+				if (!skip)
+				{
+					/* maybe we've skipped something earlier, so move it */
+					if (ndeps != i)
+						deps->deps[ndeps] = deps->deps[i];
+
+					ndeps++;
+				}
+			}
+
+			deps->ndeps = ndeps;
+		}
+
+		/*
+		 * It's possible we've removed all dependencies, in which case we
+		 * don't bother adding it to the list.
+		 */
+		if (deps->ndeps > 0)
+		{
+			func_dependencies[nfunc_dependencies] = deps;
+			total_ndeps += deps->ndeps;
+			nfunc_dependencies++;
+		}
 	}
 
 	/* if no matching stats could be found then we've nothing to do */
@@ -1300,6 +1602,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 		pfree(func_dependencies);
 		bms_free(clauses_attnums);
 		pfree(list_attnums);
+		pfree(unique_exprs);
 		return 1.0;
 	}
 
@@ -1347,6 +1650,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	pfree(func_dependencies);
 	bms_free(clauses_attnums);
 	pfree(list_attnums);
+	pfree(unique_exprs);
 
 	return s1;
 }
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 36326927c6..21e3f66b7e 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -24,6 +24,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
+#include "executor/executor.h"
 #include "commands/progress.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
@@ -35,6 +36,7 @@
 #include "statistics/statistics.h"
 #include "utils/acl.h"
 #include "utils/array.h"
+#include "utils/attoptcache.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
@@ -42,6 +44,7 @@
 #include "utils/rel.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
+#include "utils/typcache.h"
 
 /*
  * To avoid consuming too much memory during analysis and/or too much space
@@ -66,18 +69,35 @@ typedef struct StatExtEntry
 	Bitmapset  *columns;		/* attribute numbers covered by the object */
 	List	   *types;			/* 'char' list of enabled statistic kinds */
 	int			stattarget;		/* statistics target (-1 for default) */
+	List	   *exprs;			/* expressions */
 } StatExtEntry;
 
 
 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
-static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 											int nvacatts, VacAttrStats **vacatts);
 static void statext_store(Oid relid,
 						  MVNDistinct *ndistinct, MVDependencies *dependencies,
-						  MCVList *mcv, VacAttrStats **stats);
+						  MCVList *mcv, Datum exprs, VacAttrStats **stats);
 static int	statext_compute_stattarget(int stattarget,
 									   int natts, VacAttrStats **stats);
 
+typedef struct AnlExprData
+{
+	Node		   *expr;			/* expression to analyze */
+	VacAttrStats   *vacattrstat;	/* index attrs to analyze */
+} AnlExprData;
+
+static void compute_expr_stats(Relation onerel, double totalrows,
+					AnlExprData *exprdata, int nexprs,
+					HeapTuple *rows, int numrows);
+static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs);
+static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
+static AnlExprData *build_expr_data(List *exprs);
+static VacAttrStats *examine_expression(Node *expr);
+static ExprInfo *evaluate_expressions(Relation rel, List *exprs,
+									  int numrows, HeapTuple *rows);
+
 /*
  * Compute requested extended stats, using the rows sampled for the plain
  * (single-column) stats.
@@ -127,15 +147,21 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		MVNDistinct *ndistinct = NULL;
 		MVDependencies *dependencies = NULL;
 		MCVList    *mcv = NULL;
+		Datum		exprstats = (Datum) 0;
 		VacAttrStats **stats;
 		ListCell   *lc2;
 		int			stattarget;
+		ExprInfo   *exprs;
+		int			min_attrs;
 
 		/*
 		 * Check if we can build these stats based on the column analyzed. If
 		 * not, report this fact (except in autovacuum) and move on.
+		 *
+		 * FIXME This is confusing - we have 'stats' list, but it's shadowed
+		 * by another 'stats' variable here.
 		 */
-		stats = lookup_var_attr_stats(onerel, stat->columns,
+		stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 									  natts, vacattrstats);
 		if (!stats)
 		{
@@ -150,9 +176,28 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 			continue;
 		}
 
+		/* determine the minimum required number of attributes/expressions */
+		min_attrs = 1;
+		foreach(lc2, stat->types)
+		{
+			char	t = (char) lfirst_int(lc2);
+
+			switch (t)
+			{
+				/* expressions only need a single item */
+				case STATS_EXT_EXPRESSIONS:
+					break;
+
+				/* all other statistics kinds require at least two */
+				default:
+					min_attrs = 2;
+					break;
+			}
+		}
+
 		/* check allowed number of dimensions */
-		Assert(bms_num_members(stat->columns) >= 2 &&
-			   bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
+		Assert(bms_num_members(stat->columns) + list_length(stat->exprs) >= min_attrs &&
+			   bms_num_members(stat->columns) + list_length(stat->exprs) <= STATS_MAX_DIMENSIONS);
 
 		/* compute statistics target for this statistics */
 		stattarget = statext_compute_stattarget(stat->stattarget,
@@ -167,6 +212,9 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		if (stattarget == 0)
 			continue;
 
+		/* evaluate expressions (if the statistics has any) */
+		exprs = evaluate_expressions(onerel, stat->exprs, numrows, rows);
+
 		/* compute statistic of each requested type */
 		foreach(lc2, stat->types)
 		{
@@ -174,21 +222,43 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 
 			if (t == STATS_EXT_NDISTINCT)
 				ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
-													stat->columns, stats);
+													exprs, stat->columns,
+													stats);
 			else if (t == STATS_EXT_DEPENDENCIES)
 				dependencies = statext_dependencies_build(numrows, rows,
-														  stat->columns, stats);
+														  exprs, stat->columns,
+														  stats);
 			else if (t == STATS_EXT_MCV)
-				mcv = statext_mcv_build(numrows, rows, stat->columns, stats,
-										totalrows, stattarget);
+				mcv = statext_mcv_build(numrows, rows, exprs, stat->columns,
+										stats, totalrows, stattarget);
+			else if (t == STATS_EXT_EXPRESSIONS)
+			{
+				AnlExprData *exprdata;
+				int			nexprs;
+
+				/* should not happen, thanks to checks when defining stats */
+				if (!stat->exprs)
+					elog(ERROR, "requested expression stats, but there are no expressions");
+
+				exprdata = build_expr_data(stat->exprs);
+				nexprs = list_length(stat->exprs);
+
+				compute_expr_stats(onerel, totalrows,
+								   exprdata, nexprs,
+								   rows, numrows);
+
+				exprstats = serialize_expr_stats(exprdata, nexprs);
+			}
 		}
 
 		/* store the statistics in the catalog */
-		statext_store(stat->statOid, ndistinct, dependencies, mcv, stats);
+		statext_store(stat->statOid, ndistinct, dependencies, mcv, exprstats, stats);
 
 		/* for reporting progress */
 		pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
 									 ++ext_cnt);
+
+		pfree(exprs);
 	}
 
 	table_close(pg_stext, RowExclusiveLock);
@@ -241,7 +311,7 @@ ComputeExtStatisticsRows(Relation onerel,
 		 * analyzed. If not, ignore it (don't report anything, we'll do that
 		 * during the actual build BuildRelationExtStatistics).
 		 */
-		stats = lookup_var_attr_stats(onerel, stat->columns,
+		stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 									  natts, vacattrstats);
 
 		if (!stats)
@@ -349,6 +419,10 @@ statext_is_kind_built(HeapTuple htup, char type)
 			attnum = Anum_pg_statistic_ext_data_stxdmcv;
 			break;
 
+		case STATS_EXT_EXPRESSIONS:
+			attnum = Anum_pg_statistic_ext_data_stxdexpr;
+			break;
+
 		default:
 			elog(ERROR, "unexpected statistics type requested: %d", type);
 	}
@@ -388,6 +462,7 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 		ArrayType  *arr;
 		char	   *enabled;
 		Form_pg_statistic_ext staForm;
+		List	   *exprs = NIL;
 
 		entry = palloc0(sizeof(StatExtEntry));
 		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
@@ -415,10 +490,39 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 		{
 			Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
 				   (enabled[i] == STATS_EXT_DEPENDENCIES) ||
-				   (enabled[i] == STATS_EXT_MCV));
+				   (enabled[i] == STATS_EXT_MCV) ||
+				   (enabled[i] == STATS_EXT_EXPRESSIONS));
 			entry->types = lappend_int(entry->types, (int) enabled[i]);
 		}
 
+		/* decode expression (if any) */
+		datum = SysCacheGetAttr(STATEXTOID, htup,
+								Anum_pg_statistic_ext_stxexprs, &isnull);
+
+		if (!isnull)
+		{
+			char *exprsString;
+
+			exprsString = TextDatumGetCString(datum);
+			exprs = (List *) stringToNode(exprsString);
+
+			pfree(exprsString);
+
+			/*
+			 * Run the expressions through eval_const_expressions. This is not just an
+			 * optimization, but is necessary, because the planner will be comparing
+			 * them to similarly-processed qual clauses, and may fail to detect valid
+			 * matches without this.  We must not use canonicalize_qual, however,
+			 * since these aren't qual expressions.
+			 */
+			exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+			/* May as well fix opfuncids too */
+			fix_opfuncids((Node *) exprs);
+		}
+
+		entry->exprs = exprs;
+
 		result = lappend(result, entry);
 	}
 
@@ -427,6 +531,86 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 	return result;
 }
 
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Node *expr)
+{
+	HeapTuple	typtuple;
+	VacAttrStats *stats;
+	int			i;
+	bool		ok;
+
+	/*
+	 * Create the VacAttrStats struct.  Note that we only have a copy of the
+	 * fixed fields of the pg_attribute tuple.
+	 */
+	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+
+	/* fake the attribute */
+	stats->attr = (Form_pg_attribute) palloc0(ATTRIBUTE_FIXED_PART_SIZE);
+	stats->attr->attstattarget = -1;
+
+	/*
+	 * When analyzing an expression index, believe the expression tree's type
+	 * not the column datatype --- the latter might be the opckeytype storage
+	 * type of the opclass, which is not interesting for our purposes.  (Note:
+	 * if we did anything with non-expression index columns, we'd need to
+	 * figure out where to get the correct type info from, but for now that's
+	 * not a problem.)	It's not clear whether anyone will care about the
+	 * typmod, but we store that too just in case.
+	 */
+	stats->attrtypid = exprType(expr);
+	stats->attrtypmod = exprTypmod(expr);
+	stats->attrcollid = exprCollation(expr);
+
+	typtuple = SearchSysCacheCopy1(TYPEOID,
+								   ObjectIdGetDatum(stats->attrtypid));
+	if (!HeapTupleIsValid(typtuple))
+		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+	// stats->anl_context = anl_context;
+	stats->tupattnum = InvalidAttrNumber;
+
+	/*
+	 * The fields describing the stats->stavalues[n] element types default to
+	 * the type of the data being analyzed, but the type-specific typanalyze
+	 * function can change them if it wants to store something else.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		stats->statypid[i] = stats->attrtypid;
+		stats->statyplen[i] = stats->attrtype->typlen;
+		stats->statypbyval[i] = stats->attrtype->typbyval;
+		stats->statypalign[i] = stats->attrtype->typalign;
+	}
+
+	/*
+	 * Call the type-specific typanalyze function.  If none is specified, use
+	 * std_typanalyze().
+	 */
+	if (OidIsValid(stats->attrtype->typanalyze))
+		ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+										   PointerGetDatum(stats)));
+	else
+		ok = std_typanalyze(stats);
+
+	if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+	{
+		heap_freetuple(typtuple);
+		pfree(stats->attr);
+		pfree(stats);
+		return NULL;
+	}
+
+	return stats;
+}
+
 /*
  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
  * VacAttrStats array which includes only the items corresponding to
@@ -435,15 +619,18 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
  * to the caller that the stats should not be built.
  */
 static VacAttrStats **
-lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 					  int nvacatts, VacAttrStats **vacatts)
 {
 	int			i = 0;
 	int			x = -1;
+	int			natts;
 	VacAttrStats **stats;
+	ListCell   *lc;
 
-	stats = (VacAttrStats **)
-		palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+	natts = bms_num_members(attrs) + list_length(exprs);
+
+	stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
 
 	/* lookup VacAttrStats info for the requested columns (same attnum) */
 	while ((x = bms_next_member(attrs, x)) >= 0)
@@ -480,6 +667,24 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
 		i++;
 	}
 
+	/* also add info for expressions */
+	foreach (lc, exprs)
+	{
+		Node *expr = (Node *) lfirst(lc);
+
+		stats[i] = examine_attribute(expr);
+
+		/*
+		 * FIXME We need tuple descriptor later, and we just grab it from
+		 * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded
+		 * examine_attribute does not set that, so just grab it from the
+		 * first vacatts element.
+		 */
+		stats[i]->tupDesc = vacatts[0]->tupDesc;
+
+		i++;
+	}
+
 	return stats;
 }
 
@@ -491,7 +696,7 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
 static void
 statext_store(Oid statOid,
 			  MVNDistinct *ndistinct, MVDependencies *dependencies,
-			  MCVList *mcv, VacAttrStats **stats)
+			  MCVList *mcv, Datum exprs, VacAttrStats **stats)
 {
 	Relation	pg_stextdata;
 	HeapTuple	stup,
@@ -532,11 +737,17 @@ statext_store(Oid statOid,
 		nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
 		values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
 	}
+	if (exprs != (Datum) 0)
+	{
+		nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = false;
+		values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs;
+	}
 
 	/* always replace the value (either by bytea or NULL) */
 	replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
 	replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
 	replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
+	replaces[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
 
 	/* there should already be a pg_statistic_ext_data tuple */
 	oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
@@ -741,8 +952,9 @@ build_attnums_array(Bitmapset *attrs, int *numattrs)
  * can simply pfree the return value to release all of it.
  */
 SortItem *
-build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
-				   MultiSortSupport mss, int numattrs, AttrNumber *attnums)
+build_sorted_items(int numrows, int *nitems, HeapTuple *rows, ExprInfo *exprs,
+				   TupleDesc tdesc, MultiSortSupport mss,
+				   int numattrs, AttrNumber *attnums)
 {
 	int			i,
 				j,
@@ -789,8 +1001,24 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 		{
 			Datum		value;
 			bool		isnull;
+			int			attlen;
+
+			if (attnums[j] <= MaxHeapAttributeNumber)
+			{
+				value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
+				attlen = TupleDescAttr(tdesc, attnums[j] - 1)->attlen;
+			}
+			else
+			{
+				int	idx = EXPRESSION_INDEX(attnums[j]);
+
+				Assert((idx >= 0) && (idx < exprs->nexprs));
+
+				value = exprs->values[idx][i];
+				isnull = exprs->nulls[idx][i];
 
-			value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
+				attlen = get_typlen(exprs->types[idx]);
+			}
 
 			/*
 			 * If this is a varlena value, check if it's too wide and if yes
@@ -801,8 +1029,7 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 			 * on the assumption that those are small (below WIDTH_THRESHOLD)
 			 * and will be discarded at the end of analyze.
 			 */
-			if ((!isnull) &&
-				(TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
+			if ((!isnull) && (attlen == -1))
 			{
 				if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
 				{
@@ -881,7 +1108,8 @@ has_stats_of_kind(List *stats, char requiredkind)
  */
 StatisticExtInfo *
 choose_best_statistics(List *stats, char requiredkind,
-					   Bitmapset **clause_attnums, int nclauses)
+					   Bitmapset **clause_attnums, List **clause_exprs,
+					   int nclauses)
 {
 	ListCell   *lc;
 	StatisticExtInfo *best_match = NULL;
@@ -894,6 +1122,7 @@ choose_best_statistics(List *stats, char requiredkind,
 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
 		Bitmapset  *matched = NULL;
 		int			num_matched;
+		int			num_matched_exprs;
 		int			numkeys;
 
 		/* skip statistics that are not of the correct type */
@@ -920,6 +1149,38 @@ choose_best_statistics(List *stats, char requiredkind,
 		num_matched = bms_num_members(matched);
 		bms_free(matched);
 
+		/*
+		 * Collect expressions in remaining (unestimated) expressions, covered
+		 * by an expression in this statistic object.
+		 */
+		num_matched_exprs = 0;
+		for (i = 0; i < nclauses; i++)
+		{
+			ListCell *lc3;
+
+			/* ignore incompatible/estimated expressions */
+			if (!clause_exprs[i])
+				continue;
+
+			/* ignore expressions that are not covered by this object */
+			foreach (lc3, clause_exprs[i])
+			{
+				ListCell   *lc2;
+				Node	   *expr = (Node *) lfirst(lc3);
+
+				foreach(lc2, info->exprs)
+				{
+					Node   *stat_expr = (Node *) lfirst(lc2);
+
+					if (equal(expr, stat_expr))
+					{
+						num_matched_exprs++;
+						break;
+					}
+				}
+			}
+		}
+
 		/*
 		 * save the actual number of keys in the stats so that we can choose
 		 * the narrowest stats with the most matching keys.
@@ -931,11 +1192,12 @@ choose_best_statistics(List *stats, char requiredkind,
 		 * when it matches the same number of attributes but these stats have
 		 * fewer keys than any previous match.
 		 */
-		if (num_matched > best_num_matched ||
-			(num_matched == best_num_matched && numkeys < best_match_keys))
+		if (num_matched + num_matched_exprs > best_num_matched ||
+			((num_matched + num_matched_exprs) == best_num_matched &&
+			 numkeys < best_match_keys))
 		{
 			best_match = info;
-			best_num_matched = num_matched;
+			best_num_matched = num_matched + num_matched_exprs;
 			best_match_keys = numkeys;
 		}
 	}
@@ -994,7 +1256,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			return false;
 
 		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_clause_args(expr->args, &var, NULL, NULL))
+		if (!examine_opclause_expression(expr, &var, NULL, NULL))
 			return false;
 
 		/*
@@ -1150,6 +1412,187 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 	return false;
 }
 
+/*
+ * statext_extract_expression_internal
+ *		FIXME
+ *
+ */
+static List *
+statext_extract_expression_internal(PlannerInfo *root, Node *clause, Index relid)
+{
+	/* Look inside any binary-compatible relabeling (as in examine_variable) */
+	if (IsA(clause, RelabelType))
+		clause = (Node *) ((RelabelType *) clause)->arg;
+
+	/* plain Var references (boolean Vars or recursive checks) */
+	if (IsA(clause, Var))
+	{
+		Var		   *var = (Var *) clause;
+
+		/* Ensure var is from the correct relation */
+		if (var->varno != relid)
+			return NIL;
+
+		/* we also better ensure the Var is from the current level */
+		if (var->varlevelsup > 0)
+			return NIL;
+
+		/* Also skip system attributes (we don't allow stats on those). */
+		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+			return NIL;
+
+		return list_make1(clause);
+	}
+
+	/* (Var op Const) or (Const op Var) */
+	if (is_opclause(clause))
+	{
+		RangeTblEntry *rte = root->simple_rte_array[relid];
+		OpExpr	   *expr = (OpExpr *) clause;
+		Node	   *expr2 = NULL;
+
+		/* Only expressions with two arguments are considered compatible. */
+		if (list_length(expr->args) != 2)
+			return NIL;
+
+		/* Check if the expression has the right shape (one Expr, one Const) */
+		if (!examine_opclause_expression2(expr, &expr2, NULL, NULL))
+			return NIL;
+
+		/*
+		 * If it's not one of the supported operators ("=", "<", ">", etc.),
+		 * just ignore the clause, as it's not compatible with MCV lists.
+		 *
+		 * This uses the function for estimating selectivity, not the operator
+		 * directly (a bit awkward, but well ...).
+		 */
+		switch (get_oprrest(expr->opno))
+		{
+			case F_EQSEL:
+			case F_NEQSEL:
+			case F_SCALARLTSEL:
+			case F_SCALARLESEL:
+			case F_SCALARGTSEL:
+			case F_SCALARGESEL:
+				/* supported, will continue with inspection of the Var */
+				break;
+
+			default:
+				/* other estimators are considered unknown/unsupported */
+				return NIL;
+		}
+
+		/*
+		 * If there are any securityQuals on the RTE from security barrier
+		 * views or RLS policies, then the user may not have access to all the
+		 * table's data, and we must check that the operator is leak-proof.
+		 *
+		 * If the operator is leaky, then we must ignore this clause for the
+		 * purposes of estimating with MCV lists, otherwise the operator might
+		 * reveal values from the MCV list that the user doesn't have
+		 * permission to see.
+		 */
+		if (rte->securityQuals != NIL &&
+			!get_func_leakproof(get_opcode(expr->opno)))
+			return NIL;
+
+		return list_make1(expr2);
+	}
+
+	if (IsA(clause, ScalarArrayOpExpr))
+	{
+		RangeTblEntry *rte = root->simple_rte_array[relid];
+		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
+		Node	   *expr2 = NULL;
+
+		/* Only expressions with two arguments are considered compatible. */
+		if (list_length(expr->args) != 2)
+			return NIL;
+
+		/* Check if the expression has the right shape (one Expr, one Const) */
+		if (!examine_clause_args2(expr->args, &expr2, NULL, NULL))
+			return NIL;
+
+		/*
+		 * If there are any securityQuals on the RTE from security barrier
+		 * views or RLS policies, then the user may not have access to all the
+		 * table's data, and we must check that the operator is leak-proof.
+		 *
+		 * If the operator is leaky, then we must ignore this clause for the
+		 * purposes of estimating with MCV lists, otherwise the operator might
+		 * reveal values from the MCV list that the user doesn't have
+		 * permission to see.
+		 */
+		if (rte->securityQuals != NIL &&
+			!get_func_leakproof(get_opcode(expr->opno)))
+			return NIL;
+
+		return list_make1(expr2);
+	}
+
+	/* AND/OR/NOT clause */
+	if (is_andclause(clause) ||
+		is_orclause(clause) ||
+		is_notclause(clause))
+	{
+		/*
+		 * AND/OR/NOT-clauses are supported if all sub-clauses are supported
+		 *
+		 * Perhaps we could improve this by handling mixed cases, when some of
+		 * the clauses are supported and some are not. Selectivity for the
+		 * supported subclauses would be computed using extended statistics,
+		 * and the remaining clauses would be estimated using the traditional
+		 * algorithm (product of selectivities).
+		 *
+		 * It however seems overly complex, and in a way we already do that
+		 * because if we reject the whole clause as unsupported here, it will
+		 * be eventually passed to clauselist_selectivity() which does exactly
+		 * this (split into supported/unsupported clauses etc).
+		 */
+		BoolExpr   *expr = (BoolExpr *) clause;
+		ListCell   *lc;
+		List	   *exprs = NIL;
+
+		foreach(lc, expr->args)
+		{
+			List *tmp;
+
+			/*
+			 * Had we found incompatible clause in the arguments, treat the
+			 * whole clause as incompatible.
+			 */
+			tmp = statext_extract_expression_internal(root,
+													  (Node *) lfirst(lc),
+													  relid);
+
+			if (!tmp)
+				return NIL;
+
+			exprs = list_concat(exprs, tmp);
+		}
+
+		return exprs;
+	}
+
+	/* Var IS NULL */
+	if (IsA(clause, NullTest))
+	{
+		NullTest   *nt = (NullTest *) clause;
+
+		/*
+		 * Only simple (Var IS NULL) expressions supported for now. Maybe we
+		 * could use examine_variable to fix this?
+		 */
+		if (!IsA(nt->arg, Var))
+			return NIL;
+
+		return statext_extract_expression_internal(root, (Node *) (nt->arg),
+												   relid);
+	}
+
+	return NIL;
+}
+
 /*
  * statext_is_compatible_clause
  *		Determines if the clause is compatible with MCV lists.
@@ -1163,6 +1606,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
  *
  * (c) combinations using AND/OR/NOT
  *
+ * (d) ScalarArrayOpExprs of the form (Var op ANY (array)) or (Var op ALL (array))
+ *
  * In the future, the range of supported clauses may be expanded to more
  * complex cases, for example (Var op Var).
  */
@@ -1225,15 +1670,62 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 }
 
 /*
- * statext_mcv_clauselist_selectivity
- *		Estimate clauses using the best multi-column statistics.
+ * statext_extract_expression
+ *		Determines if the clause is compatible with extended statistics.
  *
- * Applies available extended (multi-column) statistics on a table. There may
- * be multiple applicable statistics (with respect to the clauses), in which
- * case we use greedy approach. In each round we select the best statistic on
- * a table (measured by the number of attributes extracted from the clauses
- * and covered by it), and compute the selectivity for the supplied clauses.
- * We repeat this process with the remaining clauses (if any), until none of
+ * Currently, we only support three types of clauses:
+ *
+ * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
+ * is one of ("=", "<", ">", ">=", "<=")
+ *
+ * (b) (Var IS [NOT] NULL)
+ *
+ * (c) combinations using AND/OR/NOT
+ *
+ * (d) ScalarArrayOpExprs of the form (Var op ANY (array)) or (Var op ALL (array))
+ *
+ * In the future, the range of supported clauses may be expanded to more
+ * complex cases, for example (Var op Var).
+ */
+static List *
+statext_extract_expression(PlannerInfo *root, Node *clause, Index relid)
+{
+	RestrictInfo *rinfo = (RestrictInfo *) clause;
+	List		 *exprs;
+
+	if (!IsA(rinfo, RestrictInfo))
+		return NIL;
+
+	/* Pseudoconstants are not really interesting here. */
+	if (rinfo->pseudoconstant)
+		return NIL;
+
+	/* clauses referencing multiple varnos are incompatible */
+	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+		return NIL;
+
+	/* Check the clause and determine what attributes it references. */
+	exprs = statext_extract_expression_internal(root, (Node *) rinfo->clause, relid);
+
+	if (!exprs)
+		return NIL;
+
+	/* FIXME do the same ACL check as in statext_is_compatible_clause */
+
+	/* If we reach here, the clause is OK */
+	return exprs;
+}
+
+/*
+ * statext_mcv_clauselist_selectivity
+ *		Estimate clauses using the best multi-column statistics.
+ *
+ * Applies available extended (multi-column) statistics on a table. There may
+ * be multiple applicable statistics (with respect to the clauses), in which
+ * case we use greedy approach. In each round we select the best statistic on
+ * a table (measured by the number of attributes extracted from the clauses
+ * and covered by it), and compute the selectivity for the supplied clauses.
+ * We repeat this process with the remaining clauses (if any), until none of
  * the available statistics can be used.
  *
  * One of the main challenges with using MCV lists is how to extrapolate the
@@ -1285,7 +1777,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 								   RelOptInfo *rel, Bitmapset **estimatedclauses)
 {
 	ListCell   *l;
-	Bitmapset **list_attnums;
+	Bitmapset **list_attnums;	/* attnums extracted from the clause */
+	List	  **list_exprs;		/* expressions matched to any statistic */
 	int			listidx;
 	Selectivity sel = 1.0;
 
@@ -1296,6 +1789,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
 										 list_length(clauses));
 
+	/* expressions extracted from complex expressions */
+	list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses));
+
 	/*
 	 * Pre-process the clauses list to extract the attnums seen in each item.
 	 * We need to determine if there's any clauses which will be useful for
@@ -1313,11 +1809,100 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		Node	   *clause = (Node *) lfirst(l);
 		Bitmapset  *attnums = NULL;
 
+		/* the clause is considered incompatible by default */
+		list_attnums[listidx] = NULL;
+
+		/* and it's also not covered exactly by the statistic */
+		list_exprs[listidx] = NULL;
+
+		/*
+		 * First see if the clause is simple enough to be covered directly
+		 * by the attributes. If not, see if there's at least one statistic
+		 * object using the expression as-is.
+		 */
 		if (!bms_is_member(listidx, *estimatedclauses) &&
 			statext_is_compatible_clause(root, clause, rel->relid, &attnums))
+		{
+			/* simple expression, covered through attnum(s) */
 			list_attnums[listidx] = attnums;
+		}
 		else
-			list_attnums[listidx] = NULL;
+		{
+			ListCell   *lc;
+			List	 *exprs;
+
+			/*
+			 * XXX This is kinda dubious, because we extract the smallest
+			 * clauses - e.g. from (Var op Const) we extract Var. But maybe
+			 * the statistics covers larger expressions, so maybe this will
+			 * skip that. For example give ((a+b) + (c+d)) it's not clear
+			 * if we should extract the whole clause or some smaller parts.
+			 * OTOH we need (Expr op Const) so maybe we only care about the
+			 * clause as a whole?
+			 */
+			exprs = statext_extract_expression(root, clause, rel->relid);
+
+			/* complex expression, search for statistic covering all parts */
+			foreach(lc, rel->statlist)
+			{
+				ListCell		   *le;
+				StatisticExtInfo   *info = (StatisticExtInfo *) lfirst(lc);
+
+				/*
+				 * Assume all parts are covered by this statistics, we'll
+				 * stop if we found part that is not covered.
+				 */
+				bool covered = true;
+
+				/* have we already matched the expression to a statistic? */
+				Assert(!list_exprs[listidx]);
+
+				/* no expressions in the statistic */
+				if (!info->exprs)
+					continue;
+
+				foreach(le, exprs)
+				{
+					ListCell   *lc2;
+					Node	   *expr = (Node *) lfirst(le);
+					bool		found = false;
+
+					/*
+					 * Walk the expressions, see if all expressions extracted from
+					 * the clause are covered by the extended statistic object.
+					 */
+					foreach (lc2, info->exprs)
+					{
+						Node   *stat_expr = (Node *) lfirst(lc2);
+
+						if (equal(expr, stat_expr))
+						{
+							found = true;
+							break;
+						}
+					}
+
+					/* found expression not covered by the statistics, stop */
+					if (!found)
+					{
+						covered = false;
+						break;
+					}
+				}
+
+				/*
+				 * OK, we found a statistics covering this clause, stop looking
+				 * for another one
+				 */
+				if (covered)
+				{
+					/* XXX should this add the original expression instead? */
+					list_exprs[listidx] = exprs;
+					break;
+				}
+
+			}
+		}
 
 		listidx++;
 	}
@@ -1336,7 +1921,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 
 		/* find the best suited statistics object for these attnums */
 		stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
-									  list_attnums, list_length(clauses));
+									  list_attnums, list_exprs,
+									  list_length(clauses));
 
 		/*
 		 * if no (additional) matching stats could be found then we've nothing
@@ -1356,17 +1942,58 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		{
 			/*
 			 * If the clause is compatible with the selected statistics, mark
-			 * it as estimated and add it to the list to estimate.
+			 * it as estimated and add it to the list to estimate. It may be
+			 * either a simple clause, or an expression.
 			 */
 			if (list_attnums[listidx] != NULL &&
 				bms_is_subset(list_attnums[listidx], stat->keys))
 			{
+				/* simple clause */
 				stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
 				*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
 
 				bms_free(list_attnums[listidx]);
 				list_attnums[listidx] = NULL;
 			}
+			else if (list_exprs[listidx] != NIL)
+			{
+				/* are all parts of the expression covered by the statistic? */
+				ListCell   *lc;
+				int			ncovered = 0;
+
+				foreach (lc, list_exprs[listidx])
+				{
+					ListCell   *lc2;
+					Node	   *expr = (Node *) lfirst(lc);
+					bool		found = false;
+
+					foreach (lc2, stat->exprs)
+					{
+						Node   *stat_expr = (Node *) lfirst(lc2);
+
+						if (equal(expr, stat_expr))
+						{
+							found = true;
+							break;
+						}
+					}
+
+					/* count it as covered and continue to the next expression */
+					if (found)
+						ncovered++;
+				}
+
+				/* all parts of thi expression are covered by this statistics */
+				if (ncovered == list_length(list_exprs[listidx]))
+				{
+					stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
+					*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
+
+					// bms_free(list_attnums[listidx]);
+					list_exprs[listidx] = NULL;
+				}
+
+			}
 
 			listidx++;
 		}
@@ -1506,3 +2133,777 @@ examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
 
 	return true;
 }
+
+bool
+examine_clause_args2(List *args, Node **exprp, Const **cstp, bool *expronleftp)
+{
+	Node	   *expr;
+	Const	   *cst;
+	bool		expronleft;
+	Node	   *leftop,
+			   *rightop;
+
+	/* enforced by statext_is_compatible_clause_internal */
+	Assert(list_length(args) == 2);
+
+	leftop = linitial(args);
+	rightop = lsecond(args);
+
+	/* strip RelabelType from either side of the expression */
+	if (IsA(leftop, RelabelType))
+		leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+	if (IsA(rightop, RelabelType))
+		rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+	if (IsA(rightop, Const))
+	{
+		expr = (Node *) leftop;
+		cst = (Const *) rightop;
+		expronleft = true;
+	}
+	else if (IsA(leftop, Const))
+	{
+		expr = (Node *) rightop;
+		cst = (Const *) leftop;
+		expronleft = false;
+	}
+	else
+		return false;
+
+	/* return pointers to the extracted parts if requested */
+	if (exprp)
+		*exprp = expr;
+
+	if (cstp)
+		*cstp = cst;
+
+	if (expronleftp)
+		*expronleftp = expronleft;
+
+	return true;
+}
+
+bool
+examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
+{
+	Var		   *var;
+	Const	   *cst;
+	bool		varonleft;
+	Node	   *leftop,
+			   *rightop;
+
+	/* enforced by statext_is_compatible_clause_internal */
+	Assert(list_length(expr->args) == 2);
+
+	leftop = linitial(expr->args);
+	rightop = lsecond(expr->args);
+
+	/* strip RelabelType from either side of the expression */
+	if (IsA(leftop, RelabelType))
+		leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+	if (IsA(rightop, RelabelType))
+		rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+	if (IsA(leftop, Var) && IsA(rightop, Const))
+	{
+		var = (Var *) leftop;
+		cst = (Const *) rightop;
+		varonleft = true;
+	}
+	else if (IsA(leftop, Const) && IsA(rightop, Var))
+	{
+		var = (Var *) rightop;
+		cst = (Const *) leftop;
+		varonleft = false;
+	}
+	else
+		return false;
+
+	/* return pointers to the extracted parts if requested */
+	if (varp)
+		*varp = var;
+
+	if (cstp)
+		*cstp = cst;
+
+	if (varonleftp)
+		*varonleftp = varonleft;
+
+	return true;
+}
+
+bool
+examine_opclause_expression2(OpExpr *expr, Node **exprp, Const **cstp, bool *expronleftp)
+{
+	Node	   *expr2;
+	Const	   *cst;
+	bool		expronleft;
+	Node	   *leftop,
+			   *rightop;
+
+	/* enforced by statext_is_compatible_clause_internal */
+	Assert(list_length(expr->args) == 2);
+
+	leftop = linitial(expr->args);
+	rightop = lsecond(expr->args);
+
+	/* strip RelabelType from either side of the expression */
+	if (IsA(leftop, RelabelType))
+		leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+	if (IsA(rightop, RelabelType))
+		rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+	if (IsA(rightop, Const))
+	{
+		expr2 = (Node *) leftop;
+		cst = (Const *) rightop;
+		expronleft = true;
+	}
+	else if (IsA(leftop, Const))
+	{
+		expr2 = (Node *) rightop;
+		cst = (Const *) leftop;
+		expronleft = false;
+	}
+	else
+		return false;
+
+	/* return pointers to the extracted parts if requested */
+	if (exprp)
+		*exprp = expr2;
+
+	if (cstp)
+		*cstp = cst;
+
+	if (expronleftp)
+		*expronleftp = expronleft;
+
+	return true;
+}
+
+
+/*
+ * Compute statistics about expressions of a relation.
+ */
+static void
+compute_expr_stats(Relation onerel, double totalrows,
+				   AnlExprData *exprdata, int nexprs,
+				   HeapTuple *rows, int numrows)
+{
+	MemoryContext expr_context,
+				old_context;
+	int			ind,
+				i;
+
+	expr_context = AllocSetContextCreate(CurrentMemoryContext,
+										 "Analyze Expression",
+										 ALLOCSET_DEFAULT_SIZES);
+	old_context = MemoryContextSwitchTo(expr_context);
+
+	for (ind = 0; ind < nexprs; ind++)
+	{
+		AnlExprData *thisdata = &exprdata[ind];
+		Node        *expr = thisdata->expr;
+		TupleTableSlot *slot;
+		EState	   *estate;
+		ExprContext *econtext;
+		Datum	   *exprvals;
+		bool	   *exprnulls;
+		ExprState  *exprstate;
+		int			tcnt;
+
+		/*
+		 * Need an EState for evaluation of expressions.  Create it in
+		 * the per-expression context to be sure it gets cleaned up at
+		 * the bottom of the loop.
+		 */
+		estate = CreateExecutorState();
+		econtext = GetPerTupleExprContext(estate);
+
+		/* Set up expression evaluation state */
+		exprstate = ExecPrepareExpr((Expr *) expr, estate);
+
+		/* Need a slot to hold the current heap tuple, too */
+		slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
+										&TTSOpsHeapTuple);
+
+		/* Arrange for econtext's scan tuple to be the tuple under test */
+		econtext->ecxt_scantuple = slot;
+
+		/* Compute and save index expression values */
+		exprvals = (Datum *) palloc(numrows * sizeof(Datum));
+		exprnulls = (bool *) palloc(numrows * sizeof(bool));
+
+		tcnt = 0;
+		for (i = 0; i < numrows; i++)
+		{
+			Datum	datum;
+			bool	isnull;
+
+			/*
+			 * Reset the per-tuple context each time, to reclaim any cruft
+			 * left behind by evaluating the predicate or index expressions.
+			 */
+			ResetExprContext(econtext);
+
+			/* Set up for predicate or expression evaluation */
+			ExecStoreHeapTuple(rows[i], slot, false);
+
+			/*
+			 * FIXME this probably leaks memory. Maybe we should use
+			 * ExecEvalExprSwitchContext but then we need to copy the
+			 * result somewhere else.
+			 */
+			datum = ExecEvalExpr(exprstate,
+								 GetPerTupleExprContext(estate),
+								 &isnull);
+			if (isnull)
+			{
+				exprvals[tcnt] = (Datum) 0;
+				exprnulls[tcnt] = true;
+			}
+			else
+			{
+				exprvals[tcnt] = (Datum) datum;
+				exprnulls[tcnt] = false;
+			}
+
+			tcnt++;
+		}
+
+		/*
+		 * Now we can compute the statistics for the expression columns.
+		 */
+		if (tcnt > 0)
+		{
+			// MemoryContextSwitchTo(col_context);
+			VacAttrStats *stats = thisdata->vacattrstat;
+			AttributeOpts *aopt =
+				get_attribute_options(stats->attr->attrelid,
+									  stats->attr->attnum);
+
+			stats->exprvals = exprvals;
+			stats->exprnulls = exprnulls;
+			stats->rowstride = 1;
+			stats->compute_stats(stats,
+								 expr_fetch_func,
+								 tcnt,
+								 tcnt);
+
+			/*
+			 * If the n_distinct option is specified, it overrides the
+			 * above computation.
+			 */
+			if (aopt != NULL && aopt->n_distinct != 0.0)
+				stats->stadistinct = aopt->n_distinct;
+
+			// MemoryContextResetAndDeleteChildren(col_context);
+		}
+
+		/* And clean up */
+		// MemoryContextSwitchTo(expr_context);
+
+		ExecDropSingleTupleTableSlot(slot);
+		FreeExecutorState(estate);
+		// MemoryContextResetAndDeleteChildren(expr_context);
+	}
+
+	MemoryContextSwitchTo(old_context);
+	MemoryContextDelete(expr_context);
+}
+
+
+/*
+ * Fetch function for analyzing index expressions.
+ *
+ * We have not bothered to construct index tuples, instead the data is
+ * just in Datum arrays.
+ */
+static Datum
+expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+{
+	int			i;
+
+	/* exprvals and exprnulls are already offset for proper column */
+	i = rownum * stats->rowstride;
+	*isNull = stats->exprnulls[i];
+	return stats->exprvals[i];
+}
+
+/*
+ * Build analyze data for a list of expressions. As this is not tied
+ * directly to a relation (table or index), we have to fake some of
+ * the data.
+ */
+static AnlExprData *
+build_expr_data(List *exprs)
+{
+	int				idx;
+	int				nexprs = list_length(exprs);
+	AnlExprData	   *exprdata;
+	ListCell	   *lc;
+
+	exprdata = (AnlExprData *) palloc0(nexprs * sizeof(AnlExprData));
+
+	idx = 0;
+	foreach (lc, exprs)
+	{
+		Node		   *expr = (Node *) lfirst(lc);
+		AnlExprData	   *thisdata = &exprdata[idx];
+
+		thisdata->expr = expr;
+		thisdata->vacattrstat = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+
+		thisdata->vacattrstat = examine_expression(expr);
+		idx++;
+	}
+
+	return exprdata;
+}
+
+/*
+ * examine_expression -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_expression(Node *expr)
+{
+	HeapTuple	typtuple;
+	VacAttrStats *stats;
+	int			i;
+	bool		ok;
+
+	Assert(expr != NULL);
+
+	/*
+	 * Create the VacAttrStats struct.
+	 */
+	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+
+	/*
+	 * When analyzing an expression, believe the expression tree's type.
+	 */
+	stats->attrtypid = exprType(expr);
+	stats->attrtypmod = exprTypmod(expr);
+
+	/*
+	 * XXX Do we need to do anything special about the collation, similar
+	 * to what examine_attribute does for expression indexes?
+	 */
+	stats->attrcollid = exprCollation(expr);
+
+	/*
+	 * We don't have any pg_attribute for expressions, so let's fake
+	 * something reasonable into attstattarget, which is the only thing
+	 * std_typanalyze needs.
+	 */
+	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
+
+	/*
+	 * FIXME we should probably get the target from the extended stats
+	 * object, or something like that.
+	 */
+	stats->attr->attstattarget = default_statistics_target;
+
+	/* initialize some basic fields */
+	stats->attr->attrelid = InvalidOid;
+	stats->attr->attnum = InvalidAttrNumber;
+	stats->attr->atttypid = stats->attrtypid;
+
+	typtuple = SearchSysCacheCopy1(TYPEOID,
+								   ObjectIdGetDatum(stats->attrtypid));
+	if (!HeapTupleIsValid(typtuple))
+		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+	stats->anl_context = CurrentMemoryContext;	/* XXX should be using something else? */
+	stats->tupattnum = InvalidAttrNumber;
+
+	/*
+	 * The fields describing the stats->stavalues[n] element types default to
+	 * the type of the data being analyzed, but the type-specific typanalyze
+	 * function can change them if it wants to store something else.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		stats->statypid[i] = stats->attrtypid;
+		stats->statyplen[i] = stats->attrtype->typlen;
+		stats->statypbyval[i] = stats->attrtype->typbyval;
+		stats->statypalign[i] = stats->attrtype->typalign;
+	}
+
+	/*
+	 * Call the type-specific typanalyze function.  If none is specified, use
+	 * std_typanalyze().
+	 */
+	if (OidIsValid(stats->attrtype->typanalyze))
+		ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+										   PointerGetDatum(stats)));
+	else
+		ok = std_typanalyze(stats);
+
+	if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+	{
+		heap_freetuple(typtuple);
+		pfree(stats);
+		return NULL;
+	}
+
+	return stats;
+}
+
+/* form an array of pg_statistic rows (per update_attstats) */
+static Datum
+serialize_expr_stats(AnlExprData *exprdata, int nexprs)
+{
+	int			exprno;
+	Oid			typOid;
+	Relation	sd;
+
+	ArrayBuildState *astate = NULL;
+
+	sd = table_open(StatisticRelationId, RowExclusiveLock);
+
+	/* lookup OID of composite type for pg_statistic */
+	typOid = get_rel_type_id(StatisticRelationId);
+	if (!OidIsValid(typOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("relation \"pg_statistic\" does not have a composite type")));
+
+	for (exprno = 0; exprno < nexprs; exprno++)
+	{
+		int				i, k;
+		VacAttrStats   *stats = exprdata[exprno].vacattrstat;
+
+		Datum		values[Natts_pg_statistic];
+		bool		nulls[Natts_pg_statistic];
+		HeapTuple	stup;
+
+		if (!stats->stats_valid)
+		{
+			astate = accumArrayResult(astate,
+									  (Datum) 0,
+									  true,
+									  typOid,
+									  CurrentMemoryContext);
+			continue;
+		}
+
+		/*
+		 * Construct a new pg_statistic tuple
+		 */
+		for (i = 0; i < Natts_pg_statistic; ++i)
+		{
+			nulls[i] = false;
+		}
+
+		values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid);
+		values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber);
+		values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(false);
+		values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
+		values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
+		values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
+		i = Anum_pg_statistic_stakind1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+		}
+		i = Anum_pg_statistic_staop1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = ObjectIdGetDatum(stats->staop[k]);	/* staopN */
+		}
+		i = Anum_pg_statistic_stacoll1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = ObjectIdGetDatum(stats->stacoll[k]);	/* stacollN */
+		}
+		i = Anum_pg_statistic_stanumbers1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			int			nnum = stats->numnumbers[k];
+
+			if (nnum > 0)
+			{
+				int			n;
+				Datum	   *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+				ArrayType  *arry;
+
+				for (n = 0; n < nnum; n++)
+					numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+				/* XXX knows more than it should about type float4: */
+				arry = construct_array(numdatums, nnum,
+									   FLOAT4OID,
+									   sizeof(float4), true, TYPALIGN_INT);
+				values[i++] = PointerGetDatum(arry);	/* stanumbersN */
+			}
+			else
+			{
+				nulls[i] = true;
+				values[i++] = (Datum) 0;
+			}
+		}
+		i = Anum_pg_statistic_stavalues1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			if (stats->numvalues[k] > 0)
+			{
+				ArrayType  *arry;
+
+				arry = construct_array(stats->stavalues[k],
+									   stats->numvalues[k],
+									   stats->statypid[k],
+									   stats->statyplen[k],
+									   stats->statypbyval[k],
+									   stats->statypalign[k]);
+				values[i++] = PointerGetDatum(arry);	/* stavaluesN */
+			}
+			else
+			{
+				nulls[i] = true;
+				values[i++] = (Datum) 0;
+			}
+		}
+
+		stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
+
+		astate = accumArrayResult(astate,
+								  heap_copy_tuple_as_datum(stup, RelationGetDescr(sd)),
+								  false,
+								  typOid,
+								  CurrentMemoryContext);
+	}
+
+	table_close(sd, RowExclusiveLock);
+
+	return makeArrayResult(astate, CurrentMemoryContext);
+}
+
+
+/*
+ * Loads pg_statistic record from expression statistics for expression
+ * identified by the supplied index.
+ */
+HeapTuple
+statext_expressions_load(Oid stxoid, int idx)
+{
+	bool		isnull;
+	Datum		value;
+	HeapTuple	htup;
+	ExpandedArrayHeader *eah;
+	HeapTupleHeader td;
+	HeapTupleData tmptup;
+	HeapTuple	tup;
+
+	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(stxoid));
+	if (!HeapTupleIsValid(htup))
+		elog(ERROR, "cache lookup failed for statistics object %u", stxoid);
+
+	value = SysCacheGetAttr(STATEXTDATASTXOID, htup,
+							Anum_pg_statistic_ext_data_stxdexpr, &isnull);
+	if (isnull)
+		elog(ERROR,
+			 "requested statistic kind \"%c\" is not yet built for statistics object %u",
+			 STATS_EXT_DEPENDENCIES, stxoid);
+
+	eah = DatumGetExpandedArray(value);
+
+	deconstruct_expanded_array(eah);
+
+	td = DatumGetHeapTupleHeader(eah->dvalues[idx]);
+
+	/* Build a temporary HeapTuple control structure */
+	tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
+	tmptup.t_data = td;
+
+	tup = heap_copytuple(&tmptup);
+
+	ReleaseSysCache(htup);
+
+	return tup;
+}
+
+/*
+ * Evaluate the expressions, so that we can use the results to build
+ * all the requested statistics types. This matters especially for
+ * expensive expressions, of course.
+ */
+static ExprInfo *
+evaluate_expressions(Relation rel, List *exprs, int numrows, HeapTuple *rows)
+{
+	/* evaluated expressions */
+	ExprInfo   *result;
+	char	   *ptr;
+	Size		len;
+
+	int			i;
+	int			idx;
+	TupleTableSlot *slot;
+	EState	   *estate;
+	ExprContext *econtext;
+	List	   *exprstates = NIL;
+	int			nexprs = list_length(exprs);
+	ListCell   *lc;
+
+	/* allocate everything as a single chunk, so we can free it easily */
+	len = MAXALIGN(sizeof(ExprInfo));
+	len += MAXALIGN(sizeof(Oid) * nexprs);	/* types */
+	len += MAXALIGN(sizeof(Oid) * nexprs);	/* collations */
+
+	/* values */
+	len += MAXALIGN(sizeof(Datum *) * nexprs);
+	len += nexprs * MAXALIGN(sizeof(Datum) * numrows);
+
+	/* values */
+	len += MAXALIGN(sizeof(bool *) * nexprs);
+	len += nexprs * MAXALIGN(sizeof(bool) * numrows);
+
+	ptr = palloc(len);
+
+	/* set the pointers */
+	result = (ExprInfo *) ptr;
+	ptr += sizeof(ExprInfo);
+
+	/* types */
+	result->types = (Oid *) ptr;
+	ptr += MAXALIGN(sizeof(Oid) * nexprs);
+
+	/* collations */
+	result->collations = (Oid *) ptr;
+	ptr += MAXALIGN(sizeof(Oid) * nexprs);
+
+	/* values */
+	result->values = (Datum **) ptr;
+	ptr += MAXALIGN(sizeof(Datum *) * nexprs);
+
+	/* nulls */
+	result->nulls = (bool **) ptr;
+	ptr += MAXALIGN(sizeof(bool *) * nexprs);
+
+	for (i = 0; i < nexprs; i++)
+	{
+		result->values[i] = (Datum *) ptr;
+		ptr += MAXALIGN(sizeof(Datum) * numrows);
+
+		result->nulls[i] = (bool *) ptr;
+		ptr += MAXALIGN(sizeof(bool) * numrows);
+	}
+
+	Assert((ptr - (char *) result) == len);
+
+	result->nexprs = list_length(exprs);
+
+	idx = 0;
+	foreach (lc, exprs)
+	{
+		Node *expr = (Node *) lfirst(lc);
+
+		result->types[idx] = exprType(expr);
+		result->collations[idx] = exprCollation(expr);
+
+		idx++;
+	}
+
+	/*
+	 * Need an EState for evaluation of index expressions and
+	 * partial-index predicates.  Create it in the per-index context to be
+	 * sure it gets cleaned up at the bottom of the loop.
+	 */
+	estate = CreateExecutorState();
+	econtext = GetPerTupleExprContext(estate);
+
+	/* Need a slot to hold the current heap tuple, too */
+	slot = MakeSingleTupleTableSlot(RelationGetDescr(rel),
+									&TTSOpsHeapTuple);
+
+	/* Arrange for econtext's scan tuple to be the tuple under test */
+	econtext->ecxt_scantuple = slot;
+
+	/* Set up expression evaluation state */
+	exprstates = ExecPrepareExprList(exprs, estate);
+
+	for (i = 0; i < numrows; i++)
+	{
+		/*
+		 * Reset the per-tuple context each time, to reclaim any cruft
+		 * left behind by evaluating the predicate or index expressions.
+		 */
+		ResetExprContext(econtext);
+
+		/* Set up for predicate or expression evaluation */
+		ExecStoreHeapTuple(rows[i], slot, false);
+
+		idx = 0;
+		foreach (lc, exprstates)
+		{
+			Datum	datum;
+			bool	isnull;
+			ExprState *exprstate = (ExprState *) lfirst(lc);
+
+			/*
+			 * FIXME this probably leaks memory. Maybe we should use
+			 * ExecEvalExprSwitchContext but then we need to copy the
+			 * result somewhere else.
+			 */
+			datum = ExecEvalExpr(exprstate,
+								 GetPerTupleExprContext(estate),
+								 &isnull);
+			if (isnull)
+			{
+				result->values[idx][i] = (Datum) 0;
+				result->nulls[idx][i] = true;
+			}
+			else
+			{
+				result->values[idx][i] = (Datum) datum;
+				result->nulls[idx][i] = false;
+			}
+
+			idx++;
+		}
+	}
+
+	ExecDropSingleTupleTableSlot(slot);
+	FreeExecutorState(estate);
+
+	return result;
+}
+
+/*
+ * add_expressions_to_attributes
+ *		add expressions as attributes with high attnums
+ *
+ * Treat the expressions as attributes with attnums above the regular
+ * attnum range. This will allow us to handle everything in the same
+ * way, and identify exressions in the dependencies.
+ *
+ * XXX This always creates a copy of the bitmap. We might optimize this
+ * by only creating the copy with (nexprs > 0) but then we'd have to track
+ * this in order to free it (if we want to). Does not seem worth it.
+ */
+Bitmapset *
+add_expressions_to_attributes(Bitmapset *attrs, int nexprs)
+{
+	int			i;
+
+	/*
+	 * Copy the bitmapset and add fake attnums representing expressions,
+	 * starting above MaxHeapAttributeNumber.
+	 */
+	attrs = bms_copy(attrs);
+
+	/* start with (MaxHeapAttributeNumber + 1) */
+	for (i = 0; i < nexprs; i++)
+	{
+		Assert(EXPRESSION_ATTNUM(i) > MaxHeapAttributeNumber);
+
+		attrs = bms_add_member(attrs, EXPRESSION_ATTNUM(i));
+	}
+
+	return attrs;
+}
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 6a262f1543..f0a9cf44db 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -73,7 +73,8 @@
 	 ((ndims) * sizeof(DimensionInfo)) + \
 	 ((nitems) * ITEM_SIZE(ndims)))
 
-static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
+static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs,
+								  ExprInfo *exprs);
 
 static SortItem *build_distinct_groups(int numrows, SortItem *items,
 									   MultiSortSupport mss, int *ndistinct);
@@ -180,8 +181,9 @@ get_mincount_for_mcv_list(int samplerows, double totalrows)
  *
  */
 MCVList *
-statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
-				  VacAttrStats **stats, double totalrows, int stattarget)
+statext_mcv_build(int numrows, HeapTuple *rows, ExprInfo *exprs,
+				  Bitmapset *attrs, VacAttrStats **stats,
+				  double totalrows, int stattarget)
 {
 	int			i,
 				numattrs,
@@ -194,14 +196,23 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 	MCVList    *mcvlist = NULL;
 	MultiSortSupport mss;
 
-	attnums = build_attnums_array(attrs, &numattrs);
-
 	/* comparator for all the columns */
-	mss = build_mss(stats, numattrs);
+	mss = build_mss(stats, bms_num_members(attrs), exprs);
+
+	/*
+	 * treat expressions as special attributes with high attnums
+	 *
+	 * XXX We do this after build_mss, because that expects the bitmapset
+	 * to only contain simple attributes (with a matching VacAttrStats)
+	 */
+	attrs = add_expressions_to_attributes(attrs, exprs->nexprs);
+
+	/* now build the array, with the special expression attnums */
+	attnums = build_attnums_array(attrs, &numattrs);
 
 	/* sort the rows */
-	items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
-							   mss, numattrs, attnums);
+	items = build_sorted_items(numrows, &nitems, rows, exprs,
+							   stats[0]->tupDesc, mss, numattrs, attnums);
 
 	if (!items)
 		return NULL;
@@ -337,6 +348,7 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 
 	pfree(items);
 	pfree(groups);
+	pfree(attrs);
 
 	return mcvlist;
 }
@@ -346,12 +358,12 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
  *	build MultiSortSupport for the attributes passed in attrs
  */
 static MultiSortSupport
-build_mss(VacAttrStats **stats, int numattrs)
+build_mss(VacAttrStats **stats, int numattrs, ExprInfo *exprs)
 {
 	int			i;
 
 	/* Sort by multiple columns (using array of SortSupport) */
-	MultiSortSupport mss = multi_sort_init(numattrs);
+	MultiSortSupport mss = multi_sort_init(numattrs + exprs->nexprs);
 
 	/* prepare the sort functions for all the attributes */
 	for (i = 0; i < numattrs; i++)
@@ -367,6 +379,20 @@ build_mss(VacAttrStats **stats, int numattrs)
 		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
 	}
 
+	/* prepare the sort functions for all the expressions */
+	for (i = 0; i < exprs->nexprs; i++)
+	{
+		TypeCacheEntry *type;
+
+		type = lookup_type_cache(exprs->types[i], TYPECACHE_LT_OPR);
+		if (type->lt_opr == InvalidOid) /* shouldn't happen */
+			elog(ERROR, "cache lookup failed for ordering operator for type %u",
+				 exprs->types[i]);
+
+		multi_sort_add_dimension(mss, numattrs + i, type->lt_opr,
+								 exprs->collations[i]);
+	}
+
 	return mss;
 }
 
@@ -1540,10 +1566,14 @@ pg_mcv_list_send(PG_FUNCTION_ARGS)
  * the size to ~1/8. It would also allow us to combine bitmaps simply using
  * & and |, which should be faster than min/max. The bitmaps are fairly
  * small, though (thanks to the cap on the MCV list size).
+ *
+ * XXX There's a lot of code duplication between branches for simple columns
+ * and complex expressions. We should refactor it somehow.
  */
 static bool *
 mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
-					 Bitmapset *keys, MCVList *mcvlist, bool is_or)
+					 Bitmapset *keys, List *exprs,
+					 MCVList *mcvlist, bool is_or)
 {
 	int			i;
 	ListCell   *l;
@@ -1583,8 +1613,10 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_clause_args returns true */
 			Var		   *var;
+			Node	   *clause_expr;
 			Const	   *cst;
 			bool		varonleft;
+			bool		expronleft;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
@@ -1653,6 +1685,89 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 					matches[i] = RESULT_MERGE(matches[i], is_or, match);
 				}
 			}
+			/* extract the expr and const from the expression */
+			else if (examine_clause_args2(expr->args, &clause_expr, &cst, &expronleft))
+			{
+				ListCell   *lc;
+				int			idx;
+				Oid			collid = exprCollation(clause_expr);
+
+				/* match the attribute to a dimension of the statistic */
+				idx = bms_num_members(keys);
+
+				foreach(lc, exprs)
+				{
+					Node *stat_expr = (Node *) lfirst(lc);
+
+					if (equal(clause_expr, stat_expr))
+						break;
+
+					idx++;
+				}
+
+				/* index should be valid */
+				Assert((idx >= 0) &&
+					   (idx < bms_num_members(keys) + list_length(exprs)));
+
+				/*
+				 * Walk through the MCV items and evaluate the current clause.
+				 * We can skip items that were already ruled out, and
+				 * terminate if there are no remaining MCV items that might
+				 * possibly match.
+				 */
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					bool		match = true;
+					MCVItem    *item = &mcvlist->items[i];
+
+					/*
+					 * When the MCV item or the Const value is NULL we can
+					 * treat this as a mismatch. We must not call the operator
+					 * because of strictness.
+					 */
+					if (item->isnull[idx] || cst->constisnull)
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap.
+					 * Once the value gets false for AND-lists, or true for
+					 * OR-lists, we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					/*
+					 * First check whether the constant is below the lower
+					 * boundary (in that case we can skip the bucket, because
+					 * there's no overlap).
+					 *
+					 * We don't store collations used to build the statistics,
+					 * but we can use the collation for the attribute itself,
+					 * as stored in varcollid. We do reset the statistics
+					 * after a type change (including collation change), so
+					 * this is OK. We may need to relax this after allowing
+					 * extended statistics on expressions.
+					 */
+					if (expronleft)
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   item->values[idx],
+															   cst->constvalue));
+					else
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   cst->constvalue,
+															   item->values[idx]));
+
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+				}
+			}
+			else
+				elog(ERROR, "incompatible clause");
 		}
 		else if (IsA(clause, ScalarArrayOpExpr))
 		{
@@ -1661,8 +1776,10 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_clause_args returns true */
 			Var		   *var;
+			Node	   *clause_expr;
 			Const	   *cst;
 			bool		varonleft;
+			bool		expronleft;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
@@ -1760,14 +1877,155 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 					matches[i] = RESULT_MERGE(matches[i], is_or, match);
 				}
 			}
+			/* extract the expr and const from the expression */
+			else if (examine_clause_args2(expr->args, &clause_expr, &cst, &expronleft))
+			{
+				ListCell   *lc;
+				int			idx;
+
+				ArrayType  *arrayval;
+				int16		elmlen;
+				bool		elmbyval;
+				char		elmalign;
+				int			num_elems;
+				Datum	   *elem_values;
+				bool	   *elem_nulls;
+				Oid			collid = exprCollation(clause_expr);
+
+				/* ScalarArrayOpExpr has the Var always on the left */
+				Assert(expronleft);
+
+				if (!cst->constisnull)
+				{
+					arrayval = DatumGetArrayTypeP(cst->constvalue);
+					get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+										 &elmlen, &elmbyval, &elmalign);
+					deconstruct_array(arrayval,
+									  ARR_ELEMTYPE(arrayval),
+									  elmlen, elmbyval, elmalign,
+									  &elem_values, &elem_nulls, &num_elems);
+				}
+
+				/* match the attribute to a dimension of the statistic */
+				idx = bms_num_members(keys);
+
+				foreach(lc, exprs)
+				{
+					Node *stat_expr = (Node *) lfirst(lc);
+
+					if (equal(clause_expr, stat_expr))
+						break;
+
+					idx++;
+				}
+
+				/* index should be valid */
+				Assert((idx >= 0) &&
+					   (idx < bms_num_members(keys) + list_length(exprs)));
+
+				/*
+				 * Walk through the MCV items and evaluate the current clause.
+				 * We can skip items that were already ruled out, and
+				 * terminate if there are no remaining MCV items that might
+				 * possibly match.
+				 */
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					int			j;
+					bool		match = (expr->useOr ? false : true);
+					MCVItem    *item = &mcvlist->items[i];
+
+					/*
+					 * When the MCV item or the Const value is NULL we can
+					 * treat this as a mismatch. We must not call the operator
+					 * because of strictness.
+					 */
+					if (item->isnull[idx] || cst->constisnull)
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap.
+					 * Once the value gets false for AND-lists, or true for
+					 * OR-lists, we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					for (j = 0; j < num_elems; j++)
+					{
+						Datum		elem_value = elem_values[j];
+						bool		elem_isnull = elem_nulls[j];
+						bool		elem_match;
+
+						/* NULL values always evaluate as not matching. */
+						if (elem_isnull)
+						{
+							match = RESULT_MERGE(match, expr->useOr, false);
+							continue;
+						}
+
+						/*
+						 * Stop evaluating the array elements once we reach
+						 * match value that can't change - ALL() is the same
+						 * as AND-list, ANY() is the same as OR-list.
+						 */
+						if (RESULT_IS_FINAL(match, expr->useOr))
+							break;
+
+						elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
+																	collid,
+																	item->values[idx],
+																	elem_value));
+
+						match = RESULT_MERGE(match, expr->useOr, elem_match);
+					}
+
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+				}
+			}
+			else
+				elog(ERROR, "incompatible clause");
 		}
 		else if (IsA(clause, NullTest))
 		{
 			NullTest   *expr = (NullTest *) clause;
-			Var		   *var = (Var *) (expr->arg);
+			Node	   *clause_expr = (Node *) (expr->arg);
 
 			/* match the attribute to a dimension of the statistic */
-			int			idx = bms_member_index(keys, var->varattno);
+			int			idx = -1;
+
+			if (IsA(clause_expr, Var))
+			{
+				/* simple Var, so just lookup using varattno */
+				Var *var = (Var *) clause_expr;
+
+				idx = bms_member_index(keys, var->varattno);
+			}
+			else
+			{
+				ListCell *lc;
+
+				/* expressions are after the simple columns */
+				idx = bms_num_members(keys);
+
+				/* expression - lookup in stats expressions */
+				foreach(lc, exprs)
+				{
+					Node *stat_expr = (Node *) lfirst(lc);
+
+					if (equal(clause_expr, stat_expr))
+						break;
+
+					idx++;
+				}
+			}
+
+			/* index should be valid */
+			Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
 
 			/*
 			 * Walk through the MCV items and evaluate the current clause. We
@@ -1810,7 +2068,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			Assert(list_length(bool_clauses) >= 2);
 
 			/* build the match bitmap for the OR-clauses */
-			bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys,
+			bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
 												mcvlist, is_orclause(clause));
 
 			/*
@@ -1838,7 +2096,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			Assert(list_length(not_args) == 1);
 
 			/* build the match bitmap for the NOT-clause */
-			not_matches = mcv_get_match_bitmap(root, not_args, keys,
+			not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
 											   mcvlist, false);
 
 			/*
@@ -1917,7 +2175,8 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
 	mcv = statext_mcv_load(stat->statOid);
 
 	/* build a match bitmap for the clauses */
-	matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
+	matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
+								   mcv, false);
 
 	/* sum frequencies for all the matching MCV items */
 	*basesel = 0.0;
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 4b86f0ab2d..552d755ab4 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -37,7 +37,8 @@
 #include "utils/typcache.h"
 
 static double ndistinct_for_combination(double totalrows, int numrows,
-										HeapTuple *rows, VacAttrStats **stats,
+										HeapTuple *rows, ExprInfo *exprs,
+										int nattrs, VacAttrStats **stats,
 										int k, int *combination);
 static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
 static int	n_choose_k(int n, int k);
@@ -81,16 +82,21 @@ static void generate_combinations(CombinationGenerator *state);
  *
  * This computes the ndistinct estimate using the same estimator used
  * in analyze.c and then computes the coefficient.
+ *
+ * To handle expressions easily, we treat them as special attributes with
+ * attnums above MaxHeapAttributeNumber, and we assume the expressions are
+ * placed after all simple attributes.
  */
 MVNDistinct *
 statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
-						Bitmapset *attrs, VacAttrStats **stats)
+						ExprInfo *exprs, Bitmapset *attrs,
+						VacAttrStats **stats)
 {
 	MVNDistinct *result;
 	int			k;
 	int			itemcnt;
 	int			numattrs = bms_num_members(attrs);
-	int			numcombs = num_combinations(numattrs);
+	int			numcombs = num_combinations(numattrs + exprs->nexprs);
 
 	result = palloc(offsetof(MVNDistinct, items) +
 					numcombs * sizeof(MVNDistinctItem));
@@ -98,14 +104,20 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
 	result->type = STATS_NDISTINCT_TYPE_BASIC;
 	result->nitems = numcombs;
 
+	/* treat expressions as special attributes with high attnums */
+	attrs = add_expressions_to_attributes(attrs, exprs->nexprs);
+
+	/* make sure there were no clashes */
+	Assert(bms_num_members(attrs) == numattrs + exprs->nexprs);
+
 	itemcnt = 0;
-	for (k = 2; k <= numattrs; k++)
+	for (k = 2; k <= bms_num_members(attrs); k++)
 	{
 		int		   *combination;
 		CombinationGenerator *generator;
 
 		/* generate combinations of K out of N elements */
-		generator = generator_init(numattrs, k);
+		generator = generator_init(bms_num_members(attrs), k);
 
 		while ((combination = generator_next(generator)))
 		{
@@ -114,10 +126,32 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
 
 			item->attrs = NULL;
 			for (j = 0; j < k; j++)
-				item->attrs = bms_add_member(item->attrs,
-											 stats[combination[j]]->attr->attnum);
+			{
+				AttrNumber attnum = InvalidAttrNumber;
+
+				/*
+				 * The simple attributes are before expressions, so have
+				 * indexes below numattrs.
+				 * */
+				if (combination[j] < numattrs)
+					attnum = stats[combination[j]]->attr->attnum;
+				else
+				{
+					/* make sure the expression index is valid */
+					Assert((combination[j] - numattrs) >= 0);
+					Assert((combination[j] - numattrs) < exprs->nexprs);
+
+					attnum = EXPRESSION_ATTNUM(combination[j] - numattrs);
+				}
+
+				Assert(attnum != InvalidAttrNumber);
+
+				item->attrs = bms_add_member(item->attrs, attnum);
+			}
+
 			item->ndistinct =
 				ndistinct_for_combination(totalrows, numrows, rows,
+										  exprs, numattrs,
 										  stats, k, combination);
 
 			itemcnt++;
@@ -428,6 +462,7 @@ pg_ndistinct_send(PG_FUNCTION_ARGS)
  */
 static double
 ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
+						  ExprInfo *exprs, int nattrs,
 						  VacAttrStats **stats, int k, int *combination)
 {
 	int			i,
@@ -467,25 +502,57 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
 	 */
 	for (i = 0; i < k; i++)
 	{
-		VacAttrStats *colstat = stats[combination[i]];
+		Oid				typid;
 		TypeCacheEntry *type;
+		AttrNumber		attnum = InvalidAttrNumber;
+		TupleDesc		tdesc = NULL;
+		Oid				collid = InvalidOid;
+
+		if (combination[i] < nattrs)
+		{
+			VacAttrStats *colstat = stats[combination[i]];
+			typid = colstat->attrtypid;
+			attnum = colstat->attr->attnum;
+			collid = colstat->attrcollid;
+			tdesc = colstat->tupDesc;
+		}
+		else
+		{
+			typid = exprs->types[combination[i] - nattrs];
+			collid = exprs->collations[combination[i] - nattrs];
+		}
 
-		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+		type = lookup_type_cache(typid, TYPECACHE_LT_OPR);
 		if (type->lt_opr == InvalidOid) /* shouldn't happen */
 			elog(ERROR, "cache lookup failed for ordering operator for type %u",
-				 colstat->attrtypid);
+				 typid);
 
 		/* prepare the sort function for this dimension */
-		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
+		multi_sort_add_dimension(mss, i, type->lt_opr, collid);
 
 		/* accumulate all the data for this dimension into the arrays */
 		for (j = 0; j < numrows; j++)
 		{
-			items[j].values[i] =
-				heap_getattr(rows[j],
-							 colstat->attr->attnum,
-							 colstat->tupDesc,
-							 &items[j].isnull[i]);
+			/*
+			 * The first nattrs indexes identify simple attributes, higher
+			 * indexes are expressions.
+			 */
+			if (combination[i] < nattrs)
+				items[j].values[i] =
+					heap_getattr(rows[j],
+								 attnum,
+								 tdesc,
+								 &items[j].isnull[i]);
+			else
+			{
+				int idx = (combination[i] - nattrs);
+
+				/* make sure the expression index is valid */
+				Assert((idx >= 0) && (idx < exprs->nexprs));
+
+				items[j].values[i] = exprs->values[idx][j];
+				items[j].isnull[i] = exprs->nulls[idx][j];
+			}
 		}
 	}
 
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 81ac9b1cb2..f3815c332a 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1833,7 +1833,22 @@ ProcessUtilitySlow(ParseState *pstate,
 				break;
 
 			case T_CreateStatsStmt:
-				address = CreateStatistics((CreateStatsStmt *) parsetree);
+				{
+					Oid			relid;
+					CreateStatsStmt *stmt = (CreateStatsStmt *) parsetree;
+					RangeVar   *rel = (RangeVar *) linitial(stmt->relations);
+
+					/*
+					 * XXX RangeVarCallbackOwnsRelation not needed needed here,
+					 * to keep the same behavior as before.
+					 */
+					relid = RangeVarGetRelid(rel, ShareLock, false);
+
+					/* Run parse analysis ... */
+					stmt = transformStatsStmt(relid, stmt, queryString);
+
+					address = CreateStatistics(stmt);
+				}
 				break;
 
 			case T_AlterStatsStmt:
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index c2c6df2a4f..a7cf88b0e8 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -337,7 +337,8 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
 									bool attrsOnly, bool keysOnly,
 									bool showTblSpc, bool inherits,
 									int prettyFlags, bool missing_ok);
-static char *pg_get_statisticsobj_worker(Oid statextid, bool missing_ok);
+static char *pg_get_statisticsobj_worker(Oid statextid, bool columns_only,
+										 bool missing_ok);
 static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
 									  bool attrsOnly, bool missing_ok);
 static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@@ -1508,7 +1509,26 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS)
 	Oid			statextid = PG_GETARG_OID(0);
 	char	   *res;
 
-	res = pg_get_statisticsobj_worker(statextid, true);
+	res = pg_get_statisticsobj_worker(statextid, false, true);
+
+	if (res == NULL)
+		PG_RETURN_NULL();
+
+	PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+
+/*
+ * pg_get_statisticsobjdef_columns
+ *		Get columns and expressions for an extended statistics object
+ */
+Datum
+pg_get_statisticsobjdef_columns(PG_FUNCTION_ARGS)
+{
+	Oid			statextid = PG_GETARG_OID(0);
+	char	   *res;
+
+	res = pg_get_statisticsobj_worker(statextid, true, true);
 
 	if (res == NULL)
 		PG_RETURN_NULL();
@@ -1520,7 +1540,7 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS)
  * Internal workhorse to decompile an extended statistics object.
  */
 static char *
-pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
+pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok)
 {
 	Form_pg_statistic_ext statextrec;
 	HeapTuple	statexttup;
@@ -1534,7 +1554,12 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 	bool		ndistinct_enabled;
 	bool		dependencies_enabled;
 	bool		mcv_enabled;
+	bool		exprs_enabled;
 	int			i;
+	List	   *context;
+	ListCell   *lc;
+	List	   *exprs = NIL;
+	bool		has_exprs;
 
 	statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
 
@@ -1545,75 +1570,91 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 		elog(ERROR, "cache lookup failed for statistics object %u", statextid);
 	}
 
+	/* has the statistics expressions? */
+	has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL);
+
 	statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
 
 	initStringInfo(&buf);
 
-	nsp = get_namespace_name(statextrec->stxnamespace);
-	appendStringInfo(&buf, "CREATE STATISTICS %s",
-					 quote_qualified_identifier(nsp,
-												NameStr(statextrec->stxname)));
+	if (!columns_only)
+	{
+		nsp = get_namespace_name(statextrec->stxnamespace);
+		appendStringInfo(&buf, "CREATE STATISTICS %s",
+						 quote_qualified_identifier(nsp,
+													NameStr(statextrec->stxname)));
 
-	/*
-	 * Decode the stxkind column so that we know which stats types to print.
-	 */
-	datum = SysCacheGetAttr(STATEXTOID, statexttup,
-							Anum_pg_statistic_ext_stxkind, &isnull);
-	Assert(!isnull);
-	arr = DatumGetArrayTypeP(datum);
-	if (ARR_NDIM(arr) != 1 ||
-		ARR_HASNULL(arr) ||
-		ARR_ELEMTYPE(arr) != CHAROID)
-		elog(ERROR, "stxkind is not a 1-D char array");
-	enabled = (char *) ARR_DATA_PTR(arr);
+		/*
+		 * Decode the stxkind column so that we know which stats types to print.
+		 */
+		datum = SysCacheGetAttr(STATEXTOID, statexttup,
+								Anum_pg_statistic_ext_stxkind, &isnull);
+		Assert(!isnull);
+		arr = DatumGetArrayTypeP(datum);
+		if (ARR_NDIM(arr) != 1 ||
+			ARR_HASNULL(arr) ||
+			ARR_ELEMTYPE(arr) != CHAROID)
+			elog(ERROR, "stxkind is not a 1-D char array");
+		enabled = (char *) ARR_DATA_PTR(arr);
+
+		ndistinct_enabled = false;
+		dependencies_enabled = false;
+		mcv_enabled = false;
+		exprs_enabled = false;
+
+		for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+		{
+			if (enabled[i] == STATS_EXT_NDISTINCT)
+				ndistinct_enabled = true;
+			if (enabled[i] == STATS_EXT_DEPENDENCIES)
+				dependencies_enabled = true;
+			if (enabled[i] == STATS_EXT_MCV)
+				mcv_enabled = true;
+			if (enabled[i] == STATS_EXT_EXPRESSIONS)
+				exprs_enabled = true;
+		}
 
-	ndistinct_enabled = false;
-	dependencies_enabled = false;
-	mcv_enabled = false;
+		/*
+		 * If any option is disabled, then we'll need to append the types clause
+		 * to show which options are enabled.  We omit the types clause on purpose
+		 * when all options are enabled, so a pg_dump/pg_restore will create all
+		 * statistics types on a newer postgres version, if the statistics had all
+		 * options enabled on the original version.
+		 */
+		if (!ndistinct_enabled || !dependencies_enabled || !mcv_enabled || (!exprs_enabled && has_exprs))
+		{
+			bool		gotone = false;
 
-	for (i = 0; i < ARR_DIMS(arr)[0]; i++)
-	{
-		if (enabled[i] == STATS_EXT_NDISTINCT)
-			ndistinct_enabled = true;
-		if (enabled[i] == STATS_EXT_DEPENDENCIES)
-			dependencies_enabled = true;
-		if (enabled[i] == STATS_EXT_MCV)
-			mcv_enabled = true;
-	}
+			appendStringInfoString(&buf, " (");
 
-	/*
-	 * If any option is disabled, then we'll need to append the types clause
-	 * to show which options are enabled.  We omit the types clause on purpose
-	 * when all options are enabled, so a pg_dump/pg_restore will create all
-	 * statistics types on a newer postgres version, if the statistics had all
-	 * options enabled on the original version.
-	 */
-	if (!ndistinct_enabled || !dependencies_enabled || !mcv_enabled)
-	{
-		bool		gotone = false;
+			if (ndistinct_enabled)
+			{
+				appendStringInfoString(&buf, "ndistinct");
+				gotone = true;
+			}
 
-		appendStringInfoString(&buf, " (");
+			if (dependencies_enabled)
+			{
+				appendStringInfo(&buf, "%sdependencies", gotone ? ", " : "");
+				gotone = true;
+			}
 
-		if (ndistinct_enabled)
-		{
-			appendStringInfoString(&buf, "ndistinct");
-			gotone = true;
-		}
+			if (mcv_enabled)
+			{
+				appendStringInfo(&buf, "%smcv", gotone ? ", " : "");
+				gotone = true;
+			}
 
-		if (dependencies_enabled)
-		{
-			appendStringInfo(&buf, "%sdependencies", gotone ? ", " : "");
-			gotone = true;
-		}
+			if (exprs_enabled)
+				appendStringInfo(&buf, "%sexpressions", gotone ? ", " : "");
 
-		if (mcv_enabled)
-			appendStringInfo(&buf, "%smcv", gotone ? ", " : "");
+			appendStringInfoChar(&buf, ')');
+		}
 
-		appendStringInfoChar(&buf, ')');
+		appendStringInfoString(&buf, " ON ");
 	}
 
-	appendStringInfoString(&buf, " ON ");
-
+	/* decode simple column references */
 	for (colno = 0; colno < statextrec->stxkeys.dim1; colno++)
 	{
 		AttrNumber	attnum = statextrec->stxkeys.values[colno];
@@ -1627,8 +1668,74 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 		appendStringInfoString(&buf, quote_identifier(attname));
 	}
 
-	appendStringInfo(&buf, " FROM %s",
-					 generate_relation_name(statextrec->stxrelid, NIL));
+	/*
+	 * Get the statistics expressions, if any.  (NOTE: we do not use the
+	 * relcache versions of the expressions and predicate, because we want
+	 * to display non-const-folded expressions.)
+	 */
+	if (has_exprs)
+	{
+		Datum		exprsDatum;
+		bool		isnull;
+		char	   *exprsString;
+
+		exprsDatum = SysCacheGetAttr(STATEXTOID, statexttup,
+									 Anum_pg_statistic_ext_stxexprs, &isnull);
+		Assert(!isnull);
+		exprsString = TextDatumGetCString(exprsDatum);
+		exprs = (List *) stringToNode(exprsString);
+		pfree(exprsString);
+
+		/*
+		 * Run the expressions through eval_const_expressions. This is not just an
+		 * optimization, but is necessary, because the planner will be comparing
+		 * them to similarly-processed qual clauses, and may fail to detect valid
+		 * matches without this.  We must not use canonicalize_qual, however,
+		 * since these aren't qual expressions.
+		 *
+		 * XXX Not sure if this is really needed, it's not in pg_get_indexdef. In
+		 * fact the comment above suggests we don't want const-folding here.
+		 */
+		// exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+		/*
+		 * May as well fix opfuncids too
+		 *
+		 * XXX Same here. Is this something we want/need?
+		 */
+		// fix_opfuncids((Node *) exprs);
+
+	}
+	else
+		exprs = NIL;
+
+	context = deparse_context_for(get_relation_name(statextrec->stxrelid),
+								  statextrec->stxrelid);
+
+	foreach (lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+		char	   *str;
+		int			prettyFlags = PRETTYFLAG_INDENT;
+
+		str = deparse_expression_pretty(expr, context, false, false,
+										prettyFlags, 0);
+
+		if (colno > 0)
+			appendStringInfoString(&buf, ", ");
+
+		/* Need parens if it's not a bare function call */
+		if (looks_like_function(expr))
+			appendStringInfoString(&buf, str);
+		else
+			appendStringInfo(&buf, "(%s)", str);
+
+		colno++;
+	}
+
+	if (!columns_only)
+		appendStringInfo(&buf, " FROM %s",
+						 generate_relation_name(statextrec->stxrelid, NIL));
 
 	ReleaseSysCache(statexttup);
 
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bec357fcef..bf15f515e7 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3291,6 +3291,88 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 	return varinfos;
 }
 
+/*
+ * Helper routine for estimate_num_groups: add an item to a list of
+ * GrouExprInfos, but only if it's not known equal to any of the existing
+ * entries.
+ */
+typedef struct
+{
+	Node	   *expr;			/* expression */
+	RelOptInfo *rel;			/* relation it belongs to */
+	List	   *varinfos;		/* info for variables in this expression */
+} GroupExprInfo;
+
+static List *
+add_unique_group_expr(PlannerInfo *root, List *exprinfos,
+					 Node *expr, List *vars)
+{
+	GroupExprInfo *exprinfo;
+	ListCell   *lc;
+	Bitmapset  *varnos;
+	Index		varno;
+
+	foreach(lc, exprinfos)
+	{
+		exprinfo = (GroupExprInfo *) lfirst(lc);
+
+		/* Drop exact duplicates */
+		if (equal(expr, exprinfo->expr))
+			return exprinfos;
+	}
+
+	exprinfo = (GroupExprInfo *) palloc(sizeof(GroupExprInfo));
+
+	varnos = pull_varnos(expr);
+
+	/*
+	 * Expressions with vars from multiple relations should never get
+	 * here, as we split them to vars.
+	 */
+	Assert(bms_num_members(varnos) == 1);
+
+	varno = bms_singleton_member(varnos);
+
+	exprinfo->expr = expr;
+	exprinfo->varinfos = NIL;
+	exprinfo->rel = root->simple_rel_array[varno];
+
+	Assert(exprinfo->rel);
+
+	/* Track vars for this expression. */
+	foreach (lc, vars)
+	{
+		VariableStatData vardata;
+		Node *var = (Node *) lfirst(lc);
+
+		/* can we get no vardata for the variable? */
+		examine_variable(root, var, 0, &vardata);
+
+		exprinfo->varinfos
+			= add_unique_group_var(root, exprinfo->varinfos, var, &vardata);
+
+		ReleaseVariableStats(vardata);
+	}
+
+	/* without a list of variables, use the expression itself */
+	if (vars == NIL)
+	{
+		VariableStatData vardata;
+
+		/* can we get no vardata for the variable? */
+		examine_variable(root, expr, 0, &vardata);
+
+		exprinfo->varinfos
+			= add_unique_group_var(root, exprinfo->varinfos,
+								   expr, &vardata);
+
+		ReleaseVariableStats(vardata);
+	}
+
+	return lappend(exprinfos, exprinfo);
+}
+
+
 /*
  * estimate_num_groups		- Estimate number of groups in a grouped query
  *
@@ -3360,7 +3442,7 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset)
 {
-	List	   *varinfos = NIL;
+	List	   *exprinfos = NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
@@ -3398,6 +3480,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		double		this_srf_multiplier;
 		VariableStatData vardata;
 		List	   *varshere;
+		Relids		varnos;
 		ListCell   *l2;
 
 		/* is expression in this grouping set? */
@@ -3434,8 +3517,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		examine_variable(root, groupexpr, 0, &vardata);
 		if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
 		{
-			varinfos = add_unique_group_var(root, varinfos,
-											groupexpr, &vardata);
+			exprinfos = add_unique_group_expr(root, exprinfos,
+											  groupexpr, NIL);
+
 			ReleaseVariableStats(vardata);
 			continue;
 		}
@@ -3465,6 +3549,19 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 			continue;
 		}
 
+		/*
+		 * Are all the variables from the same relation? If yes, search for
+		 * an extended statistic matching this expression exactly.
+		 */
+		varnos = pull_varnos((Node *) varshere);
+		if (bms_membership(varnos) == BMS_SINGLETON)
+		{
+			exprinfos = add_unique_group_expr(root, exprinfos,
+											  groupexpr,
+											  varshere);
+			continue;
+		}
+
 		/*
 		 * Else add variables to varinfos list
 		 */
@@ -3472,9 +3569,8 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		{
 			Node	   *var = (Node *) lfirst(l2);
 
-			examine_variable(root, var, 0, &vardata);
-			varinfos = add_unique_group_var(root, varinfos, var, &vardata);
-			ReleaseVariableStats(vardata);
+			exprinfos = add_unique_group_expr(root, exprinfos,
+											  var, NIL);
 		}
 	}
 
@@ -3482,7 +3578,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
 	 */
-	if (varinfos == NIL)
+	if (exprinfos == NIL)
 	{
 		/* Apply SRF multiplier as we would do in the long path */
 		numdistinct *= srf_multiplier;
@@ -3506,32 +3602,32 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	do
 	{
-		GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
-		RelOptInfo *rel = varinfo1->rel;
+		GroupExprInfo *exprinfo1 = (GroupExprInfo *) linitial(exprinfos);
+		RelOptInfo *rel = exprinfo1->rel;
 		double		reldistinct = 1;
 		double		relmaxndistinct = reldistinct;
 		int			relvarcount = 0;
-		List	   *newvarinfos = NIL;
-		List	   *relvarinfos = NIL;
+		List	   *newexprinfos = NIL;
+		List	   *relexprinfos = NIL;
 
 		/*
 		 * Split the list of varinfos in two - one for the current rel, one
 		 * for remaining Vars on other rels.
 		 */
-		relvarinfos = lappend(relvarinfos, varinfo1);
-		for_each_from(l, varinfos, 1)
+		relexprinfos = lappend(relexprinfos, exprinfo1);
+		for_each_from(l, exprinfos, 1)
 		{
-			GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+			GroupExprInfo *exprinfo2 = (GroupExprInfo *) lfirst(l);
 
-			if (varinfo2->rel == varinfo1->rel)
+			if (exprinfo2->rel == exprinfo1->rel)
 			{
 				/* varinfos on current rel */
-				relvarinfos = lappend(relvarinfos, varinfo2);
+				relexprinfos = lappend(relexprinfos, exprinfo2);
 			}
 			else
 			{
-				/* not time to process varinfo2 yet */
-				newvarinfos = lappend(newvarinfos, varinfo2);
+				/* not time to process exprinfo2 yet */
+				newexprinfos = lappend(newexprinfos, exprinfo2);
 			}
 		}
 
@@ -3547,11 +3643,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		 * apply.  We apply a fudge factor below, but only if we multiplied
 		 * more than one such values.
 		 */
-		while (relvarinfos)
+		while (relexprinfos)
 		{
 			double		mvndistinct;
 
-			if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+			if (estimate_multivariate_ndistinct(root, rel, &relexprinfos,
 												&mvndistinct))
 			{
 				reldistinct *= mvndistinct;
@@ -3561,18 +3657,24 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 			}
 			else
 			{
-				foreach(l, relvarinfos)
+				foreach(l, relexprinfos)
 				{
-					GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+					ListCell *lc;
+					GroupExprInfo *exprinfo2 = (GroupExprInfo *) lfirst(l);
+
+					foreach (lc, exprinfo2->varinfos)
+					{
+						GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(lc);
 
-					reldistinct *= varinfo2->ndistinct;
-					if (relmaxndistinct < varinfo2->ndistinct)
-						relmaxndistinct = varinfo2->ndistinct;
-					relvarcount++;
+						reldistinct *= varinfo2->ndistinct;
+						if (relmaxndistinct < varinfo2->ndistinct)
+							relmaxndistinct = varinfo2->ndistinct;
+						relvarcount++;
+					}
 				}
 
 				/* we're done with this relation */
-				relvarinfos = NIL;
+				relexprinfos = NIL;
 			}
 		}
 
@@ -3658,8 +3760,8 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 			numdistinct *= reldistinct;
 		}
 
-		varinfos = newvarinfos;
-	} while (varinfos != NIL);
+		exprinfos = newexprinfos;
+	} while (exprinfos != NIL);
 
 	/* Now we can account for the effects of any SRFs */
 	numdistinct *= srf_multiplier;
@@ -3875,53 +3977,75 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
  */
 static bool
 estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
-								List **varinfos, double *ndistinct)
+								List **exprinfos, double *ndistinct)
 {
 	ListCell   *lc;
-	Bitmapset  *attnums = NULL;
-	int			nmatches;
+	int			nmatches_vars;
+	int			nmatches_exprs;
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
-	Bitmapset  *matched = NULL;
+	StatisticExtInfo *matched_info = NULL;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
 		return false;
 
-	/* Determine the attnums we're looking for */
-	foreach(lc, *varinfos)
-	{
-		GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
-		AttrNumber	attnum;
-
-		Assert(varinfo->rel == rel);
-
-		if (!IsA(varinfo->var, Var))
-			continue;
-
-		attnum = ((Var *) varinfo->var)->varattno;
-
-		if (!AttrNumberIsForUserDefinedAttr(attnum))
-			continue;
-
-		attnums = bms_add_member(attnums, attnum);
-	}
-
 	/* look for the ndistinct statistics matching the most vars */
-	nmatches = 1;				/* we require at least two matches */
+	nmatches_vars = 0;				/* we require at least two matches */
+	nmatches_exprs = 0;
 	foreach(lc, rel->statlist)
 	{
+		ListCell	*lc2;
 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
-		Bitmapset  *shared;
-		int			nshared;
+		int			nshared_vars = 0;
+		int			nshared_exprs = 0;
 
 		/* skip statistics of other kinds */
 		if (info->kind != STATS_EXT_NDISTINCT)
 			continue;
 
-		/* compute attnums shared by the vars and the statistics object */
-		shared = bms_intersect(info->keys, attnums);
-		nshared = bms_num_members(shared);
+		/*
+		 * Determine how many expressions (and variables in non-matched
+		 * expressions) match.
+		 */
+		foreach(lc2, *exprinfos)
+		{
+			ListCell *lc3;
+			GroupExprInfo *exprinfo = (GroupExprInfo *) lfirst(lc2);
+			AttrNumber	attnum;
+
+			Assert(exprinfo->rel == rel);
+
+			/* simple Var, search in statistics keys directly */
+			if (IsA(exprinfo->expr, Var))
+			{
+				attnum = ((Var *) exprinfo->expr)->varattno;
+
+				if (!AttrNumberIsForUserDefinedAttr(attnum))
+					continue;
+
+				if (bms_is_member(attnum, info->keys))
+					nshared_vars++;
+
+				continue;
+			}
+
+			/* expression - see if it's in the statistics */
+			foreach (lc3, info->exprs)
+			{
+				Node *expr = (Node *) lfirst(lc3);
+
+				if (equal(exprinfo->expr, expr))
+				{
+					nshared_exprs++;
+					nshared_vars += list_length(exprinfo->varinfos);
+					break;
+				}
+			}
+		}
+
+		if (nshared_vars + nshared_exprs < 2)
+			continue;
 
 		/*
 		 * Does this statistics object match more columns than the currently
@@ -3930,18 +4054,21 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		 * XXX This should break ties using name of the object, or something
 		 * like that, to make the outcome stable.
 		 */
-		if (nshared > nmatches)
+		if ((nshared_vars > nmatches_vars) ||
+			((nshared_vars == nmatches_vars) && (nshared_exprs > nmatches_exprs)))
 		{
 			statOid = info->statOid;
-			nmatches = nshared;
-			matched = shared;
+			nmatches_vars = nshared_vars;
+			nmatches_exprs = nshared_exprs;
+			matched_info = info;
 		}
 	}
 
 	/* No match? */
 	if (statOid == InvalidOid)
 		return false;
-	Assert(nmatches > 1 && matched != NULL);
+
+	Assert(nmatches_vars + nmatches_exprs > 1);
 
 	stats = statext_ndistinct_load(statOid);
 
@@ -3954,6 +4081,56 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		int			i;
 		List	   *newlist = NIL;
 		MVNDistinctItem *item = NULL;
+		ListCell   *lc2;
+		Bitmapset  *matched = NULL;
+
+		/* see what actually matched */
+		foreach (lc2, *exprinfos)
+		{
+			ListCell   *lc3;
+			int			idx;
+			bool		found = false;
+
+			GroupExprInfo *exprinfo = (GroupExprInfo *) lfirst(lc2);
+
+			/* expression - see if it's in the statistics */
+			idx = 0;
+			foreach (lc3, matched_info->exprs)
+			{
+				Node *expr = (Node *) lfirst(lc3);
+
+				idx++;
+
+				if (equal(exprinfo->expr, expr))
+				{
+					matched = bms_add_member(matched, MaxHeapAttributeNumber + idx);
+					found = true;
+					break;
+				}
+			}
+
+			if (found)
+				continue;
+
+			foreach (lc3, exprinfo->varinfos)
+			{
+				GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
+
+				/* simple Var, search in statistics keys directly */
+				if (IsA(varinfo->var, Var))
+				{
+					AttrNumber	attnum = ((Var *) varinfo->var)->varattno;
+
+					if (!AttrNumberIsForUserDefinedAttr(attnum))
+						continue;
+
+					if (!bms_is_member(attnum, matched_info->keys))
+						continue;
+
+					matched = bms_add_member(matched, attnum);
+				}
+			}
+		}
 
 		/* Find the specific item that exactly matches the combination */
 		for (i = 0; i < stats->nitems; i++)
@@ -3971,28 +4148,49 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		if (!item)
 			elog(ERROR, "corrupt MVNDistinct entry");
 
-		/* Form the output varinfo list, keeping only unmatched ones */
-		foreach(lc, *varinfos)
+		/* Form the output exprinfo list, keeping only unmatched ones */
+		foreach(lc, *exprinfos)
 		{
-			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+			GroupExprInfo *exprinfo = (GroupExprInfo *) lfirst(lc);
 			AttrNumber	attnum;
+			ListCell   *lc3;
+			bool		found = false;
+
+			foreach (lc3, matched_info->exprs)
+			{
+				Node *expr = (Node *) lfirst(lc3);
+
+				if (equal(exprinfo->expr, expr))
+				{
+					found = true;
+					break;
+				}
+			}
+
+			/* the whole expression was matched, so skip it */
+			if (found)
+				continue;
 
-			if (!IsA(varinfo->var, Var))
+			if (!IsA(exprinfo->expr, Var))
 			{
-				newlist = lappend(newlist, varinfo);
+				/*
+				 * FIXME Probably should remove varinfos that match the
+				 * selected MVNDistinct item.
+				 */
+				newlist = lappend(newlist, exprinfo);
 				continue;
 			}
 
-			attnum = ((Var *) varinfo->var)->varattno;
+			attnum = ((Var *) exprinfo->expr)->varattno;
 
 			if (!AttrNumberIsForUserDefinedAttr(attnum))
 				continue;
 
 			if (!bms_is_member(attnum, matched))
-				newlist = lappend(newlist, varinfo);
+				newlist = lappend(newlist, exprinfo);
 		}
 
-		*varinfos = newlist;
+		*exprinfos = newlist;
 		*ndistinct = item->ndistinct;
 		return true;
 	}
@@ -4688,6 +4886,13 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
 		*join_is_reversed = false;
 }
 
+/* statext_expressions_load copies the tuple, so just pfree it. */
+static void
+ReleaseDummy(HeapTuple tuple)
+{
+	pfree(tuple);
+}
+
 /*
  * examine_variable
  *		Try to look up statistical data about an expression.
@@ -4828,6 +5033,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 		 * operator we are estimating for.  FIXME later.
 		 */
 		ListCell   *ilist;
+		ListCell   *slist;
 
 		foreach(ilist, onerel->indexlist)
 		{
@@ -4984,6 +5190,67 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 			if (vardata->statsTuple)
 				break;
 		}
+
+		/*
+		 * Search extended statistics for one with a matching expression.
+		 * There might be multiple ones, so just grab the first one. In
+		 * the future, we might consider 
+		 */
+		foreach(slist, onerel->statlist)
+		{
+			StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
+			ListCell   *expr_item;
+			int			pos;
+
+			/*
+			 * Stop once we've found statistics for the expression (either
+			 * from extended stats, or for an index in the preceding loop).
+			 */
+			if (vardata->statsTuple)
+				break;
+
+			/* skip stats without per-expression stats */
+			if (info->kind != STATS_EXT_EXPRESSIONS)
+				continue;
+
+			pos = 0;
+			foreach (expr_item, info->exprs)
+			{
+				Node *expr = (Node *) lfirst(expr_item);
+
+				Assert(expr);
+
+				/* strip RelabelType before comparing it */
+				if (expr && IsA(expr, RelabelType))
+					expr = (Node *) ((RelabelType *) expr)->arg;
+
+				/* found a match, see if we can extract pg_statistic row */
+				if (equal(node, expr))
+				{
+					HeapTuple t = statext_expressions_load(info->statOid, pos);
+
+					vardata->statsTuple = t;
+
+					/*
+					 * FIXME not sure if we should cache the tuple somewhere?
+					 * It's stored in a cached tuple in the "data" catalog,
+					 * and we just create a new copy every time.
+					 */
+					vardata->freefunc = ReleaseDummy;
+
+					/*
+					 * FIXME Hack to make statistic_proc_security_check happy,
+					 * so that this does not get rejected. Probably needs more
+					 * thought, just a hack.
+					 */
+					vardata->acl_ok = true;
+
+					break;
+				}
+
+				pos++;
+			}
+		}
 	}
 }
 
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 07d640021c..b6b75be29e 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2676,18 +2676,20 @@ describeOneTableDetails(const char *schemaname,
 		/* print any extended statistics */
 		if (pset.sversion >= 100000)
 		{
+			/*
+			 * FIXME this needs to be version-dependent, because older
+			 * versions don't have pg_get_statisticsobjdef_columns.
+			 */
 			printfPQExpBuffer(&buf,
 							  "SELECT oid, "
 							  "stxrelid::pg_catalog.regclass, "
 							  "stxnamespace::pg_catalog.regnamespace AS nsp, "
 							  "stxname,\n"
-							  "  (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname),', ')\n"
-							  "   FROM pg_catalog.unnest(stxkeys) s(attnum)\n"
-							  "   JOIN pg_catalog.pg_attribute a ON (stxrelid = a.attrelid AND\n"
-							  "        a.attnum = s.attnum AND NOT attisdropped)) AS columns,\n"
+							  "pg_get_statisticsobjdef_columns(oid) AS columns,\n"
 							  "  'd' = any(stxkind) AS ndist_enabled,\n"
 							  "  'f' = any(stxkind) AS deps_enabled,\n"
-							  "  'm' = any(stxkind) AS mcv_enabled,\n");
+							  "  'm' = any(stxkind) AS mcv_enabled,\n"
+							  "  'e' = any(stxkind) AS expressions_enabled,\n");
 
 			if (pset.sversion >= 130000)
 				appendPQExpBufferStr(&buf, "  stxstattarget\n");
@@ -2735,6 +2737,12 @@ describeOneTableDetails(const char *schemaname,
 					if (strcmp(PQgetvalue(result, i, 7), "t") == 0)
 					{
 						appendPQExpBuffer(&buf, "%smcv", gotone ? ", " : "");
+						gotone = true;
+					}
+
+					if (strcmp(PQgetvalue(result, i, 8), "t") == 0)
+					{
+						appendPQExpBuffer(&buf, "%sexpressions", gotone ? ", " : "");
 					}
 
 					appendPQExpBuffer(&buf, ") ON %s FROM %s",
@@ -2742,9 +2750,9 @@ describeOneTableDetails(const char *schemaname,
 									  PQgetvalue(result, i, 1));
 
 					/* Show the stats target if it's not default */
-					if (strcmp(PQgetvalue(result, i, 8), "-1") != 0)
+					if (strcmp(PQgetvalue(result, i, 9), "-1") != 0)
 						appendPQExpBuffer(&buf, "; STATISTICS %s",
-										  PQgetvalue(result, i, 8));
+										  PQgetvalue(result, i, 9));
 
 					printTableAddFooter(&cont, buf.data);
 				}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 33dacfd340..016cbaffdc 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3655,6 +3655,10 @@
   proname => 'pg_get_statisticsobjdef', provolatile => 's',
   prorettype => 'text', proargtypes => 'oid',
   prosrc => 'pg_get_statisticsobjdef' },
+{ oid => '8887', descr => 'extended statistics columns',
+  proname => 'pg_get_statisticsobjdef_columns', provolatile => 's',
+  prorettype => 'text', proargtypes => 'oid',
+  prosrc => 'pg_get_statisticsobjdef_columns' },
 { oid => '3352', descr => 'partition key description',
   proname => 'pg_get_partkeydef', provolatile => 's', prorettype => 'text',
   proargtypes => 'oid', prosrc => 'pg_get_partkeydef' },
diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h
index 61d402c600..c182f5684c 100644
--- a/src/include/catalog/pg_statistic_ext.h
+++ b/src/include/catalog/pg_statistic_ext.h
@@ -52,6 +52,9 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId)
 #ifdef CATALOG_VARLEN
 	char		stxkind[1] BKI_FORCE_NOT_NULL;	/* statistics kinds requested
 												 * to build */
+	pg_node_tree stxexprs;		/* A list of expression trees for stats
+								 * attributes that are not simple column
+								 * references. */
 #endif
 
 } FormData_pg_statistic_ext;
@@ -77,6 +80,7 @@ DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btre
 #define STATS_EXT_NDISTINCT			'd'
 #define STATS_EXT_DEPENDENCIES		'f'
 #define STATS_EXT_MCV				'm'
+#define STATS_EXT_EXPRESSIONS		'e'
 
 #endif							/* EXPOSE_TO_CLIENT_CODE */
 
diff --git a/src/include/catalog/pg_statistic_ext_data.h b/src/include/catalog/pg_statistic_ext_data.h
index c9515df117..4794fcd2dd 100644
--- a/src/include/catalog/pg_statistic_ext_data.h
+++ b/src/include/catalog/pg_statistic_ext_data.h
@@ -37,6 +37,7 @@ CATALOG(pg_statistic_ext_data,3429,StatisticExtDataRelationId)
 	pg_ndistinct stxdndistinct; /* ndistinct coefficients (serialized) */
 	pg_dependencies stxddependencies;	/* dependencies (serialized) */
 	pg_mcv_list stxdmcv;		/* MCV (serialized) */
+	pg_statistic stxdexpr[1];		/* stats for expressions */
 
 #endif
 
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 7ddd8c011b..48b3689a31 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -451,6 +451,7 @@ typedef enum NodeTag
 	T_TypeName,
 	T_ColumnDef,
 	T_IndexElem,
+	T_StatsElem,
 	T_Constraint,
 	T_DefElem,
 	T_RangeTblEntry,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index d1f9ef29ca..3d484b2cab 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2811,8 +2811,24 @@ typedef struct CreateStatsStmt
 	List	   *relations;		/* rels to build stats on (list of RangeVar) */
 	char	   *stxcomment;		/* comment to apply to stats, or NULL */
 	bool		if_not_exists;	/* do nothing if stats name already exists */
+	bool		transformed;	/* true when transformStatsStmt is finished */
 } CreateStatsStmt;
 
+/*
+ * StatsElem - statistics parameters (used in CREATE STATISTICS)
+ *
+ * For a plain attribute, 'name' is the name of the referenced table column
+ * and 'expr' is NULL.  For an expression, 'name' is NULL and 'expr' is the
+ * expression tree.
+ */
+typedef struct StatsElem
+{
+	NodeTag		type;
+	char	   *name;			/* name of attribute to index, or NULL */
+	Node	   *expr;			/* expression to index, or NULL */
+} StatsElem;
+
+
 /* ----------------------
  *		Alter Statistics Statement
  * ----------------------
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 8f62d61702..f768925a1a 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -911,6 +911,7 @@ typedef struct StatisticExtInfo
 	RelOptInfo *rel;			/* back-link to statistic's table */
 	char		kind;			/* statistic kind of this entry */
 	Bitmapset  *keys;			/* attnums of the columns covered */
+	List	   *exprs;			/* expressions */
 } StatisticExtInfo;
 
 /*
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index d25819aa28..82e5190964 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -69,6 +69,7 @@ typedef enum ParseExprKind
 	EXPR_KIND_FUNCTION_DEFAULT, /* default parameter value for function */
 	EXPR_KIND_INDEX_EXPRESSION, /* index expression */
 	EXPR_KIND_INDEX_PREDICATE,	/* index predicate */
+	EXPR_KIND_STATS_EXPRESSION, /* extended statistics expression */
 	EXPR_KIND_ALTER_COL_TRANSFORM,	/* transform expr in ALTER COLUMN TYPE */
 	EXPR_KIND_EXECUTE_PARAMETER,	/* parameter value in EXECUTE */
 	EXPR_KIND_TRIGGER_WHEN,		/* WHEN condition in CREATE TRIGGER */
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index bc3d66ed88..c864801628 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -26,6 +26,8 @@ extern AlterTableStmt *transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
 											   List **afterStmts);
 extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt,
 									 const char *queryString);
+extern CreateStatsStmt *transformStatsStmt(Oid relid, CreateStatsStmt *stmt,
+										   const char *queryString);
 extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
 							  List **actions, Node **whereClause);
 extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 61e69696cf..82151812d0 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -57,19 +57,35 @@ typedef struct SortItem
 	int			count;
 } SortItem;
 
+/*
+ * Used to pass pre-computed information about expressions the stats
+ * object is defined on.
+ */
+typedef struct ExprInfo
+{
+	int			nexprs;			/* number of expressions */
+	Oid		   *collations;		/* collation for each expression */
+	Oid		   *types;			/* type of each expression */
+	Datum	  **values;			/* values for each expression */
+	bool	  **nulls;			/* nulls for each expression */
+} ExprInfo;
+
 extern MVNDistinct *statext_ndistinct_build(double totalrows,
 											int numrows, HeapTuple *rows,
-											Bitmapset *attrs, VacAttrStats **stats);
+											ExprInfo *exprs, Bitmapset *attrs,
+											VacAttrStats **stats);
 extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
 extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
 
 extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
-												  Bitmapset *attrs, VacAttrStats **stats);
+												  ExprInfo *exprs, Bitmapset *attrs,
+												  VacAttrStats **stats);
 extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
 extern MVDependencies *statext_dependencies_deserialize(bytea *data);
 
 extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows,
-								  Bitmapset *attrs, VacAttrStats **stats,
+								  ExprInfo *exprs, Bitmapset *attrs,
+								  VacAttrStats **stats,
 								  double totalrows, int stattarget);
 extern bytea *statext_mcv_serialize(MCVList *mcv, VacAttrStats **stats);
 extern MCVList *statext_mcv_deserialize(bytea *data);
@@ -93,11 +109,18 @@ extern void *bsearch_arg(const void *key, const void *base,
 extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs);
 
 extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
-									TupleDesc tdesc, MultiSortSupport mss,
+									ExprInfo *exprs, TupleDesc tdesc,
+									MultiSortSupport mss,
 									int numattrs, AttrNumber *attnums);
 
 extern bool examine_clause_args(List *args, Var **varp,
 								Const **cstp, bool *varonleftp);
+extern bool examine_clause_args2(List *args, Node **exprp,
+								 Const **cstp, bool *expronleftp);
+extern bool examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp,
+										bool *varonleftp);
+extern bool examine_opclause_expression2(OpExpr *expr, Node **exprp, Const **cstp,
+										 bool *expronleftp);
 
 extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
 											  StatisticExtInfo *stat,
@@ -109,4 +132,13 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
 											  Selectivity *basesel,
 											  Selectivity *totalsel);
 
+extern Bitmapset *add_expressions_to_attributes(Bitmapset *attrs, int nexprs);
+
+/* translate 0-based expression index to attnum and back */
+#define	EXPRESSION_ATTNUM(index)	\
+	(MaxHeapAttributeNumber + (index) + 1)
+
+#define	EXPRESSION_INDEX(attnum)	\
+	((attnum) - MaxHeapAttributeNumber - 1)
+
 #endif							/* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 50fce4935f..d7d52c437b 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -120,6 +120,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
 extern bool has_stats_of_kind(List *stats, char requiredkind);
 extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
 												Bitmapset **clause_attnums,
+												List **clause_exprs,
 												int nclauses);
+extern HeapTuple statext_expressions_load(Oid stxoid, int idx);
 
 #endif							/* STATISTICS_H */
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 4c3edd213f..39ff7dd146 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -43,12 +43,25 @@ CREATE STATISTICS tst ON a, b FROM pg_class;
 ERROR:  column "a" does not exist
 CREATE STATISTICS tst ON relname, relname, relnatts FROM pg_class;
 ERROR:  duplicate column name in statistics definition
-CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
-ERROR:  only simple column references are allowed in CREATE STATISTICS
-CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class;
-ERROR:  only simple column references are allowed in CREATE STATISTICS
+CREATE STATISTICS tst ON relname, relname, relnatts, relname, relname, relnatts, relname, relname, relnatts FROM pg_class;
+ERROR:  cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON relname, relname, relnatts, relname, relname, (relname || 'x'), (relnatts + 1), (relname || 'x'), (relname || 'x'), (relnatts + 1) FROM pg_class;
+ERROR:  cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON (relname || 'x'), (relname || 'x'), (relnatts + 1), (relname || 'x'), (relname || 'x'), (relnatts + 1), (relname || 'x'), (relname || 'x'), (relnatts + 1) FROM pg_class;
+ERROR:  cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON (relname || 'x'), (relname || 'x'), relnatts FROM pg_class;
+ERROR:  duplicate expression in statistics definition
 CREATE STATISTICS tst (unrecognized) ON relname, relnatts FROM pg_class;
 ERROR:  unrecognized statistics kind "unrecognized"
+-- incorrect expressions
+CREATE STATISTICS tst ON relnatts + relpages FROM pg_class; -- missing parentheses
+ERROR:  syntax error at or near "+"
+LINE 1: CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
+                                          ^
+CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class; -- tuple expression
+ERROR:  syntax error at or near ","
+LINE 1: CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class...
+                                          ^
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
 CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
@@ -148,6 +161,27 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 NOTICE:  drop cascades to table ab1c
+-- basic test for statistics on expressions
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
+-- expression stats may be built on a single column
+CREATE STATISTICS ab1_exprstat_1 (expressions) ON (a+b) FROM ab1;
+-- we build all stats types by default, requiring at least two columns
+CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1;
+ERROR:  extended statistics require at least 2 columns
+-- expression must be immutable, but date_trunc on timestamptz is not
+CREATE STATISTICS ab1_exprstat_3 (expressions) ON date_trunc('day', d) FROM ab1;
+ERROR:  functions in statistics expression must be marked IMMUTABLE
+-- but on timestamp it should work fine
+CREATE STATISTICS ab1_exprstat_4 (expressions) ON (a+b), (a-b), date_trunc('day', c) FROM ab1;
+-- insert some data and run analyze, to test that these cases build properly
+INSERT INTO ab1
+SELECT
+    generate_series(1,10),
+    generate_series(1,10),
+    generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
+    generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
+ANALYZE ab1;
+DROP TABLE ab1;
 -- Verify supported object types for extended statistics
 CREATE schema tststats;
 CREATE TABLE tststats.t (a int, b int, c text);
@@ -425,6 +459,40 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
          1 |      1
 (1 row)
 
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+-- now do the same thing, but with expressions
+INSERT INTO functional_dependencies (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     35
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      5
+(1 row)
+
+-- create statistics
+CREATE STATISTICS func_deps_stat (expressions, dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+ estimated | actual 
+-----------+--------
+        35 |     35
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+ estimated | actual 
+-----------+--------
+         5 |      5
+(1 row)
+
 -- a => b, a => c, b => c
 TRUNCATE functional_dependencies;
 DROP STATISTICS func_deps_stat;
@@ -894,6 +962,39 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
          1 |      1
 (1 row)
 
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+-- random data (no MCV list), but with expression
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      4
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (expressions, mcv) ON (mod(a,37)), (mod(b::int,41)), (mod(c,47)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+ estimated | actual 
+-----------+--------
+         3 |      4
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -1113,6 +1214,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
  estimated | actual 
 -----------+--------
@@ -1206,6 +1313,454 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
         50 |     50
 (1 row)
 
+-- 100 distinct combinations, all in the MCV list, but with expressions
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists;
+-- without any stats on the expressions, we have to use default selectivities, which
+-- is why the estimates here are different from the pre-computed case above
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+ estimated | actual 
+-----------+--------
+       556 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+       556 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+ estimated | actual 
+-----------+--------
+       556 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+       556 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+ estimated | actual 
+-----------+--------
+       185 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+ estimated | actual 
+-----------+--------
+       185 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+ estimated | actual 
+-----------+--------
+       185 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+ estimated | actual 
+-----------+--------
+       185 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+        75 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+        75 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+ estimated | actual 
+-----------+--------
+        53 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+        53 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       391 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+ estimated | actual 
+-----------+--------
+       391 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+         6 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+ estimated | actual 
+-----------+--------
+         6 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+        75 |    200
+(1 row)
+
+-- create statistics with expressions only
+CREATE STATISTICS mcv_lists_stats (expressions) ON (mod(a,100)), (mod(b::int,50)), (mod(c,25)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+ estimated | actual 
+-----------+--------
+        26 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+        26 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+        10 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+ estimated | actual 
+-----------+--------
+        10 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
+DROP STATISTICS mcv_lists_stats;
+-- create statistics with both MCV and expressions
+CREATE STATISTICS mcv_lists_stats (expressions, mcv) ON (mod(a,100)), (mod(b::int,50)), (mod(c,25)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+ estimated | actual 
+-----------+--------
+       150 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+       150 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       343 |    200
+(1 row)
+
 -- 100 distinct combinations with NULL values, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -1535,6 +2090,102 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AN
 (1 row)
 
 DROP TABLE mcv_lists_multi;
+-- statistics on integer expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+-- FIXME add dependency tracking for expressions, to automatically drop after DROP TABLE
+-- (not it fails, when there are no simple column references)
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
+-- statistics on a mix columns and expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+DROP TABLE expr_stats;
+-- statistics on expressions with different data types
+CREATE TABLE expr_stats (a int, b name, c text);
+INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual 
+-----------+--------
+       111 |   1000
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+DROP TABLE expr_stats;
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 9781e590a3..882ee025b8 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -34,9 +34,14 @@ CREATE STATISTICS tst FROM sometab;
 CREATE STATISTICS tst ON a, b FROM nonexistent;
 CREATE STATISTICS tst ON a, b FROM pg_class;
 CREATE STATISTICS tst ON relname, relname, relnatts FROM pg_class;
-CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
-CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class;
+CREATE STATISTICS tst ON relname, relname, relnatts, relname, relname, relnatts, relname, relname, relnatts FROM pg_class;
+CREATE STATISTICS tst ON relname, relname, relnatts, relname, relname, (relname || 'x'), (relnatts + 1), (relname || 'x'), (relname || 'x'), (relnatts + 1) FROM pg_class;
+CREATE STATISTICS tst ON (relname || 'x'), (relname || 'x'), (relnatts + 1), (relname || 'x'), (relname || 'x'), (relnatts + 1), (relname || 'x'), (relname || 'x'), (relnatts + 1) FROM pg_class;
+CREATE STATISTICS tst ON (relname || 'x'), (relname || 'x'), relnatts FROM pg_class;
 CREATE STATISTICS tst (unrecognized) ON relname, relnatts FROM pg_class;
+-- incorrect expressions
+CREATE STATISTICS tst ON relnatts + relpages FROM pg_class; -- missing parentheses
+CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class; -- tuple expression
 
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
@@ -95,6 +100,31 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 
+-- basic test for statistics on expressions
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
+
+-- expression stats may be built on a single column
+CREATE STATISTICS ab1_exprstat_1 (expressions) ON (a+b) FROM ab1;
+
+-- we build all stats types by default, requiring at least two columns
+CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1;
+
+-- expression must be immutable, but date_trunc on timestamptz is not
+CREATE STATISTICS ab1_exprstat_3 (expressions) ON date_trunc('day', d) FROM ab1;
+
+-- but on timestamp it should work fine
+CREATE STATISTICS ab1_exprstat_4 (expressions) ON (a+b), (a-b), date_trunc('day', c) FROM ab1;
+
+-- insert some data and run analyze, to test that these cases build properly
+INSERT INTO ab1
+SELECT
+    generate_series(1,10),
+    generate_series(1,10),
+    generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
+    generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
+ANALYZE ab1;
+DROP TABLE ab1;
+
 -- Verify supported object types for extended statistics
 CREATE schema tststats;
 
@@ -270,6 +300,29 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
 TRUNCATE functional_dependencies;
 DROP STATISTICS func_deps_stat;
 
+-- now do the same thing, but with expressions
+INSERT INTO functional_dependencies (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+
+-- create statistics
+CREATE STATISTICS func_deps_stat (expressions, dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies;
+
+ANALYZE functional_dependencies;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+
 INSERT INTO functional_dependencies (a, b, c, filler1)
      SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
 
@@ -477,6 +530,28 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
 
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+-- random data (no MCV list), but with expression
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (expressions, mcv) ON (mod(a,37)), (mod(b::int,41)), (mod(c,47)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,37) = 1 AND mod(b::int,41) = 1 AND mod(c,47) = 1');
+
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -561,6 +636,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -601,6 +678,176 @@ ANALYZE mcv_lists;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
 
+
+-- 100 distinct combinations, all in the MCV list, but with expressions
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists;
+
+-- without any stats on the expressions, we have to use default selectivities, which
+-- is why the estimates here are different from the pre-computed case above
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+-- create statistics with expressions only
+CREATE STATISTICS mcv_lists_stats (expressions) ON (mod(a,100)), (mod(b::int,50)), (mod(c,25)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+DROP STATISTICS mcv_lists_stats;
+
+-- create statistics with both MCV and expressions
+CREATE STATISTICS mcv_lists_stats (expressions, mcv) ON (mod(a,100)), (mod(b::int,50)), (mod(c,25)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,100) AND 1 = mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 1 AND mod(b::int,50) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,100) AND 1 > mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 0 AND mod(b::int,50) <= 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= mod(a,100) AND 0 >= mod(b::int,50)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 AND mod(b::int,50) = 1 AND mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND mod(b::int,50) < 1 AND mod(c,25) < 5');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < 5 AND 1 > mod(b::int,50) AND 5 > mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= 4 AND mod(b::int,50) <= 0 AND mod(c,25) <= 4');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= mod(a,100) AND 0 >= mod(b::int,50) AND 4 >= mod(c,25)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52) AND mod(b::int,50) IN ( 1, 2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) IN (1, 2, 51, 52, NULL) AND mod(b::int,50) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND mod(b::int,50) = ANY (ARRAY[1, 2, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, 2, 3]) AND mod(b::int,50) IN (1, 2, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,50) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(c,25) > ANY (ARRAY[1, 2, 3, NULL])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, 3) AND mod(c,25) > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) < ALL (ARRAY[4, 5]) AND mod(b::int,50) IN (1, 2, NULL, 3) AND mod(c,25) > ANY (ARRAY[1, 2, NULL, 3])');
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,100) = 1 OR mod(b::int,50) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
 -- 100 distinct combinations with NULL values, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -813,6 +1060,59 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AN
 
 DROP TABLE mcv_lists_multi;
 
+
+-- statistics on integer expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+
+-- FIXME add dependency tracking for expressions, to automatically drop after DROP TABLE
+-- (not it fails, when there are no simple column references)
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
+
+-- statistics on a mix columns and expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+
+DROP TABLE expr_stats;
+
+-- statistics on expressions with different data types
+CREATE TABLE expr_stats (a int, b name, c text);
+INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,10000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+
+DROP TABLE expr_stats;
+
+
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
-- 
2.17.0