>From 47a48180be115db2fa29ac659f4e4f259e01600d Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Fri, 16 Jan 2015 22:33:41 +0100
Subject: [PATCH 2/5] clause reduction using functional dependencies

During planning, use functional dependencies to decide
which clauses to skip during cardinality estimation.
Initial and rather simplistic implementation.

This only works with regular WHERE clauses, not clauses
used for joining.

Note: The clause_is_mv_compatible() needs to identify the
relation (so that we can fetch the list of multivariate stats
by OID). planner_rt_fetch() seems like the appropriate way to
get the relation OID, but apparently it only works with simple
vars. Maybe examine_variable() would make this work with more
complex vars too?

Includes regression tests analyzing functional dependencies
(part of ANALYZE) on several datasets (no dependencies, no
transitive dependencies, ...).

Checks that a query with conditions on two columns, where one (B)
is functionally dependent on the other one (A), correctly ignores
the clause on (B) and chooses bitmap index scan instead of plain
index scan (which is what happens otherwise, thanks to assumption
of independence).

Note: Functional dependencies only work with equality clauses,
      no inequalities etc.
---
 src/backend/commands/analyze.c                |   1 +
 src/backend/commands/tablecmds.c              |   9 +-
 src/backend/optimizer/path/clausesel.c        | 650 +++++++++++++++++++++++++-
 src/backend/utils/mvstats/common.c            |   5 +-
 src/include/catalog/pg_proc.h                 |   4 +-
 src/include/utils/mvstats.h                   |  23 +-
 src/test/regress/expected/mv_dependencies.out | 175 +++++++
 src/test/regress/parallel_schedule            |   3 +
 src/test/regress/serial_schedule              |   1 +
 src/test/regress/sql/mv_dependencies.sql      | 153 ++++++
 10 files changed, 1013 insertions(+), 11 deletions(-)
 create mode 100644 src/test/regress/expected/mv_dependencies.out
 create mode 100644 src/test/regress/sql/mv_dependencies.sql

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index f82fcf5..e247f84 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -115,6 +115,7 @@ static void update_attstats(Oid relid, bool inh,
 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
 static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
 
+
 /*
  *	analyze_rel() -- analyze one relation
  */
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index a321755..965d342 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -420,6 +420,7 @@ static void ATExecEnableRowSecurity(Relation rel);
 static void ATExecDisableRowSecurity(Relation rel);
 static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 								StatisticsDef *def, LOCKMODE lockmode);
+
 static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
 				   ForkNumber forkNum, char relpersistence);
 static const char *storage_name(char c);
@@ -11900,7 +11901,7 @@ static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 	Relation	mvstatrel;
 
 	/* by default build everything */
-	bool 	build_dependencies = true;
+	bool 	build_dependencies = false;
 
 	Assert(IsA(def, StatisticsDef));
 
@@ -11962,6 +11963,12 @@ static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 							opt->defname)));
 	}
 
+	/* check that at least some statistics were requested */
+	if (! build_dependencies)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("no statistics type (dependencies) was requested")));
+
 	/* sort the attnums and build int2vector */
 	qsort(attnums, numcols, sizeof(int16), compare_int16);
 	stakeys = buildint2vector(attnums, numcols);
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index dcac1c1..e742827 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -24,6 +24,14 @@
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 
+#include "utils/mvstats.h"
+#include "catalog/pg_collation.h"
+#include "utils/typcache.h"
+
+#include "parser/parsetree.h"
+
+
+#include <stdio.h>
 
 /*
  * Data structure for accumulating info about possible range-query
@@ -43,6 +51,16 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 			   bool varonleft, bool isLTsel, Selectivity s2);
 
 
+static bool clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
+							 Oid *relid, AttrNumber *attnum, SpecialJoinInfo *sjinfo);
+
+static Bitmapset  *collect_mv_attnums(PlannerInfo *root, List *clauses,
+									  Oid varRelid, Oid *relid, SpecialJoinInfo *sjinfo);
+
+static List *clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
+								Oid varRelid, int nmvstats, MVStats mvstats,
+								SpecialJoinInfo *sjinfo);
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -61,7 +79,7 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
  * subclauses.  However, that's only right if the subclauses have independent
  * probabilities, and in reality they are often NOT independent.  So,
  * we want to be smarter where we can.
-
+ *
  * Currently, the only extra smarts we have is to recognize "range queries",
  * such as "x > 34 AND x < 42".  Clauses are recognized as possible range
  * query components if they are restriction opclauses whose operators have
@@ -88,6 +106,76 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
  *
  * Of course this is all very dependent on the behavior of
  * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
+ *
+ *
+ * Multivariate statististics
+ * --------------------------
+ * This also uses multivariate stats to estimate combinations of conditions,
+ * in a way attempting to minimize the overhead when there are no suitable
+ * multivariate stats.
+ *
+ * The following checks are performed (in this order), and the optimizer
+ * falls back to regular stats on the first 'false'.
+ *
+ * NOTE: This explains how this works with all the patches applied, not
+ *       just the functional dependencies.
+ *
+ * (1) check that at least two columns are referenced from conditions
+ *     compatible with multivariate stats
+ *
+ *     If there are no conditions that might be handled by multivariate
+ *     stats, or if the conditions reference just a single column, it
+ *     makes no sense to use multivariate stats.
+ *
+ *     What conditions are compatible with multivariate stats is decided
+ *     by clause_is_mv_compatible(). At this moment, only simple conditions
+ *     of the form "column operator constant" (for simple comparison
+ *     operators), and IS NULL / IS NOT NULL are considered compatible
+ *     with multivariate statistics.
+ *
+ * (2) reduce the clauses using functional dependencies
+ *
+ *     This simply attempts to 'reduce' the clauses by applying functional
+ *     dependencies. For example if there are two clauses:
+ *
+ *         WHERE (a = 1) AND (b = 2)
+ *
+ *     and we know that 'a' determines the value of 'b', we may remove
+ *     the second condition (b = 2) when computing the selectivity.
+ *     This is of course tricky - see mvstats/dependencies.c for details.
+ *
+ *     After the reduction, step (1) is to be repeated.
+ *
+ * (3) check if there are multivariate stats built on the columns
+ *
+ *     If there are no multivariate statistics, we have to fall back to
+ *     the regular stats. We might perform checks (1) and (2) in reverse
+ *     order, i.e. first check if there are multivariate statistics and
+ *     then collect the attributes only if needed. The assumption is
+ *     that checking the clauses is cheaper than querying the catalog,
+ *     so this check is performed first.
+ *
+ * (4) choose the stats matching the most columns (at least two)
+ *
+ *     If there are multiple instances of multivariate statistics (e.g.
+ *     built on different sets of columns), we choose the stats covering
+ *     the most columns from step (1). It may happen that all available
+ *     stats match just a single column - for example with conditions
+ *
+ *         WHERE a = 1 AND b = 2
+ *
+ *     and statistics built on (a,c) and (b,c). In such case just fall
+ *     back to the regular stats because it makes no sense to use the
+ *     multivariate statistics.
+ *
+ *     This selection criteria (the most columns) is certainly very
+ *     simple and definitely not optimal - it's simple to come up with
+ *     examples where other approaches work better. More about this
+ *     at choose_mv_statistics().
+ *
+ * (5) use the multivariate stats to estimate matching clauses
+ *
+ * (6) estimate the remaining clauses using the regular statistics
  */
 Selectivity
 clauselist_selectivity(PlannerInfo *root,
@@ -100,6 +188,14 @@ clauselist_selectivity(PlannerInfo *root,
 	RangeQueryClause *rqlist = NULL;
 	ListCell   *l;
 
+	/* processing mv stats */
+	Oid			relid = InvalidOid;
+	int			nmvstats = 0;
+	MVStats		mvstats = NULL;
+
+	/* attributes in mv-compatible clauses */
+	Bitmapset  *mvattnums = NULL;
+
 	/*
 	 * If there's exactly one clause, then no use in trying to match up pairs,
 	 * so just go directly to clause_selectivity().
@@ -108,6 +204,28 @@ clauselist_selectivity(PlannerInfo *root,
 		return clause_selectivity(root, (Node *) linitial(clauses),
 								  varRelid, jointype, sjinfo);
 
+	/* collect attributes referenced by mv-compatible clauses */
+	mvattnums = collect_mv_attnums(root, clauses, varRelid, &relid, sjinfo);
+
+	/*
+	 * If there are mv-compatible clauses, referencing at least two
+	 * different columns (otherwise it makes no sense to use mv stats),
+	 * try to reduce the clauses using functional dependencies, and
+	 * recollect the attributes from the reduced list.
+	 *
+	 * We don't need to select a single statistics for this - we can
+	 * apply all the functional dependencies we have.
+	 */
+	if (bms_num_members(mvattnums) >= 2)
+	{
+		/* fetch info from the catalog (not the serialized stats yet) */
+		mvstats = list_mv_stats(relid, &nmvstats, true);
+
+		/* reduce clauses by applying functional dependencies rules */
+		clauses = clauselist_apply_dependencies(root, clauses, varRelid,
+												nmvstats, mvstats, sjinfo);
+	}
+
 	/*
 	 * Initial scan over clauses.  Anything that doesn't look like a potential
 	 * rangequery clause gets multiplied into s1 and forgotten. Anything that
@@ -782,3 +900,533 @@ clause_selectivity(PlannerInfo *root,
 
 	return s1;
 }
+
+/*
+ * Collect attributes from mv-compatible clauses.
+ */
+static Bitmapset *
+collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
+				   Oid *relid, SpecialJoinInfo *sjinfo)
+{
+	Bitmapset  *attnums = NULL;
+	ListCell   *l;
+
+	/*
+	 * Walk through the clauses and identify the ones we can estimate
+	 * using multivariate stats, and remember the relid/columns. We'll
+	 * then cross-check if we have suitable stats, and only if needed
+	 * we'll split the clauses into multivariate and regular lists.
+	 *
+	 * For now we're only interested in RestrictInfo nodes with nested
+	 * OpExpr, using either a range or equality.
+	 */
+	foreach (l, clauses)
+	{
+		AttrNumber attnum;
+		Node	   *clause = (Node *) lfirst(l);
+
+		/* ignore the result for now - we only need the info */
+		if (clause_is_mv_compatible(root, clause, varRelid, relid, &attnum, sjinfo))
+			attnums = bms_add_member(attnums, attnum);
+	}
+
+	/*
+	 * If there are not at least two attributes referenced by the clause(s),
+	 * we can throw everything out (as we'll revert to simple stats).
+	 */
+	if (bms_num_members(attnums) <= 1)
+	{
+		if (attnums != NULL)
+			pfree(attnums);
+		attnums = NULL;
+		*relid = InvalidOid;
+	}
+
+	return attnums;
+}
+
+/*
+ * Determines whether the clause is compatible with multivariate stats,
+ * and if it is, returns some additional information - varno (index
+ * into simple_rte_array) and a bitmap of attributes. This is then
+ * used to fetch related multivariate statistics.
+ *
+ * At this moment we only support basic conditions of the form
+ *
+ *     variable OP constant
+ *
+ * where OP is one of [=,<,<=,>=,>] (which is however determined by
+ * looking at the associated function for estimating selectivity, just
+ * like with the single-dimensional case).
+ *
+ * TODO Support 'OR clauses' - shouldn't be all that difficult to
+ *      evaluate them using multivariate stats.
+ */
+static bool
+clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
+						Oid *relid, AttrNumber *attnum, SpecialJoinInfo *sjinfo)
+{
+
+	if (IsA(clause, RestrictInfo))
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) clause;
+
+		/* Pseudoconstants are not really interesting here. */
+		if (rinfo->pseudoconstant)
+			return false;
+
+		/* no support for OR clauses at this point */
+		if (rinfo->orclause)
+			return false;
+
+		/* get the actual clause from the RestrictInfo (it's not an OR clause) */
+		clause = (Node*)rinfo->clause;
+
+		/* only simple opclauses are compatible with multivariate stats */
+		if (! is_opclause(clause))
+			return false;
+
+		/* we don't support join conditions at this moment */
+		if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
+			return false;
+
+		/* is it 'variable op constant' ? */
+		if (list_length(((OpExpr *) clause)->args) == 2)
+		{
+			OpExpr	   *expr = (OpExpr *) clause;
+			bool		varonleft = true;
+			bool		ok;
+
+			ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
+				(is_pseudo_constant_clause_relids(lsecond(expr->args),
+												rinfo->right_relids) ||
+				(varonleft = false,
+				is_pseudo_constant_clause_relids(linitial(expr->args),
+												rinfo->left_relids)));
+
+			if (ok)
+			{
+				RangeTblEntry * rte;
+				Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+
+				/*
+				 * Simple variables only - otherwise the planner_rt_fetch seems to fail
+				 * (return NULL).
+				 *
+				 * TODO Maybe use examine_variable() would fix that?
+				 */
+				if (! (IsA(var, Var) && (varRelid == 0 || varRelid == var->varno)))
+					return false;
+
+				/*
+				 * Only consider this variable if (varRelid == 0) or when the varno
+				 * matches varRelid (see explanation at clause_selectivity).
+				 *
+				 * FIXME I suspect this may not be really necessary. The (varRelid == 0)
+				 *       part seems to be enforced by treat_as_join_clause().
+				 */
+				if (! ((varRelid == 0) || (varRelid == var->varno)))
+					return false;
+
+				/* Also skip special varno values, and system attributes ... */
+				if ((IS_SPECIAL_VARNO(var->varno)) || (! AttrNumberIsForUserDefinedAttr(var->varattno)))
+					return false;
+
+				/* Lookup info about the base relation (we need to pass the OID out) */
+				rte = planner_rt_fetch(var->varno, root);
+				*relid = rte->relid;
+
+				/*
+				 * If it's not a "<" or ">" or "=" operator, just ignore the
+				 * clause. Otherwise note the relid and attnum for the variable.
+				 * This uses the function for estimating selectivity, ont the
+				 * operator directly (a bit awkward, but well ...).
+				 */
+				switch (get_oprrest(expr->opno))
+					{
+						case F_EQSEL:
+							*attnum = var->varattno;
+							return true;
+					}
+			}
+		}
+	}
+
+	return false;
+
+}
+
+/*
+ * Performs reduction of clauses using functional dependencies, i.e.
+ * removes clauses that are considered redundant. It simply walks
+ * through dependencies, and checks whether the dependency 'matches'
+ * the clauses, i.e. if there's a clause matching the condition. If yes,
+ * all clauses matching the implied part of the dependency are removed
+ * from the list.
+ *
+ * This simply looks at attnums references by the clauses, not at the
+ * type of the operator (equality, inequality, ...). This may not be the
+ * right way to do - it certainly works best for equalities, which is
+ * naturally consistent with functional dependencies (implications).
+ * It's not clear that other operators are handled sensibly - for
+ * example for inequalities, like
+ *
+ *     WHERE (A >= 10) AND (B <= 20)
+ *
+ * and a trivial case where [A == B], resulting in symmetric pair of
+ * rules [A => B], [B => A], it's rather clear we can't remove either of
+ * those clauses.
+ *
+ * That only highlights that functional dependencies are most suitable
+ * for label-like data, where using non-equality operators is very rare.
+ * Using the common city/zipcode example, clauses like
+ *
+ *     (zipcode <= 12345)
+ *
+ * or
+ *
+ *     (cityname >= 'Washington')
+ *
+ * are rare. So restricting the reduction to equality should not harm
+ * the usefulness / applicability.
+ *
+ * The other assumption is that this assumes 'compatible' clauses. For
+ * example by using mismatching zip code and city name, this is unable
+ * to identify the discrepancy and eliminates one of the clauses. The
+ * usual approach (multiplying both selectivities) thus produces a more
+ * accurate estimate, although mostly by luck - the multiplication
+ * comes from assumption of statistical independence of the two
+ * conditions (which is not not valid in this case), but moves the
+ * estimate in the right direction (towards 0%).
+ *
+ * This might be somewhat improved by cross-checking the selectivities
+ * against MCV and/or histogram.
+ *
+ * The implementation needs to be careful about cyclic rules, i.e. rules
+ * like [A => B] and [B => A] at the same time. This must not reduce
+ * clauses on both attributes at the same time.
+ *
+ * Technically we might consider selectivities here too, somehow. E.g.
+ * when (A => B) and (B => A), we might use the clauses with minimum
+ * selectivity.
+ *
+ * TODO Consider restricting the reduction to equality clauses. Or maybe
+ *      use equality classes somehow?
+ *
+ * TODO Merge this docs to dependencies.c, as it's saying mostly the
+ *      same things as the comments there.
+ */
+static List *
+clauselist_apply_dependencies(PlannerInfo *root, List *clauses, Oid varRelid,
+					int nmvstats, MVStats mvstats, SpecialJoinInfo *sjinfo)
+{
+	int i;
+	ListCell *lc;
+	List * reduced_clauses = NIL;
+	Oid	relid;
+
+	/*
+	 * preallocate space for all clauses, including non-mv-compatible,
+	 * so that we don't need to reallocate the arrays repeatedly
+	 *
+	 * XXX This assumes each clause references exactly one Var, so the
+	 *     arrays are sized accordingly - for functional dependencies
+	 *     this is safe, because it only works with Var=Const.
+	 */
+	bool	   *reduced;
+	AttrNumber *mvattnums;
+	Node	  **mvclauses;
+	int			nmvclauses = 0;	/* number clauses in the arrays */
+
+	/*
+	 * matrix of (natts x natts), 1 means x=>y
+	 *
+	 * This serves two purposes - first, it merges dependencies from all
+	 * the statistics, second it makes generating all the transitive
+	 * dependencies easier.
+	 *
+	 * We need to build this only for attributes from the dependencies,
+	 * not for all attributes in the table.
+	 *
+	 * We can't do that only for attributes from the clauses, because we
+	 * want to build transitive dependencies (including those going
+	 * through attributes not listed in the stats).
+	 *
+	 * This only works for A=>B dependencies, not sure how to do that
+	 * for complex dependencies.
+	 */
+	bool       *deps_matrix;
+	int			deps_natts;	/* size of the matric */
+
+	/* mapping attnum <=> matrix index */
+	int		   *deps_idx_to_attnum;
+	int		   *deps_attnum_to_idx;
+
+	/* attnums in dependencies and clauses (and intersection) */
+	Bitmapset  *deps_attnums   = NULL;
+	Bitmapset  *clause_attnums = NULL;
+	Bitmapset  *intersect_attnums = NULL;
+
+	int			attnum, attidx, attnum_max;
+
+	bool		has_deps_built = false;
+
+	/* see if there's at least one statistics with dependencies */
+	for (i = 0; i < nmvstats; i++)
+	{
+		if (mvstats[i].deps_built)
+		{
+			has_deps_built = true;
+			break;
+		}
+	}
+
+	/* no dependencies available - return the original clauses */
+	if (! has_deps_built)
+		return clauses;
+
+	mvclauses = (Node**)palloc0(list_length(clauses) * sizeof(Node*));
+	mvattnums = (AttrNumber*)palloc0(list_length(clauses) * sizeof(AttrNumber));
+
+	/*
+	 * Walk through the clauses - clauses that are not mv-compatible copy
+	 * directly into the result list, and mv-compatible ones store into
+	 * an array of clauses (and remember the attnumb in another array).
+	 */
+	foreach (lc, clauses)
+	{
+		AttrNumber attnum;
+		Node	   *clause = (Node *) lfirst(lc);
+		if (! clause_is_mv_compatible(root, clause, varRelid, &relid, &attnum, sjinfo))
+			reduced_clauses = lappend(reduced_clauses, clause);
+		else
+		{
+			mvclauses[nmvclauses] = clause;
+			mvattnums[nmvclauses] = attnum;
+			nmvclauses++;
+
+			clause_attnums = bms_add_member(clause_attnums, attnum);
+		}
+	}
+
+	/*
+	 * we need at least two clauses referencing two different attributes
+	 * referencing to do the reduction
+	 */
+	if ((nmvclauses < 2) || (bms_num_members(clause_attnums) < 2))
+	{
+		pfree(mvattnums);
+		pfree(mvclauses);
+
+		bms_free(clause_attnums);
+		list_free(reduced_clauses);
+
+		return clauses;
+	}
+
+	reduced = (bool*)palloc0(list_length(clauses) * sizeof(bool));
+
+	/* build the dependency matrix */
+	attnum_max = -1;
+	for (i = 0; i < nmvstats; i++)
+	{
+		int j;
+		int2vector *stakeys = mvstats[i].stakeys;
+
+		/* skip stats without functional dependencies built */
+		if (! mvstats[i].deps_built)
+			continue;
+
+		for (j = 0; j < stakeys->dim1; j++)
+		{
+			int attnum = stakeys->values[j];
+			deps_attnums = bms_add_member(deps_attnums, attnum);
+
+			/* keep the max attnum in the dependencies */
+			attnum_max = (attnum > attnum_max) ? attnum : attnum_max;
+		}
+	}
+
+	/*
+	 * We need at least two matching attributes in the clauses and
+	 * dependencies, otherwise we can't reduce anything.
+	 */
+	intersect_attnums = bms_intersect(clause_attnums, deps_attnums);
+	if (bms_num_members(intersect_attnums) < 2)
+	{
+		pfree(mvattnums);
+		pfree(mvclauses);
+
+		bms_free(clause_attnums);
+		bms_free(deps_attnums);
+		bms_free(intersect_attnums);
+
+		list_free(reduced_clauses);
+
+		return clauses;
+	}
+
+	/* allocate the matrix and mappings */
+	deps_natts  = bms_num_members(deps_attnums);
+	deps_matrix = (bool*)palloc0(deps_natts * deps_natts * sizeof(int));
+	deps_idx_to_attnum = (int*)palloc0(deps_natts * sizeof(int));
+	deps_attnum_to_idx = (int*)palloc0((attnum_max+1) * sizeof(int));
+
+	/* build the (attnum => attidx) and (attidx => attnum) mappings */
+	attidx = 0;
+	attnum = -1;
+
+	while (true)
+	{
+		attnum = bms_next_member(deps_attnums, attnum);
+		if (attnum == -2)
+			break;
+
+		deps_idx_to_attnum[attidx] = attnum;
+		deps_attnum_to_idx[attnum] = attidx;
+
+		attidx += 1;
+	}
+
+	/* do we have all the attributes mapped? */
+	Assert(attidx == deps_natts);
+
+	/* walk through all the mvstats, build the adjacency matrix */
+	for (i = 0; i < nmvstats; i++)
+	{
+		int j;
+		MVDependencies dependencies = NULL;
+
+		/* skip stats without functional dependencies built */
+		if (! mvstats[i].deps_built)
+			continue;
+
+		/* fetch dependencies */
+		dependencies = deserialize_mv_dependencies(fetch_mv_dependencies(mvstats[i].mvoid));
+		if (dependencies == NULL)
+			continue;
+
+		/* set deps_matrix[a,b] to 'true' if 'a=>b' */
+		for (j = 0; j < dependencies->ndeps; j++)
+		{
+			int aidx = deps_attnum_to_idx[dependencies->deps[j]->a];
+			int bidx = deps_attnum_to_idx[dependencies->deps[j]->b];
+
+			/* a=> b */
+			deps_matrix[aidx * deps_natts + bidx] = true;
+		}
+	}
+
+	/*
+	 * Multiply the matrix N-times (N = size of the matrix), so that we
+	 * get all the transitive dependencies. That makes the next step
+	 * much easier and faster.
+	 *
+	 * This is essentially an adjacency matrix from graph theory, and
+	 * by multiplying it we get transitive edges. We don't really care
+	 * about the exact number (number of paths between vertices) though,
+	 * so we can do the multiplication in-place (we don't care whether
+	 * we found the dependency in this round or in the previous one).
+	 *
+	 * Track how many new dependencies were added, and stop when 0, but
+	 * we can't multiply more than N-times (longest path in the graph).
+	 */
+	for (i = 0; i < deps_natts; i++)
+	{
+		int k, l, m;
+		int nchanges = 0;
+
+		/* k => l */
+		for (k = 0; k < deps_natts; k++)
+		{
+			for (l = 0; l < deps_natts; l++)
+			{
+				/* we already have this dependency */
+				if (deps_matrix[k * deps_natts + l])
+					continue;
+
+				/* we don't really care about the exact value, just 0/1 */
+				for (m = 0; m < deps_natts; m++)
+				{
+					if (deps_matrix[k * deps_natts + m] * deps_matrix[m * deps_natts + l])
+					{
+						deps_matrix[k * deps_natts + l] = true;
+						nchanges += 1;
+						break;
+					}
+				}
+			}
+		}
+
+		/* no transitive dependency added here, so terminate */
+		if (nchanges == 0)
+			break;
+	}
+
+	/*
+	 * Walk through the clauses, and see which other clauses we may
+	 * reduce. The matrix contains all transitive dependencies, which
+	 * makes this very fast.
+	 *
+	 * We have to be careful not to reduce the clause using itself, or
+	 * reducing all clauses forming a cycle (so we have to skip already
+	 * eliminated clauses).
+	 *
+	 * I'm not sure whether this guarantees finding the best solution,
+	 * i.e. reducing the most clauses, but it probably does (thanks to
+	 * having all the transitive dependencies).
+	 */
+	for (i = 0; i < nmvclauses; i++)
+	{
+		int j;
+
+		/* not covered by dependencies */
+		if (! bms_is_member(mvattnums[i], deps_attnums))
+			continue;
+
+		/* this clause was already reduced, so let's skip it */
+		if (reduced[i])
+			continue;
+
+		/* walk the potentially 'implied' clauses */
+		for (j = 0; j < nmvclauses; j++)
+		{
+			int aidx, bidx;
+
+			/* not covered by dependencies */
+			if (! bms_is_member(mvattnums[j], deps_attnums))
+				continue;
+
+			aidx = deps_attnum_to_idx[mvattnums[i]];
+			bidx = deps_attnum_to_idx[mvattnums[j]];
+
+			/* can't reduce the clause by itself, or if already reduced */
+			if ((i == j) || reduced[j])
+				continue;
+
+			/* mark the clause as reduced (if aidx => bidx) */
+			reduced[j] = deps_matrix[aidx * deps_natts + bidx];
+		}
+	}
+
+	/* now walk through the clauses, and keep only those not reduced */
+	for (i = 0; i < nmvclauses; i++)
+	{
+		if (! reduced[i])
+			reduced_clauses = lappend(reduced_clauses, mvclauses[i]);
+	}
+
+	pfree(reduced);
+	pfree(mvclauses);
+	pfree(mvattnums);
+
+	pfree(deps_matrix);
+	pfree(deps_idx_to_attnum);
+	pfree(deps_attnum_to_idx);
+
+	bms_free(deps_attnums);
+	bms_free(clause_attnums);
+	bms_free(intersect_attnums);
+
+	return reduced_clauses;
+}
diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c
index 8efc5ba..d44b95a 100644
--- a/src/backend/utils/mvstats/common.c
+++ b/src/backend/utils/mvstats/common.c
@@ -57,7 +57,8 @@ build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
 		/*
 		 * Analyze functional dependencies of columns.
 		 */
-		deps = build_mv_dependencies(numrows, rows, attrs, stats);
+		if (mvstats->deps_enabled)
+			deps = build_mv_dependencies(numrows, rows, attrs, stats);
 
 		/* store the histogram / MCV list in the catalog */
 		update_mv_stats(mvstats[i].mvoid, deps);
@@ -154,6 +155,7 @@ list_mv_stats(Oid relid, int *nstats, bool built_only)
 
 		result[*nstats].mvoid = HeapTupleGetOid(htup);
 		result[*nstats].stakeys = buildint2vector(stats->stakeys.values, stats->stakeys.dim1);
+		result[*nstats].deps_enabled = stats->deps_enabled;
 		result[*nstats].deps_built = stats->deps_built;
 		*nstats += 1;
 	}
@@ -260,6 +262,7 @@ compare_scalars_partition(const void *a, const void *b, void *arg)
 	return ApplySortComparator(da, false, db, false, ssup);
 }
 
+
 /* initialize multi-dimensional sort */
 MultiSortSupport
 multi_sort_init(int ndims)
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f728d88..2916f11 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2712,9 +2712,9 @@ DESCR("current user privilege on any column by rel name");
 DATA(insert OID = 3029 (  has_any_column_privilege	   PGNSP PGUID 12 10 0 0 0 f f f f t f s 2 0 16 "26 25" _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
 DESCR("current user privilege on any column by rel oid");
 
-DATA(insert OID = 3284 (  pg_mv_stats_dependencies_info     PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_dependencies_info _null_ _null_ _null_ ));
+DATA(insert OID = 3377 (  pg_mv_stats_dependencies_info     PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_dependencies_info _null_ _null_ _null_ ));
 DESCR("multivariate stats: functional dependencies info");
-DATA(insert OID = 3285 (  pg_mv_stats_dependencies_show     PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_dependencies_show _null_ _null_ _null_ ));
+DATA(insert OID = 3378 (  pg_mv_stats_dependencies_show     PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_dependencies_show _null_ _null_ _null_ ));
 DESCR("multivariate stats: functional dependencies show");
 
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
index 5c8643d..ec6764b 100644
--- a/src/include/utils/mvstats.h
+++ b/src/include/utils/mvstats.h
@@ -18,24 +18,34 @@
 
 /*
  * Basic info about the stats, used when choosing what to use
- *
- * TODO Add info about what statistics is available (histogram, MCV,
- *      hashed MCV, functional dependencies).
  */
 typedef struct MVStatsData {
 	Oid			mvoid;		/* OID of the stats in pg_mv_statistic */
 	int2vector *stakeys;	/* attnums for columns in the stats */
+
+	/* statistics requested in ALTER TABLE ... ADD STATISTICS */
+	bool		deps_enabled;	/* analyze functional dependencies */
+
+	/* available statistics (computed by ANALYZE) */
 	bool		deps_built;	/* functional dependencies available */
 } MVStatsData;
 
 typedef struct MVStatsData *MVStats;
 
+/*
+ * Degree of how much MCV item / histogram bucket matches a clause.
+ * This is then considered when computing the selectivity.
+ */
+#define MVSTATS_MATCH_NONE		0		/* no match at all */
+#define MVSTATS_MATCH_PARTIAL	1		/* partial match */
+#define MVSTATS_MATCH_FULL		2		/* full match */
 
 #define MVSTATS_MAX_DIMENSIONS	8		/* max number of attributes */
 
-/* An associative rule, tracking [a => b] dependency.
- *
- * TODO Make this work with multiple columns on both sides.
+
+/*
+ * Functional dependencies, tracking column-level relationships (values
+ * in one column determine values in another one).
  */
 typedef struct MVDependencyData {
 	int16	a;
@@ -61,6 +71,7 @@ typedef MVDependenciesData* MVDependencies;
  *      stats specified using flags (or something like that).
  */
 MVStats list_mv_stats(Oid relid, int *nstats, bool built_only);
+bytea * fetch_mv_rules(Oid mvoid);
 
 bytea * fetch_mv_dependencies(Oid mvoid);
 
diff --git a/src/test/regress/expected/mv_dependencies.out b/src/test/regress/expected/mv_dependencies.out
new file mode 100644
index 0000000..159d317
--- /dev/null
+++ b/src/test/regress/expected/mv_dependencies.out
@@ -0,0 +1,175 @@
+-- data type passed by value
+CREATE TABLE functional_dependencies (
+    a INT,
+    b INT,
+    c INT
+);
+-- unknown column
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (unknown_column);
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a);
+ERROR:  multivariate stats require 2 or more columns
+-- single column, duplicated
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, a);
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, a, b);
+ERROR:  duplicate column name in statistics definition
+-- unknown option
+ALTER TABLE functional_dependencies ADD STATISTICS (unknown_option) ON (a, b, c);
+ERROR:  unrecognized STATISTICS option "unknown_option"
+-- correct command
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, b, c);
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | f          | 
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = 10 AND b = 5;
+                 QUERY PLAN                  
+---------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+   Recheck Cond: ((a = 10) AND (b = 5))
+   ->  Bitmap Index Scan on fdeps_idx
+         Index Cond: ((a = 10) AND (b = 5))
+(4 rows)
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+DROP TABLE functional_dependencies;
+-- varlena type (text)
+CREATE TABLE functional_dependencies (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, b, c);
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | f          | 
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = '10' AND b = '5';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+   Recheck Cond: ((a = '10'::text) AND (b = '5'::text))
+   ->  Bitmap Index Scan on fdeps_idx
+         Index Cond: ((a = '10'::text) AND (b = '5'::text))
+(4 rows)
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+DROP TABLE functional_dependencies;
+-- NULL values (mix of int and text columns)
+CREATE TABLE functional_dependencies (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, b, c, d);
+INSERT INTO functional_dependencies
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built |     pg_mv_stats_dependencies_show      
+--------------+------------+----------------------------------------
+ t            | t          | 2 => 1, 3 => 1, 3 => 2, 4 => 1, 4 => 2
+(1 row)
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+DROP TABLE functional_dependencies;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 6d3b865..00c6ddf 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -109,3 +109,6 @@ test: event_trigger
 
 # run stats by itself because its delay may be insufficient under heavy load
 test: stats
+
+# run tests of multivariate stats
+test: mv_dependencies
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 8326894..b818be9 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -153,3 +153,4 @@ test: with
 test: xml
 test: event_trigger
 test: stats
+test: mv_dependencies
diff --git a/src/test/regress/sql/mv_dependencies.sql b/src/test/regress/sql/mv_dependencies.sql
new file mode 100644
index 0000000..f95dbf5
--- /dev/null
+++ b/src/test/regress/sql/mv_dependencies.sql
@@ -0,0 +1,153 @@
+-- data type passed by value
+CREATE TABLE functional_dependencies (
+    a INT,
+    b INT,
+    c INT
+);
+
+-- unknown column
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (unknown_column);
+
+-- single column
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a);
+
+-- single column, duplicated
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, a);
+
+-- two columns, one duplicated
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, a, b);
+
+-- unknown option
+ALTER TABLE functional_dependencies ADD STATISTICS (unknown_option) ON (a, b, c);
+
+-- correct command
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, b, c);
+
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = 10 AND b = 5;
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+DROP TABLE functional_dependencies;
+
+-- varlena type (text)
+CREATE TABLE functional_dependencies (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, b, c);
+
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = '10' AND b = '5';
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+DROP TABLE functional_dependencies;
+
+-- NULL values (mix of int and text columns)
+CREATE TABLE functional_dependencies (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+
+ALTER TABLE functional_dependencies ADD STATISTICS (dependencies) ON (a, b, c, d);
+
+INSERT INTO functional_dependencies
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+DELETE FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+DROP TABLE functional_dependencies;
-- 
2.0.5