>From 039046f31843f2747a4fef4ed49b830b492ee459 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Mon, 6 Apr 2015 19:42:18 +0200
Subject: [PATCH 3/7] clause reduction using functional dependencies

During planning, use functional dependencies to decide which
clauses to skip during cardinality estimation. Initial and
rather simplistic implementation.

This only works with regular WHERE clauses, not clauses used
for join clauses.

Note: The clause_is_mv_compatible() needs to identify the
relation (so that we can fetch the list of multivariate stats
by OID). planner_rt_fetch() seems like the appropriate way to
get the relation OID, but apparently it only works with simple
vars. Maybe examine_variable() would make this work with more
complex vars too?

Includes regression tests analyzing functional dependencies
(part of ANALYZE) on several datasets (no dependencies, no
transitive dependencies, ...).

Checks that a query with conditions on two columns, where one (B)
is functionally dependent on the other one (A), correctly ignores
the clause on (B) and chooses bitmap index scan instead of plain
index scan (which is what happens otherwise, thanks to assumption
of independence).

Note: Functional dependencies only work with equality clauses,
no inequalities etc.
---
 src/backend/optimizer/path/clausesel.c        | 912 +++++++++++++++++++++++++-
 src/backend/utils/mvstats/common.c            |   5 +-
 src/backend/utils/mvstats/dependencies.c      |  24 +
 src/include/utils/mvstats.h                   |  16 +-
 src/test/regress/expected/mv_dependencies.out | 172 +++++
 src/test/regress/parallel_schedule            |   3 +
 src/test/regress/serial_schedule              |   1 +
 src/test/regress/sql/mv_dependencies.sql      | 150 +++++
 8 files changed, 1278 insertions(+), 5 deletions(-)
 create mode 100644 src/test/regress/expected/mv_dependencies.out
 create mode 100644 src/test/regress/sql/mv_dependencies.sql

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 6ce2726..c7f17e3 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -14,14 +14,19 @@
  */
 #include "postgres.h"
 
+#include "access/sysattr.h"
+#include "catalog/pg_operator.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/plancat.h"
+#include "optimizer/var.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
+#include "utils/mvstats.h"
 #include "utils/selfuncs.h"
+#include "utils/typcache.h"
 
 
 /*
@@ -41,6 +46,44 @@ typedef struct RangeQueryClause
 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 			   bool varonleft, bool isLTsel, Selectivity s2);
 
+#define		MV_CLAUSE_TYPE_FDEP		0x01
+
+static bool clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
+							 Index *relid, AttrNumber *attnum, SpecialJoinInfo *sjinfo);
+
+static Bitmapset  *collect_mv_attnums(PlannerInfo *root, List *clauses,
+									  Oid varRelid, Index *relid, SpecialJoinInfo *sjinfo);
+
+static List *clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
+								Oid varRelid, List *stats,
+								SpecialJoinInfo *sjinfo);
+
+static bool has_stats(List *stats, int type);
+
+static List * find_stats(PlannerInfo *root, List *clauses,
+						 Oid varRelid, Index *relid);
+ 
+static Bitmapset* fdeps_collect_attnums(List *stats);
+
+static int	*make_idx_to_attnum_mapping(Bitmapset *attnums);
+static int	*make_attnum_to_idx_mapping(Bitmapset *attnums);
+
+static bool	*build_adjacency_matrix(List *stats, Bitmapset *attnums,
+								int *idx_to_attnum, int *attnum_to_idx);
+
+static void	multiply_adjacency_matrix(bool *matrix, int natts);
+
+static List* fdeps_reduce_clauses(List *clauses,
+								  Bitmapset *attnums, bool *matrix,
+								  int *idx_to_attnum, int *attnum_to_idx,
+								  Index relid);
+
+static Bitmapset *fdeps_filter_clauses(PlannerInfo *root,
+					 List *clauses, Bitmapset *deps_attnums,
+					 List **reduced_clauses, List **deps_clauses,
+					 Oid varRelid, Index *relid, SpecialJoinInfo *sjinfo);
+
+static Bitmapset * get_varattnos(Node * node, Index relid);
 
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
@@ -60,7 +103,7 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
  * subclauses.  However, that's only right if the subclauses have independent
  * probabilities, and in reality they are often NOT independent.  So,
  * we want to be smarter where we can.
-
+ *
  * Currently, the only extra smarts we have is to recognize "range queries",
  * such as "x > 34 AND x < 42".  Clauses are recognized as possible range
  * query components if they are restriction opclauses whose operators have
@@ -87,6 +130,88 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
  *
  * Of course this is all very dependent on the behavior of
  * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
+ *
+ *
+ * Multivariate statististics
+ * --------------------------
+ * This also uses multivariate stats to estimate combinations of
+ * conditions, in a way (a) maximizing the estimate accuracy by using
+ * as many stats as possible, and (b) minimizing the overhead,
+ * especially when there are no suitable multivariate stats (so if you
+ * are not using multivariate stats, there's no additional overhead).
+ *
+ * The following checks are performed (in this order), and the optimizer
+ * falls back to regular stats on the first 'false'.
+ *
+ * NOTE: This explains how this works with all the patches applied, not
+ *       just the functional dependencies.
+ *
+ * (0) check if there are multivariate stats on the relation
+ *
+ *     If no, just skip all the following steps (directly to the
+ *     original code).
+ *
+ * (1) check how many attributes are there in conditions compatible
+ *     with functional dependencies
+ *
+ *     Only simple equality clauses are considered compatible with
+ *     functional dependencies (and that's unlikely to change, because
+ *     that's the only case when functional dependencies are useful).
+ *
+ *     If there are no conditions that might be handled by multivariate
+ *     stats, or if the conditions reference just a single column, it
+ *     makes no sense to use functional dependencies, so skip to (4).
+ *
+ * (2) reduce the clauses using functional dependencies
+ *
+ *     This simply attempts to 'reduce' the clauses by applying functional
+ *     dependencies. For example if there are two clauses:
+ *
+ *         WHERE (a = 1) AND (b = 2)
+ *
+ *     and we know that 'a' determines the value of 'b', we may remove
+ *     the second condition (b = 2) when computing the selectivity.
+ *     This is of course tricky - see mvstats/dependencies.c for details.
+ *
+ *     After the reduction, step (1) is to be repeated.
+ *
+ * (3) check how many attributes are there in conditions compatible
+ *     with MCV lists and histograms
+ *
+ *     What conditions are compatible with multivariate stats is decided
+ *     by clause_is_mv_compatible(). At this moment, only conditions
+ *     of the form "column operator constant" (for simple comparison
+ *     operators), IS [NOT] NULL and some AND/OR clauses are considered
+ *     compatible with multivariate statistics.
+ *
+ *     Again, see clause_is_mv_compatible() for details.
+ *
+ * (4) check how many attributes are there in conditions compatible
+ *     with MCV lists and histograms
+ *
+ *     If there are no conditions that might be handled by MCV lists
+ *     or histograms, or if the conditions reference just a single
+ *     column, it makes no sense to continue, so just skip to (7).
+ *
+ * (5) choose the stats matching the most columns
+ *
+ *     If there are multiple instances of multivariate statistics (e.g.
+ *     built on different sets of columns), we choose the stats covering
+ *     the most columns from step (1). It may happen that all available
+ *     stats match just a single column - for example with conditions
+ *
+ *         WHERE a = 1 AND b = 2
+ *
+ *     and statistics built on (a,c) and (b,c). In such case just fall
+ *     back to the regular stats because it makes no sense to use the
+ *     multivariate statistics.
+ *
+ *     For more details about how exactly we choose the stats, see
+ *     choose_mv_statistics().
+ *
+ * (6) use the multivariate stats to estimate matching clauses
+ *
+ * (7) estimate the remaining clauses using the regular statistics
  */
 Selectivity
 clauselist_selectivity(PlannerInfo *root,
@@ -99,6 +224,16 @@ clauselist_selectivity(PlannerInfo *root,
 	RangeQueryClause *rqlist = NULL;
 	ListCell   *l;
 
+	/* processing mv stats */
+	Oid			relid = InvalidOid;
+
+	/* attributes in mv-compatible clauses */
+	Bitmapset  *mvattnums = NULL;
+	List	   *stats = NIL;
+
+	/* use clauses (not conditions), because those are always non-empty */
+	stats = find_stats(root, clauses, varRelid, &relid);
+
 	/*
 	 * If there's exactly one clause, then no use in trying to match up pairs,
 	 * so just go directly to clause_selectivity().
@@ -108,6 +243,31 @@ clauselist_selectivity(PlannerInfo *root,
 								  varRelid, jointype, sjinfo);
 
 	/*
+	 * Check that there are some stats with functional dependencies
+	 * built (by walking the stats list). We're going to find that
+	 * anyway when trying to apply the functional dependencies, but
+	 * this is probably a tad faster.
+	 */
+	if (has_stats(stats, MV_CLAUSE_TYPE_FDEP))
+	{
+		/* collect attributes referenced by mv-compatible clauses */
+		mvattnums = collect_mv_attnums(root, clauses, varRelid, &relid, sjinfo);
+
+		/*
+		 * If there are mv-compatible clauses, referencing at least two
+		 * different columns (otherwise it makes no sense to use mv stats),
+		 * try to reduce the clauses using functional dependencies, and
+		 * recollect the attributes from the reduced list.
+		 *
+		 * We don't need to select a single statistics for this - we can
+		 * apply all the functional dependencies we have.
+		 */
+		if (bms_num_members(mvattnums) >= 2)
+			clauses = clauselist_apply_dependencies(root, clauses, varRelid,
+													stats, sjinfo);
+	}
+
+	/*
 	 * Initial scan over clauses.  Anything that doesn't look like a potential
 	 * rangequery clause gets multiplied into s1 and forgotten. Anything that
 	 * does gets inserted into an rqlist entry.
@@ -763,3 +923,753 @@ clause_selectivity(PlannerInfo *root,
 
 	return s1;
 }
+
+/*
+ * Collect attributes from mv-compatible clauses.
+ */
+static Bitmapset *
+collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
+				   Index *relid, SpecialJoinInfo *sjinfo)
+{
+	Bitmapset  *attnums = NULL;
+	ListCell   *l;
+
+	/*
+	 * Walk through the clauses and identify the ones we can estimate
+	 * using multivariate stats, and remember the relid/columns. We'll
+	 * then cross-check if we have suitable stats, and only if needed
+	 * we'll split the clauses into multivariate and regular lists.
+	 *
+	 * For now we're only interested in RestrictInfo nodes with nested
+	 * OpExpr, using either a range or equality.
+	 */
+	foreach (l, clauses)
+	{
+		AttrNumber attnum;
+		Node	   *clause = (Node *) lfirst(l);
+
+		/* ignore the result for now - we only need the info */
+		if (clause_is_mv_compatible(root, clause, varRelid, relid, &attnum, sjinfo))
+			attnums = bms_add_member(attnums, attnum);
+	}
+
+	/*
+	 * If there are not at least two attributes referenced by the clause(s),
+	 * we can throw everything out (as we'll revert to simple stats).
+	 */
+	if (bms_num_members(attnums) <= 1)
+	{
+		if (attnums != NULL)
+			pfree(attnums);
+		attnums = NULL;
+		*relid = InvalidOid;
+	}
+
+	return attnums;
+}
+
+/*
+ * Determines whether the clause is compatible with multivariate stats,
+ * and if it is, returns some additional information - varno (index
+ * into simple_rte_array) and a bitmap of attributes. This is then
+ * used to fetch related multivariate statistics.
+ *
+ * At this moment we only support basic conditions of the form
+ *
+ *     variable OP constant
+ *
+ * where OP is one of [=,<,<=,>=,>] (which is however determined by
+ * looking at the associated function for estimating selectivity, just
+ * like with the single-dimensional case).
+ *
+ * TODO Support 'OR clauses' - shouldn't be all that difficult to
+ *      evaluate them using multivariate stats.
+ */
+static bool
+clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
+						Index *relid, AttrNumber *attnum, SpecialJoinInfo *sjinfo)
+{
+
+	if (IsA(clause, RestrictInfo))
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) clause;
+
+		/* Pseudoconstants are not really interesting here. */
+		if (rinfo->pseudoconstant)
+			return false;
+
+		/* no support for OR clauses at this point */
+		if (rinfo->orclause)
+			return false;
+
+		/* get the actual clause from the RestrictInfo (it's not an OR clause) */
+		clause = (Node*)rinfo->clause;
+
+		/* only simple opclauses are compatible with multivariate stats */
+		if (! is_opclause(clause))
+			return false;
+
+		/* we don't support join conditions at this moment */
+		if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
+			return false;
+
+		/* is it 'variable op constant' ? */
+		if (list_length(((OpExpr *) clause)->args) == 2)
+		{
+			OpExpr	   *expr = (OpExpr *) clause;
+			bool		varonleft = true;
+			bool		ok;
+
+			ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
+				(is_pseudo_constant_clause_relids(lsecond(expr->args),
+												rinfo->right_relids) ||
+				(varonleft = false,
+				is_pseudo_constant_clause_relids(linitial(expr->args),
+												rinfo->left_relids)));
+
+			if (ok)
+			{
+				Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+
+				/*
+				 * Simple variables only - otherwise the planner_rt_fetch seems to fail
+				 * (return NULL).
+				 *
+				 * TODO Maybe use examine_variable() would fix that?
+				 */
+				if (! (IsA(var, Var) && (varRelid == 0 || varRelid == var->varno)))
+					return false;
+
+				/*
+				 * Only consider this variable if (varRelid == 0) or when the varno
+				 * matches varRelid (see explanation at clause_selectivity).
+				 *
+				 * FIXME I suspect this may not be really necessary. The (varRelid == 0)
+				 *       part seems to be enforced by treat_as_join_clause().
+				 */
+				if (! ((varRelid == 0) || (varRelid == var->varno)))
+					return false;
+
+				/* Also skip special varno values, and system attributes ... */
+				if ((IS_SPECIAL_VARNO(var->varno)) || (! AttrNumberIsForUserDefinedAttr(var->varattno)))
+					return false;
+
+				*relid = var->varno;
+
+				/*
+				 * If it's not a "<" or ">" or "=" operator, just ignore the
+				 * clause. Otherwise note the relid and attnum for the variable.
+				 * This uses the function for estimating selectivity, ont the
+				 * operator directly (a bit awkward, but well ...).
+				 */
+				switch (get_oprrest(expr->opno))
+					{
+						case F_EQSEL:
+							*attnum = var->varattno;
+							return true;
+					}
+			}
+		}
+	}
+
+	return false;
+
+}
+
+/*
+ * Performs reduction of clauses using functional dependencies, i.e.
+ * removes clauses that are considered redundant. It simply walks
+ * through dependencies, and checks whether the dependency 'matches'
+ * the clauses, i.e. if there's a clause matching the condition. If yes,
+ * all clauses matching the implied part of the dependency are removed
+ * from the list.
+ *
+ * This simply looks at attnums references by the clauses, not at the
+ * type of the operator (equality, inequality, ...). This may not be the
+ * right way to do - it certainly works best for equalities, which is
+ * naturally consistent with functional dependencies (implications).
+ * It's not clear that other operators are handled sensibly - for
+ * example for inequalities, like
+ *
+ *     WHERE (A >= 10) AND (B <= 20)
+ *
+ * and a trivial case where [A == B], resulting in symmetric pair of
+ * rules [A => B], [B => A], it's rather clear we can't remove either of
+ * those clauses.
+ *
+ * That only highlights that functional dependencies are most suitable
+ * for label-like data, where using non-equality operators is very rare.
+ * Using the common city/zipcode example, clauses like
+ *
+ *     (zipcode <= 12345)
+ *
+ * or
+ *
+ *     (cityname >= 'Washington')
+ *
+ * are rare. So restricting the reduction to equality should not harm
+ * the usefulness / applicability.
+ *
+ * The other assumption is that this assumes 'compatible' clauses. For
+ * example by using mismatching zip code and city name, this is unable
+ * to identify the discrepancy and eliminates one of the clauses. The
+ * usual approach (multiplying both selectivities) thus produces a more
+ * accurate estimate, although mostly by luck - the multiplication
+ * comes from assumption of statistical independence of the two
+ * conditions (which is not not valid in this case), but moves the
+ * estimate in the right direction (towards 0%).
+ *
+ * This might be somewhat improved by cross-checking the selectivities
+ * against MCV and/or histogram.
+ *
+ * The implementation needs to be careful about cyclic rules, i.e. rules
+ * like [A => B] and [B => A] at the same time. This must not reduce
+ * clauses on both attributes at the same time.
+ *
+ * Technically we might consider selectivities here too, somehow. E.g.
+ * when (A => B) and (B => A), we might use the clauses with minimum
+ * selectivity.
+ *
+ * TODO Consider restricting the reduction to equality clauses. Or maybe
+ *      use equality classes somehow?
+ *
+ * TODO Merge this docs to dependencies.c, as it's saying mostly the
+ *      same things as the comments there.
+ *
+ * TODO Currently this is applied only to the top-level clauses, but
+ *      maybe we could apply it to lists at subtrees too, e.g. to the
+ *      two AND-clauses in
+ *
+ *          (x=1 AND y=2) OR (z=3 AND q=10)
+ *
+ */
+static List *
+clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
+							  Oid varRelid, List *stats,
+							  SpecialJoinInfo *sjinfo)
+{
+	List	   *reduced_clauses = NIL;
+	Index		relid;
+
+	/*
+	 * matrix of (natts x natts), 1 means x=>y
+	 *
+	 * This serves two purposes - first, it merges dependencies from all
+	 * the statistics, second it makes generating all the transitive
+	 * dependencies easier.
+	 *
+	 * We need to build this only for attributes from the dependencies,
+	 * not for all attributes in the table.
+	 *
+	 * We can't do that only for attributes from the clauses, because we
+	 * want to build transitive dependencies (including those going
+	 * through attributes not listed in the stats).
+	 *
+	 * This only works for A=>B dependencies, not sure how to do that
+	 * for complex dependencies.
+	 */
+	bool       *deps_matrix;
+	int			deps_natts;	/* size of the matric */
+
+	/* mapping attnum <=> matrix index */
+	int		   *deps_idx_to_attnum;
+	int		   *deps_attnum_to_idx;
+
+	/* attnums in dependencies and clauses (and intersection) */
+	List	   *deps_clauses   = NIL;
+	Bitmapset  *deps_attnums   = NULL;
+	Bitmapset  *clause_attnums = NULL;
+	Bitmapset  *intersect_attnums = NULL;
+
+	/*
+	 * Is there at least one statistics with functional dependencies?
+	 * If not, return the original clauses right away.
+	 *
+	 * XXX Isn't this pointless, thanks to exactly the same check in
+	 *     clauselist_selectivity()? Can we trigger the condition here?
+	 */
+	if (! has_stats(stats, MV_CLAUSE_TYPE_FDEP))
+		return clauses;
+
+	/*
+	 * Build the dependency matrix, i.e. attribute adjacency matrix,
+	 * where 1 means (a=>b). Once we have the adjacency matrix, we'll
+	 * multiply it by itself, to get transitive dependencies.
+	 *
+	 * Note: This is pretty much transitive closure from graph theory.
+	 *
+	 * First, let's see what attributes are covered by functional
+	 * dependencies (sides of the adjacency matrix), and also a maximum
+	 * attribute (size of mapping to simple integer indexes);
+	 */
+	deps_attnums = fdeps_collect_attnums(stats);
+
+	/*
+	 * Walk through the clauses - clauses that are (one of)
+	 *
+	 * (a) not mv-compatible
+	 * (b) are using more than a single attnum
+	 * (c) using attnum not covered by functional depencencies
+	 *
+	 * may be copied directly to the result. The interesting clauses are
+	 * kept in 'deps_clauses' and will be processed later.
+	 */
+	clause_attnums = fdeps_filter_clauses(root, clauses, deps_attnums,
+										  &reduced_clauses, &deps_clauses,
+										  varRelid, &relid, sjinfo);
+
+	/*
+	 * we need at least two clauses referencing two different attributes
+	 * referencing to do the reduction
+	 */
+	if ((list_length(deps_clauses) < 2) || (bms_num_members(clause_attnums) < 2))
+	{
+		bms_free(clause_attnums);
+		list_free(reduced_clauses);
+		list_free(deps_clauses);
+
+		return clauses;
+	}
+
+
+	/*
+	 * We need at least two matching attributes in the clauses and
+	 * dependencies, otherwise we can't really reduce anything.
+	 */
+	intersect_attnums = bms_intersect(clause_attnums, deps_attnums);
+	if (bms_num_members(intersect_attnums) < 2)
+	{
+		bms_free(clause_attnums);
+		bms_free(deps_attnums);
+		bms_free(intersect_attnums);
+
+		list_free(deps_clauses);
+		list_free(reduced_clauses);
+
+		return clauses;
+	}
+
+	/*
+	 * Build mapping between matrix indexes and attnums, and then the
+	 * adjacency matrix itself.
+	 */
+	deps_idx_to_attnum = make_idx_to_attnum_mapping(deps_attnums);
+	deps_attnum_to_idx = make_attnum_to_idx_mapping(deps_attnums);
+
+	/* build the adjacency matrix */
+	deps_matrix = build_adjacency_matrix(stats, deps_attnums,
+										 deps_idx_to_attnum,
+										 deps_attnum_to_idx);
+
+	deps_natts = bms_num_members(deps_attnums);
+
+	/*
+	 * Multiply the matrix N-times (N = size of the matrix), so that we
+	 * get all the transitive dependencies. That makes the next step
+	 * much easier and faster.
+	 *
+	 * This is essentially an adjacency matrix from graph theory, and
+	 * by multiplying it we get transitive edges. We don't really care
+	 * about the exact number (number of paths between vertices) though,
+	 * so we can do the multiplication in-place (we don't care whether
+	 * we found the dependency in this round or in the previous one).
+	 *
+	 * Track how many new dependencies were added, and stop when 0, but
+	 * we can't multiply more than N-times (longest path in the graph).
+	 */
+	multiply_adjacency_matrix(deps_matrix, deps_natts);
+
+	/*
+	 * Walk through the clauses, and see which other clauses we may
+	 * reduce. The matrix contains all transitive dependencies, which
+	 * makes this very fast.
+	 *
+	 * We have to be careful not to reduce the clause using itself, or
+	 * reducing all clauses forming a cycle (so we have to skip already
+	 * eliminated clauses).
+	 *
+	 * I'm not sure whether this guarantees finding the best solution,
+	 * i.e. reducing the most clauses, but it probably does (thanks to
+	 * having all the transitive dependencies).
+	 */
+	deps_clauses = fdeps_reduce_clauses(deps_clauses,
+										deps_attnums, deps_matrix,
+										deps_idx_to_attnum,
+										deps_attnum_to_idx, relid);
+
+	/* join the two lists of clauses */
+	reduced_clauses = list_union(reduced_clauses, deps_clauses);
+
+	pfree(deps_matrix);
+	pfree(deps_idx_to_attnum);
+	pfree(deps_attnum_to_idx);
+
+	bms_free(deps_attnums);
+	bms_free(clause_attnums);
+	bms_free(intersect_attnums);
+
+	return reduced_clauses;
+}
+
+static bool
+has_stats(List *stats, int type)
+{
+	ListCell   *s;
+
+	foreach (s, stats)
+	{
+		MVStatisticInfo	*stat = (MVStatisticInfo *)lfirst(s);
+
+		if ((type & MV_CLAUSE_TYPE_FDEP) && stat->deps_built)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * Determing relid (either from varRelid or from clauses) and then
+ * lookup stats using the relid.
+ */
+static List *
+find_stats(PlannerInfo *root, List *clauses, Oid varRelid, Index *relid)
+{
+	/* unknown relid by default */
+	*relid = InvalidOid;
+
+	/*
+	 * First we need to find the relid (index info simple_rel_array).
+	 * If varRelid is not 0, we already have it, otherwise we have to
+	 * look it up from the clauses.
+	 */
+	if (varRelid != 0)
+		*relid = varRelid;
+	else
+	{
+		Relids	relids = pull_varnos((Node*)clauses);
+
+		/*
+		 * We only expect 0 or 1 members in the bitmapset. If there are
+		 * no vars, we'll get empty bitmapset, otherwise we'll get the
+		 * relid as the single member.
+		 *
+		 * FIXME For some reason we can get 2 relids here (e.g. \d in
+		 *       psql does that).
+		 */
+		if (bms_num_members(relids) == 1)
+			*relid = bms_singleton_member(relids);
+
+		bms_free(relids);
+	}
+
+	/*
+	 * if we found the relid, we can get the stats from simple_rel_array
+	 *
+	 * This only gets stats that are already built, because that's how
+	 * we load it into RelOptInfo (see get_relation_info), but we don't
+	 * detoast the whole stats yet. That'll be done later, after we
+	 * decide which stats to use.
+	 */
+	if (*relid != InvalidOid)
+		return root->simple_rel_array[*relid]->mvstatlist;
+
+	return NIL;
+}
+
+static Bitmapset*
+fdeps_collect_attnums(List *stats)
+{
+	ListCell *lc;
+	Bitmapset *attnums = NULL;
+
+	foreach (lc, stats)
+	{
+		int j;
+		MVStatisticInfo *info = (MVStatisticInfo *)lfirst(lc);
+
+		int2vector *stakeys = info->stakeys;
+
+		/* skip stats without functional dependencies built */
+		if (! info->deps_built)
+			continue;
+
+		for (j = 0; j < stakeys->dim1; j++)
+			attnums = bms_add_member(attnums, stakeys->values[j]);
+	}
+
+	return attnums;
+}
+
+
+static int*
+make_idx_to_attnum_mapping(Bitmapset *attnums)
+{
+	int		attidx = 0;
+	int		attnum = -1;
+
+	int	   *mapping = (int*)palloc0(bms_num_members(attnums) * sizeof(int));
+
+	while ((attnum = bms_next_member(attnums, attnum)) >= 0)
+		mapping[attidx++] = attnum;
+
+	Assert(attidx == bms_num_members(attnums));
+
+	return mapping;
+}
+
+static int*
+make_attnum_to_idx_mapping(Bitmapset *attnums)
+{
+	int		attidx = 0;
+	int		attnum = -1;
+	int		maxattnum = -1;
+	int	   *mapping;
+
+	while ((attnum = bms_next_member(attnums, attnum)) >= 0)
+		maxattnum = attnum;
+
+	mapping = (int*)palloc0((maxattnum+1) * sizeof(int));
+
+	attnum = -1;
+	while ((attnum = bms_next_member(attnums, attnum)) >= 0)
+		mapping[attnum] = attidx++;
+
+	Assert(attidx == bms_num_members(attnums));
+
+	return mapping;
+}
+
+static bool*
+build_adjacency_matrix(List *stats, Bitmapset *attnums,
+					   int *idx_to_attnum, int *attnum_to_idx)
+{
+	ListCell *lc;
+	int		natts  = bms_num_members(attnums);
+	bool   *matrix = (bool*)palloc0(natts * natts * sizeof(bool));
+
+	foreach (lc, stats)
+	{
+		int j;
+		MVStatisticInfo *stat = (MVStatisticInfo *)lfirst(lc);
+		MVDependencies dependencies = NULL;
+
+		/* skip stats without functional dependencies built */
+		if (! stat->deps_built)
+			continue;
+
+		/* fetch and deserialize dependencies */
+		dependencies = load_mv_dependencies(stat->mvoid);
+		if (dependencies == NULL)
+		{
+			elog(WARNING, "failed to deserialize func deps %d", stat->mvoid);
+			continue;
+		}
+
+		/* set matrix[a,b] to 'true' if 'a=>b' */
+		for (j = 0; j < dependencies->ndeps; j++)
+		{
+			int aidx = attnum_to_idx[dependencies->deps[j]->a];
+			int bidx = attnum_to_idx[dependencies->deps[j]->b];
+
+			/* a=> b */
+			matrix[aidx * natts + bidx] = true;
+		}
+	}
+
+	return matrix;
+}
+
+static void
+multiply_adjacency_matrix(bool *matrix, int natts)
+{
+	int i;
+
+	for (i = 0; i < natts; i++)
+	{
+		int k, l, m;
+		int nchanges = 0;
+
+		/* k => l */
+		for (k = 0; k < natts; k++)
+		{
+			for (l = 0; l < natts; l++)
+			{
+				/* we already have this dependency */
+				if (matrix[k * natts + l])
+					continue;
+
+				/* we don't really care about the exact value, just 0/1 */
+				for (m = 0; m < natts; m++)
+				{
+					if (matrix[k * natts + m] * matrix[m * natts + l])
+					{
+						matrix[k * natts + l] = true;
+						nchanges += 1;
+						break;
+					}
+				}
+			}
+		}
+
+		/* no transitive dependency added here, so terminate */
+		if (nchanges == 0)
+			break;
+	}
+}
+
+static List*
+fdeps_reduce_clauses(List *clauses, Bitmapset *attnums, bool *matrix,
+					int *idx_to_attnum, int *attnum_to_idx, Index relid)
+{
+	int i;
+	ListCell *lc;
+	List   *reduced_clauses = NIL;
+
+	int			nmvclauses;	/* size of the arrays */
+	bool	   *reduced;
+	AttrNumber *mvattnums;
+	Node	  **mvclauses;
+
+	int			natts = bms_num_members(attnums);
+
+	/*
+	 * Preallocate space for all clauses (the list only containst
+	 * compatible clauses at this point). This makes it somewhat easier
+	 * to access the stats / attnums randomly.
+	 *
+	 * XXX This assumes each clause references exactly one Var, so the
+	 *     arrays are sized accordingly - for functional dependencies
+	 *     this is safe, because it only works with Var=Const.
+	 */
+	mvclauses = (Node**)palloc0(list_length(clauses) * sizeof(Node*));
+	mvattnums = (AttrNumber*)palloc0(list_length(clauses) * sizeof(AttrNumber));
+	reduced = (bool*)palloc0(list_length(clauses) * sizeof(bool));
+
+	/* fill the arrays */
+	nmvclauses = 0;
+	foreach (lc, clauses)
+	{
+		Node * clause = (Node*)lfirst(lc);
+		Bitmapset * attnums = get_varattnos(clause, relid);
+
+		mvclauses[nmvclauses] = clause;
+		mvattnums[nmvclauses] = bms_singleton_member(attnums);
+		nmvclauses++;
+	}
+
+	Assert(nmvclauses == list_length(clauses));
+
+	/* now try to reduce the clauses (using the dependencies) */
+	for (i = 0; i < nmvclauses; i++)
+	{
+		int j;
+
+		/* not covered by dependencies */
+		if (! bms_is_member(mvattnums[i], attnums))
+			continue;
+
+		/* this clause was already reduced, so let's skip it */
+		if (reduced[i])
+			continue;
+
+		/* walk the potentially 'implied' clauses */
+		for (j = 0; j < nmvclauses; j++)
+		{
+			int aidx, bidx;
+
+			/* not covered by dependencies */
+			if (! bms_is_member(mvattnums[j], attnums))
+				continue;
+
+			aidx = attnum_to_idx[mvattnums[i]];
+			bidx = attnum_to_idx[mvattnums[j]];
+
+			/* can't reduce the clause by itself, or if already reduced */
+			if ((i == j) || reduced[j])
+				continue;
+
+			/* mark the clause as reduced (if aidx => bidx) */
+			reduced[j] = matrix[aidx * natts + bidx];
+		}
+	}
+
+	/* now walk through the clauses, and keep only those not reduced */
+	for (i = 0; i < nmvclauses; i++)
+		if (! reduced[i])
+			reduced_clauses = lappend(reduced_clauses, mvclauses[i]);
+
+	pfree(reduced);
+	pfree(mvclauses);
+	pfree(mvattnums);
+
+	return reduced_clauses;
+}
+
+
+static Bitmapset *
+fdeps_filter_clauses(PlannerInfo *root,
+					 List *clauses, Bitmapset *deps_attnums,
+					 List **reduced_clauses, List **deps_clauses,
+					 Oid varRelid, Index *relid, SpecialJoinInfo *sjinfo)
+{
+	ListCell *lc;
+	Bitmapset *clause_attnums = NULL;
+
+	foreach (lc, clauses)
+	{
+		AttrNumber	attnum;
+		Node	   *clause = (Node *) lfirst(lc);
+
+		if (! clause_is_mv_compatible(root, clause, varRelid, relid,
+									  &attnum, sjinfo))
+
+			/* clause incompatible with functional dependencies */
+			*reduced_clauses = lappend(*reduced_clauses, clause);
+
+		else if (! bms_is_member(attnum, deps_attnums))
+
+			/* clause not covered by the dependencies */
+			*reduced_clauses = lappend(*reduced_clauses, clause);
+
+		else
+		{
+			*deps_clauses   = lappend(*deps_clauses, clause);
+			clause_attnums = bms_add_member(clause_attnums, attnum);
+		}
+	}
+
+	return clause_attnums;
+}
+
+/*
+ * Pull varattnos from the clauses, similarly to pull_varattnos() but:
+ *
+ * (a) only get attributes for a particular relation (relid)
+ * (b) ignore system attributes (we can't build stats on them anyway)
+ *
+ * This makes it possible to directly compare the result with attnum
+ * values from pg_attribute etc.
+ */
+static Bitmapset *
+get_varattnos(Node * node, Index relid)
+{
+	int			k;
+	Bitmapset  *varattnos = NULL;
+	Bitmapset  *result = NULL;
+
+	/* get the varattnos */
+	pull_varattnos(node, relid, &varattnos);
+
+	k = -1;
+	while ((k = bms_next_member(varattnos, k)) >= 0)
+	{
+		if (k + FirstLowInvalidHeapAttributeNumber > 0)
+			result
+				= bms_add_member(result,
+								 k + FirstLowInvalidHeapAttributeNumber);
+	}
+
+	bms_free(varattnos);
+
+	return result;
+}
diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c
index a755c49..bd200bc 100644
--- a/src/backend/utils/mvstats/common.c
+++ b/src/backend/utils/mvstats/common.c
@@ -84,7 +84,8 @@ build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
 		/*
 		 * Analyze functional dependencies of columns.
 		 */
-		deps = build_mv_dependencies(numrows, rows, attrs, stats);
+		if (stat->deps_enabled)
+			deps = build_mv_dependencies(numrows, rows, attrs, stats);
 
 		/* store the histogram / MCV list in the catalog */
 		update_mv_stats(stat->mvoid, deps, attrs);
@@ -163,6 +164,7 @@ list_mv_stats(Oid relid)
 
 		info->mvoid = HeapTupleGetOid(htup);
 		info->stakeys = buildint2vector(stats->stakeys.values, stats->stakeys.dim1);
+		info->deps_enabled = stats->deps_enabled;
 		info->deps_built = stats->deps_built;
 
 		result = lappend(result, info);
@@ -274,6 +276,7 @@ compare_scalars_partition(const void *a, const void *b, void *arg)
 	return ApplySortComparator(da, false, db, false, ssup);
 }
 
+
 /* initialize multi-dimensional sort */
 MultiSortSupport
 multi_sort_init(int ndims)
diff --git a/src/backend/utils/mvstats/dependencies.c b/src/backend/utils/mvstats/dependencies.c
index 84b6561..0a08d12 100644
--- a/src/backend/utils/mvstats/dependencies.c
+++ b/src/backend/utils/mvstats/dependencies.c
@@ -636,3 +636,27 @@ pg_mv_stats_dependencies_show(PG_FUNCTION_ARGS)
 
 	PG_RETURN_TEXT_P(cstring_to_text(result));
 }
+
+MVDependencies
+load_mv_dependencies(Oid mvoid)
+{
+	bool		isnull = false;
+	Datum		deps;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	HeapTuple	htup = SearchSysCache1(MVSTATOID, ObjectIdGetDatum(mvoid));
+
+#ifdef USE_ASSERT_CHECKING
+	Form_pg_mv_statistic	mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
+	Assert(mvstat->deps_enabled && mvstat->deps_built);
+#endif
+
+	deps = SysCacheGetAttr(MVSTATOID, htup,
+						   Anum_pg_mv_statistic_stadeps, &isnull);
+
+	Assert(!isnull);
+
+	ReleaseSysCache(htup);
+
+	return deserialize_mv_dependencies(DatumGetByteaP(deps));
+}
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
index 411cd16..02a7dda 100644
--- a/src/include/utils/mvstats.h
+++ b/src/include/utils/mvstats.h
@@ -16,12 +16,20 @@
 
 #include "commands/vacuum.h"
 
+/*
+ * Degree of how much MCV item / histogram bucket matches a clause.
+ * This is then considered when computing the selectivity.
+ */
+#define MVSTATS_MATCH_NONE		0		/* no match at all */
+#define MVSTATS_MATCH_PARTIAL	1		/* partial match */
+#define MVSTATS_MATCH_FULL		2		/* full match */
 
 #define MVSTATS_MAX_DIMENSIONS	8		/* max number of attributes */
 
-/* An associative rule, tracking [a => b] dependency.
- *
- * TODO Make this work with multiple columns on both sides.
+
+/*
+ * Functional dependencies, tracking column-level relationships (values
+ * in one column determine values in another one).
  */
 typedef struct MVDependencyData {
 	int16	a;
@@ -47,6 +55,8 @@ typedef MVDependenciesData* MVDependencies;
  *      stats specified using flags (or something like that).
  */
 
+MVDependencies load_mv_dependencies(Oid mvoid);
+
 bytea * serialize_mv_dependencies(MVDependencies dependencies);
 
 /* deserialization of stats (serialization is private to analyze) */
diff --git a/src/test/regress/expected/mv_dependencies.out b/src/test/regress/expected/mv_dependencies.out
new file mode 100644
index 0000000..e759997
--- /dev/null
+++ b/src/test/regress/expected/mv_dependencies.out
@@ -0,0 +1,172 @@
+-- data type passed by value
+CREATE TABLE functional_dependencies (
+    a INT,
+    b INT,
+    c INT
+);
+-- unknown column
+CREATE STATISTICS s1 ON functional_dependencies (unknown_column) WITH (dependencies);
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+CREATE STATISTICS s1 ON functional_dependencies (a) WITH (dependencies);
+ERROR:  multivariate stats require 2 or more columns
+-- single column, duplicated
+CREATE STATISTICS s1 ON functional_dependencies (a,a) WITH (dependencies);
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+CREATE STATISTICS s1 ON functional_dependencies (a, a, b) WITH (dependencies);
+ERROR:  duplicate column name in statistics definition
+-- unknown option
+CREATE STATISTICS s1 ON functional_dependencies (a, b, c) WITH (unknown_option);
+ERROR:  unrecognized STATISTICS option "unknown_option"
+-- correct command
+CREATE STATISTICS s1 ON functional_dependencies (a, b, c) WITH (dependencies);
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | f          | 
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = 10 AND b = 5;
+                 QUERY PLAN                  
+---------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+   Recheck Cond: ((a = 10) AND (b = 5))
+   ->  Bitmap Index Scan on fdeps_idx
+         Index Cond: ((a = 10) AND (b = 5))
+(4 rows)
+
+DROP TABLE functional_dependencies;
+-- varlena type (text)
+CREATE TABLE functional_dependencies (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+CREATE STATISTICS s2 ON functional_dependencies (a, b, c) WITH (dependencies);
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | f          | 
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3
+(1 row)
+
+TRUNCATE functional_dependencies;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built | pg_mv_stats_dependencies_show 
+--------------+------------+-------------------------------
+ t            | t          | 1 => 2, 1 => 3, 2 => 3
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = '10' AND b = '5';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+   Recheck Cond: ((a = '10'::text) AND (b = '5'::text))
+   ->  Bitmap Index Scan on fdeps_idx
+         Index Cond: ((a = '10'::text) AND (b = '5'::text))
+(4 rows)
+
+DROP TABLE functional_dependencies;
+-- NULL values (mix of int and text columns)
+CREATE TABLE functional_dependencies (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+CREATE STATISTICS s3 ON functional_dependencies (a, b, c, d) WITH (dependencies);
+INSERT INTO functional_dependencies
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+ deps_enabled | deps_built |     pg_mv_stats_dependencies_show      
+--------------+------------+----------------------------------------
+ t            | t          | 2 => 1, 3 => 1, 3 => 2, 4 => 1, 4 => 2
+(1 row)
+
+DROP TABLE functional_dependencies;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index b1bc7c7..81484f1 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -110,3 +110,6 @@ test: event_trigger
 
 # run stats by itself because its delay may be insufficient under heavy load
 test: stats
+
+# run tests of multivariate stats
+test: mv_dependencies
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index ade9ef1..14ea574 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -161,3 +161,4 @@ test: with
 test: xml
 test: event_trigger
 test: stats
+test: mv_dependencies
diff --git a/src/test/regress/sql/mv_dependencies.sql b/src/test/regress/sql/mv_dependencies.sql
new file mode 100644
index 0000000..48dea4d
--- /dev/null
+++ b/src/test/regress/sql/mv_dependencies.sql
@@ -0,0 +1,150 @@
+-- data type passed by value
+CREATE TABLE functional_dependencies (
+    a INT,
+    b INT,
+    c INT
+);
+
+-- unknown column
+CREATE STATISTICS s1 ON functional_dependencies (unknown_column) WITH (dependencies);
+
+-- single column
+CREATE STATISTICS s1 ON functional_dependencies (a) WITH (dependencies);
+
+-- single column, duplicated
+CREATE STATISTICS s1 ON functional_dependencies (a,a) WITH (dependencies);
+
+-- two columns, one duplicated
+CREATE STATISTICS s1 ON functional_dependencies (a, a, b) WITH (dependencies);
+
+-- unknown option
+CREATE STATISTICS s1 ON functional_dependencies (a, b, c) WITH (unknown_option);
+
+-- correct command
+CREATE STATISTICS s1 ON functional_dependencies (a, b, c) WITH (dependencies);
+
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = 10 AND b = 5;
+
+DROP TABLE functional_dependencies;
+
+-- varlena type (text)
+CREATE TABLE functional_dependencies (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+
+CREATE STATISTICS s2 ON functional_dependencies (a, b, c) WITH (dependencies);
+
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c, b => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- a => b, a => c
+INSERT INTO functional_dependencies
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+TRUNCATE functional_dependencies;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO functional_dependencies
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX fdeps_idx ON functional_dependencies (a, b);
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM functional_dependencies WHERE a = '10' AND b = '5';
+
+DROP TABLE functional_dependencies;
+
+-- NULL values (mix of int and text columns)
+CREATE TABLE functional_dependencies (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+
+CREATE STATISTICS s3 ON functional_dependencies (a, b, c, d) WITH (dependencies);
+
+INSERT INTO functional_dependencies
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT deps_enabled, deps_built, pg_mv_stats_dependencies_show(stadeps)
+  FROM pg_mv_statistic WHERE starelid = 'functional_dependencies'::regclass;
+
+DROP TABLE functional_dependencies;
-- 
2.1.0