>From db24cc534985ce97b238ea539b4216d8e33397a5 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Fri, 6 Feb 2015 01:42:38 +0100
Subject: [PATCH 5/5] multi-statistics estimation

The general idea is that a probability (which
is what selectivity is) can be split into a product of
conditional probabilities like this:

    P(A & B & C) = P(A & B) * P(C|A & B)

If we assume that C and B are independent, the last part
may be simplified like this

    P(A & B & C) = P(A & B) * P(C|A)

we only need probabilities on [A,B] and [C,A] to compute
the original probability.

The implementation works in the other direction, though.
We know what probability P(A & B & C) we need to compute,
and also what statistics are available.

So we search for a combinations of statistics, covering
the clauses in an optimal way (most clauses covered, most
dependencies exploited).

There are two possible approaches - exhaustive and greedy.
The exhaustive one walks through all permutations of
stats using dynamic programming, so it's guaranteed to
find the optimal solution, but it soon gets very slow as
it's roughly O(N!). The dynamic programming may improve
that a bit, but it's still far too expensive for large
numbers of statistics (on a single table).

The greedy algorithm is very simple - in every step choose
the best solution. That may not guarantee the best solution
globally (but maybe it does?), but it only needs N steps
to find the solution, so it's very fast (processing the
selected stats is usually way more expensive).

There's a GUC for selecting the search algorithm

    mvstat_search = {'greedy', 'exhaustive'}

The default value is 'greedy' as that's much safer (with
respect to runtime). See choose_mv_statistics().

Once we have found a sequence of statistics, we apply
them to the clauses using the conditional probabilities.
We process the selected stats one by one, and for each
we select the estimated clauses and conditions. See
clauselist_selectivity() for more details.

Limitations
-----------

It's still true that each clause at a given level has to
be covered by a single MV statistics. So with this query

    WHERE (clause1) AND (clause2) AND (clause3 OR clause4)

each parenthesized clause has to be covered by a single
multivariate statistics.

Clauses not covered by a single statistics at this level
will be passed to clause_selectivity() but this will treat
them as a collection of simpler clauses (connected by AND
or OR), and the clauses from the previous level will be
used as conditions.

So using the same example, the last clause will be passed
to clause_selectivity() with 'clause1' and 'clause2' as
conditions, and it will be processed using multivariate
stats if possible.

The other limitation is that all the expressions have to
be mv-compatible, i.e. there can't be a mix of expressions.
Fixing this should be relatively simple - just split the
list into two parts (mv-compatible/incompatible), as at
the top level.
---
 src/backend/optimizer/path/clausesel.c | 1533 ++++++++++++++++++++++++++++++--
 src/backend/optimizer/path/costsize.c  |   23 +-
 src/backend/optimizer/util/orclauses.c |    4 +-
 src/backend/utils/adt/selfuncs.c       |   17 +-
 src/backend/utils/misc/guc.c           |   20 +
 src/include/optimizer/cost.h           |    6 +-
 src/include/utils/mvstats.h            |    8 +
 7 files changed, 1513 insertions(+), 98 deletions(-)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index ea4d588..98ad802 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -30,7 +30,7 @@
 #include "utils/typcache.h"
 
 #include "parser/parsetree.h"
-
+#include "miscadmin.h"
 
 #include <stdio.h>
 
@@ -63,23 +63,25 @@ static Bitmapset  *collect_mv_attnums(PlannerInfo *root, List *clauses,
 									  Oid varRelid, Oid *relid, SpecialJoinInfo *sjinfo,
 									  int type);
 
+static Bitmapset *clause_mv_get_attnums(PlannerInfo *root, Node *clause);
+
 static List *clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 								Oid varRelid, int nmvstats, MVStats mvstats,
 								SpecialJoinInfo *sjinfo);
 
-static int choose_mv_statistics(int nmvstats, MVStats mvstats,
-								Bitmapset *attnums);
 static List *clauselist_mv_split(PlannerInfo *root, SpecialJoinInfo *sjinfo,
 								 List *clauses, Oid varRelid,
 								 List **mvclauses, MVStats mvstats, int types);
 
 static Selectivity clauselist_mv_selectivity(PlannerInfo *root,
-						List *clauses, MVStats mvstats);
+						MVStats mvstats, List *clauses, List *conditions);
 static Selectivity clauselist_mv_selectivity_mcvlist(PlannerInfo *root,
-									List *clauses, MVStats mvstats,
+									MVStats mvstats,
+									List *clauses, List *conditions,
 									bool *fullmatch, Selectivity *lowsel);
 static Selectivity clauselist_mv_selectivity_histogram(PlannerInfo *root,
-									List *clauses, MVStats mvstats);
+									MVStats mvstats,
+									List *clauses, List *conditions);
 
 static int update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
 									int2vector *stakeys, MCVList mcvlist,
@@ -92,6 +94,31 @@ static int update_match_bitmap_histogram(PlannerInfo *root, List *clauses,
 									int nmatches, char * matches,
 									bool is_or);
 
+/*
+ * Describes a combination of multiple statistics to cover attributes
+ * referenced by the clauses. The array 'stats' (with nstats elements)
+ * lists attributes (in the order as they are applied), and number of
+ * clause attributes covered by this solution.
+ *
+ * choose_mv_statistics_exhaustive() uses this to track both the current
+ * and the best solutions, while walking through the state of possible
+ * combination.
+ */
+typedef struct mv_solution_t {
+	int		nclauses;		/* number of clauses covered */
+	int		nconditions;	/* number of conditions covered */
+	int		nstats;			/* number of stats applied */
+	int	   *stats;			/* stats (in the apply order) */
+} mv_solution_t;
+
+static mv_solution_t *choose_mv_statistics(PlannerInfo *root,
+								int nmvstats, MVStats mvstats,
+								List *clauses, List *conditions,
+								Oid varRelid,
+								SpecialJoinInfo *sjinfo, int type);
+
+int mvstat_search_type = MVSTAT_SEARCH_GREEDY;
+
 /* used for merging bitmaps - AND (min), OR (max) */
 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
@@ -220,7 +247,8 @@ clauselist_selectivity(PlannerInfo *root,
 					   List *clauses,
 					   int varRelid,
 					   JoinType jointype,
-					   SpecialJoinInfo *sjinfo)
+					   SpecialJoinInfo *sjinfo,
+					   List *conditions)
 {
 	Selectivity s1 = 1.0;
 	RangeQueryClause *rqlist = NULL;
@@ -234,13 +262,8 @@ clauselist_selectivity(PlannerInfo *root,
 	/* attributes in mv-compatible clauses */
 	Bitmapset  *mvattnums = NULL;
 
-	/*
-	 * If there's exactly one clause, then no use in trying to match up
-	 * pairs, so just go directly to clause_selectivity().
-	 */
-	if (list_length(clauses) == 1)
-		return clause_selectivity(root, (Node *) linitial(clauses),
-								  varRelid, jointype, sjinfo);
+	/* local conditions, accumulated and passed to clauses in this list */
+	List	*conditions_local = list_copy(conditions);
 
 	/*
 	 * Collect attributes referenced by mv-compatible clauses (looking
@@ -288,30 +311,185 @@ clauselist_selectivity(PlannerInfo *root,
 		/* see choose_mv_statistics() for details */
 		if (nmvstats > 0)
 		{
-			int idx = choose_mv_statistics(nmvstats, mvstats, mvattnums);
+			mv_solution_t * solution
+				= choose_mv_statistics(root, nmvstats, mvstats,
+									   clauses, conditions,
+									   varRelid, sjinfo,
+									   (MV_CLAUSE_TYPE_MCV | MV_CLAUSE_TYPE_HIST));
 
-			if (idx >= 0)	/* we have a matching stats */
+			/*
+			 * FIXME This probaly leaks memory a bit - the lists of clauses
+			 *       should be freed properly.
+			 */
+
+			/* we have a good solution stats */
+			if (solution != NULL)
 			{
-				MVStats mvstat = &mvstats[idx];
+				int i, j, k;
+
+				for (i = 0; i < solution->nstats; i++)
+				{
+					/* clauses compatible with multi-variate stats */
+					List	*mvclauses = NIL;
+					List	*mvclauses_new = NIL;
+					List	*mvclauses_conditions = NIL;
+					Bitmapset	*stat_attnums = NULL;
+
+					MVStats	mvstat = &mvstats[solution->stats[i]];
+
+					/* build attnum bitmapset for this statistics */
+					for (k = 0; k < mvstat->stakeys->dim1; k++)
+						stat_attnums = bms_add_member(stat_attnums,
+													  mvstat->stakeys->values[k]);
+
+					/*
+					 * Append the compatible conditions (passed from above)
+					 * to mvclauses_conditions.
+					 */
+					foreach (l, conditions)
+					{
+						Node *c = (Node*)lfirst(l);
+						Bitmapset *tmp = clause_mv_get_attnums(root, c);
 
-				/* clauses compatible with multi-variate stats */
-				List	*mvclauses = NIL;
+						if (bms_is_subset(tmp, stat_attnums))
+							mvclauses_conditions
+								= lappend(mvclauses_conditions, c);
+
+						bms_free(tmp);
+					}
+
+					/* split the clauselist into regular and mv-clauses
+					 *
+					 * We keep the list of clauses (we don't remove the
+					 * clauses yet, because we want to use the clauses
+					 * as conditions of other clauses).
+					 *
+					 * FIXME Do this only once, i.e. filter the clauses
+					 *       once (selecting clauses covered by at least
+					 *       one statistics) and then convert them into
+					 *       smaller per-statistics lists of conditions
+					 *       and estimated clauses.
+					 */
+					clauselist_mv_split(root, sjinfo, clauses,
+											varRelid, &mvclauses, mvstat,
+											(MV_CLAUSE_TYPE_MCV | MV_CLAUSE_TYPE_HIST));
+
+					/*
+					 * We've chosen the statistics to match the clauses, so
+					 * each statistics from the solution should have at least
+					 * one new clause (not covered by the previous stats).
+					 */
+					Assert(mvclauses != NIL);
+
+					/*
+					 * Mvclauses now contains only clauses compatible
+					 * with the currently selected stats, but we have to
+					 * split that into conditions (already matched by
+					 * the previous stats), and the new clauses we need
+					 * to estimate using this stats.
+					 */
+					foreach (l, mvclauses)
+					{
+						bool covered = false;
+						Node  *clause = (Node *) lfirst(l);
+						Bitmapset *clause_attnums = clause_mv_get_attnums(root, clause);
+
+						/*
+						 * If already covered by previous stats, add it to
+						 * conditions.
+						 *
+						 * TODO Maybe this could be relaxed a bit? Because
+						 *      with complex and/or clauses, this might
+						 *      mean no statistics actually covers such
+						 *      complex clause.
+						 */
+						for (j = 0; j < i; j++)
+						{
+							int k;
+							Bitmapset  *stat_attnums = NULL;
+							MVStats		prev_stat = &mvstats[solution->stats[j]];
+
+							for (k = 0; k < prev_stat->stakeys->dim1; k++)
+								stat_attnums = bms_add_member(stat_attnums,
+															  prev_stat->stakeys->values[k]);
+
+							covered = bms_is_subset(clause_attnums, stat_attnums);
+
+							bms_free(stat_attnums);
+
+							if (covered)
+								break;
+						}
+
+						if (covered)
+							mvclauses_conditions
+								= lappend(mvclauses_conditions, clause);
+						else
+							mvclauses_new
+								= lappend(mvclauses_new, clause);
+					}
+
+					/*
+					 * We need at least one new clause (not just conditions).
+					 */
+					Assert(mvclauses_new != NIL);
+
+					/* compute the multivariate stats */
+					s1 *= clauselist_mv_selectivity(root, mvstat,
+													mvclauses_new,
+													mvclauses_conditions);
+				}
+
+				/*
+				 * And now finally remove all the mv-compatible clauses.
+				 *
+				 * This only repeats the same split as above, but this
+				 * time we actually use the result list (and feed it to
+				 * the next call).
+				 */
+				for (i = 0; i < solution->nstats; i++)
+				{
+					/* clauses compatible with multi-variate stats */
+					List	*mvclauses = NIL;
 
-				/* split the clauselist into regular and mv-clauses */
-				clauses = clauselist_mv_split(root, sjinfo, clauses,
-										varRelid, &mvclauses, mvstat,
-										(MV_CLAUSE_TYPE_MCV | MV_CLAUSE_TYPE_HIST));
+					MVStats	mvstat = &mvstats[solution->stats[i]];
 
-				/* we've chosen the histogram to match the clauses */
-				Assert(mvclauses != NIL);
+					/* split the list into regular and mv-clauses */
+					clauses = clauselist_mv_split(root, sjinfo, clauses,
+											varRelid, &mvclauses, mvstat,
+											(MV_CLAUSE_TYPE_MCV | MV_CLAUSE_TYPE_HIST));
 
-				/* compute the multivariate stats */
-				s1 *= clauselist_mv_selectivity(root, mvclauses, mvstat);
+					/*
+					 * Add the clauses to the conditions (to be passed
+					 * to regular clauses), irrespectedly whether it
+					 * will be used as a condition or a clause here.
+					 *
+					 * We only keep the remaining conditions in the
+					 * clauses (we keep what clauselist_mv_split returns)
+					 * so we add each MV condition exactly once.
+					 */
+					foreach (l, mvclauses)
+						conditions_local = lappend(conditions_local,
+												   (Node*)lfirst(l));
+				}
 			}
 		}
 	}
 
 	/*
+	 * If there's exactly one clause, then no use in trying to match up
+	 * pairs, so just go directly to clause_selectivity().
+	 */
+	if (list_length(clauses) == 1)
+	{
+		Selectivity s = clause_selectivity(root, (Node *) linitial(clauses),
+								  varRelid, jointype, sjinfo,
+								  conditions_local);
+		list_free(conditions_local);
+		return s;
+	}
+
+	/*
 	 * Initial scan over clauses.  Anything that doesn't look like a potential
 	 * rangequery clause gets multiplied into s1 and forgotten. Anything that
 	 * does gets inserted into an rqlist entry.
@@ -323,7 +501,8 @@ clauselist_selectivity(PlannerInfo *root,
 		Selectivity s2;
 
 		/* Always compute the selectivity using clause_selectivity */
-		s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
+		s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo,
+								conditions_local);
 
 		/*
 		 * Check for being passed a RestrictInfo.
@@ -478,6 +657,9 @@ clauselist_selectivity(PlannerInfo *root,
 		rqlist = rqnext;
 	}
 
+	/* free the local conditions */
+	list_free(conditions_local);
+
 	return s1;
 }
 
@@ -688,7 +870,8 @@ clause_selectivity(PlannerInfo *root,
 				   Node *clause,
 				   int varRelid,
 				   JoinType jointype,
-				   SpecialJoinInfo *sjinfo)
+				   SpecialJoinInfo *sjinfo,
+				   List *conditions)
 {
 	Selectivity s1 = 0.5;		/* default for any unhandled clause type */
 	RestrictInfo *rinfo = NULL;
@@ -818,7 +1001,8 @@ clause_selectivity(PlannerInfo *root,
 								  (Node *) get_notclausearg((Expr *) clause),
 									  varRelid,
 									  jointype,
-									  sjinfo);
+									  sjinfo,
+									  conditions);
 	}
 	else if (and_clause(clause))
 	{
@@ -827,7 +1011,8 @@ clause_selectivity(PlannerInfo *root,
 									((BoolExpr *) clause)->args,
 									varRelid,
 									jointype,
-									sjinfo);
+									sjinfo,
+									conditions);
 	}
 	else if (or_clause(clause))
 	{
@@ -839,6 +1024,13 @@ clause_selectivity(PlannerInfo *root,
 		 */
 		ListCell   *arg;
 
+		/* TODO Split the clause list into mv-compatible part, pretty
+		 *      much just like in clauselist_selectivity(), and call
+		 *      clauselist_mv_selectivity(). It has to be taught about
+		 *      OR-semantics (right now it assumes AND) or maybe just
+		 *      create a fake OR clause here, and pass it in.
+		 */
+
 		s1 = 0.0;
 		foreach(arg, ((BoolExpr *) clause)->args)
 		{
@@ -846,7 +1038,8 @@ clause_selectivity(PlannerInfo *root,
 												(Node *) lfirst(arg),
 												varRelid,
 												jointype,
-												sjinfo);
+												sjinfo,
+												conditions);
 
 			s1 = s1 + s2 - s1 * s2;
 		}
@@ -958,7 +1151,8 @@ clause_selectivity(PlannerInfo *root,
 								(Node *) ((RelabelType *) clause)->arg,
 								varRelid,
 								jointype,
-								sjinfo);
+								sjinfo,
+								conditions);
 	}
 	else if (IsA(clause, CoerceToDomain))
 	{
@@ -967,7 +1161,8 @@ clause_selectivity(PlannerInfo *root,
 								(Node *) ((CoerceToDomain *) clause)->arg,
 								varRelid,
 								jointype,
-								sjinfo);
+								sjinfo,
+								conditions);
 	}
 
 	/* Cache the result if possible */
@@ -1149,9 +1344,67 @@ clause_selectivity(PlannerInfo *root,
  *      that from the most selective clauses first, because that'll
  *      eliminate the buckets/items sooner (so we'll be able to skip
  *      them without inspection, which is more expensive).
+ *
+ * TODO All this is based on the assumption that the statistics represent
+ *      the necessary dependencies, i.e. that if two colunms are not in
+ *      the same statistics, there's no dependency. If that's not the
+ *      case, we may get misestimates, just like before. For example
+ *      assume we have a table with three columns [a,b,c] with exactly
+ *      the same values, and statistics on [a,b] and [b,c]. So somthing
+ *      like this:
+ *
+ *          CREATE TABLE test AS SELECT i, i, i
+                                  FROM generate_series(1,1000);
+ *
+ *          ALTER TABLE test ADD STATISTICS (mcv) ON (a,b);
+ *          ALTER TABLE test ADD STATISTICS (mcv) ON (b,c);
+ *
+ *          ANALYZE test;
+ *
+ *          EXPLAIN ANALYZE SELECT * FROM test
+ *                    WHERE (a < 10) AND (b < 20) AND (c < 10);
+ *
+ *      The problem here is that the only shared column between the two
+ *      statistics is 'b' so the probability will be computed like this
+ *
+ *          P[(a < 10) & (b < 20) & (c < 10)]
+ *             = P[(a < 10) & (b < 20)] * P[(c < 10) | (a < 10) & (b < 20)]
+ *             = P[(a < 10) & (b < 20)] * P[(c < 10) | (b < 20)]
+ *
+ *      or like this
+ *
+ *          P[(a < 10) & (b < 20) & (c < 10)]
+ *             = P[(b < 20) & (c < 10)] * P[(a < 10) | (b < 20) & (c < 10)]
+ *             = P[(b < 20) & (c < 10)] * P[(a < 10) | (b < 20)]
+ *
+ *      In both cases the conditional probabilities will be evaluated as
+ *      0.5, because they lack the other column (which would make it 1.0).
+ *
+ *      Theoretically it might be possible to transfer the dependency,
+ *      e.g. by building bitmap for [a,b] and then combine it with [b,c]
+ *      by doing something like this:
+ *
+ *          1) build bitmap on [a,b] using [(a<10) & (b < 20)]
+ *          2) for each element in [b,c] check the bitmap
+ *
+ *      But that's certainly nontrivial - for example the statistics may
+ *      be different (MCV list vs. histogram) and/or the items may not
+ *      match (e.g. MCV items or histogram buckets will be built
+ *      differently). Also, for one value of 'b' there might be multiple
+ *      MCV items (because of the other column values) with different
+ *      bitmap values (some will match, some won't) - so it's not exactly
+ *      bitmap but a partial match.
+ *
+ *      Maybe a hash table with number of matches and mismatches (or
+ *      maybe sums of frequencies) would work? The step (2) would then
+ *      lookup the values and use that to weight the item somehow.
+ * 
+ *      Currently the only solution is to build statistics on all three
+ *      columns.
  */
 static Selectivity
-clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStats mvstats)
+clauselist_mv_selectivity(PlannerInfo *root, MVStats mvstats,
+						  List *clauses, List *conditions)
 {
 	bool fullmatch = false;
 	Selectivity s1 = 0.0, s2 = 0.0;
@@ -1169,7 +1422,8 @@ clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStats mvstats)
 	 */
 
 	/* Evaluate the MCV first. */
-	s1 = clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
+	s1 = clauselist_mv_selectivity_mcvlist(root, mvstats,
+										   clauses, conditions,
 										   &fullmatch, &mcv_low);
 
 	/*
@@ -1182,7 +1436,8 @@ clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStats mvstats)
 	/* FIXME if (fullmatch) without matching MCV item, use the mcv_low
 	 *       selectivity as upper bound */
 
-	s2 = clauselist_mv_selectivity_histogram(root, clauses, mvstats);
+	s2 = clauselist_mv_selectivity_histogram(root, mvstats,
+											 clauses, conditions);
 
 	/* TODO clamp to <= 1.0 (or more strictly, when possible) */
 	return s1 + s2;
@@ -1232,6 +1487,665 @@ collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
 }
 
 /*
+ * Selects the best combination of multivariate statistics, where
+ * 'best' means:
+ *
+ * (a) covering the most attributes (referenced by clauses)
+ * (b) using the least number of multivariate stats
+ *
+ * There may be other optimality criteria, not considered in the initial
+ * implementation (more on that 'weaknesses' section).
+ *
+ * This is pretty much equal to splitting the probability of clauses
+ * (aka selectivity) into a sequence of conditional probabilities, like
+ * this
+ *
+ *    P(A,B,C,D) = P(A,B) * P(C|A,B) * P(D|A,B,C)
+ *
+ * and removing the attributes not referenced by the existing stats,
+ * under the assumption that there's no dependency (otherwise the DBA
+ * would create the stats).
+ *
+ *
+ * Algorithm
+ * ---------
+ * The algorithm is a recursive implementation of backtracking, with
+ * maximum 'depth' equal to the number of multi-variate statistics
+ * available on the table.
+ *
+ * It explores all the possible permutations of the stats.
+ * 
+ * Whenever it considers adding the next statistics, the clauses it
+ * matches are divided into 'conditions' (clauses already matched by at
+ * least one previous statistics) and clauses that are estimated.
+ *
+ * Then several checks are performed:
+ *
+ *  (a) The statistics covers at least 2 columns, referenced in the
+ *      estimated clauses (otherwise multi-variate stats are useless).
+ *
+ *  (b) The statistics covers at least 1 new column, i.e. column not
+ *      refefenced by the already used stats (and the new column has
+ *      to be referenced by the clauses, of couse). Otherwise the
+ *      statistics would not add any new information.
+ *
+ * There are some other sanity checks (e.g. that the stats must not be
+ * used twice etc.).
+ *
+ * Finally the new solution is compared to the currently best one, and
+ * if it's considered better, it's used instead.
+ *
+ *
+ * Weaknesses
+ * ----------
+ * The current implemetation uses a somewhat simple optimality criteria,
+ * suffering by the following weaknesses.
+ *
+ * (a) There may be multiple solutions with the same number of covered
+ *     attributes and number of statistics (e.g. the same solution but
+ *     with statistics in a different order). It's unclear which solution
+ *     is the best one - in a sense all of them are equal.
+ *
+ * TODO It might be possible to compute estimate for each of those
+ *      solutions, and then combine them to get the final estimate
+ *      (e.g. by using average or median).
+ *
+ * (b) Does not consider that some types of stats are a better match for
+ *     some types of clauses (e.g. MCV list is a good match for equality
+ *     than a histogram).
+ *
+ *     XXX Maybe MCV is almost always better / more accurate?
+ *
+ *     But maybe this is pointless - generally, each column is either
+ *     a label (it's not important whether because of the data type or
+ *     how it's used), or a value with ordering that makes sense. So
+ *     either a MCV list is more appropriate (labels) or a histogram
+ *     (values with orderings).
+ *
+ *     Now sure what to do with statistics on columns mixing columns of
+ *     both types - maybe it'd be beeter to invent a new type of stats
+ *     combining MCV list and histogram (keeping a small histogram for
+ *     each MCV item, and a separate histogram for values not on the
+ *     MCV list). But that's not implemented at this moment.
+ *
+ * (c) Does not consider that some solutions may better exploit the
+ *     dependencies. For example with clauses on columns [A,B,C,D] and
+ *     statistics on [A,B,C] and [C,D] cover all the columns just like
+ *     [A,B,C] and [B,C,D], but the latter probably exploits additional
+ *     dependencies thanks to having 'B' in both stats (thus allowing
+ *     using it as a condition for the second stats). Of course, if
+ *     B and [C,D] are independent, this is untrue - but if we have that
+ *     statistics created, it's a sign that the DBA/developer believes
+ *     there's a dependency.
+ *
+ * (d) Does not consider the order of clauses, which may be significant.
+ *     For example, when there's a mix of simple and complex clauses,
+ *     i.e. something like
+ *
+ *       (a=2) AND (b=3 OR (c=3 AND d=4)) AND (c=3)
+ *
+ *     It may be better to evaluate the simple clauses first, and then
+ *     use them as conditions for the complex clause.
+ *
+ *     We can for example count number of different attributes
+ *     referenced in the clause, and use that as a metric of complexity
+ *     (lower number -> simpler). Maybe use ratio (#vars/#atts) or
+ *     (#clauses/#atts) as secondary metrics? Also the general complexity
+ *     of the clause (levels of nesting etc.) might be useful.
+ *
+ *     Hopefully most clauses will be reasonably simple, though.
+ *
+ *     Update: On second thought, I believe the order of clauses is
+ *     determined by choosing the order of statistics, and therefore
+ *     optimized by the current algorithm.
+ *
+ * TODO Consider adding a counter of attributes covered by previous
+ *      stats (possibly tracking the number of how many stats reference
+ *      it too), and use this 'dependency_count' when selecting the best
+ *      solution (not sure how). Similarly to (a) it might be possible
+ *      to build estimate for each solution (different criteria) and then
+ *      combine them somehow.
+ *
+ * TODO The current implementation repeatedly walks through the previous
+ *      stats, just to compute the number of covered attributes over and
+ *      over. With non-trivial number of statistics this might be an
+ *      issue, so maybe we should keep track of 'covered' attributes by
+ *      each step, so that we can get rid of this. We'll need this
+ *      information anyway (when splitting clauses into condition and
+ *      the estimated part).
+ *
+ * TODO This needs to consider the conditions passed from the preceding
+ *      and upper clauses (in complex cases), but only as conditions
+ *      and not as estimated clauses. So it needs to somehow affect the
+ *      score (the more conditions we use the better).
+ *
+ * TODO The algorithm should probably count number of Vars (not just
+ *      attnums) when computing the 'score' of each solution. Computing
+ *      the ratio of (num of all vars) / (num of condition vars) as a
+ *      measure of how well the solution uses conditions might be
+ *      useful.
+ *
+ * TODO This might be much easier if we kept Bitmapset of attributes
+ *      covered by the stats up to that step.
+ *
+ * FIXME When comparing the solutions, we currently use this condition:
+ *
+ *       ((current->nstats > (*best)->nstats))
+ *
+ *       i.e. we're choosing solution with more stats, because with
+ *       clauses
+ *
+ *           (a = 1) AND (b = 1) AND (c = 1) AND (d = 1)
+ *
+ *       and stats on [a,b], [b,c], [c,d] we want to choose the solution
+ *       with all three stats, and not just [a,b], [c,d]. Otherwise we'd
+ *       fail to exploit one of the dependencies.
+ *
+ *       This is however a workaround for another issue - we're not
+ *       tracking number of 'dependencies' covered by the solution, only
+ *       number of clauses, and that's the same for both solutions.
+ *       ([a,b], [c,d]) and ([a,b], [b,c], [c,d]) both cover all 4 clauses.
+ *
+ *       Once a suitable metric is added, we want to choose the solution
+ *       with less stats, assuming it covers the same number of clauses
+ *       and exploits the same number of dependencies.
+ */
+static void
+choose_mv_statistics_exhaustive(PlannerInfo *root, int step,
+					int nmvstats, MVStats mvstats, Bitmapset ** stats_attnums,
+					int nclauses, Node ** clauses, Bitmapset ** clauses_attnums,
+					int nconditions, Node ** conditions, Bitmapset ** conditions_attnums,
+					bool *cover_map, bool *condition_map, int *ruled_out,
+					mv_solution_t *current, mv_solution_t **best)
+{
+	int i, j;
+
+	Assert(best != NULL);
+	Assert((step == 0 && current == NULL) || (step > 0 && current != NULL));
+
+	CHECK_FOR_INTERRUPTS();
+
+	if (current == NULL)
+	{
+		current = (mv_solution_t*)palloc0(sizeof(mv_solution_t));
+		current->stats = (int*)palloc0(sizeof(int)*nmvstats);
+		current->nstats = 0;
+		current->nclauses = 0;
+		current->nconditions = 0;
+	}
+
+	/*
+	 * Now try to apply each statistics, matching at least two attributes,
+	 * unless it's already used in one of the previous steps.
+	 */
+	for (i = 0; i < nmvstats; i++)
+	{
+		int c;
+
+		int ncovered_clauses = 0;		/* number of covered clauses */
+		int ncovered_conditions = 0;	/* number of covered conditions */
+		int nattnums = 0;		/* number of covered attributes */
+
+		Bitmapset  *all_attnums = NULL;
+		Bitmapset  *new_attnums = NULL;
+
+		/* skip statistics that were already used or eliminated */
+		if (ruled_out[i] != -1)
+			continue;
+
+		/*
+		 * See if we have clauses covered by this statistics, but not
+		 * yet covered by any of the preceding onces.
+		 */
+		for (c = 0; c < nclauses; c++)
+		{
+			bool covered = false;
+			Bitmapset *clause_attnums = clauses_attnums[c];
+			Bitmapset *tmp = NULL;
+
+			/*
+			 * If this clause is not covered by this stats, we can't
+			 * use the stats to estimate that at all.
+			 */
+			if (! cover_map[i * nclauses + c])
+				continue;
+
+			/*
+			 * Now we know we'll use this clause - either as a condition
+			 * or as a new clause (the estimated one). So let's add the
+			 * attributes to the attnums from all the clauses usable with
+			 * this statistics.
+			 */
+			tmp = bms_union(all_attnums, clause_attnums);
+
+			/* free the old bitmap */
+			bms_free(all_attnums);
+			all_attnums = tmp;
+
+			/* let's see if it's covered by any of the previous stats */
+			for (j = 0; j < step; j++)
+			{
+				/* already covered by the previous stats */
+				if (cover_map[current->stats[j] * nclauses + c])
+					covered = true;
+
+				if (covered)
+					break;
+			}
+
+			/* if already covered, continue with the next clause */
+			if (covered)
+			{
+				ncovered_conditions += 1;
+				continue;
+			}
+
+			/*
+			 * OK, this clause is covered by this statistics (and not by
+			 * any of the previous ones)
+			 */
+			ncovered_clauses += 1;
+
+			/* add the attnums into attnums from 'new clauses' */
+			// new_attnums = bms_union(new_attnums, clause_attnums);
+		}
+
+		/* can't have more new clauses than original clauses */
+		Assert(nclauses >= ncovered_clauses);
+		Assert(ncovered_clauses >= 0);	/* mostly paranoia */
+
+		nattnums = bms_num_members(all_attnums);
+
+		/* free all the bitmapsets - we don't need them anymore */
+		bms_free(all_attnums);
+		bms_free(new_attnums);
+
+		all_attnums = NULL;
+		new_attnums = NULL;
+
+		/*
+		 * See if we have clauses covered by this statistics, but not
+		 * yet covered by any of the preceding onces.
+		 */
+		for (c = 0; c < nconditions; c++)
+		{
+			Bitmapset *clause_attnums = conditions_attnums[c];
+			Bitmapset *tmp = NULL;
+
+			/*
+			 * If this clause is not covered by this stats, we can't
+			 * use the stats to estimate that at all.
+			 */
+			if (! condition_map[i * nconditions + c])
+				continue;
+
+			/* count this as a condition */
+			ncovered_conditions += 1;
+
+			/*
+			 * Now we know we'll use this clause - either as a condition
+			 * or as a new clause (the estimated one). So let's add the
+			 * attributes to the attnums from all the clauses usable with
+			 * this statistics.
+			 */
+			tmp = bms_union(all_attnums, clause_attnums);
+
+			/* free the old bitmap */
+			bms_free(all_attnums);
+			all_attnums = tmp;
+		}
+
+		/*
+		 * Let's mark the statistics as 'ruled out' - either we'll use
+		 * it (and proceed to the next step), or it's incompatible.
+		 */
+		ruled_out[i] = step;
+
+		/*
+		 * There are no clauses usable with this statistics (not already
+		 * covered by aome of the previous stats).
+		 *
+		 * Similarly, if the clauses only use a single attribute, we
+		 * can't really use that.
+		 */
+		if ((ncovered_clauses == 0) || (nattnums < 2))
+			continue;
+
+		/*
+		 * TODO Not sure if it's possible to add a clause referencing
+		 *      only attributes already covered by previous stats?
+		 *      Introducing only some new dependency, not a new
+		 *      attribute. Couldn't come up with an example, though.
+		 *      Might be worth adding some assert.
+		 */
+
+		/*
+		 * got a suitable statistics - let's update the current solution,
+		 * maybe use it as the best solution
+		 */
+		current->nclauses += ncovered_clauses;
+		current->nconditions += ncovered_conditions;
+		current->nstats += 1;
+		current->stats[step] = i;
+
+		/*
+		 * We can never cover more clauses, or use more stats that we
+		 * actually have at the beginning.
+		 */
+		Assert(nclauses >= current->nclauses);
+		Assert(nmvstats >= current->nstats);
+		Assert(step < nmvstats);
+
+		/* we can't get more conditions that clauses and conditions combined
+		 *
+		 * FIXME This assert does not work because we count the conditions
+		 *       repeatedly (once for each statistics covering it).
+		 */
+		/* Assert((nconditions + nclauses) >= current->nconditions); */
+
+		if (*best == NULL)
+		{
+			*best = (mv_solution_t*)palloc0(sizeof(mv_solution_t));
+			(*best)->stats = (int*)palloc0(sizeof(int)*nmvstats);
+			(*best)->nstats = 0;
+			(*best)->nclauses = 0;
+			(*best)->nconditions = 0;
+		}
+
+		/* see if it's better than the current 'best' solution */
+		if ((current->nclauses > (*best)->nclauses) ||
+			((current->nclauses == (*best)->nclauses) &&
+			((current->nstats > (*best)->nstats))))
+		{
+			(*best)->nstats = current->nstats;
+			(*best)->nclauses = current->nclauses;
+			(*best)->nconditions = current->nconditions;
+			memcpy((*best)->stats, current->stats, nmvstats * sizeof(int));
+		}
+
+		/*
+		 * The recursion only makes sense if we haven't covered all the
+		 * attributes (then adding stats is not really possible).
+		 */
+		if ((step + 1) < nmvstats)
+			choose_mv_statistics_exhaustive(root, step+1,
+									nmvstats, mvstats, stats_attnums,
+									nclauses, clauses, clauses_attnums,
+									nconditions, conditions, conditions_attnums,
+									cover_map, condition_map, ruled_out,
+									current, best);
+
+		/* reset the last step */
+		current->nclauses -= ncovered_clauses;
+		current->nconditions -= ncovered_conditions;
+		current->nstats -= 1;
+		current->stats[step] = 0;
+
+		/* mark the statistics as usable again */
+		ruled_out[i] = -1;
+
+		Assert(current->nclauses >= 0);
+		Assert(current->nstats >= 0);
+	}
+
+	/* reset all statistics as 'incompatible' in this step */
+	for (i = 0; i < nmvstats; i++)
+		if (ruled_out[i] == step)
+			ruled_out[i] = -1;
+
+}
+
+/*
+ * Greedy search for a multivariate solution - a sequence of statistics
+ * covering the clauses. This chooses the "best" statistics at each step,
+ * so the resulting solution may not be the best solution globally, but
+ * this produces the solution in only N steps (where N is the number of
+ * statistics), while the exhaustive approach may have to walk through
+ * ~N! combinations (although some of those are terminated early).
+ *
+ * TODO There are probably other metrics we might use - e.g. using
+ *      number of columns (num_cond_columns / num_cov_columns), which
+ *      might work better with a mix of simple and complex clauses.
+ *
+ * TODO Also the choice at the very first step should be handled
+ *      in a special way, because there will be 0 conditions at that
+ *      moment, so there needs to be some other criteria - e.g. using
+ *      the simplest (or most complex?) clause might be a good idea.
+ *
+ * TODO We might also select multiple stats using different criteria,
+ *      and branch the search. This is however tricky, because if we
+ *      choose k statistics at each step, we get k^N branches to
+ *      walk through (with N steps). That's not really good with
+ *      large number of stats (yet better than exhaustive search).
+ */
+static void
+choose_mv_statistics_greedy(PlannerInfo *root, int step,
+					int nmvstats, MVStats mvstats, Bitmapset ** stats_attnums,
+					int nclauses, Node ** clauses, Bitmapset ** clauses_attnums,
+					int nconditions, Node ** conditions, Bitmapset ** conditions_attnums,
+					bool *cover_map, bool *condition_map, int *ruled_out,
+					mv_solution_t *current, mv_solution_t **best)
+{
+	int i, j;
+	int best_stat = -1;
+	double gain, max_gain = -1.0;
+
+	/*
+	 * Bitmap tracking which clauses are already covered (by the previous
+	 * statistics) and may thus serve only as a condition in this step.
+	 */
+	bool *covered_clauses = (bool*)palloc0(nclauses);
+
+	/*
+	 * Number of clauses and columns covered by each statistics - this
+	 * includes both conditions and clauses covered by the statistics for
+	 * the first time. The number of columns may count some columns
+	 * repeatedly - if a column is shared by multiple clauses, it will
+	 * be counted once for each clause (covered by the statistics).
+	 * So with two clauses [(a=1 OR b=2),(a<2 OR c>1)] the column "a"
+	 * will be counted twice (if both clauses are covered).
+	 *
+	 * The values for reduded statistics (that can't be applied) are
+	 * not computed, because that'd be pointless.
+	 */
+	int	*num_cov_clauses	= (int*)palloc0(sizeof(int) * nmvstats);
+	int	*num_cov_columns	= (int*)palloc0(sizeof(int) * nmvstats);
+
+	/*
+	 * Same as above, but this only includes clauses that are already
+	 * covered by the previous stats (and the current one).
+	 */
+	int	*num_cond_clauses	= (int*)palloc0(sizeof(int) * nmvstats);
+	int	*num_cond_columns	= (int*)palloc0(sizeof(int) * nmvstats);
+
+	/*
+	 * Number of attributes for each clause.
+	 *
+	 * TODO Might be computed in choose_mv_statistics() and then passed
+	 *      here, but then the function would not have the same signature
+	 *      as _exhaustive().
+	 */
+	int *attnum_counts = (int*)palloc0(sizeof(int) * nclauses);
+	int *attnum_cond_counts = (int*)palloc0(sizeof(int) * nconditions);
+
+	CHECK_FOR_INTERRUPTS();
+
+	Assert(best != NULL);
+	Assert((step == 0 && current == NULL) || (step > 0 && current != NULL));
+
+	/* compute attributes (columns) for each clause */
+	for (i = 0; i < nclauses; i++)
+		attnum_counts[i] = bms_num_members(clauses_attnums[i]);
+
+	/* compute attributes (columns) for each condition */
+	for (i = 0; i < nconditions; i++)
+		attnum_cond_counts[i] = bms_num_members(conditions_attnums[i]);
+
+	/* see which clauses are already covered at this point (by previous stats) */
+	for (i = 0; i < step; i++)
+		for (j = 0; j < nclauses; j++)
+			covered_clauses[j] |= (cover_map[current->stats[i] * nclauses + j]);
+
+	/* which remaining statistics covers most clauses / uses most conditions? */
+	for (i = 0; i < nmvstats; i++)
+	{
+		Bitmapset *attnums_covered = NULL;
+		Bitmapset *attnums_conditions = NULL;
+
+		/* skip stats that are already ruled out (either used or inapplicable) */
+		if (ruled_out[i] != -1)
+			continue;
+
+		/* count covered clauses and conditions (for the statistics) */
+		for (j = 0; j < nclauses; j++)
+		{
+			if (cover_map[i * nclauses + j])
+			{
+				Bitmapset *new = bms_union(attnums_covered, clauses_attnums[j]);
+
+				/* get rid of the old bitmap and keep the unified result */
+				bms_free(attnums_covered);
+				attnums_covered = new;
+
+				num_cov_clauses[i] += 1;
+				num_cov_columns[i] += attnum_counts[j];
+
+				/* is the clause already covered (i.e. a condition)? */
+				if (covered_clauses[j])
+				{
+					num_cond_clauses[i] += 1;
+					num_cond_columns[i] += attnum_counts[j];
+					new = bms_union(attnums_conditions,
+												  clauses_attnums[j]);
+
+					bms_free(attnums_conditions);
+					attnums_conditions = new;
+				}
+			}
+		}
+
+		/* if all covered clauses are covered by prev stats (thus conditions) */
+		if (num_cov_clauses[i] == num_cond_clauses[i])
+			ruled_out[i] = step;
+
+		/* same if there are no new attributes */
+		else if (bms_num_members(attnums_conditions) == bms_num_members(attnums_covered))
+			ruled_out[i] = step;
+
+		bms_free(attnums_covered);
+		bms_free(attnums_conditions);
+
+		/* if the statistics is inapplicable, try the next one */
+		if (ruled_out[i] != -1)
+			continue;
+
+		/* now let's walk through conditions and count the covered */
+		for (j = 0; j < nconditions; j++)
+		{
+			if (condition_map[i * nconditions + j])
+			{
+				num_cond_clauses[i] += 1;
+				num_cond_columns[i] += attnum_cond_counts[j];
+			}
+		}
+
+		/* otherwise see if this improves the interesting metrics */
+		gain = num_cond_columns[i] / (double)num_cov_columns[i];
+
+		if (gain > max_gain)
+		{
+			max_gain = gain;
+			best_stat = i;
+		}
+	}
+
+	/*
+	 * Have we found a suitable statistics? Add it to the solution and
+	 * try next step.
+	 */
+	if (best_stat != -1)
+	{
+		/* mark the statistics, so that we skip it in next steps */
+		ruled_out[best_stat] = step;
+
+		/* allocate current solution if necessary */
+		if (current == NULL)
+		{
+			current = (mv_solution_t*)palloc0(sizeof(mv_solution_t));
+			current->stats = (int*)palloc0(sizeof(int)*nmvstats);
+			current->nstats = 0;
+			current->nclauses = 0;
+			current->nconditions = 0;
+		}
+
+		current->nclauses += num_cov_clauses[best_stat];
+		current->nconditions += num_cond_clauses[best_stat];
+		current->stats[step] = best_stat;
+		current->nstats++;
+
+		if (*best == NULL)
+		{
+			(*best) = (mv_solution_t*)palloc0(sizeof(mv_solution_t));
+			(*best)->nstats = current->nstats;
+			(*best)->nclauses = current->nclauses;
+			(*best)->nconditions = current->nconditions;
+
+			(*best)->stats = (int*)palloc0(sizeof(int)*nmvstats);
+			memcpy((*best)->stats, current->stats, nmvstats * sizeof(int));
+		}
+		else
+		{
+			/* see if this is a better solution */
+			double current_gain = (double)current->nconditions / current->nclauses;
+			double best_gain    = (double)(*best)->nconditions / (*best)->nclauses;
+
+			if ((current_gain > best_gain) ||
+				((current_gain == best_gain) && (current->nstats < (*best)->nstats)))
+			{
+				(*best)->nstats = current->nstats;
+				(*best)->nclauses = current->nclauses;
+				(*best)->nconditions = current->nconditions;
+				memcpy((*best)->stats, current->stats, nmvstats * sizeof(int));
+			}
+		}
+
+		/*
+		 * The recursion only makes sense if we haven't covered all the
+		 * attributes (then adding stats is not really possible).
+		*/
+		if ((step + 1) < nmvstats)
+			choose_mv_statistics_greedy(root, step+1,
+									nmvstats, mvstats, stats_attnums,
+									nclauses, clauses, clauses_attnums,
+									nconditions, conditions, conditions_attnums,
+									cover_map, condition_map, ruled_out,
+									current, best);
+
+		/* reset the last step */
+		current->nclauses -= num_cov_clauses[best_stat];
+		current->nconditions -= num_cond_clauses[best_stat];
+		current->nstats -= 1;
+		current->stats[step] = 0;
+
+		/* mark the statistics as usable again */
+		ruled_out[best_stat] = -1;
+	}
+
+	/* reset all statistics eliminated in this step */
+	for (i = 0; i < nmvstats; i++)
+		if (ruled_out[i] == step)
+			ruled_out[i] = -1;
+
+	/* free everything allocated in this step */
+	pfree(covered_clauses);
+	pfree(attnum_counts);
+	pfree(num_cov_clauses);
+	pfree(num_cov_columns);
+	pfree(num_cond_clauses);
+	pfree(num_cond_columns);
+}
+
+/*
  * We're looking for statistics matching at least 2 attributes,
  * referenced in the clauses compatible with multivariate statistics.
  * The current selection criteria is very simple - we choose the
@@ -1299,48 +2213,386 @@ collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
  * TODO This will probably have to consider compatibility of clauses,
  *      because 'dependencies' will probably work only with equality
  *      clauses.
+ *
+ * TODO Another way to make the optimization problems smaller might
+ *      be splitting the statistics into several disjoint subsets, i.e.
+ *      if we can split the graph of statistics (after the elimination)
+ *      into multiple components (so that stats in different components
+ *      share no attributes), we can do the optimization for each
+ *      component separately.
+ *
+ * TODO Another possible optimization might be removing redundant
+ *      statistics - if statistics S1 covers S2 (covers S2 attributes
+ *      and possibly some more), we can probably remove S2. What
+ *      actually matters are attributes from covered clauses (not all
+ *      the original attributes). This might however prefer larger,
+ *      and thus less accurate, statistics.
+ *
+ * TODO If we could compute what is a "perfect solution" maybe we could
+ *      terminate the search after reaching ~90% of it? Say, if we knew
+ *      that we can cover 10 clauses and reuse 8 dependencies, maybe
+ *      covering 9 clauses and 7 dependencies would be OK?
  */
-static int
-choose_mv_statistics(int nmvstats, MVStats mvstats, Bitmapset *attnums)
+static mv_solution_t *
+choose_mv_statistics(PlannerInfo *root, int nmvstats, MVStats mvstats,
+					 List *clauses, List *conditions,
+					 Oid varRelid, SpecialJoinInfo *sjinfo, int type)
 {
 	int i, j;
+	mv_solution_t *best = NULL;
+	ListCell *l;
+
+	/* pass only stats matching at least two attributes (from clauses) */
+	MVStats	mvstats_filtered = (MVStats)palloc0(nmvstats * sizeof(MVStatsData));
+	int		nmvstats_filtered;
+	bool	repeat = true;
+	bool   *clause_cover_map = NULL,
+		   *condition_cover_map = NULL;
+	int	   *ruled_out = NULL;
+
+	/* build bitmapsets for all stats and clauses */
+	Bitmapset **stats_attnums;
+	Bitmapset **clauses_attnums;
+	Bitmapset **conditions_attnums;
+
+	int nclauses, nconditions;
+	Node ** clauses_array;
+	Node ** conditions_array;
 
-	int choice = -1;
-	int current_matches = 1;					/* goal #1: maximize */
-	int current_dims = (MVSTATS_MAX_DIMENSIONS+1);	/* goal #2: minimize */
+	/* copy lists, so that we can free them during elimination easily */
+	clauses = list_copy(clauses);
+	conditions = list_copy(conditions);
 
 	/*
-	 * Walk through the statistics (simple array with nmvstats elements)
-	 * and for each one count the referenced attributes (encoded in
-	 * the 'attnums' bitmap).
+	 * Reduce the optimization problem size as much as possible.
+	 *
+	 * Eliminate clauses and conditions not covered by any statistics,
+	 * or statistics not matching at least two attributes (one of them
+	 * has to be in a regular clause).
+	 *
+	 * It's possible that removing a statistics in one iteration
+	 * eliminates clause in the next one, so we'll repeat this until we
+	 * eliminate no clauses/stats in that iteration.
+	 *
+	 * This can only happen after eliminating a statistics - clauses are
+	 * eliminated first, so statistics always reflect that.
 	 */
+	while (repeat)
+	{
+		/* pass only mv-compatible clauses covered by at least one statistics */
+		List *compatible_clauses = NIL;
+		List *compatible_conditions = NIL;
+
+		Bitmapset *compatible_attnums = NULL;
+		Bitmapset *condition_attnums  = NULL;
+		Bitmapset *all_attnums = NULL;
+
+		/*
+		 * Clauses
+		 *
+		 * Walk through clauses and keep only those covered by at least
+		 * one of the statistics we still have. Also, collect bitmap of
+		 * attributes so that we can make sure we add at least one new.
+		 */
+		foreach (l, clauses)
+		{
+			Node *clause = (Node*)lfirst(l);
+			Bitmapset *clause_attnums = NULL;
+			Oid relid;
+
+			/*
+			 * The clause has to be mv-compatible (suitable operators etc.).
+			 */
+			if (! clause_is_mv_compatible(root, clause, varRelid,
+							 &relid, &clause_attnums, sjinfo, type))
+				continue;
+
+			/* is there a statistics covering this clause? */
+			for (i = 0; i < nmvstats; i++)
+			{
+				int k, matches = 0;
+				for (k = 0; k < mvstats[i].stakeys->dim1; k++)
+				{
+					if (bms_is_member(mvstats[i].stakeys->values[k],
+									  clause_attnums))
+						matches += 1;
+				}
+
+				/*
+				 * The clause is compatible if all attributes it references
+				 * are covered by the statistics.
+				 */
+				if (bms_num_members(clause_attnums) == matches)
+				{
+					compatible_attnums = bms_union(compatible_attnums,
+												   clause_attnums);
+					compatible_clauses = lappend(compatible_clauses,
+												 clause);
+					break;
+				}
+			}
+
+			bms_free(clause_attnums);
+		}
+
+		/* we can't have more compatible clauses that source clauses */
+		Assert(list_length(clauses) >= list_length(compatible_clauses));
+
+		/* work with only compatible clauses from now */
+		list_free(clauses);
+		clauses = compatible_clauses;
+
+		/*
+		 * Conditions
+		 *
+		 * Walk through clauses and keep only those covered by at least
+		 * one of the statistics we still have. Also, collect bitmap of
+		 * attributes so that we can make sure we add at least one new.
+		 */
+
+		/* next, generate bitmap of attnums from all mv_compatible conditions */
+		foreach (l, conditions)
+		{
+			Node *clause = (Node*)lfirst(l);
+			Bitmapset *clause_attnums = NULL;
+			Oid relid;
+
+			/*
+			 * The clause has to be mv-compatible (suitable operators etc.).
+			 */
+			if (! clause_is_mv_compatible(root, clause, varRelid,
+							 &relid, &clause_attnums, sjinfo, type))
+				continue;
+
+			/* is there a statistics covering this clause? */
+			for (i = 0; i < nmvstats; i++)
+			{
+				int k, matches = 0;
+				for (k = 0; k < mvstats[i].stakeys->dim1; k++)
+				{
+					if (bms_is_member(mvstats[i].stakeys->values[k],
+									  clause_attnums))
+						matches += 1;
+				}
+
+				if (bms_num_members(clause_attnums) == matches)
+				{
+					condition_attnums = bms_union(condition_attnums,
+												  clause_attnums);
+					compatible_conditions = lappend(compatible_conditions,
+													clause);
+					break;
+				}
+			}
+
+			bms_free(clause_attnums);
+		}
+
+		/* we can't have more compatible conditions than source conditions */
+		Assert(list_length(conditions) >= list_length(compatible_conditions));
+
+		/* keep only compatible clauses */
+		list_free(conditions);
+		conditions = compatible_conditions;
+
+		/* get a union of attnums (from conditions and clauses) */
+		all_attnums = bms_union(compatible_attnums, condition_attnums);
+
+		/*
+		 * Statisitics
+		 *
+		 * Walk through statistics and only keep those covering at least
+		 * one new attribute (excluding conditions) and at two attributes
+		 * in both clauses and conditions.
+		 */
+		nmvstats_filtered = 0;
+
+		for (i = 0; i < nmvstats; i++)
+		{
+			int k;
+			int matches_new = 0,
+				matches_all = 0;
+
+			for (k = 0; k < mvstats[i].stakeys->dim1; k++)
+			{
+				/* attribute covered by new clause(s) */
+				if (bms_is_member(mvstats[i].stakeys->values[k],
+								  compatible_attnums))
+					matches_new += 1;
+
+				/* attribute covered by clause(s) or ondition(s) */
+				if (bms_is_member(mvstats[i].stakeys->values[k],
+								  all_attnums))
+					matches_all += 1;
+			}
+
+			/* check we have enough attributes for this statistics */
+			if ((matches_new >= 1) && (matches_all >= 2))
+			{
+				mvstats_filtered[nmvstats_filtered] = mvstats[i];
+				nmvstats_filtered += 1;
+			}
+		}
+
+		/* we can't have more useful stats than we had originally */
+		Assert(nmvstats >= nmvstats_filtered);
+
+		/* if we've eliminated a statistics, trigger another round */
+		repeat = (nmvstats > nmvstats_filtered);
+
+		/*
+		 * work only with filtered statistics from now
+		 *
+		 * FIXME This rewrites the input 'mvstats' array, which is not
+		 *       exactly pretty as it's an unexpected side-effect (the
+		 *       caller may use the stats for something else). But the
+		 *       solution contains indexes into this 'reduced' array so
+		 *       we can't stop doing that easily.
+		 *
+		 *       Another issue is that we only modify the local 'mvstats'
+		 *       value, so the caller will still see the original number
+		 *       of stats (and thus maybe duplicate entries).
+		 *
+		 *       We should make a copy of the array, and only mess with
+		 *       that copy (and map the indexes to the original ones at
+		 *       the end, when returning the solution to the user). Or
+		 *       simply work with OIDs.
+		 */
+		if (nmvstats_filtered < nmvstats)
+		{
+			nmvstats = nmvstats_filtered;
+			memcpy(mvstats, mvstats_filtered, sizeof(MVStatsData)*nmvstats);
+			nmvstats_filtered = 0;
+		}
+	}
+
+	/* only do the optimization if we have clauses/statistics */
+	if ((nmvstats == 0) || (list_length(clauses) == 0))
+		return NULL;
+
+	stats_attnums
+		= (Bitmapset **)palloc0(nmvstats * sizeof(Bitmapset *));
+
+	/*
+	 * TODO We should sort the stats to make the order deterministic,
+	 *      otherwise we may get different estimates on different
+	 *      executions - if there are multiple "equally good" solutions,
+	 *      we'll keep the first solution we see.
+	 *
+	 *      Sorting by OID probably is not the right solution though,
+	 *      because we'd like it to be somehow reproducible,
+	 *      irrespectedly of the order of ADD STATISTICS commands.
+	 *      So maybe statkeys?
+	 */
+
 	for (i = 0; i < nmvstats; i++)
 	{
-		/* columns matching this statistics */
-		int matches = 0;
+		for (j = 0; j < mvstats[i].stakeys->dim1; j++)
+			stats_attnums[i] = bms_add_member(stats_attnums[i],
+										mvstats[i].stakeys->values[j]);
+	}
 
-		int2vector * attrs = mvstats[i].stakeys;
-		int	numattrs = mvstats[i].stakeys->dim1;
+	/* collect clauses an bitmap of attnums */
+	nclauses = 0;
+	clauses_attnums = (Bitmapset **)palloc0(list_length(clauses)
+											* sizeof(Bitmapset *));
+	clauses_array = (Node **)palloc0(list_length(clauses)
+									 * sizeof(Node *));
 
-		/* count columns covered by the histogram */
-		for (j = 0; j < numattrs; j++)
-			if (bms_is_member(attrs->values[j], attnums))
-				matches++;
+	foreach (l, clauses)
+	{
+		Oid relid;
+		Bitmapset * attnums = NULL;
 
 		/*
-		 * Use this statistics when it improves the number of matches or
-		 * when it matches the same number of attributes but is smaller.
+		 * The clause has to be mv-compatible (suitable operators etc.).
 		 */
-		if ((matches > current_matches) ||
-			((matches == current_matches) && (current_dims > numattrs)))
+		if (! clause_is_mv_compatible(root, (Node *)lfirst(l), varRelid,
+						 &relid, &attnums, sjinfo, type))
+			elog(ERROR, "should not get non-mv-compatible cluase");
+
+		clauses_attnums[nclauses] = attnums;
+		clauses_array[nclauses] = (Node *)lfirst(l);
+		nclauses += 1;
+	}
+
+	/* collect conditions and bitmap of attnums */
+	nconditions = 0;
+	conditions_attnums = (Bitmapset **)palloc0(list_length(conditions)
+												* sizeof(Bitmapset *));
+	conditions_array = (Node **)palloc0(list_length(conditions)
+													* sizeof(Node *));
+
+	foreach (l, conditions)
+	{
+		Oid relid;
+		Bitmapset * attnums = NULL;
+
+		/* conditions are mv-compatible (thanks to the reduction) */
+		if (! clause_is_mv_compatible(root, (Node *)lfirst(l), varRelid,
+						 &relid, &attnums, sjinfo, type))
+			elog(ERROR, "should not get non-mv-compatible cluase");
+
+		conditions_attnums[nconditions] = attnums;
+		conditions_array[nconditions] = (Node *)lfirst(l);
+		nconditions += 1;
+	}
+
+	/*
+	 * Build bitmaps with info about which clauses/conditions are
+	 * covered by each statistics (so that we don't need to call the
+	 * bms_is_subset over and over again).
+	 */
+	clause_cover_map	= (bool*)palloc0(nclauses * nmvstats);
+	condition_cover_map	= (bool*)palloc0(nconditions * nmvstats);
+	ruled_out 			= (int*)palloc0(nmvstats * sizeof(int));
+
+	for (i = 0; i < nmvstats; i++)
+	{
+		ruled_out[i] = -1;	/* not ruled out by default */
+		for (j = 0; j < nclauses; j++)
+		{
+			clause_cover_map[i * nclauses + j]
+				= bms_is_subset(clauses_attnums[j],
+								stats_attnums[i]);
+		}
+
+		for (j = 0; j < nconditions; j++)
 		{
-			choice = i;
-			current_matches = matches;
-			current_dims = numattrs;
+			condition_cover_map[i * nconditions + j]
+				= bms_is_subset(conditions_attnums[j],
+								stats_attnums[i]);
 		}
 	}
 
-	return choice;
+	/* do the optimization itself */
+	if (mvstat_search_type == MVSTAT_SEARCH_EXHAUSTIVE)
+		choose_mv_statistics_exhaustive(root, 0,
+									   nmvstats, mvstats, stats_attnums,
+									   nclauses, clauses_array, clauses_attnums,
+									   nconditions, conditions_array, conditions_attnums,
+									   clause_cover_map, condition_cover_map,
+									   ruled_out, NULL, &best);
+	else
+		choose_mv_statistics_greedy(root, 0,
+									   nmvstats, mvstats, stats_attnums,
+									   nclauses, clauses_array, clauses_attnums,
+									   nconditions, conditions_array, conditions_attnums,
+									   clause_cover_map, condition_cover_map,
+									   ruled_out, NULL, &best);
+
+	/* maybe we should leave the cleanup up to the memory context */
+	pfree(mvstats_filtered);
+	pfree(stats_attnums);
+	pfree(clauses_attnums);
+	pfree(clauses_array);
+	pfree(conditions_attnums);
+	pfree(conditions_array);
+	pfree(clause_cover_map);
+	pfree(condition_cover_map);
+	pfree(ruled_out);
+
+	return best;
 }
 
 
@@ -1624,6 +2876,51 @@ clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
 	return false;
 }
 
+
+static Bitmapset *
+clause_mv_get_attnums(PlannerInfo *root, Node *clause)
+{
+	Bitmapset * attnums = NULL;
+
+	/* Extract clause from restrict info, if needed. */
+	if (IsA(clause, RestrictInfo))
+		clause = (Node*)((RestrictInfo*)clause)->clause;
+
+	/*
+	 * Only simple opclauses and IS NULL tests are compatible with
+	 * multivariate stats at this point.
+	 */
+	if ((is_opclause(clause))
+		&& (list_length(((OpExpr *) clause)->args) == 2))
+	{
+		OpExpr	   *expr = (OpExpr *) clause;
+
+		if (IsA(linitial(expr->args), Var))
+			attnums = bms_add_member(attnums,
+							((Var*)linitial(expr->args))->varattno);
+		else
+			attnums = bms_add_member(attnums,
+							((Var*)lsecond(expr->args))->varattno);
+	}
+	else if (IsA(clause, NullTest)
+			 && IsA(((NullTest*)clause)->arg, Var))
+	{
+		attnums = bms_add_member(attnums,
+							((Var*)((NullTest*)clause)->arg)->varattno);
+	}
+	else if (or_clause(clause) || and_clause(clause))
+	{
+		ListCell *l;
+		foreach (l, ((BoolExpr*)clause)->args)
+		{
+			attnums = bms_join(attnums,
+						clause_mv_get_attnums(root, (Node*)lfirst(l)));
+		}
+	}
+
+	return attnums;
+}
+
 /*
  * Performs reduction of clauses using functional dependencies, i.e.
  * removes clauses that are considered redundant. It simply walks
@@ -2049,20 +3346,24 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses, Oid varRelid,
  *      as the clauses are processed (and skip items that are 'match').
  */
 static Selectivity
-clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses,
-								  MVStats mvstats, bool *fullmatch,
-								  Selectivity *lowsel)
+clauselist_mv_selectivity_mcvlist(PlannerInfo *root, MVStats mvstats,
+								  List *clauses, List *conditions,
+								  bool *fullmatch, Selectivity *lowsel)
 {
 	int i;
 	Selectivity s = 0.0;
+	Selectivity t = 0.0;
 	MCVList mcvlist = NULL;
+
 	int	nmatches = 0;
+	int	nconditions = 0;
 
 	/* match/mismatch bitmap for each MCV item */
 	char * matches = NULL;
+	char * condition_matches = NULL;
 
 	Assert(clauses != NIL);
-	Assert(list_length(clauses) >= 2);
+	Assert(list_length(clauses) >= 1);
 
 	/* there's no MCV list built yet */
 	if (! mvstats->mcv_built)
@@ -2073,13 +3374,34 @@ clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses,
 	Assert(mcvlist != NULL);
 	Assert(mcvlist->nitems > 0);
 
-	/* by default all the MCV items match the clauses fully */
-	matches = palloc0(sizeof(char) * mcvlist->nitems);
-	memset(matches, MVSTATS_MATCH_FULL, sizeof(char)*mcvlist->nitems);
-
 	/* number of matching MCV items */
 	nmatches = mcvlist->nitems;
+	nconditions = mcvlist->nitems;
+
+	/* conditions */
+	condition_matches = palloc0(sizeof(char) * nconditions);
+	memset(condition_matches, MVSTATS_MATCH_FULL, sizeof(char)*nconditions);
 
+	/* by default all the MCV items match the clauses fully */
+	matches = palloc0(sizeof(char) * nmatches);
+	memset(matches, MVSTATS_MATCH_FULL, sizeof(char)*nmatches);
+
+	/*
+	 * build the match bitmap for the conditions
+	 */
+	if (conditions != NIL)
+		nconditions = update_match_bitmap_mcvlist(root, conditions,
+									   mvstats->stakeys, mcvlist,
+									   nconditions, condition_matches,
+									   lowsel, fullmatch, false);
+
+	/*
+	 * build the match bitmap for the estimated clauses
+	 *
+	 * TODO This evaluates the clauses for all MCV items, even those
+	 *      ruled out by the conditions. The final result should be the
+	 *      same, but it might be faster.
+	 */
 	nmatches = update_match_bitmap_mcvlist(root, clauses,
 										   mvstats->stakeys, mcvlist,
 										   nmatches, matches,
@@ -2088,14 +3410,25 @@ clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses,
 	/* sum frequencies for all the matching MCV items */
 	for (i = 0; i < mcvlist->nitems; i++)
 	{
+		/* skit MCV items not matching the conditions */
+		if (condition_matches[i] == MVSTATS_MATCH_NONE)
+			continue;
+
 		if (matches[i] != MVSTATS_MATCH_NONE)
 			s += mcvlist->items[i]->frequency;
+
+		t += mcvlist->items[i]->frequency;
 	}
 
 	pfree(matches);
+	pfree(condition_matches);
 	pfree(mcvlist);
 
-	return s;
+	/* no condition matches */
+	if (t == 0.0)
+		return (Selectivity)0.0;
+
+	return (s / t);
 }
 
 /*
@@ -2490,13 +3823,17 @@ update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
  *      this is not uncommon, but for histograms it's not that clear.
  */
 static Selectivity
-clauselist_mv_selectivity_histogram(PlannerInfo *root, List *clauses,
-									MVStats mvstats)
+clauselist_mv_selectivity_histogram(PlannerInfo *root, MVStats mvstats,
+									List *clauses, List *conditions)
 {
 	int i;
 	Selectivity s = 0.0;
+	Selectivity t = 0.0;
 	int		nmatches = 0;
+	int		nconditions = 0;
 	char   *matches = NULL;
+	char   *condition_matches = NULL;
+
 	MVHistogram mvhist = NULL;
 
 	/* there's no histogram */
@@ -2508,18 +3845,34 @@ clauselist_mv_selectivity_histogram(PlannerInfo *root, List *clauses,
 
 	Assert (mvhist != NULL);
 	Assert (clauses != NIL);
-	Assert (list_length(clauses) >= 2);
+	Assert (list_length(clauses) >= 1);
+
+	nmatches = mvhist->nbuckets;
+	nconditions = mvhist->nbuckets;
 
 	/*
 	 * Bitmap of bucket matches (mismatch, partial, full). by default
 	 * all buckets fully match (and we'll eliminate them).
 	 */
-	matches = palloc0(sizeof(char) * mvhist->nbuckets);
-	memset(matches,  MVSTATS_MATCH_FULL, sizeof(char)*mvhist->nbuckets);
+	matches = palloc0(sizeof(char) * nmatches);
+	memset(matches,  MVSTATS_MATCH_FULL, sizeof(char)*nmatches);
 
-	nmatches = mvhist->nbuckets;
+	condition_matches = palloc0(sizeof(char)*nconditions);
+	memset(condition_matches, MVSTATS_MATCH_FULL, sizeof(char)*nconditions);
+
+	/* build the match bitmap for the conditions */
+	if (conditions != NIL)
+		update_match_bitmap_histogram(root, conditions,
+								  mvstats->stakeys, mvhist,
+								  nconditions, condition_matches, false);
 
-	/* build the match bitmap */
+	/*
+	 * build the match bitmap for the estimated clauses
+	 *
+	 * TODO This evaluates the clauses for all buckets, even those
+	 *      ruled out by the conditions. The final result should be
+	 *      the same, but it might be faster.
+	 */
 	update_match_bitmap_histogram(root, clauses,
 								  mvstats->stakeys, mvhist,
 								  nmatches, matches, false);
@@ -2527,17 +3880,37 @@ clauselist_mv_selectivity_histogram(PlannerInfo *root, List *clauses,
 	/* now, walk through the buckets and sum the selectivities */
 	for (i = 0; i < mvhist->nbuckets; i++)
 	{
+		float coeff = 1.0;
+
+		/* skip buckets not matching the conditions */
+		if (condition_matches[i] == MVSTATS_MATCH_NONE)
+			continue;
+		else if (condition_matches[i] == MVSTATS_MATCH_PARTIAL)
+			coeff = 0.5;
+
+		t += coeff * mvhist->buckets[i]->ntuples;
+
 		if (matches[i] == MVSTATS_MATCH_FULL)
-			s += mvhist->buckets[i]->ntuples;
+			s += coeff * mvhist->buckets[i]->ntuples;
 		else if (matches[i] == MVSTATS_MATCH_PARTIAL)
-			s += 0.5 * mvhist->buckets[i]->ntuples;
+			/*
+			 * TODO If both conditions and clauses match partially, this
+			 *      will use 0.25 match - not sure if that's the right
+			 *      thing solution, but seems about right.
+			 */
+			s += coeff * 0.5 * mvhist->buckets[i]->ntuples;
 	}
 
 	/* release the allocated bitmap and deserialized histogram */
 	pfree(matches);
+	pfree(condition_matches);
 	pfree(mvhist);
 
-	return s;
+	/* no condition matches */
+	if (t == 0.0)
+		return (Selectivity)0.0;
+
+	return (s / t);
 }
 
 /*
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1a0d358..71beb2e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3280,7 +3280,8 @@ compute_semi_anti_join_factors(PlannerInfo *root,
 									joinquals,
 									0,
 									jointype,
-									sjinfo);
+									sjinfo,
+									NIL);
 
 	/*
 	 * Also get the normal inner-join selectivity of the join clauses.
@@ -3303,7 +3304,8 @@ compute_semi_anti_join_factors(PlannerInfo *root,
 									joinquals,
 									0,
 									JOIN_INNER,
-									&norm_sjinfo);
+									&norm_sjinfo,
+									NIL);
 
 	/* Avoid leaking a lot of ListCells */
 	if (jointype == JOIN_ANTI)
@@ -3470,7 +3472,7 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
 		Node	   *qual = (Node *) lfirst(l);
 
 		/* Note that clause_selectivity will be able to cache its result */
-		selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
+		selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo, NIL);
 	}
 
 	/* Apply it to the input relation sizes */
@@ -3506,7 +3508,8 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 							   rel->baserestrictinfo,
 							   0,
 							   JOIN_INNER,
-							   NULL);
+							   NULL,
+							   NIL);
 
 	rel->rows = clamp_row_est(nrows);
 
@@ -3543,7 +3546,8 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel,
 							   allclauses,
 							   rel->relid,		/* do not use 0! */
 							   JOIN_INNER,
-							   NULL);
+							   NULL,
+							   NIL);
 	nrows = clamp_row_est(nrows);
 	/* For safety, make sure result is not more than the base estimate */
 	if (nrows > rel->rows)
@@ -3681,12 +3685,14 @@ calc_joinrel_size_estimate(PlannerInfo *root,
 										joinquals,
 										0,
 										jointype,
-										sjinfo);
+										sjinfo,
+										NIL);
 		pselec = clauselist_selectivity(root,
 										pushedquals,
 										0,
 										jointype,
-										sjinfo);
+										sjinfo,
+										NIL);
 
 		/* Avoid leaking a lot of ListCells */
 		list_free(joinquals);
@@ -3698,7 +3704,8 @@ calc_joinrel_size_estimate(PlannerInfo *root,
 										restrictlist,
 										0,
 										jointype,
-										sjinfo);
+										sjinfo,
+										NIL);
 		pselec = 0.0;			/* not used, keep compiler quiet */
 	}
 
diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c
index f0acc14..e41508b 100644
--- a/src/backend/optimizer/util/orclauses.c
+++ b/src/backend/optimizer/util/orclauses.c
@@ -280,7 +280,7 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
 	 * saving work later.)
 	 */
 	or_selec = clause_selectivity(root, (Node *) or_rinfo,
-								  0, JOIN_INNER, NULL);
+								  0, JOIN_INNER, NULL, NIL);
 
 	/*
 	 * The clause is only worth adding to the query if it rejects a useful
@@ -342,7 +342,7 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
 
 		/* Compute inner-join size */
 		orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
-										0, JOIN_INNER, &sjinfo);
+										0, JOIN_INNER, &sjinfo, NIL);
 
 		/* And hack cached selectivity so join size remains the same */
 		join_or_rinfo->norm_selec = orig_selec / or_selec;
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 4dd3f9f..326dd36 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1580,13 +1580,15 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
 			case IS_NOT_FALSE:
 				selec = (double) clause_selectivity(root, arg,
 													varRelid,
-													jointype, sjinfo);
+													jointype, sjinfo,
+													NIL);
 				break;
 			case IS_FALSE:
 			case IS_NOT_TRUE:
 				selec = 1.0 - (double) clause_selectivity(root, arg,
 														  varRelid,
-														  jointype, sjinfo);
+														  jointype, sjinfo,
+														  NIL);
 				break;
 			default:
 				elog(ERROR, "unrecognized booltesttype: %d",
@@ -6196,7 +6198,8 @@ genericcostestimate(PlannerInfo *root,
 	indexSelectivity = clauselist_selectivity(root, selectivityQuals,
 											  index->rel->relid,
 											  JOIN_INNER,
-											  NULL);
+											  NULL,
+											  NIL);
 
 	/*
 	 * If caller didn't give us an estimate, estimate the number of index
@@ -6521,7 +6524,8 @@ btcostestimate(PG_FUNCTION_ARGS)
 		btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
 												  index->rel->relid,
 												  JOIN_INNER,
-												  NULL);
+												  NULL,
+												  NIL);
 		numIndexTuples = btreeSelectivity * index->rel->tuples;
 
 		/*
@@ -7264,7 +7268,8 @@ gincostestimate(PG_FUNCTION_ARGS)
 	*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
 											   index->rel->relid,
 											   JOIN_INNER,
-											   NULL);
+											   NULL,
+											   NIL);
 
 	/* fetch estimated page cost for tablespace containing index */
 	get_tablespace_page_costs(index->reltablespace,
@@ -7496,7 +7501,7 @@ brincostestimate(PG_FUNCTION_ARGS)
 	*indexSelectivity =
 		clauselist_selectivity(root, indexQuals,
 							   path->indexinfo->rel->relid,
-							   JOIN_INNER, NULL);
+							   JOIN_INNER, NULL, NIL);
 	*indexCorrelation = 1;
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b8a0f9f..5cd2583 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -75,6 +75,7 @@
 #include "utils/bytea.h"
 #include "utils/guc_tables.h"
 #include "utils/memutils.h"
+#include "utils/mvstats.h"
 #include "utils/pg_locale.h"
 #include "utils/plancache.h"
 #include "utils/portal.h"
@@ -396,6 +397,15 @@ static const struct config_enum_entry row_security_options[] = {
 };
 
 /*
+ * Search algorithm for multivariate stats.
+ */
+static const struct config_enum_entry mvstat_search_options[] = {
+	{"greedy", MVSTAT_SEARCH_GREEDY, false},
+	{"exhaustive", MVSTAT_SEARCH_EXHAUSTIVE, false},
+	{NULL, 0, false}
+};
+
+/*
  * Options for enum values stored in other modules
  */
 extern const struct config_enum_entry wal_level_options[];
@@ -3651,6 +3661,16 @@ static struct config_enum ConfigureNamesEnum[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"mvstat_search", PGC_USERSET, QUERY_TUNING_OTHER,
+			gettext_noop("Sets the algorithm used for combining multivariate stats."),
+			NULL
+		},
+		&mvstat_search_type,
+		MVSTAT_SEARCH_GREEDY, mvstat_search_options,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 9c2000b..7a3835b 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -182,11 +182,13 @@ extern Selectivity clauselist_selectivity(PlannerInfo *root,
 					   List *clauses,
 					   int varRelid,
 					   JoinType jointype,
-					   SpecialJoinInfo *sjinfo);
+					   SpecialJoinInfo *sjinfo,
+					   List *conditions);
 extern Selectivity clause_selectivity(PlannerInfo *root,
 				   Node *clause,
 				   int varRelid,
 				   JoinType jointype,
-				   SpecialJoinInfo *sjinfo);
+				   SpecialJoinInfo *sjinfo,
+				   List *conditions);
 
 #endif   /* COST_H */
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
index 673e546..194bbf8 100644
--- a/src/include/utils/mvstats.h
+++ b/src/include/utils/mvstats.h
@@ -16,6 +16,14 @@
 
 #include "commands/vacuum.h"
 
+typedef enum MVStatSearchType
+{
+	MVSTAT_SEARCH_EXHAUSTIVE,		/* exhaustive search */
+	MVSTAT_SEARCH_GREEDY			/* greedy search */
+}	MVStatSearchType;
+
+extern int mvstat_search_type;
+
 /*
  * Basic info about the stats, used when choosing what to use
  */
-- 
2.0.5