>From 1ce724f8813c5e680be3b845a6a8d2d3cf8f3560 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Mon, 6 Apr 2015 16:52:15 +0200
Subject: [PATCH 4/7] multivariate MCV lists

- extends the pg_mv_statistic catalog (add 'mcv' fields)
- building the MCV lists during ANALYZE
- simple estimation while planning the queries

Includes regression tests, mostly equal to regression tests for
functional dependencies.
---
 src/backend/catalog/system_views.sql   |    4 +-
 src/backend/commands/statscmds.c       |   45 +-
 src/backend/nodes/outfuncs.c           |    2 +
 src/backend/optimizer/path/clausesel.c | 1079 ++++++++++++++++++++++++++--
 src/backend/optimizer/util/plancat.c   |    4 +-
 src/backend/utils/mvstats/Makefile     |    2 +-
 src/backend/utils/mvstats/common.c     |  104 ++-
 src/backend/utils/mvstats/common.h     |   11 +-
 src/backend/utils/mvstats/mcv.c        | 1237 ++++++++++++++++++++++++++++++++
 src/bin/psql/describe.c                |   25 +-
 src/include/catalog/pg_mv_statistic.h  |   19 +-
 src/include/catalog/pg_proc.h          |    4 +
 src/include/nodes/relation.h           |    2 +
 src/include/utils/mvstats.h            |   69 +-
 src/test/regress/expected/mv_mcv.out   |  207 ++++++
 src/test/regress/expected/rules.out    |    4 +-
 src/test/regress/parallel_schedule     |    2 +-
 src/test/regress/serial_schedule       |    1 +
 src/test/regress/sql/mv_mcv.sql        |  178 +++++
 19 files changed, 2898 insertions(+), 101 deletions(-)
 create mode 100644 src/backend/utils/mvstats/mcv.c
 create mode 100644 src/test/regress/expected/mv_mcv.out
 create mode 100644 src/test/regress/sql/mv_mcv.sql

diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index e3f3387..6482aa7 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -165,7 +165,9 @@ CREATE VIEW pg_mv_stats AS
         S.staname AS staname,
         S.stakeys AS attnums,
         length(S.stadeps) as depsbytes,
-        pg_mv_stats_dependencies_info(S.stadeps) as depsinfo
+        pg_mv_stats_dependencies_info(S.stadeps) as depsinfo,
+        length(S.stamcv) AS mcvbytes,
+        pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo
     FROM (pg_mv_statistic S JOIN pg_class C ON (C.oid = S.starelid))
         LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
 
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 3790082..f730253 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -134,7 +134,13 @@ CreateStatistics(CreateStatsStmt *stmt)
 	ObjectAddress parentobject, childobject;
 
 	/* by default build nothing */
-	bool 	build_dependencies = false;
+	bool 	build_dependencies = false,
+			build_mcv = false;
+
+	int32 	max_mcv_items = -1;
+
+	/* options required because of other options */
+	bool	require_mcv = false;
 
 	Assert(IsA(stmt, CreateStatsStmt));
 
@@ -191,6 +197,29 @@ CreateStatistics(CreateStatsStmt *stmt)
 
 		if (strcmp(opt->defname, "dependencies") == 0)
 			build_dependencies = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "mcv") == 0)
+			build_mcv = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "max_mcv_items") == 0)
+		{
+			max_mcv_items = defGetInt32(opt);
+
+			/* this option requires 'mcv' to be enabled */
+			require_mcv = true;
+
+			/* sanity check */
+			if (max_mcv_items < MVSTAT_MCVLIST_MIN_ITEMS)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("max number of MCV items must be at least %d",
+								MVSTAT_MCVLIST_MIN_ITEMS)));
+
+			else if (max_mcv_items > MVSTAT_MCVLIST_MAX_ITEMS)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("max number of MCV items is %d",
+								MVSTAT_MCVLIST_MAX_ITEMS)));
+
+		}
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -199,10 +228,16 @@ CreateStatistics(CreateStatsStmt *stmt)
 	}
 
 	/* check that at least some statistics were requested */
-	if (! build_dependencies)
+	if (! (build_dependencies || build_mcv))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("no statistics type (dependencies, mcv) was requested")));
+
+	/* now do some checking of the options */
+	if (require_mcv && (! build_mcv))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("no statistics type (dependencies) was requested")));
+				 errmsg("option 'mcv' is required by other options(s)")));
 
 	/* sort the attnums and build int2vector */
 	qsort(attnums, numcols, sizeof(int16), compare_int16);
@@ -223,8 +258,12 @@ CreateStatistics(CreateStatsStmt *stmt)
 	values[Anum_pg_mv_statistic_stakeys -1] = PointerGetDatum(stakeys);
 
 	values[Anum_pg_mv_statistic_deps_enabled -1] = BoolGetDatum(build_dependencies);
+	values[Anum_pg_mv_statistic_mcv_enabled  -1] = BoolGetDatum(build_mcv);
+
+	values[Anum_pg_mv_statistic_mcv_max_items    -1] = Int32GetDatum(max_mcv_items);
 
 	nulls[Anum_pg_mv_statistic_stadeps  -1] = true;
+	nulls[Anum_pg_mv_statistic_stamcv   -1] = true;
 
 	/* insert the tuple into pg_mv_statistic */
 	mvstatrel = heap_open(MvStatisticRelationId, RowExclusiveLock);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index cae21d0..0f58199 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1948,9 +1948,11 @@ _outMVStatisticInfo(StringInfo str, const MVStatisticInfo *node)
 
 	/* enabled statistics */
 	WRITE_BOOL_FIELD(deps_enabled);
+	WRITE_BOOL_FIELD(mcv_enabled);
 
 	/* built/available statistics */
 	WRITE_BOOL_FIELD(deps_built);
+	WRITE_BOOL_FIELD(mcv_built);
 }
 
 static void
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index c7f17e3..f122045 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/sysattr.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_operator.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
@@ -47,17 +48,38 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 			   bool varonleft, bool isLTsel, Selectivity s2);
 
 #define		MV_CLAUSE_TYPE_FDEP		0x01
+#define		MV_CLAUSE_TYPE_MCV		0x02
 
 static bool clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
-							 Index *relid, AttrNumber *attnum, SpecialJoinInfo *sjinfo);
+							 Index *relid, Bitmapset **attnums, SpecialJoinInfo *sjinfo,
+							 int type);
 
 static Bitmapset  *collect_mv_attnums(PlannerInfo *root, List *clauses,
-									  Oid varRelid, Index *relid, SpecialJoinInfo *sjinfo);
+									  Oid varRelid, Index *relid, SpecialJoinInfo *sjinfo,
+									  int type);
 
 static List *clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 								Oid varRelid, List *stats,
 								SpecialJoinInfo *sjinfo);
 
+static MVStatisticInfo *choose_mv_statistics(List *mvstats, Bitmapset *attnums);
+
+static List *clauselist_mv_split(PlannerInfo *root, SpecialJoinInfo *sjinfo,
+								 List *clauses, Oid varRelid,
+								 List **mvclauses, MVStatisticInfo *mvstats, int types);
+
+static Selectivity clauselist_mv_selectivity(PlannerInfo *root,
+						List *clauses, MVStatisticInfo *mvstats);
+static Selectivity clauselist_mv_selectivity_mcvlist(PlannerInfo *root,
+									List *clauses, MVStatisticInfo *mvstats,
+									bool *fullmatch, Selectivity *lowsel);
+
+static int update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
+									int2vector *stakeys, MCVList mcvlist,
+									int nmatches, char * matches,
+									Selectivity *lowsel, bool *fullmatch,
+									bool is_or);
+
 static bool has_stats(List *stats, int type);
 
 static List * find_stats(PlannerInfo *root, List *clauses,
@@ -85,6 +107,13 @@ static Bitmapset *fdeps_filter_clauses(PlannerInfo *root,
 
 static Bitmapset * get_varattnos(Node * node, Index relid);
 
+/* used for merging bitmaps - AND (min), OR (max) */
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+#define UPDATE_RESULT(m,r,isor)	\
+	(m) = (isor) ? (MAX(m,r)) : (MIN(m,r))
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -250,8 +279,12 @@ clauselist_selectivity(PlannerInfo *root,
 	 */
 	if (has_stats(stats, MV_CLAUSE_TYPE_FDEP))
 	{
-		/* collect attributes referenced by mv-compatible clauses */
-		mvattnums = collect_mv_attnums(root, clauses, varRelid, &relid, sjinfo);
+		/*
+		 * Collect attributes referenced by mv-compatible clauses (looking
+		 * for clauses compatible with functional dependencies for now).
+		 */
+		mvattnums = collect_mv_attnums(root, clauses, varRelid, &relid, sjinfo,
+									   MV_CLAUSE_TYPE_FDEP);
 
 		/*
 		 * If there are mv-compatible clauses, referencing at least two
@@ -268,6 +301,48 @@ clauselist_selectivity(PlannerInfo *root,
 	}
 
 	/*
+	 * Check that there are statistics with MCV list. If not, we don't
+	 * need to waste time with the optimization.
+	 */
+	if (has_stats(stats, MV_CLAUSE_TYPE_MCV))
+	{
+		/*
+		 * Recollect attributes from mv-compatible clauses (maybe we've
+		 * removed so many clauses we have a single mv-compatible attnum).
+		 * From now on we're only interested in MCV-compatible clauses.
+		 */
+		mvattnums = collect_mv_attnums(root, clauses, varRelid, &relid, sjinfo,
+									   MV_CLAUSE_TYPE_MCV);
+
+		/*
+		 * If there still are at least two columns, we'll try to select
+		 * a suitable multivariate stats.
+		 */
+		if (bms_num_members(mvattnums) >= 2)
+		{
+			/* see choose_mv_statistics() for details */
+			MVStatisticInfo *mvstat = choose_mv_statistics(stats, mvattnums);
+
+			if (mvstat != NULL)	/* we have a matching stats */
+			{
+				/* clauses compatible with multi-variate stats */
+				List	*mvclauses = NIL;
+
+				/* split the clauselist into regular and mv-clauses */
+				clauses = clauselist_mv_split(root, sjinfo, clauses,
+										varRelid, &mvclauses, mvstat,
+										MV_CLAUSE_TYPE_MCV);
+
+				/* we've chosen the histogram to match the clauses */
+				Assert(mvclauses != NIL);
+
+				/* compute the multivariate stats */
+				s1 *= clauselist_mv_selectivity(root, mvclauses, mvstat);
+			}
+		}
+	}
+
+	/*
 	 * Initial scan over clauses.  Anything that doesn't look like a potential
 	 * rangequery clause gets multiplied into s1 and forgotten. Anything that
 	 * does gets inserted into an rqlist entry.
@@ -924,12 +999,129 @@ clause_selectivity(PlannerInfo *root,
 	return s1;
 }
 
+
+/*
+ * Estimate selectivity for the list of MV-compatible clauses, using
+ * using a MV statistics (combining a histogram and MCV list).
+ *
+ * This simply passes the estimation to the MCV list and then to the
+ * histogram, if available.
+ *
+ * TODO Clamp the selectivity by min of the per-clause selectivities
+ *      (i.e. the selectivity of the most restrictive clause), because
+ *      that's the maximum we can ever get from ANDed list of clauses.
+ *      This may probably prevent issues with hitting too many buckets
+ *      and low precision histograms.
+ *
+ * TODO We may support some additional conditions, most importantly
+ *      those matching multiple columns (e.g. "a = b" or "a < b").
+ *      Ultimately we could track multi-table histograms for join
+ *      cardinality estimation.
+ *
+ * TODO Further thoughts on processing equality clauses: Maybe it'd be
+ *      better to look for stats (with MCV) covered by the equality
+ *      clauses, because then we have a chance to find an exact match
+ *      in the MCV list, which is pretty much the best we can do. We may
+ *      also look at the least frequent MCV item, and use it as a upper
+ *      boundary for the selectivity (had there been a more frequent
+ *      item, it'd be in the MCV list).
+ *
+ * TODO There are several options for 'sanity clamping' the estimates.
+ *
+ *      First, if we have selectivities for each condition, then
+ *
+ *          P(A,B) <= MIN(P(A), P(B))
+ *
+ *      Because additional conditions (connected by AND) can only lower
+ *      the probability.
+ *
+ *      So we can do some basic sanity checks using the single-variate
+ *      stats (the ones we have right now).
+ *
+ *      Second, when we have multivariate stats with a MCV list, then
+ *
+ *      (a) if we have a full equality condition (one equality condition
+ *          on each column) and we found a match in the MCV list, this is
+ *          the selectivity (and it's supposed to be exact)
+ *
+ *      (b) if we have a full equality condition and we haven't found a
+ *          match in the MCV list, then the selectivity is below the
+ *          lowest selectivity in the MCV list
+ *
+ *      (c) if we have a equality condition (not full), we can still
+ *          search the MCV for matches and use the sum of probabilities
+ *          as a lower boundary for the histogram (if there are no
+ *          matches in the MCV list, then we have no boundary)
+ *
+ *      Third, if there are multiple (combinations of) multivariate
+ *      stats for a set of clauses, we may compute all of them and then
+ *      somehow aggregate them - e.g. by choosing the minimum, median or
+ *      average. The stats are susceptible to overestimation (because
+ *      we take 50% of the bucket for partial matches). Some stats may
+ *      give better estimates than others, but it's very difficult to
+ *      say that in advance which one is the best (it depends on the
+ *      number of buckets, number of additional columns not referenced
+ *      in the clauses, type of condition etc.).
+ *
+ *      So we may compute them all and then choose a sane aggregation
+ *      (minimum seems like a good approach). Of course, this may result
+ *      in longer / more expensive estimation (CPU-wise), but it may be
+ *      worth it.
+ *
+ *      It's possible to add a GUC choosing whether to do a 'simple'
+ *      (using a single stats expected to give the best estimate) and
+ *      'complex' (combining the multiple estimates).
+ *
+ *          multivariate_estimates = (simple|full)
+ *
+ *      Also, this might be enabled at a table level, by something like
+ *
+ *          ALTER TABLE ... SET STATISTICS (simple|full)
+ *
+ *      Which would make it possible to use this only for the tables
+ *      where the simple approach does not work.
+ *
+ *      Also, there are ways to optimize this algorithmically. E.g. we
+ *      may try to get an estimate from a matching MCV list first, and
+ *      if we happen to get a "full equality match" we may stop computing
+ *      the estimates from other stats (for this condition) because
+ *      that's probably the best estimate we can really get.
+ *
+ * TODO When applying the clauses to the histogram/MCV list, we can do
+ *      that from the most selective clauses first, because that'll
+ *      eliminate the buckets/items sooner (so we'll be able to skip
+ *      them without inspection, which is more expensive). But this
+ *      requires really knowing the per-clause selectivities in advance,
+ *      and that's not what we do now.
+ */
+static Selectivity
+clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStatisticInfo *mvstats)
+{
+	bool fullmatch = false;
+
+	/*
+	 * Lowest frequency in the MCV list (may be used as an upper bound
+	 * for full equality conditions that did not match any MCV item).
+	 */
+	Selectivity mcv_low = 0.0;
+
+	/* TODO Evaluate simple 1D selectivities, use the smallest one as
+	 *      an upper bound, product as lower bound, and sort the
+	 *      clauses in ascending order by selectivity (to optimize the
+	 *      MCV/histogram evaluation).
+	 */
+
+	/* Evaluate the MCV selectivity */
+	return clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
+										   &fullmatch, &mcv_low);
+}
+
 /*
  * Collect attributes from mv-compatible clauses.
  */
 static Bitmapset *
 collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
-				   Index *relid, SpecialJoinInfo *sjinfo)
+				   Index *relid, SpecialJoinInfo *sjinfo, int types)
 {
 	Bitmapset  *attnums = NULL;
 	ListCell   *l;
@@ -945,12 +1137,11 @@ collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
 	 */
 	foreach (l, clauses)
 	{
-		AttrNumber attnum;
 		Node	   *clause = (Node *) lfirst(l);
 
-		/* ignore the result for now - we only need the info */
-		if (clause_is_mv_compatible(root, clause, varRelid, relid, &attnum, sjinfo))
-			attnums = bms_add_member(attnums, attnum);
+		/* ignore the result here - we only need the attnums */
+		clause_is_mv_compatible(root, clause, varRelid, relid, &attnums,
+								sjinfo, types);
 	}
 
 	/*
@@ -969,6 +1160,188 @@ collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
 }
 
 /*
+ * We're looking for statistics matching at least 2 attributes,
+ * referenced in the clauses compatible with multivariate statistics.
+ * The current selection criteria is very simple - we choose the
+ * statistics referencing the most attributes.
+ *
+ * If there are multiple statistics referencing the same number of
+ * columns (from the clauses), the one with less source columns
+ * (as listed in the ADD STATISTICS when creating the statistics) wins.
+ * Other wise the first one wins.
+ *
+ * This is a very simple criteria, and has several weaknesses:
+ *
+ * (a) does not consider the accuracy of the statistics
+ *
+ *     If there are two histograms built on the same set of columns,
+ *     but one has 100 buckets and the other one has 1000 buckets (thus
+ *     likely providing better estimates), this is not currently
+ *     considered.
+ *
+ * (b) does not consider the type of statistics
+ *
+ *     If there are three statistics - one containing just a MCV list,
+ *     another one with just a histogram and a third one with both,
+ *     this is not considered.
+ *
+ * (c) does not consider the number of clauses
+ *
+ *     As explained, only the number of referenced attributes counts,
+ *     so if there are multiple clauses on a single attribute, this
+ *     still counts as a single attribute.
+ *
+ * (d) does not consider type of condition
+ *
+ *     Some clauses may work better with some statistics - for example
+ *     equality clauses probably work better with MCV lists than with
+ *     histograms. But IS [NOT] NULL conditions may often work better
+ *     with histograms (thanks to NULL-buckets).
+ *
+ * So for example with five WHERE conditions
+ *
+ *     WHERE (a = 1) AND (b = 1) AND (c = 1) AND (d = 1) AND (e = 1)
+ *
+ * and statistics on (a,b), (a,b,e) and (a,b,c,d), the last one will be
+ * selected as it references the most columns.
+ *
+ * Once we have selected the multivariate statistics, we split the list
+ * of clauses into two parts - conditions that are compatible with the
+ * selected stats, and conditions are estimated using simple statistics.
+ *
+ * From the example above, conditions
+ *
+ *     (a = 1) AND (b = 1) AND (c = 1) AND (d = 1)
+ *
+ * will be estimated using the multivariate statistics (a,b,c,d) while
+ * the last condition (e = 1) will get estimated using the regular ones.
+ *
+ * There are various alternative selection criteria (e.g. counting
+ * conditions instead of just referenced attributes), but eventually
+ * the best option should be to combine multiple statistics. But that's
+ * much harder to do correctly.
+ *
+ * TODO Select multiple statistics and combine them when computing
+ *      the estimate.
+ *
+ * TODO This will probably have to consider compatibility of clauses,
+ *      because 'dependencies' will probably work only with equality
+ *      clauses.
+ */
+static MVStatisticInfo *
+choose_mv_statistics(List *stats, Bitmapset *attnums)
+{
+	int i;
+	ListCell   *lc;
+
+	MVStatisticInfo *choice = NULL;
+
+	int current_matches = 1;						/* goal #1: maximize */
+	int current_dims = (MVSTATS_MAX_DIMENSIONS+1);	/* goal #2: minimize */
+
+	/*
+	 * Walk through the statistics (simple array with nmvstats elements)
+	 * and for each one count the referenced attributes (encoded in
+	 * the 'attnums' bitmap).
+	 */
+	foreach (lc, stats)
+	{
+		MVStatisticInfo *info = (MVStatisticInfo *)lfirst(lc);
+
+		/* columns matching this statistics */
+		int matches = 0;
+
+		int2vector * attrs = info->stakeys;
+		int	numattrs = attrs->dim1;
+
+		/* skip dependencies-only stats */
+		if (! info->mcv_built)
+			continue;
+
+		/* count columns covered by the histogram */
+		for (i = 0; i < numattrs; i++)
+			if (bms_is_member(attrs->values[i], attnums))
+				matches++;
+
+		/*
+		 * Use this statistics when it improves the number of matches or
+		 * when it matches the same number of attributes but is smaller.
+		 */
+		if ((matches > current_matches) ||
+			((matches == current_matches) && (current_dims > numattrs)))
+		{
+			choice = info;
+			current_matches = matches;
+			current_dims = numattrs;
+		}
+	}
+
+	return choice;
+}
+
+
+/*
+ * This splits the clauses list into two parts - one containing clauses
+ * that will be evaluated using the chosen statistics, and the remaining
+ * clauses (either non-mvcompatible, or not related to the histogram).
+ */
+static List *
+clauselist_mv_split(PlannerInfo *root, SpecialJoinInfo *sjinfo,
+					List *clauses, Oid varRelid, List **mvclauses,
+					MVStatisticInfo *mvstats, int types)
+{
+	int i;
+	ListCell *l;
+	List	 *non_mvclauses = NIL;
+
+	/* FIXME is there a better way to get info on int2vector? */
+	int2vector * attrs = mvstats->stakeys;
+	int	numattrs = mvstats->stakeys->dim1;
+
+	Bitmapset *mvattnums = NULL;
+
+	/* build bitmap of attributes covered by the stats, so we can
+	 * do bms_is_subset later */
+	for (i = 0; i < numattrs; i++)
+		mvattnums = bms_add_member(mvattnums, attrs->values[i]);
+
+	/* erase the list of mv-compatible clauses */
+	*mvclauses = NIL;
+
+	foreach (l, clauses)
+	{
+		bool		match = false;	/* by default not mv-compatible */
+		Bitmapset	*attnums = NULL;
+		Node	   *clause = (Node *) lfirst(l);
+
+		if (clause_is_mv_compatible(root, clause, varRelid, NULL,
+									&attnums, sjinfo, types))
+		{
+			/* are all the attributes part of the selected stats? */
+			if (bms_is_subset(attnums, mvattnums))
+				match = true;
+		}
+
+		/*
+		 * The clause matches the selected stats, so put it to the list
+		 * of mv-compatible clauses. Otherwise, keep it in the list of
+		 * 'regular' clauses (that may be selected later).
+		 */
+		if (match)
+			*mvclauses = lappend(*mvclauses, clause);
+		else
+			non_mvclauses = lappend(non_mvclauses, clause);
+	}
+
+	/*
+	 * Perform regular estimation using the clauses incompatible
+	 * with the chosen histogram (or MV stats in general).
+	 */
+	return non_mvclauses;
+
+}
+
+/*
  * Determines whether the clause is compatible with multivariate stats,
  * and if it is, returns some additional information - varno (index
  * into simple_rte_array) and a bitmap of attributes. This is then
@@ -987,8 +1360,12 @@ collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid,
  */
 static bool
 clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
-						Index *relid, AttrNumber *attnum, SpecialJoinInfo *sjinfo)
+						Index *relid, Bitmapset **attnums, SpecialJoinInfo *sjinfo,
+						int types)
 {
+	Relids clause_relids;
+	Relids left_relids;
+	Relids right_relids;
 
 	if (IsA(clause, RestrictInfo))
 	{
@@ -998,82 +1375,176 @@ clause_is_mv_compatible(PlannerInfo *root, Node *clause, Oid varRelid,
 		if (rinfo->pseudoconstant)
 			return false;
 
-		/* no support for OR clauses at this point */
-		if (rinfo->orclause)
-			return false;
-
 		/* get the actual clause from the RestrictInfo (it's not an OR clause) */
 		clause = (Node*)rinfo->clause;
 
-		/* only simple opclauses are compatible with multivariate stats */
-		if (! is_opclause(clause))
-			return false;
-
 		/* we don't support join conditions at this moment */
 		if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
 			return false;
 
+		clause_relids = rinfo->clause_relids;
+		left_relids = rinfo->left_relids;
+		right_relids = rinfo->right_relids;
+	}
+	else if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
+	{
+		left_relids = pull_varnos(get_leftop((Expr*)clause));
+		right_relids = pull_varnos(get_rightop((Expr*)clause));
+
+		clause_relids = bms_union(left_relids,
+								  right_relids);
+	}
+	else
+	{
+		/* Not a binary opclause, so mark left/right relid sets as empty */
+		left_relids = NULL;
+		right_relids = NULL;
+		/* and get the total relid set the hard way */
+		clause_relids = pull_varnos((Node *) clause);
+	}
+
+	/*
+	 * Only simple opclauses and IS NULL tests are compatible with
+	 * multivariate stats at this point.
+	 */
+	if ((is_opclause(clause))
+		&& (list_length(((OpExpr *) clause)->args) == 2))
+	{
+		OpExpr	   *expr = (OpExpr *) clause;
+		bool		varonleft = true;
+		bool		ok;
+
 		/* is it 'variable op constant' ? */
-		if (list_length(((OpExpr *) clause)->args) == 2)
+
+		ok = (bms_membership(clause_relids) == BMS_SINGLETON) &&
+			(is_pseudo_constant_clause_relids(lsecond(expr->args),
+											  right_relids) ||
+			(varonleft = false,
+			is_pseudo_constant_clause_relids(linitial(expr->args),
+											 left_relids)));
+
+		if (ok)
 		{
-			OpExpr	   *expr = (OpExpr *) clause;
-			bool		varonleft = true;
-			bool		ok;
+			Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
 
-			ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
-				(is_pseudo_constant_clause_relids(lsecond(expr->args),
-												rinfo->right_relids) ||
-				(varonleft = false,
-				is_pseudo_constant_clause_relids(linitial(expr->args),
-												rinfo->left_relids)));
+			/*
+			 * Simple variables only - otherwise the planner_rt_fetch seems to fail
+			 * (return NULL).
+			 *
+			 * TODO Maybe use examine_variable() would fix that?
+			 */
+			if (! (IsA(var, Var) && (varRelid == 0 || varRelid == var->varno)))
+				return false;
 
-			if (ok)
-			{
-				Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+			/*
+			 * Only consider this variable if (varRelid == 0) or when the varno
+			 * matches varRelid (see explanation at clause_selectivity).
+			 *
+			 * FIXME I suspect this may not be really necessary. The (varRelid == 0)
+			 *       part seems to be enforced by treat_as_join_clause().
+			 */
+			if (! ((varRelid == 0) || (varRelid == var->varno)))
+				return false;
 
-				/*
-				 * Simple variables only - otherwise the planner_rt_fetch seems to fail
-				 * (return NULL).
-				 *
-				 * TODO Maybe use examine_variable() would fix that?
-				 */
-				if (! (IsA(var, Var) && (varRelid == 0 || varRelid == var->varno)))
-					return false;
+			/* Also skip special varno values, and system attributes ... */
+			if ((IS_SPECIAL_VARNO(var->varno)) || (! AttrNumberIsForUserDefinedAttr(var->varattno)))
+				return false;
 
-				/*
-				 * Only consider this variable if (varRelid == 0) or when the varno
-				 * matches varRelid (see explanation at clause_selectivity).
-				 *
-				 * FIXME I suspect this may not be really necessary. The (varRelid == 0)
-				 *       part seems to be enforced by treat_as_join_clause().
-				 */
-				if (! ((varRelid == 0) || (varRelid == var->varno)))
-					return false;
+			/* Lookup info about the base relation (we need to pass the OID out) */
+			if (relid != NULL)
+				*relid = var->varno;
+
+			/*
+			 * If it's not a "<" or ">" or "=" operator, just ignore the
+			 * clause. Otherwise note the relid and attnum for the variable.
+			 * This uses the function for estimating selectivity, ont the
+			 * operator directly (a bit awkward, but well ...).
+			 */
+			switch (get_oprrest(expr->opno))
+				{
+					case F_SCALARLTSEL:
+					case F_SCALARGTSEL:
+						/* not compatible with functional dependencies */
+						if (types & MV_CLAUSE_TYPE_MCV)
+						{
+							*attnums = bms_add_member(*attnums, var->varattno);
+							return (types & MV_CLAUSE_TYPE_MCV);
+						}
+						return false;
+
+					case F_EQSEL:
+						*attnums = bms_add_member(*attnums, var->varattno);
+						return true;
+				}
+		}
+	}
+	else if (IsA(clause, NullTest)
+			 && IsA(((NullTest*)clause)->arg, Var))
+	{
+		Var * var = (Var*)((NullTest*)clause)->arg;
+
+		/*
+		 * Simple variables only - otherwise the planner_rt_fetch seems to fail
+		 * (return NULL).
+		 *
+		 * TODO Maybe use examine_variable() would fix that?
+		 */
+		if (! (IsA(var, Var) && (varRelid == 0 || varRelid == var->varno)))
+			return false;
+
+		/*
+		 * Only consider this variable if (varRelid == 0) or when the varno
+		 * matches varRelid (see explanation at clause_selectivity).
+		 *
+		 * FIXME I suspect this may not be really necessary. The (varRelid == 0)
+		 *       part seems to be enforced by treat_as_join_clause().
+		 */
+		if (! ((varRelid == 0) || (varRelid == var->varno)))
+			return false;
 
-				/* Also skip special varno values, and system attributes ... */
-				if ((IS_SPECIAL_VARNO(var->varno)) || (! AttrNumberIsForUserDefinedAttr(var->varattno)))
-					return false;
+		/* Also skip special varno values, and system attributes ... */
+		if ((IS_SPECIAL_VARNO(var->varno)) || (! AttrNumberIsForUserDefinedAttr(var->varattno)))
+			return false;
 
+		/* Lookup info about the base relation (we need to pass the OID out) */
+		if (relid != NULL)
 				*relid = var->varno;
 
-				/*
-				 * If it's not a "<" or ">" or "=" operator, just ignore the
-				 * clause. Otherwise note the relid and attnum for the variable.
-				 * This uses the function for estimating selectivity, ont the
-				 * operator directly (a bit awkward, but well ...).
-				 */
-				switch (get_oprrest(expr->opno))
-					{
-						case F_EQSEL:
-							*attnum = var->varattno;
-							return true;
-					}
-			}
+		*attnums = bms_add_member(*attnums, var->varattno);
+
+		return true;
+	}
+	else if (or_clause(clause) || and_clause(clause))
+	{
+		/*
+		 * AND/OR-clauses are supported if all sub-clauses are supported
+		 *
+		 * TODO We might support mixed case, where some of the clauses
+		 *      are supported and some are not, and treat all supported
+		 *      subclauses as a single clause, compute it's selectivity
+		 *      using mv stats, and compute the total selectivity using
+		 *      the current algorithm.
+		 *
+		 * TODO For RestrictInfo above an OR-clause, we might use the
+		 *      orclause with nested RestrictInfo - we won't have to
+		 *      call pull_varnos() for each clause, saving time. 
+		 */
+		Bitmapset *tmp = NULL;
+		ListCell *l;
+		foreach (l, ((BoolExpr*)clause)->args)
+		{
+			if (! clause_is_mv_compatible(root, (Node*)lfirst(l),
+						varRelid, relid, &tmp, sjinfo, types))
+				return false;
 		}
+
+		/* add the attnums from the OR-clause to the set of attnums */
+		*attnums = bms_join(*attnums, tmp);
+
+		return true;
 	}
 
 	return false;
-
 }
 
 /*
@@ -1322,6 +1793,9 @@ has_stats(List *stats, int type)
 
 		if ((type & MV_CLAUSE_TYPE_FDEP) && stat->deps_built)
 			return true;
+
+		if ((type & MV_CLAUSE_TYPE_MCV) && stat->mcv_built)
+			return true;
 	}
 
 	return false;
@@ -1617,25 +2091,39 @@ fdeps_filter_clauses(PlannerInfo *root,
 
 	foreach (lc, clauses)
 	{
-		AttrNumber	attnum;
+		Bitmapset *attnums = NULL;
 		Node	   *clause = (Node *) lfirst(lc);
 
-		if (! clause_is_mv_compatible(root, clause, varRelid, relid,
-									  &attnum, sjinfo))
+		if (! clause_is_mv_compatible(root, clause, varRelid, relid, &attnums,
+									  sjinfo, MV_CLAUSE_TYPE_FDEP))
 
 			/* clause incompatible with functional dependencies */
 			*reduced_clauses = lappend(*reduced_clauses, clause);
 
-		else if (! bms_is_member(attnum, deps_attnums))
+		else if (bms_num_members(attnums) > 1)
+
+			/*
+			 * clause referencing multiple attributes (strange, should
+			 * this be handled by clause_is_mv_compatible directly)
+			 */
+			*reduced_clauses = lappend(*reduced_clauses, clause);
+
+		else if (! bms_is_member(bms_singleton_member(attnums), deps_attnums))
 
 			/* clause not covered by the dependencies */
 			*reduced_clauses = lappend(*reduced_clauses, clause);
 
 		else
 		{
+			/* ok, clause compatible with existing dependencies */
+			Assert(bms_num_members(attnums) == 1);
+
 			*deps_clauses   = lappend(*deps_clauses, clause);
-			clause_attnums = bms_add_member(clause_attnums, attnum);
+			clause_attnums = bms_add_member(clause_attnums,
+										bms_singleton_member(attnums));
 		}
+
+		bms_free(attnums);
 	}
 
 	return clause_attnums;
@@ -1673,3 +2161,454 @@ get_varattnos(Node * node, Index relid)
 
 	return result;
 }
+
+/*
+ * Estimate selectivity of clauses using a MCV list.
+ *
+ * If there's no MCV list for the stats, the function returns 0.0.
+ *
+ * While computing the estimate, the function checks whether all the
+ * columns were matched with an equality condition. If that's the case,
+ * we can skip processing the histogram, as there can be no rows in
+ * it with the same values - all the rows matching the condition are
+ * represented by the MCV item. This can only happen with equality
+ * on all the attributes.
+ *
+ * The algorithm works like this:
+ *
+ *   1) mark all items as 'match'
+ *   2) walk through all the clauses
+ *   3) for a particular clause, walk through all the items
+ *   4) skip items that are already 'no match'
+ *   5) check clause for items that still match
+ *   6) sum frequencies for items to get selectivity
+ *
+ * The function also returns the frequency of the least frequent item
+ * on the MCV list, which may be useful for clamping estimate from the
+ * histogram (all items not present in the MCV list are less frequent).
+ * This however seems useful only for cases with conditions on all
+ * attributes.
+ *
+ * TODO This only handles AND-ed clauses, but it might work for OR-ed
+ *      lists too - it just needs to reverse the logic a bit. I.e. start
+ *      with 'no match' for all items, and mark the items as a match
+ *      as the clauses are processed (and skip items that are 'match').
+ */
+static Selectivity
+clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses,
+								  MVStatisticInfo *mvstats, bool *fullmatch,
+								  Selectivity *lowsel)
+{
+	int i;
+	Selectivity s = 0.0;
+	Selectivity u = 0.0;
+
+	MCVList mcvlist = NULL;
+	int	nmatches = 0;
+
+	/* match/mismatch bitmap for each MCV item */
+	char * matches = NULL;
+
+	Assert(clauses != NIL);
+	Assert(list_length(clauses) >= 2);
+
+	/* there's no MCV list built yet */
+	if (! mvstats->mcv_built)
+		return 0.0;
+
+	mcvlist = load_mv_mcvlist(mvstats->mvoid);
+
+	Assert(mcvlist != NULL);
+	Assert(mcvlist->nitems > 0);
+
+	/* by default all the MCV items match the clauses fully */
+	matches = palloc0(sizeof(char) * mcvlist->nitems);
+	memset(matches, MVSTATS_MATCH_FULL, sizeof(char)*mcvlist->nitems);
+
+	/* number of matching MCV items */
+	nmatches = mcvlist->nitems;
+
+	nmatches = update_match_bitmap_mcvlist(root, clauses,
+										   mvstats->stakeys, mcvlist,
+										   nmatches, matches,
+										   lowsel, fullmatch, false);
+
+	/* sum frequencies for all the matching MCV items */
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		/* used to 'scale' for MCV lists not covering all tuples */
+		u += mcvlist->items[i]->frequency;
+
+		if (matches[i] != MVSTATS_MATCH_NONE)
+			s += mcvlist->items[i]->frequency;
+	}
+
+	pfree(matches);
+	pfree(mcvlist);
+
+	return s*u;
+}
+
+/*
+ * Evaluate clauses using the MCV list, and update the match bitmap.
+ *
+ * The bitmap may be already partially set, so this is really a way to
+ * combine results of several clause lists - either when computing
+ * conditional probability P(A|B) or a combination of AND/OR clauses.
+ *
+ * TODO This works with 'bitmap' where each bit is represented as a char,
+ *      which is slightly wasteful. Instead, we could use a regular
+ *      bitmap, reducing the size to ~1/8. Another thing is merging the
+ *      bitmaps using & and |, which might be faster than min/max.
+ */
+static int
+update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
+						   int2vector *stakeys, MCVList mcvlist,
+						   int nmatches, char * matches,
+						   Selectivity *lowsel, bool *fullmatch,
+						   bool is_or)
+{
+	int i;
+	ListCell * l;
+
+	Bitmapset *eqmatches = NULL;	/* attributes with equality matches */
+
+	/* The bitmap may be partially built. */
+	Assert(nmatches >= 0);
+	Assert(nmatches <= mcvlist->nitems);
+	Assert(clauses != NIL);
+	Assert(list_length(clauses) >= 1);
+	Assert(mcvlist != NULL);
+	Assert(mcvlist->nitems > 0);
+
+	/* No possible matches (only works for AND-ded clauses) */
+	if (((nmatches == 0) && (! is_or)) ||
+		((nmatches == mcvlist->nitems) && is_or))
+		return nmatches;
+
+	/* frequency of the lowest MCV item */
+	*lowsel = 1.0;
+
+	/*
+	 * Loop through the list of clauses, and for each of them evaluate
+	 * all the MCV items not yet eliminated by the preceding clauses.
+	 *
+	 * FIXME This would probably deserve a refactoring, I guess. Unify
+	 *       the two loops and put the checks inside, or something like
+	 *       that.
+	 */
+	foreach (l, clauses)
+	{
+		Node * clause = (Node*)lfirst(l);
+
+		/* if it's a RestrictInfo, then extract the clause */
+		if (IsA(clause, RestrictInfo))
+			clause = (Node*)((RestrictInfo*)clause)->clause;
+
+		/* if there are no remaining matches possible, we can stop */
+		if (((nmatches == 0) && (! is_or)) ||
+			((nmatches == mcvlist->nitems) && is_or))
+				break;
+
+		/* it's either OpClause, or NullTest */
+		if (is_opclause(clause))
+		{
+			OpExpr * expr = (OpExpr*)clause;
+			bool		varonleft = true;
+			bool		ok;
+
+			/* operator */
+			FmgrInfo		opproc;
+
+			/* get procedure computing operator selectivity */
+			RegProcedure	oprrest = get_oprrest(expr->opno);
+
+			fmgr_info(get_opcode(expr->opno), &opproc);
+
+			ok = (NumRelids(clause) == 1) &&
+				 (is_pseudo_constant_clause(lsecond(expr->args)) ||
+				 (varonleft = false,
+				  is_pseudo_constant_clause(linitial(expr->args))));
+
+			if (ok)
+			{
+
+				FmgrInfo	ltproc, gtproc;
+				Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+				Const * cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args);
+				bool isgt = (! varonleft);
+
+				/*
+				 * TODO Fetch only when really needed (probably for equality only)
+				 * TODO Technically either lt/gt is sufficient.
+				 *
+				 * FIXME The code in analyze.c creates histograms only for types
+				 *       with enough ordering (by calling get_sort_group_operators).
+				 *       Is this the same assumption, i.e. are we certain that we
+				 *       get the ltproc/gtproc every time we ask? Or are there types
+				 *       where get_sort_group_operators returns ltopr and here we
+				 *       get nothing?
+				 */
+				TypeCacheEntry *typecache
+					= lookup_type_cache(var->vartype,
+										TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+
+				/* FIXME proper matching attribute to dimension */
+				int idx = mv_get_index(var->varattno, stakeys);
+
+				fmgr_info(get_opcode(typecache->lt_opr), &ltproc);
+				fmgr_info(get_opcode(typecache->gt_opr), &gtproc);
+
+				/*
+				 * Walk through the MCV items and evaluate the current clause. We can
+				 * skip items that were already ruled out, and terminate if there are
+				 * no remaining MCV items that might possibly match.
+				 */
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					bool mismatch = false;
+					MCVItem item = mcvlist->items[i];
+
+					/*
+					 * find the lowest selectivity in the MCV
+					 * FIXME Maybe not the best place do do this (in for all clauses).
+					 */
+					if (item->frequency < *lowsel)
+						*lowsel = item->frequency;
+
+					/*
+					 * If there are no more matches (AND) or no remaining unmatched
+					 * items (OR), we can stop processing this clause.
+					 */
+					if (((nmatches == 0) && (! is_or)) ||
+						((nmatches == mcvlist->nitems) && is_or))
+						break;
+
+					/*
+					 * For AND-lists, we can also mark NULL items as 'no match' (and
+					 * then skip them). For OR-lists this is not possible.
+					 */
+					if ((! is_or) && item->isnull[idx])
+						matches[i] = MVSTATS_MATCH_NONE;
+
+					/* skip MCV items that were already ruled out */
+					if ((! is_or) && (matches[i] == MVSTATS_MATCH_NONE))
+						continue;
+					else if (is_or && (matches[i] == MVSTATS_MATCH_FULL))
+						continue;
+
+					/* TODO consider bsearch here (list is sorted by values)
+					 * TODO handle other operators too (LT, GT)
+					 * TODO identify "full match" when the clauses fully
+					 *      match the whole MCV list (so that checking the
+					 *      histogram is not needed)
+					 */
+					if (oprrest == F_EQSEL)
+					{
+						/*
+						 * We don't care about isgt in equality, because it does not
+						 * matter whether it's (var = const) or (const = var).
+						 */
+						bool match = DatumGetBool(FunctionCall2Coll(&opproc,
+															 DEFAULT_COLLATION_OID,
+															 cst->constvalue,
+															 item->values[idx]));
+
+						if (match)
+							eqmatches = bms_add_member(eqmatches, idx);
+
+						mismatch = (! match);
+					}
+					else if (oprrest == F_SCALARLTSEL)	/* column < constant */
+					{
+
+						if (! isgt)	/* (var < const) */
+						{
+							/*
+							 * First check whether the constant is below the lower boundary (in that
+							 * case we can skip the bucket, because there's no overlap).
+							 */
+							mismatch = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 cst->constvalue,
+																 item->values[idx]));
+
+						} /* (get_oprrest(expr->opno) == F_SCALARLTSEL) */
+						else	/* (const < var) */
+						{
+							/*
+							 * First check whether the constant is above the upper boundary (in that
+							 * case we can skip the bucket, because there's no overlap).
+							 */
+							mismatch = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 item->values[idx],
+																 cst->constvalue));
+						}
+					}
+					else if (oprrest == F_SCALARGTSEL)	/* column > constant */
+					{
+
+						if (! isgt)	/* (var > const) */
+						{
+							/*
+							 * First check whether the constant is above the upper boundary (in that
+							 * case we can skip the bucket, because there's no overlap).
+							 */
+							mismatch = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 cst->constvalue,
+																 item->values[idx]));
+						}
+						else /* (const > var) */
+						{
+							/*
+							 * First check whether the constant is below the lower boundary (in
+							 * that case we can skip the bucket, because there's no overlap).
+							 */
+							mismatch = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 item->values[idx],
+																 cst->constvalue));
+						}
+
+					} /* (get_oprrest(expr->opno) == F_SCALARGTSEL) */
+
+					/* XXX The conditions on matches[i] are not needed, as we
+					 *     skip MCV items that can't become true/false, depending
+					 *     on the current flag. See beginning of the loop over
+					 *     MCV items.
+					 */
+
+					if ((is_or) && (matches[i] == MVSTATS_MATCH_NONE) && (! mismatch))
+					{
+						/* OR - was MATCH_NONE, but will be MATCH_FULL */
+						matches[i] = MVSTATS_MATCH_FULL;
+						++nmatches;
+						continue;
+					}
+					else if ((! is_or) && (matches[i] == MVSTATS_MATCH_FULL) && mismatch)
+					{
+						/* AND - was MATC_FULL, but will be MATCH_NONE */
+						matches[i] = MVSTATS_MATCH_NONE;
+						--nmatches;
+						continue;
+					}
+
+				}
+			}
+		}
+		else if (IsA(clause, NullTest))
+		{
+			NullTest * expr = (NullTest*)clause;
+			Var * var = (Var*)(expr->arg);
+
+			/* FIXME proper matching attribute to dimension */
+			int idx = mv_get_index(var->varattno, stakeys);
+
+			/*
+			 * Walk through the MCV items and evaluate the current clause. We can
+			 * skip items that were already ruled out, and terminate if there are
+			 * no remaining MCV items that might possibly match.
+			 */
+			for (i = 0; i < mcvlist->nitems; i++)
+			{
+				MCVItem item = mcvlist->items[i];
+
+				/*
+				 * find the lowest selectivity in the MCV
+				 * FIXME Maybe not the best place do do this (in for all clauses).
+				 */
+				if (item->frequency < *lowsel)
+					*lowsel = item->frequency;
+
+				/* if there are no more matches, we can stop processing this clause */
+				if (nmatches == 0)
+					break;
+
+				/* skip MCV items that were already ruled out */
+				if (matches[i] == MVSTATS_MATCH_NONE)
+					continue;
+
+				/* if the clause mismatches the MCV item, set it as MATCH_NONE */
+				if (((expr->nulltesttype == IS_NULL) && (! mcvlist->items[i]->isnull[idx])) ||
+					((expr->nulltesttype == IS_NOT_NULL) && (mcvlist->items[i]->isnull[idx])))
+				{
+						matches[i] = MVSTATS_MATCH_NONE;
+						--nmatches;
+				}
+			}
+		}
+		else if (or_clause(clause) || and_clause(clause))
+		{
+			/* AND/OR clause, with all clauses compatible with the selected MV stat */
+
+			int			i;
+			BoolExpr   *orclause  = ((BoolExpr*)clause);
+			List	   *orclauses = orclause->args;
+
+			/* match/mismatch bitmap for each MCV item */
+			int	or_nmatches = 0;
+			char * or_matches = NULL;
+
+			Assert(orclauses != NIL);
+			Assert(list_length(orclauses) >= 2);
+
+			/* number of matching MCV items */
+			or_nmatches = mcvlist->nitems;
+
+			/* by default none of the MCV items matches the clauses */
+			or_matches = palloc0(sizeof(char) * or_nmatches);
+
+			if (or_clause(clause))
+			{
+				/* OR clauses assume nothing matches, initially */
+				memset(or_matches, MVSTATS_MATCH_NONE, sizeof(char)*or_nmatches);
+				or_nmatches = 0;
+			}
+			else
+			{
+				/* AND clauses assume nothing matches, initially */
+				memset(or_matches, MVSTATS_MATCH_FULL, sizeof(char)*or_nmatches);
+			}
+
+			/* build the match bitmap for the OR-clauses */
+			or_nmatches = update_match_bitmap_mcvlist(root, orclauses,
+									   stakeys, mcvlist,
+									   or_nmatches, or_matches,
+									   lowsel, fullmatch, or_clause(clause));
+
+			/* merge the bitmap into the existing one*/
+			for (i = 0; i < mcvlist->nitems; i++)
+			{
+				/*
+				 * To AND-merge the bitmaps, a MIN() semantics is used.
+				 * For OR-merge, use MAX().
+				 *
+				 * FIXME this does not decrease the number of matches
+				 */
+				UPDATE_RESULT(matches[i], or_matches[i], is_or);
+			}
+
+			pfree(or_matches);
+
+		}
+		else
+		{
+			elog(ERROR, "unknown clause type: %d", clause->type);
+		}
+	}
+
+	/*
+	 * If all the columns were matched by equality, it's a full match.
+	 * In this case there can be just a single MCV item, matching the
+	 * clause (if there were two, both would match the other one).
+	 */
+	*fullmatch = (bms_num_members(eqmatches) == mcvlist->ndimensions);
+
+	/* free the allocated pieces */
+	if (eqmatches)
+		pfree(eqmatches);
+
+	return nmatches;
+}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 60fd57f..0da7ad9 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -410,7 +410,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 			mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
 
 			/* unavailable stats are not interesting for the planner */
-			if (mvstat->deps_built)
+			if (mvstat->deps_built || mvstat->mcv_built)
 			{
 				info = makeNode(MVStatisticInfo);
 
@@ -419,9 +419,11 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
 				/* enabled statistics */
 				info->deps_enabled = mvstat->deps_enabled;
+				info->mcv_enabled  = mvstat->mcv_enabled;
 
 				/* built/available statistics */
 				info->deps_built = mvstat->deps_built;
+				info->mcv_built  = mvstat->mcv_built;
 
 				/* stakeys */
 				adatum = SysCacheGetAttr(MVSTATOID, htup,
diff --git a/src/backend/utils/mvstats/Makefile b/src/backend/utils/mvstats/Makefile
index 099f1ed..f9bf10c 100644
--- a/src/backend/utils/mvstats/Makefile
+++ b/src/backend/utils/mvstats/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mvstats
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = common.o dependencies.o
+OBJS = common.o dependencies.o mcv.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c
index bd200bc..d1da714 100644
--- a/src/backend/utils/mvstats/common.c
+++ b/src/backend/utils/mvstats/common.c
@@ -16,12 +16,14 @@
 
 #include "common.h"
 
+#include "utils/array.h"
+
 static VacAttrStats ** lookup_var_attr_stats(int2vector *attrs,
-									  int natts, VacAttrStats **vacattrstats);
+											 int natts,
+											 VacAttrStats **vacattrstats);
 
 static List* list_mv_stats(Oid relid);
 
-
 /*
  * Compute requested multivariate stats, using the rows sampled for the
  * plain (single-column) stats.
@@ -49,6 +51,8 @@ build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
 		int				j;
 		MVStatisticInfo *stat = (MVStatisticInfo *)lfirst(lc);
 		MVDependencies	deps  = NULL;
+		MCVList		mcvlist   = NULL;
+		int numrows_filtered  = 0;
 
 		VacAttrStats  **stats  = NULL;
 		int				numatts   = 0;
@@ -87,8 +91,12 @@ build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
 		if (stat->deps_enabled)
 			deps = build_mv_dependencies(numrows, rows, attrs, stats);
 
+		/* build the MCV list */
+		if (stat->mcv_enabled)
+			mcvlist = build_mv_mcvlist(numrows, rows, attrs, stats, &numrows_filtered);
+
 		/* store the histogram / MCV list in the catalog */
-		update_mv_stats(stat->mvoid, deps, attrs);
+		update_mv_stats(stat->mvoid, deps, mcvlist, attrs, stats);
 	}
 }
 
@@ -166,6 +174,8 @@ list_mv_stats(Oid relid)
 		info->stakeys = buildint2vector(stats->stakeys.values, stats->stakeys.dim1);
 		info->deps_enabled = stats->deps_enabled;
 		info->deps_built = stats->deps_built;
+		info->mcv_enabled = stats->mcv_enabled;
+		info->mcv_built = stats->mcv_built;
 
 		result = lappend(result, info);
 	}
@@ -180,8 +190,56 @@ list_mv_stats(Oid relid)
 	return result;
 }
 
+
+/*
+ * Find attnims of MV stats using the mvoid.
+ */
+int2vector*
+find_mv_attnums(Oid mvoid, Oid *relid)
+{
+	ArrayType  *arr;
+	Datum		adatum;
+	bool		isnull;
+	HeapTuple	htup;
+	int2vector *keys;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	htup = SearchSysCache1(MVSTATOID,
+							 ObjectIdGetDatum(mvoid));
+
+	/* XXX syscache contains OIDs of deleted stats (not invalidated) */
+	if (! HeapTupleIsValid(htup))
+		return NULL;
+
+	/* starelid */
+	adatum = SysCacheGetAttr(MVSTATOID, htup,
+							 Anum_pg_mv_statistic_starelid, &isnull);
+	Assert(!isnull);
+
+	*relid = DatumGetObjectId(adatum);
+
+	/* stakeys */
+	adatum = SysCacheGetAttr(MVSTATOID, htup,
+							 Anum_pg_mv_statistic_stakeys, &isnull);
+	Assert(!isnull);
+
+	arr = DatumGetArrayTypeP(adatum);
+
+	keys = buildint2vector((int16 *) ARR_DATA_PTR(arr),
+							ARR_DIMS(arr)[0]);
+	ReleaseSysCache(htup);
+
+	/* TODO maybe save the list into relcache, as in RelationGetIndexList
+	 *      (which was used as an inspiration of this one)?. */
+
+	return keys;
+}
+
+
 void
-update_mv_stats(Oid mvoid, MVDependencies dependencies, int2vector *attrs)
+update_mv_stats(Oid mvoid,
+				MVDependencies dependencies, MCVList mcvlist,
+				int2vector *attrs, VacAttrStats **stats)
 {
 	HeapTuple	stup,
 				oldtup;
@@ -206,18 +264,29 @@ update_mv_stats(Oid mvoid, MVDependencies dependencies, int2vector *attrs)
 			= PointerGetDatum(serialize_mv_dependencies(dependencies));
 	}
 
+	if (mcvlist != NULL)
+	{
+		bytea * data = serialize_mv_mcvlist(mcvlist, attrs, stats);
+		nulls[Anum_pg_mv_statistic_stamcv -1]    = (data == NULL);
+		values[Anum_pg_mv_statistic_stamcv  - 1] = PointerGetDatum(data);
+	}
+
 	/* always replace the value (either by bytea or NULL) */
 	replaces[Anum_pg_mv_statistic_stadeps -1] = true;
+	replaces[Anum_pg_mv_statistic_stamcv -1] = true;
 
 	/* always change the availability flags */
 	nulls[Anum_pg_mv_statistic_deps_built -1] = false;
+	nulls[Anum_pg_mv_statistic_mcv_built -1] = false;
 	nulls[Anum_pg_mv_statistic_stakeys-1]     = false;
 
 	/* use the new attnums, in case we removed some dropped ones */
 	replaces[Anum_pg_mv_statistic_deps_built-1] = true;
+	replaces[Anum_pg_mv_statistic_mcv_built  -1] = true;
 	replaces[Anum_pg_mv_statistic_stakeys -1]    = true;
 
 	values[Anum_pg_mv_statistic_deps_built-1] = BoolGetDatum(dependencies != NULL);
+	values[Anum_pg_mv_statistic_mcv_built  -1] = BoolGetDatum(mcvlist != NULL);
 	values[Anum_pg_mv_statistic_stakeys -1]    = PointerGetDatum(attrs);
 
 	/* Is there already a pg_mv_statistic tuple for this attribute? */
@@ -246,6 +315,21 @@ update_mv_stats(Oid mvoid, MVDependencies dependencies, int2vector *attrs)
 	heap_close(sd, RowExclusiveLock);
 }
 
+
+int
+mv_get_index(AttrNumber varattno, int2vector * stakeys)
+{
+	int i, idx = 0;
+	for (i = 0; i < stakeys->dim1; i++)
+	{
+		if (stakeys->values[i] < varattno)
+			idx += 1;
+		else
+			break;
+	}
+	return idx;
+}
+
 /* multi-variate stats comparator */
 
 /*
@@ -256,11 +340,15 @@ update_mv_stats(Oid mvoid, MVDependencies dependencies, int2vector *attrs)
 int
 compare_scalars_simple(const void *a, const void *b, void *arg)
 {
-	Datum		da = *(Datum*)a;
-	Datum		db = *(Datum*)b;
-	SortSupport ssup= (SortSupport) arg;
+	return compare_datums_simple(*(Datum*)a,
+								 *(Datum*)b,
+								 (SortSupport)arg);
+}
 
-	return ApplySortComparator(da, false, db, false, ssup);
+int
+compare_datums_simple(Datum a, Datum b, SortSupport ssup)
+{
+	return ApplySortComparator(a, false, b, false, ssup);
 }
 
 /*
diff --git a/src/backend/utils/mvstats/common.h b/src/backend/utils/mvstats/common.h
index 6d5465b..f4309f7 100644
--- a/src/backend/utils/mvstats/common.h
+++ b/src/backend/utils/mvstats/common.h
@@ -46,7 +46,15 @@ typedef struct
 	Datum		value;			/* a data value */
 	int			tupno;			/* position index for tuple it came from */
 } ScalarItem;
- 
+
+/* (de)serialization info */
+typedef struct DimensionInfo {
+	int		nvalues;	/* number of deduplicated values */
+	int		nbytes;		/* number of bytes (serialized) */
+	int		typlen;		/* pg_type.typlen */
+	bool	typbyval;	/* pg_type.typbyval */
+} DimensionInfo;
+
 /* multi-sort */
 typedef struct MultiSortSupportData {
 	int				ndims;		/* number of dimensions supported by the */
@@ -71,5 +79,6 @@ int multi_sort_compare_dim(int dim, const SortItem *a,
 						   const SortItem *b, MultiSortSupport mss);
 
 /* comparators, used when constructing multivariate stats */
+int compare_datums_simple(Datum a, Datum b, SortSupport ssup);
 int compare_scalars_simple(const void *a, const void *b, void *arg);
 int compare_scalars_partition(const void *a, const void *b, void *arg);
diff --git a/src/backend/utils/mvstats/mcv.c b/src/backend/utils/mvstats/mcv.c
new file mode 100644
index 0000000..670dbda
--- /dev/null
+++ b/src/backend/utils/mvstats/mcv.c
@@ -0,0 +1,1237 @@
+/*-------------------------------------------------------------------------
+ *
+ * mcv.c
+ *	  POSTGRES multivariate MCV lists
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mvstats/mcv.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "funcapi.h"
+
+#include "utils/lsyscache.h"
+
+#include "common.h"
+
+/*
+ * Multivariate MCVs (most-common values lists) are a straightforward
+ * extension of regular MCV list, tracking combinations of values for
+ * several attributes (columns), including NULL flags, and frequency
+ * of the combination.
+ *
+ * For columns with small number of distinct values, this works quite
+ * well and may represent the distribution very accurately. For columns
+ * with large number of distinct values (e.g. stored as FLOAT), this
+ * does not work that well. Especially if the distribution is mostly
+ * uniform, with no very common combinations.
+ *
+ * If we can represent the distribution as a MCV list, we can estimate
+ * some clauses (e.g. equality clauses) much accurately than using
+ * histograms for example.
+ *
+ * Another benefit of MCV lists (compared to histograms) is that they
+ * don't require sorting of the values, so that they work better for
+ * data types that either don't support sorting at all, or when the
+ * sorting does not really match the meaning. For example we know how to
+ * sort strings, but it's unlikely to make much sense for city names.
+ *
+ *
+ * Hashed MCV (not yet implemented)
+ * -------------------------------- 
+ * By restricting to MCV list and equality conditions, we may use hash
+ * values instead of the long varlena values. This significantly reduces
+ * the storage requirements, and we can still use it to estimate the
+ * equality conditions (assuming the collisions are rare enough).
+ *
+ * This however complicates matching the columns to available stats, as
+ * it requires matching clauses (not columns) to stats. And it may get
+ * quite complex - e.g. what if there are multiple clauses, each
+ * compatible with different stats subset?
+ *
+ *
+ * Selectivity estimation
+ * ----------------------
+ * The estimation, implemented in clauselist_mv_selectivity_mcvlist(),
+ * is quite simple in principle - walk through the MCV items and sum
+ * frequencies of all the items that match all the clauses.
+ *
+ * The current implementation uses MCV lists to estimates those types
+ * of clauses (think of WHERE conditions):
+ *
+ *  (a) equality clauses    WHERE (a = 1) AND (b = 2)
+ *  (b) inequality clauses  WHERE (a < 1) AND (b >= 2)
+ *  (c) NULL clauses        WHERE (a IS NULL) AND (b IS NOT NULL)
+ *  (d) OR clauses          WHERE (a < 1) OR (b >= 2)
+ *
+ * It's possible to add more clauses, for example:
+ *
+ *  (e) multi-var clauses   WHERE (a > b)
+ *
+ * and so on. These are tasks for the future, not yet implemented.
+ *
+ *
+ * Estimating equality clauses
+ * ---------------------------
+ * When computing selectivity estimate for equality clauses
+ *
+ *      (a = 1) AND (b = 2)
+ *
+ * we can do this estimate pretty exactly assuming that two conditions
+ * are met:
+ *
+ *      (1) there's an equality condition on each attribute
+ *
+ *      (2) we find a matching item in the MCV list
+ *
+ * In that case we know the MCV item represents all the tuples matching
+ * the clauses, and the selectivity estimate is complete. This is what
+ * we call 'full match'.
+ *
+ * When only (1) holds, but there's no matching MCV item, we don't know
+ * whether there are no such rows or just are not very frequent. We can
+ * however use the frequency of the least frequent MCV item as an upper
+ * bound for the selectivity.
+ *
+ * If the equality conditions match only a subset of the attributes
+ * the MCV list is built on (i.e. we can't get a full match - we may get
+ * multiple MCV items matching the clauses, but even if we get a single
+ * match there may be items that did not get into the MCV list. But in
+ * this case we can still use the frequency of the last MCV item to clam
+ * the 'additional' selectivity not accounted for by the matching items.
+ *
+ * If there's no histogram, because the MCV list approximates the
+ * distribution accurately (not because the histogram was disabled),
+ * it does not really matter whether there are equality conditions on
+ * all the columns - we can do pretty accurate estimation using the MCV.
+ *
+ * TODO For a combination of equality conditions (not full-match case)
+ *      we probably can clamp the selectivity by the minimum of
+ *      selectivities for each condition. For example if we know the
+ *      number of distinct values for each column, we can use 1/ndistinct
+ *      as a per-column estimate. Or rather 1/ndistinct + selectivity
+ *      derived from the MCV list.
+ *
+ *      If we know the estimate of number of combinations of the columns
+ *      (i.e. ndistinct(A,B)), we may estimate the average frequency of
+ *      items in the remaining 10% as [10% / ndistinct(A,B)].
+ *
+ *
+ * Bounding estimates
+ * ------------------
+ * In general the MCV lists may not provide estimates as accurate as
+ * for the full-match equality case, but may provide some useful
+ * lower/upper boundaries for the estimation error.
+ *
+ * With equality clauses we can do a few more tricks to narrow this
+ * error range (see the previous section and TODO), but with inequality
+ * clauses (or generally non-equality clauses), it's rather dificult.
+ * There's nothing like a 'full match' - we have to consider both the
+ * MCV items and the remaining part every time. We can't use the minimum
+ * selectivity of MCV items, as the clauses may match multiple items.
+ *
+ * For example with a MCV list on columns (A, B), covering 90% of the
+ * table (computed while building the MCV list), about ~10% of the table
+ * is not represented by the MCV list. So even if the conditions match
+ * all the remaining rows (not represented by the MCV items), we can't
+ * get selectivity higher than those 10%. We may use 1/2 the remaining
+ * selectivity as an estimate (minimizing average error).
+ *
+ * TODO Most of these ideas (error limiting) are not yet implemented.
+ *
+ *
+ * General TODO
+ * ------------
+ *
+ * FIXME Use max_mcv_items from ALTER TABLE ADD STATISTICS command.
+ *
+ * TODO Add support for clauses referencing multiple columns (a < b).
+ *
+ * TODO It's possible to build a special case of MCV list, storing not
+ *      the actual values but only 32/64-bit hash. This is only useful
+ *      for estimating equality clauses and for large varlena types,
+ *      which are very impractical for plain MCV list because of size.
+ *      But for those data types we really want just the equality
+ *      clauses, so it's actually a good solution.
+ *
+ * TODO Currently there's no logic to consider building only a MCV list
+ *      (and not building the histogram at all), except for doing this
+ *      decision manually in ADD STATISTICS.
+ */
+
+/*
+ * Each serialized item needs to store (in this order):
+ *
+ * - indexes              (ndim * sizeof(int32))
+ * - null flags           (ndim * sizeof(bool))
+ * - frequency            (sizeof(double))
+ *
+ * So in total:
+ *
+ *   ndim * (sizeof(int32) + sizeof(bool)) + sizeof(double)
+ */
+#define ITEM_SIZE(ndims)	\
+	(ndims * (sizeof(uint16) + sizeof(bool)) + sizeof(double))
+
+/* pointers into a flat serialized item of ITEM_SIZE(n) bytes */
+#define ITEM_INDEXES(item)			((uint16*)item)
+#define ITEM_NULLS(item,ndims)		((bool*)(ITEM_INDEXES(item) + ndims))
+#define ITEM_FREQUENCY(item,ndims)	((double*)(ITEM_NULLS(item,ndims) + ndims))
+
+/*
+ * Builds MCV list from sample rows, and removes rows represented by
+ * the MCV list from the sample (the number of remaining sample rows is
+ * returned by the numrows_filtered parameter).
+ *
+ * The method is quite simple - in short it does about these steps:
+ *
+ *       (1) sort the data (default collation, '<' for the data type)
+ *
+ *       (2) count distinct groups, decide how many to keep
+ *
+ *       (3) build the MCV list using the threshold determined in (2)
+ *
+ *       (4) remove rows represented by the MCV from the sample
+ *
+ * For more details, see the comments in the code.
+ *
+ * FIXME Single-dimensional MCV is sorted by frequency (descending). We
+ *       should do that too, because when walking through the list we
+ *       want to check the most frequent items first.
+ *
+ * TODO We're using Datum (8B), even for data types (e.g. int4 or
+ *      float4). Maybe we could save some space here, but the bytea
+ *      compression should handle it just fine.
+ *
+ * TODO This probably should not use the ndistinct directly (as computed
+ *      from the table, but rather estimate the number of distinct
+ *      values in the table), no?
+ */
+MCVList
+build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs,
+					  VacAttrStats **stats, int *numrows_filtered)
+{
+	int i, j;
+	int numattrs = attrs->dim1;
+	int ndistinct = 0;
+	int mcv_threshold = 0;
+	int count = 0;
+	int nitems = 0;
+
+	MCVList	mcvlist = NULL;
+
+	/* Sort by multiple columns (using array of SortSupport) */
+	MultiSortSupport mss = multi_sort_init(numattrs);
+
+	/*
+	 * Preallocate space for all the items as a single chunk, and point
+	 * the items to the appropriate parts of the array.
+	 */
+	SortItem   *items  = (SortItem*)palloc0(numrows * sizeof(SortItem));
+	Datum	   *values = (Datum*)palloc0(sizeof(Datum) * numrows * numattrs);
+	bool	   *isnull = (bool*)palloc0(sizeof(bool) * numrows * numattrs);
+
+	/* keep all the rows by default (as if there was no MCV list) */
+	*numrows_filtered = numrows;
+
+	for (i = 0; i < numrows; i++)
+	{
+		items[i].values = &values[i * numattrs];
+		items[i].isnull = &isnull[i * numattrs];
+	}
+
+	/* load the values/null flags from sample rows */
+	for (j = 0; j < numrows; j++)
+		for (i = 0; i < numattrs; i++)
+			items[j].values[i] = heap_getattr(rows[j], attrs->values[i],
+								stats[i]->tupDesc, &items[j].isnull[i]);
+
+	/* prepare the sort functions for all the attributes */
+	for (i = 0; i < numattrs; i++)
+		multi_sort_add_dimension(mss, i, i, stats);
+
+	/* do the sort, using the multi-sort */
+	qsort_arg((void *) items, numrows, sizeof(SortItem),
+				multi_sort_compare, mss);
+
+	/*
+	 * Count the number of distinct groups - just walk through the
+	 * sorted list and count the number of key changes. We use this to
+	 * determine the threshold (125% of the average frequency).
+	 */
+	ndistinct = 1;
+	for (i = 1; i < numrows; i++)
+		if (multi_sort_compare(&items[i], &items[i-1], mss) != 0)
+			ndistinct += 1;
+
+	/*
+	 * Determine how many groups actually exceed the threshold, and then
+	 * walk the array again and collect them into an array. We'll always
+	 * require at least 4 rows per group.
+	 *
+	 * But if we can fit all the distinct values in the MCV list (i.e.
+	 * if there are less distinct groups than MVSTAT_MCVLIST_MAX_ITEMS),
+	 * we'll require only 2 rows per group.
+	 *
+	 * TODO For now the threshold is the same as in the single-column
+	 *      case (average + 25%), but maybe that's worth revisiting
+	 *      for the multivariate case.
+	 *
+	 * TODO We can do this only if we believe we got all the distinct
+	 *      values of the table.
+	 *
+	 * FIXME This should really reference mcv_max_items (from catalog)
+	 *       instead of the constant MVSTAT_MCVLIST_MAX_ITEMS.
+	 */
+	mcv_threshold = 1.25 * numrows / ndistinct;
+	mcv_threshold = (mcv_threshold < 4) ? 4  : mcv_threshold;
+
+	if (ndistinct <= MVSTAT_MCVLIST_MAX_ITEMS)
+		mcv_threshold = 2;
+
+	/*
+	 * Walk through the sorted data again, and see how many groups
+	 * reach the mcv_threshold (and become an item in the MCV list).
+	 */
+	count = 1;
+	for (i = 1; i <= numrows; i++)
+	{
+		/* last row or new group, so check if we exceed  mcv_threshold */
+		if ((i == numrows) || (multi_sort_compare(&items[i], &items[i-1], mss) != 0))
+		{
+			/* group hits the threshold, count the group as MCV item */
+			if (count >= mcv_threshold)
+				nitems += 1;
+
+			count = 1;
+		}
+		else	/* within group, so increase the number of items */
+			count += 1;
+	}
+
+	/* we know the number of MCV list items, so let's build the list */
+	if (nitems > 0)
+	{
+		/* allocate the MCV list structure, set parameters we know */
+		mcvlist = (MCVList)palloc0(sizeof(MCVListData));
+
+		mcvlist->magic = MVSTAT_MCV_MAGIC;
+		mcvlist->type = MVSTAT_MCV_TYPE_BASIC;
+		mcvlist->ndimensions = numattrs;
+		mcvlist->nitems = nitems;
+
+		/*
+		 * Preallocate Datum/isnull arrays (not as a single chunk, as
+		 * we'll pass this outside this method and thus it needs to be
+		 * easy to pfree() the data (and we wouldn't know where the
+		 * arrays start).
+		 *
+		 * TODO Maybe the reasoning that we can't allocate a single
+		 *      piece because we're passing it out is bogus? Who'd
+		 *      free a single item of the MCV list, anyway?
+		 *
+		 * TODO Maybe with a proper encoding (stuffing all the values
+		 *      into a list-level array, this will be untrue)?
+		 */
+		mcvlist->items = (MCVItem*)palloc0(sizeof(MCVItem)*nitems);
+
+		for (i = 0; i < nitems; i++)
+		{
+			mcvlist->items[i] = (MCVItem)palloc0(sizeof(MCVItemData));
+			mcvlist->items[i]->values = (Datum*)palloc0(sizeof(Datum)*numattrs);
+			mcvlist->items[i]->isnull = (bool*)palloc0(sizeof(bool)*numattrs);
+		}
+
+		/*
+		 * Repeat the same loop as above, but this time copy the data
+		 * into the MCV list (for items exceeding the threshold).
+		 *
+		 * TODO Maybe we could simply remember indexes of the last item
+		 *      in each group (from the previous loop)?
+		 */
+		count = 1;
+		nitems = 0;
+		for (i = 1; i <= numrows; i++)
+		{
+			/* last row or a new group */
+			if ((i == numrows) || (multi_sort_compare(&items[i], &items[i-1], mss) != 0))
+			{
+				/* count the MCV item if exceeding the threshold (and copy into the array) */
+				if (count >= mcv_threshold)
+				{
+					/* just pointer to the proper place in the list */
+					MCVItem item = mcvlist->items[nitems];
+
+					/* copy values from the _previous_ group (last item of) */
+					memcpy(item->values, items[(i-1)].values, sizeof(Datum) * numattrs);
+					memcpy(item->isnull, items[(i-1)].isnull, sizeof(bool)  * numattrs);
+
+
+					/* and finally the group frequency */
+					item->frequency = (double)count / numrows;
+
+					/* next item */
+					nitems += 1;
+				}
+
+				count = 1;
+			}
+			else	/* same group, just increase the number of items */
+				count += 1;
+		}
+
+		/* make sure the loops are consistent */
+		Assert(nitems == mcvlist->nitems);
+
+		/*
+		 * Remove the rows matching the MCV list (i.e. keep only rows
+		 * that are not represented by the MCV list).
+		 *
+		 * FIXME This implementation is rather naive, effectively O(N^2).
+		 *       As the MCV list grows, the check will take longer and
+		 *       longer. And as the number of sampled rows increases (by
+		 *       increasing statistics target), it will take longer and
+		 *       longer. One option is to sort the MCV items first and
+		 *       then perform a binary search.
+		 *
+		 *       A better option would be keeping the ID of the row in
+		 *       the sort item, and then just walk through the items and
+		 *       mark rows to remove (in a bitmap of the same size).
+		 *       There's not space for that in SortItem at this moment,
+		 *       but it's trivial to add 'private' pointer, or just
+		 *       using another structure with extra field (starting with
+		 *       SortItem, so that the comparators etc. still work).
+		 *
+		 *       Another option is to use the sorted array of items
+		 *       (because that's how we sorted the source data), and
+		 *       simply do a bsearch() into it. If we find a matching
+		 *       item, the row belongs to the MCV list.
+		 */
+		if (nitems == ndistinct) /* all rows are covered by MCV items */
+			*numrows_filtered = 0;
+		else /* (nitems < ndistinct) && (nitems > 0) */
+		{
+			int nfiltered = 0;
+			HeapTuple *rows_filtered = (HeapTuple*)palloc0(sizeof(HeapTuple) * numrows);
+
+			/* used for the searches */
+			SortItem item, mcvitem;;
+
+			item.values = (Datum*)palloc0(numattrs * sizeof(Datum));
+			item.isnull = (bool*)palloc0(numattrs * sizeof(bool));
+
+			/*
+			 * FIXME we don't need to allocate this, we can reference
+			 *       the MCV item directly ...
+			 */
+			mcvitem.values = (Datum*)palloc0(numattrs * sizeof(Datum));
+			mcvitem.isnull = (bool*)palloc0(numattrs * sizeof(bool));
+
+			/* walk through the tuples, compare the values to MCV items */
+			for (i = 0; i < numrows; i++)
+			{
+				bool	match = false;
+
+				/* collect the key values from the row */
+				for (j = 0; j < numattrs; j++)
+					item.values[j] = heap_getattr(rows[i], attrs->values[j],
+										stats[j]->tupDesc, &item.isnull[j]);
+
+				/* scan through the MCV list for matches */
+				for (j = 0; j < mcvlist->nitems; j++)
+				{
+					/*
+					 * TODO Create a SortItem/MCVItem comparator so that
+					 *      we don't need to do memcpy() like crazy.
+					 */
+					memcpy(mcvitem.values, mcvlist->items[j]->values,
+							numattrs * sizeof(Datum));
+					memcpy(mcvitem.isnull, mcvlist->items[j]->isnull,
+							numattrs * sizeof(bool));
+
+					if (multi_sort_compare(&item, &mcvitem, mss) == 0)
+					{
+						match = true;
+						break;
+					}
+				}
+
+				/* if no match in the MCV list, copy the row into the filtered ones */
+				if (! match)
+					memcpy(&rows_filtered[nfiltered++], &rows[i], sizeof(HeapTuple));
+			}
+
+			/* replace the rows and remember how many rows we kept */
+			memcpy(rows, rows_filtered, sizeof(HeapTuple) * nfiltered);
+			*numrows_filtered = nfiltered;
+
+			/* free all the data used here */
+			pfree(rows_filtered);
+			pfree(item.values);
+			pfree(item.isnull);
+			pfree(mcvitem.values);
+			pfree(mcvitem.isnull);
+		}
+	}
+
+	pfree(values);
+	pfree(items);
+	pfree(isnull);
+
+	return mcvlist;
+}
+
+
+/* fetch the MCV list (as a bytea) from the pg_mv_statistic catalog */
+MCVList
+load_mv_mcvlist(Oid mvoid)
+{
+	bool		isnull = false;
+	Datum		mcvlist;
+
+#ifdef USE_ASSERT_CHECKING
+	Form_pg_mv_statistic	mvstat;
+#endif
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	HeapTuple	htup = SearchSysCache1(MVSTATOID, ObjectIdGetDatum(mvoid));
+
+	if (! HeapTupleIsValid(htup))
+		return NULL;
+
+#ifdef USE_ASSERT_CHECKING
+	mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
+	Assert(mvstat->mcv_enabled && mvstat->mcv_built);
+#endif
+
+	mcvlist = SysCacheGetAttr(MVSTATOID, htup,
+						   Anum_pg_mv_statistic_stamcv, &isnull);
+
+	Assert(!isnull);
+
+	ReleaseSysCache(htup);
+
+	return deserialize_mv_mcvlist(DatumGetByteaP(mcvlist));
+}
+
+/* print some basic info about the MCV list
+ *
+ * TODO Add info about what part of the table this covers.
+ */
+Datum
+pg_mv_stats_mcvlist_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result;
+
+	MCVList	mcvlist = deserialize_mv_mcvlist(data);
+
+	result = palloc0(128);
+	snprintf(result, 128, "nitems=%d", mcvlist->nitems);
+
+	pfree(mcvlist);
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/* used to pass context into bsearch() */
+static SortSupport ssup_private = NULL;
+
+static int bsearch_comparator(const void * a, const void * b);
+
+/*
+ * Serialize MCV list into a bytea value. The basic algorithm is simple:
+ *
+ * (1) perform deduplication for each attribute (separately)
+ *     (a) collect all (non-NULL) attribute values from all MCV items
+ *     (b) sort the data (using 'lt' from VacAttrStats)
+ *     (c) remove duplicate values from the array
+ *
+ * (2) serialize the arrays into a bytea value
+ *
+ * (3) process all MCV list items
+ *     (a) replace values with indexes into the arrays
+ *
+ * Each attribute has to be processed separately, because we're mixing
+ * different datatypes, and we don't know what equality means for them.
+ * We're also mixing pass-by-value and pass-by-ref types, and so on.
+ *
+ * We'll use uint16 values for the indexes in step (3), as we don't
+ * allow more than 8k MCV items (see list max_mcv_items). We might
+ * increase this to 65k and still fit into uint16.
+ *
+ * We don't really expect the high compression as with histograms,
+ * because we're not doing any bucket splits etc. (which is the source
+ * of high redundancy there), but we need to do it anyway as we need
+ * to serialize varlena values etc. We might invent another way to
+ * serialize MCV lists, but let's keep it consistent.
+ *
+ * FIXME This probably leaks memory, or at least uses it inefficiently
+ *       (many small palloc() calls instead of a large one).
+ *
+ * TODO Consider using 16-bit values for the indexes in step (3).
+ *
+ * TODO Consider packing boolean flags (NULL) for each item into 'char'
+ *      or a longer type (instead of using an array of bool items).
+ */
+bytea *
+serialize_mv_mcvlist(MCVList mcvlist, int2vector *attrs,
+					 VacAttrStats **stats)
+{
+	int	i, j;
+	int	ndims = mcvlist->ndimensions;
+	int	itemsize = ITEM_SIZE(ndims);
+
+	Size	total_length = 0;
+
+	char   *item = palloc0(itemsize);
+
+	/* serialized items (indexes into arrays, etc.) */
+	bytea  *output;
+	char   *data = NULL;
+
+	/* values per dimension (and number of non-NULL values) */
+	Datum **values = (Datum**)palloc0(sizeof(Datum*) * ndims);
+	int	   *counts = (int*)palloc0(sizeof(int) * ndims);
+
+	/* info about dimensions (for deserialize) */
+	DimensionInfo * info
+				= (DimensionInfo *)palloc0(sizeof(DimensionInfo)*ndims);
+
+	/* sort support data */
+	SortSupport	ssup = (SortSupport)palloc0(sizeof(SortSupportData)*ndims);
+
+	/* collect and deduplicate values for each dimension */
+	for (i = 0; i < ndims; i++)
+	{
+		int count;
+		StdAnalyzeData *tmp = (StdAnalyzeData *)stats[i]->extra_data;
+
+		/* keep important info about the data type */
+		info[i].typlen   = stats[i]->attrtype->typlen;
+		info[i].typbyval = stats[i]->attrtype->typbyval;
+
+		/* allocate space for all values, including NULLs (won't use them) */
+		values[i] = (Datum*)palloc0(sizeof(Datum) * mcvlist->nitems);
+
+		for (j = 0; j < mcvlist->nitems; j++)
+		{
+			if (! mcvlist->items[j]->isnull[i])	/* skip NULL values */
+			{
+				values[i][counts[i]] = mcvlist->items[j]->values[i];
+				counts[i] += 1;
+			}
+		}
+
+		/* there are just NULL values in this dimension */
+		if (counts[i] == 0)
+			continue;
+
+		/* sort and deduplicate */
+		ssup[i].ssup_cxt = CurrentMemoryContext;
+		ssup[i].ssup_collation = DEFAULT_COLLATION_OID;
+		ssup[i].ssup_nulls_first = false;
+
+		PrepareSortSupportFromOrderingOp(tmp->ltopr, &ssup[i]);
+
+		qsort_arg(values[i], counts[i], sizeof(Datum),
+				  compare_scalars_simple, &ssup[i]);
+
+		/*
+		 * Walk through the array and eliminate duplicitate values, but
+		 * keep the ordering (so that we can do bsearch later). We know
+		 * there's at least 1 item, so we can skip the first element.
+		 */
+		count = 1;	/* number of deduplicated items */
+		for (j = 1; j < counts[i]; j++)
+		{
+			/* if it's different from the previous value, we need to keep it */
+			if (compare_datums_simple(values[i][j-1], values[i][j], &ssup[i]) != 0)
+			{
+				/* XXX: not needed if (count == j) */
+				values[i][count] = values[i][j];
+				count += 1;
+			}
+		}
+
+		/* do not exceed UINT16_MAX */
+		Assert(count <= UINT16_MAX);
+
+		/* keep info about the deduplicated count */
+		info[i].nvalues = count;
+
+		/* compute size of the serialized data */
+		if (info[i].typbyval || (info[i].typlen > 0))
+			/* by value pased by reference, but fixed length */
+			info[i].nbytes = info[i].nvalues * info[i].typlen;
+		else if (info[i].typlen == -1)
+			/* varlena, so just use VARSIZE_ANY */
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += VARSIZE_ANY(values[i][j]);
+		else if (info[i].typlen == -2)
+			/* cstring, so simply strlen */
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += strlen(DatumGetPointer(values[i][j]));
+		else
+			elog(ERROR, "unknown data type typbyval=%d typlen=%d",
+				info[i].typbyval, info[i].typlen);
+	}
+
+	/*
+	 * Now we finally know how much space we'll need for the serialized
+	 * MCV list, as it contains these fields:
+	 *
+	 * - length (4B) for varlena
+	 * - magic (4B)
+	 * - type (4B)
+	 * - ndimensions (4B)
+	 * - nitems (4B)
+	 * - info (ndim * sizeof(DimensionInfo)
+	 * - arrays of values for each dimension
+	 * - serialized items (nitems * itemsize)
+	 *
+	 * So the 'header' size is 20B + ndim * sizeof(DimensionInfo) and
+	 * then we'll place the data.
+	 */
+	total_length = (sizeof(int32) + offsetof(MCVListData, items)
+					+ ndims * sizeof(DimensionInfo)
+					+ mcvlist->nitems * itemsize);
+
+	for (i = 0; i < ndims; i++)
+		total_length += info[i].nbytes;
+
+	/* enforce arbitrary limit of 1MB */
+	if (total_length > 1024 * 1024)
+		elog(ERROR, "serialized MCV exceeds 1MB (%ld)", total_length);
+
+	/* allocate space for the serialized MCV list, set header fields */
+	output = (bytea*)palloc0(total_length);
+	SET_VARSIZE(output, total_length);
+
+	/* we'll use 'ptr' to keep track of the place to write data */
+	data = VARDATA(output);
+
+	memcpy(data, mcvlist, offsetof(MCVListData, items));
+	data += offsetof(MCVListData, items);
+
+	memcpy(data, info, sizeof(DimensionInfo) * ndims);
+	data += sizeof(DimensionInfo) * ndims;
+
+	/* value array for each dimension */
+	for (i = 0; i < ndims; i++)
+	{
+#ifdef USE_ASSERT_CHECKING
+		char *tmp = data;
+#endif
+		for (j = 0; j < info[i].nvalues; j++)
+		{
+			if (info[i].typbyval)
+			{
+				/* passed by value / Datum */
+				memcpy(data, &values[i][j], info[i].typlen);
+				data += info[i].typlen;
+			}
+			else if (info[i].typlen > 0)
+			{
+				/* pased by reference, but fixed length (name, tid, ...) */
+				memcpy(data, &values[i][j], info[i].typlen);
+				data += info[i].typlen;
+			}
+			else if (info[i].typlen == -1)
+			{
+				/* varlena */
+				memcpy(data, DatumGetPointer(values[i][j]),
+							VARSIZE_ANY(values[i][j]));
+				data += VARSIZE_ANY(values[i][j]);
+			}
+			else if (info[i].typlen == -2)
+			{
+				/* cstring (don't forget the \0 terminator!) */
+				memcpy(data, DatumGetPointer(values[i][j]),
+							strlen(DatumGetPointer(values[i][j])) + 1);
+				data += strlen(DatumGetPointer(values[i][j])) + 1;
+			}
+		}
+		Assert((data - tmp) == info[i].nbytes);
+	}
+
+	/* and finally, the MCV items */
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		/* don't write beyond the allocated space */
+		Assert(data <= (char*)output + total_length - itemsize);
+
+		/* reset the values for each item */
+		memset(item, 0, itemsize);
+
+		for (j = 0; j < ndims; j++)
+		{
+			/* do the lookup only for non-NULL values */
+			if (! mcvlist->items[i]->isnull[j])
+			{
+				Datum * v = NULL;
+				ssup_private = &ssup[j];
+
+				v = (Datum*)bsearch(&mcvlist->items[i]->values[j],
+								values[j], info[j].nvalues, sizeof(Datum),
+								bsearch_comparator);
+
+				if (v == NULL)
+					elog(ERROR, "value for dim %d not found in array", j);
+
+				/* compute index within the array */
+				ITEM_INDEXES(item)[j] = (v - values[j]);
+
+				/* check the index is within expected bounds */
+				Assert(ITEM_INDEXES(item)[j] >= 0);
+				Assert(ITEM_INDEXES(item)[j] < info[j].nvalues);
+			}
+		}
+
+		/* copy NULL and frequency flags into the item */
+		memcpy(ITEM_NULLS(item, ndims),
+				mcvlist->items[i]->isnull, sizeof(bool) * ndims);
+		memcpy(ITEM_FREQUENCY(item, ndims),
+				&mcvlist->items[i]->frequency, sizeof(double));
+
+		/* copy the item into the array */
+		memcpy(data, item, itemsize);
+
+		data += itemsize;
+	}
+
+	/* at this point we expect to match the total_length exactly */
+	Assert((data - (char*)output) == total_length);
+
+	return output;
+}
+
+/*
+ * Inverse to serialize_mv_mcvlist() - see the comment there.
+ *
+ * We'll do full deserialization, because we don't really expect high
+ * duplication of values so the caching may not be as efficient as with
+ * histograms.
+ */
+MCVList deserialize_mv_mcvlist(bytea * data)
+{
+	int		i, j;
+	Size	expected_size;
+	MCVList mcvlist;
+	char   *tmp;
+
+	int		ndims, nitems, itemsize;
+	DimensionInfo *info = NULL;
+
+	uint16 *indexes = NULL;
+	Datum **values = NULL;
+
+	/* local allocation buffer (used only for deserialization) */
+	int		bufflen;
+	char   *buff;
+	char   *ptr;
+
+	/* buffer used for the result */
+	int		rbufflen;
+	char   *rbuff;
+	char   *rptr;
+
+	if (data == NULL)
+		return NULL;
+
+	if (VARSIZE_ANY_EXHDR(data) < offsetof(MCVListData,items))
+		elog(ERROR, "invalid MCV Size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), offsetof(MCVListData,items));
+
+	/* read the MCV list header */
+	mcvlist = (MCVList)palloc0(sizeof(MCVListData));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA(data);
+
+	/* get the header and perform basic sanity checks */
+	memcpy(mcvlist, tmp, offsetof(MCVListData,items));
+	tmp += offsetof(MCVListData,items);
+
+	if (mcvlist->magic != MVSTAT_MCV_MAGIC)
+		elog(ERROR, "invalid MCV magic %d (expected %dd)",
+			 mcvlist->magic, MVSTAT_MCV_MAGIC);
+
+	if (mcvlist->type != MVSTAT_MCV_TYPE_BASIC)
+		elog(ERROR, "invalid MCV type %d (expected %dd)",
+			 mcvlist->type, MVSTAT_MCV_TYPE_BASIC);
+
+	nitems = mcvlist->nitems;
+	ndims = mcvlist->ndimensions;
+	itemsize = ITEM_SIZE(ndims);
+
+	Assert(nitems > 0);
+	Assert((ndims >= 2) && (ndims <= MVSTATS_MAX_DIMENSIONS));
+
+	/*
+	 * What size do we expect with those parameters (it's incomplete,
+	 * as we yet have to count the array sizes (from DimensionInfo
+	 * records).
+	 */
+	expected_size = offsetof(MCVListData,items) +
+					ndims * sizeof(DimensionInfo) +
+					(nitems * itemsize);
+
+	/* check that we have at least the DimensionInfo records */
+	if (VARSIZE_ANY_EXHDR(data) < expected_size)
+		elog(ERROR, "invalid MCV Size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	info = (DimensionInfo*)(tmp);
+	tmp += ndims * sizeof(DimensionInfo);
+
+	/* account for the value arrays */
+	for (i = 0; i < ndims; i++)
+		expected_size += info[i].nbytes;
+
+	if (VARSIZE_ANY_EXHDR(data) != expected_size)
+		elog(ERROR, "invalid MCV Size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	/* looks OK - not corrupted or something */
+
+	/*
+	 * We'll allocate one large chunk of memory for the intermediate
+	 * data, needed only for deserializing the MCV list, and we'll pack
+	 * use a local dense allocation to minimize the palloc overhead.
+	 *
+	 * Let's see how much space we'll actually need, and also include
+	 * space for the array with pointers.
+	 */
+	bufflen = sizeof(Datum*) * ndims;			/* space for pointers */
+
+	for (i = 0; i < ndims; i++)
+		/* for full-size byval types, we reuse the serialized value */
+		if (! (info[i].typbyval && info[i].typlen == sizeof(Datum)))
+			bufflen += (sizeof(Datum) * info[i].nvalues);
+
+	buff = palloc(bufflen);
+	ptr  = buff;
+
+	values = (Datum**)buff;
+	ptr += (sizeof(Datum*) * ndims);
+
+	/*
+	 * FIXME This uses pointers to the original data array (the types
+	 *       not passed by value), so when someone frees the memory,
+	 *       e.g. by doing something like this:
+	 *
+	 *           bytea * data = ... fetch the data from catalog ...
+	 *           MCVList mcvlist = deserialize_mcv_list(data);
+	 *           pfree(data);
+	 *
+	 *       then 'mcvlist' references the freed memory. This needs to
+	 *       copy the pieces.
+	 */
+	for (i = 0; i < ndims; i++)
+	{
+		if (info[i].typbyval)
+		{
+			/* passed by value / Datum - simply reuse the array */
+			if (info[i].typlen == sizeof(Datum))
+			{
+				values[i] = (Datum*)tmp;
+				tmp += info[i].nbytes;
+			}
+			else
+			{
+				values[i] = (Datum*)ptr;
+				ptr += (sizeof(Datum) * info[i].nvalues);
+
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					memcpy(&values[i][j], tmp, info[i].typlen);
+					tmp += info[i].typlen;
+				}
+			}
+		}
+		else
+		{
+			/* all the varlena data need a chunk from the buffer */
+			values[i] = (Datum*)ptr;
+			ptr += (sizeof(Datum) * info[i].nvalues);
+
+			/* pased by reference, but fixed length (name, tid, ...) */
+			if (info[i].typlen > 0)
+			{
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					values[i][j] = PointerGetDatum(tmp);
+					tmp += info[i].typlen;
+				}
+			}
+			else if (info[i].typlen == -1)
+			{
+				/* varlena */
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					values[i][j] = PointerGetDatum(tmp);
+					tmp += VARSIZE_ANY(tmp);
+				}
+			}
+			else if (info[i].typlen == -2)
+			{
+				/* cstring */
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					values[i][j] = PointerGetDatum(tmp);
+					tmp += (strlen(tmp) + 1); /* don't forget the \0 */
+				}
+			}
+		}
+	}
+
+	/* we should exhaust the buffer exactly */
+	Assert((ptr - buff) == bufflen);
+
+	/* allocate space for the MCV items in a single piece */
+	rbufflen = (sizeof(MCVItem) + sizeof(MCVItemData) +
+				sizeof(Datum)*ndims + sizeof(bool)*ndims) * nitems;
+
+	rbuff = palloc(rbufflen);
+	rptr  = rbuff;
+
+	mcvlist->items = (MCVItem*)rbuff;
+	rptr += (sizeof(MCVItem) * nitems);
+
+	for (i = 0; i < nitems; i++)
+	{
+		MCVItem item = (MCVItem)rptr;
+		rptr += (sizeof(MCVItemData));
+
+		item->values = (Datum*)rptr;
+		rptr += (sizeof(Datum)*ndims);
+
+		item->isnull = (bool*)rptr;
+		rptr += (sizeof(bool) *ndims);
+
+		/* just point to the right place */
+		indexes = ITEM_INDEXES(tmp);
+
+		memcpy(item->isnull, ITEM_NULLS(tmp, ndims), sizeof(bool) * ndims);
+		memcpy(&item->frequency, ITEM_FREQUENCY(tmp, ndims), sizeof(double));
+
+#ifdef ASSERT_CHECKING
+		for (j = 0; j < ndims; j++)
+			Assert(indexes[j] <= UINT16_MAX);
+#endif
+
+		/* translate the values */
+		for (j = 0; j < ndims; j++)
+			if (! item->isnull[j])
+				item->values[j] = values[j][indexes[j]];
+
+		mcvlist->items[i] = item;
+
+		tmp += ITEM_SIZE(ndims);
+
+		Assert(tmp <= (char*)data + VARSIZE_ANY(data));
+	}
+
+	/* check that we processed all the data */
+	Assert(tmp == (char*)data + VARSIZE_ANY(data));
+
+	/* release the temporary buffer */
+	pfree(buff);
+
+	return mcvlist;
+}
+
+/*
+ * We need to pass the SortSupport to the comparator, but bsearch()
+ * has no 'context' parameter, so we use a global variable (ugly).
+ */
+static int
+bsearch_comparator(const void * a, const void * b)
+{
+	Assert(ssup_private != NULL);
+	return compare_scalars_simple(a, b, (void*)ssup_private);
+}
+/*
+ * SRF with details about buckets of a histogram:
+ *
+ * - item ID (0...nitems)
+ * - values (string array)
+ * - nulls only (boolean array)
+ * - frequency (double precision)
+ *
+ * The input is the OID of the statistics, and there are no rows
+ * returned if the statistics contains no histogram.
+ */
+PG_FUNCTION_INFO_V1(pg_mv_mcv_items);
+
+Datum
+pg_mv_mcv_items(PG_FUNCTION_ARGS)
+{
+	FuncCallContext	   *funcctx;
+	int					call_cntr;
+	int					max_calls;
+	TupleDesc			tupdesc;
+	AttInMetadata	   *attinmeta;
+
+	/* stuff done only on the first call of the function */
+	if (SRF_IS_FIRSTCALL())
+	{
+		MemoryContext	oldcontext;
+		MCVList			mcvlist;
+
+		/* create a function context for cross-call persistence */
+		funcctx = SRF_FIRSTCALL_INIT();
+
+		/* switch to memory context appropriate for multiple function calls */
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+		mcvlist = load_mv_mcvlist(PG_GETARG_OID(0));
+
+		funcctx->user_fctx = mcvlist;
+
+		/* total number of tuples to be returned */
+		funcctx->max_calls = 0;
+		if (funcctx->user_fctx != NULL)
+			funcctx->max_calls = mcvlist->nitems;
+
+		/* Build a tuple descriptor for our result type */
+		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("function returning record called in context "
+							"that cannot accept type record")));
+
+		/*
+		 * generate attribute metadata needed later to produce tuples
+		 * from raw C strings
+		 */
+		attinmeta = TupleDescGetAttInMetadata(tupdesc);
+		funcctx->attinmeta = attinmeta;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/* stuff done on every call of the function */
+	funcctx = SRF_PERCALL_SETUP();
+
+	call_cntr = funcctx->call_cntr;
+	max_calls = funcctx->max_calls;
+	attinmeta = funcctx->attinmeta;
+
+	if (call_cntr < max_calls)    /* do when there is more left to send */
+	{
+		char	  **values;
+		HeapTuple	tuple;
+		Datum		result;
+		int2vector *stakeys;
+		Oid			relid;
+
+		char *buff = palloc0(1024);
+		char *format;
+
+		int			i;
+
+		Oid		   *outfuncs;
+		FmgrInfo   *fmgrinfo;
+
+		MCVList		mcvlist;
+		MCVItem		item;
+
+		mcvlist = (MCVList)funcctx->user_fctx;
+
+		Assert(call_cntr < mcvlist->nitems);
+
+		item = mcvlist->items[call_cntr];
+
+		stakeys = find_mv_attnums(PG_GETARG_OID(0), &relid);
+
+		/*
+		 * Prepare a values array for building the returned tuple.
+		 * This should be an array of C strings which will
+		 * be processed later by the type input functions.
+		 */
+		values = (char **) palloc(4 * sizeof(char *));
+
+		values[0] = (char *) palloc(64 * sizeof(char));
+
+		/* arrays */
+		values[1] = (char *) palloc0(1024 * sizeof(char));
+		values[2] = (char *) palloc0(1024 * sizeof(char));
+
+		/* frequency */
+		values[3] = (char *) palloc(64 * sizeof(char));
+
+		outfuncs = (Oid*)palloc0(sizeof(Oid) * mcvlist->ndimensions);
+		fmgrinfo = (FmgrInfo*)palloc0(sizeof(FmgrInfo) * mcvlist->ndimensions);
+
+		for (i = 0; i < mcvlist->ndimensions; i++)
+		{
+			bool isvarlena;
+
+			getTypeOutputInfo(get_atttype(relid, stakeys->values[i]),
+							  &outfuncs[i], &isvarlena);
+
+			fmgr_info(outfuncs[i], &fmgrinfo[i]);
+		}
+
+		snprintf(values[0], 64, "%d", call_cntr);	/* item ID */
+
+		for (i = 0; i < mcvlist->ndimensions; i++)
+		{
+			Datum val, valout;
+
+			format = "%s, %s";
+			if (i == 0)
+				format = "{%s%s";
+			else if (i == mcvlist->ndimensions-1)
+				format = "%s, %s}";
+
+			val = item->values[i];
+			valout = FunctionCall1(&fmgrinfo[i], val);
+
+			snprintf(buff, 1024, format, values[1], DatumGetPointer(valout));
+			strncpy(values[1], buff, 1023);
+			buff[0] = '\0';
+
+			snprintf(buff, 1024, format, values[2], item->isnull[i] ? "t" : "f");
+			strncpy(values[2], buff, 1023);
+			buff[0] = '\0';
+		}
+
+		snprintf(values[3], 64, "%f", item->frequency);	/* frequency */
+
+		/* build a tuple */
+		tuple = BuildTupleFromCStrings(attinmeta, values);
+
+		/* make the tuple into a datum */
+		result = HeapTupleGetDatum(tuple);
+
+		/* clean up (this is not really necessary) */
+		pfree(values[0]);
+		pfree(values[1]);
+		pfree(values[2]);
+		pfree(values[3]);
+
+		pfree(values);
+
+		SRF_RETURN_NEXT(funcctx, result);
+	}
+	else    /* do when there is no more left */
+	{
+		SRF_RETURN_DONE(funcctx);
+	}
+}
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index f6d60ad..cd0ed01 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2109,8 +2109,9 @@ describeOneTableDetails(const char *schemaname,
 		{
 			printfPQExpBuffer(&buf,
 						   "SELECT oid, staname, stakeys,\n"
-						   "  deps_enabled,\n"
-						   "  deps_built,\n"
+						   "  deps_enabled, mcv_enabled,\n"
+						   "  deps_built, mcv_built,\n"
+						   "  mcv_max_items,\n"
 						   "  (SELECT string_agg(attname::text,', ')\n"
 						   "    FROM ((SELECT unnest(stakeys) AS attnum) s\n"
 						   "         JOIN pg_attribute a ON (starelid = a.attrelid and a.attnum = s.attnum))) AS attnums\n"
@@ -2128,6 +2129,8 @@ describeOneTableDetails(const char *schemaname,
 				printTableAddFooter(&cont, _("Statistics:"));
 				for (i = 0; i < tuples; i++)
 				{
+					bool first = true;
+
 					printfPQExpBuffer(&buf, "    ");
 
 					/* statistics name */
@@ -2135,10 +2138,22 @@ describeOneTableDetails(const char *schemaname,
 
 					/*  options */
 					if (!strcmp(PQgetvalue(result, i, 3), "t"))
-						appendPQExpBuffer(&buf, "(dependencies)");
+					{
+						appendPQExpBuffer(&buf, "(dependencies");
+						first = false;
+					}
+
+					if (!strcmp(PQgetvalue(result, i, 4), "t"))
+					{
+						if (! first)
+							appendPQExpBuffer(&buf, ", mcv");
+						else
+							appendPQExpBuffer(&buf, "(mcv");
+						first = false;
+					}
 
-					appendPQExpBuffer(&buf, " ON (%s)",
-							PQgetvalue(result, i, 7));
+					appendPQExpBuffer(&buf, ") ON (%s)",
+							PQgetvalue(result, i, 9));
 
 					printTableAddFooter(&cont, buf.data);
 				}
diff --git a/src/include/catalog/pg_mv_statistic.h b/src/include/catalog/pg_mv_statistic.h
index 8c33a92..7be6223 100644
--- a/src/include/catalog/pg_mv_statistic.h
+++ b/src/include/catalog/pg_mv_statistic.h
@@ -36,15 +36,21 @@ CATALOG(pg_mv_statistic,3381)
 
 	/* statistics requested to build */
 	bool		deps_enabled;		/* analyze dependencies? */
+	bool		mcv_enabled;		/* build MCV list? */
+
+	/* MCV size */
+	int32		mcv_max_items;		/* max MCV items */
 
 	/* statistics that are available (if requested) */
 	bool		deps_built;			/* dependencies were built */
+	bool		mcv_built;			/* MCV list was built */
 
 	/* variable-length fields start here, but we allow direct access to stakeys */
 	int2vector	stakeys;			/* array of column keys */
 
 #ifdef CATALOG_VARLEN
 	bytea		stadeps;			/* dependencies (serialized) */
+	bytea		stamcv;				/* MCV list (serialized) */
 #endif
 
 } FormData_pg_mv_statistic;
@@ -60,12 +66,17 @@ typedef FormData_pg_mv_statistic *Form_pg_mv_statistic;
  *		compiler constants for pg_attrdef
  * ----------------
  */
-#define Natts_pg_mv_statistic					6
+
+#define Natts_pg_mv_statistic					10
 #define Anum_pg_mv_statistic_starelid			1
 #define Anum_pg_mv_statistic_staname			2
 #define Anum_pg_mv_statistic_deps_enabled		3
-#define Anum_pg_mv_statistic_deps_built			4
-#define Anum_pg_mv_statistic_stakeys			5
-#define Anum_pg_mv_statistic_stadeps			6
+#define Anum_pg_mv_statistic_mcv_enabled		4
+#define Anum_pg_mv_statistic_mcv_max_items		5
+#define Anum_pg_mv_statistic_deps_built			6
+#define Anum_pg_mv_statistic_mcv_built			7
+#define Anum_pg_mv_statistic_stakeys			8
+#define Anum_pg_mv_statistic_stadeps			9
+#define Anum_pg_mv_statistic_stamcv				10
 
 #endif   /* PG_MV_STATISTIC_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 85c638d..b16f2a9 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2743,6 +2743,10 @@ DATA(insert OID = 3998 (  pg_mv_stats_dependencies_info     PGNSP PGUID 12 1 0 0
 DESCR("multivariate stats: functional dependencies info");
 DATA(insert OID = 3999 (  pg_mv_stats_dependencies_show     PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ pg_mv_stats_dependencies_show _null_ _null_ _null_ ));
 DESCR("multivariate stats: functional dependencies show");
+DATA(insert OID = 3376 (  pg_mv_stats_mcvlist_info	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ pg_mv_stats_mcvlist_info _null_ _null_ _null_ ));
+DESCR("multi-variate statistics: MCV list info");
+DATA(insert OID = 3373 (  pg_mv_mcv_items PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 1 0 2249 "26" "{26,23,1009,1000,701}" "{i,o,o,o,o}" "{oid,index,values,nulls,frequency}" _null_ _null_ pg_mv_mcv_items _null_ _null_ _null_ ));
+DESCR("details about MCV list items");
 
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index baa0c88..7f2dc8a 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -592,9 +592,11 @@ typedef struct MVStatisticInfo
 
 	/* enabled statistics */
 	bool		deps_enabled;	/* functional dependencies enabled */
+	bool		mcv_enabled;	/* MCV list enabled */
 
 	/* built/available statistics */
 	bool		deps_built;		/* functional dependencies built */
+	bool		mcv_built;		/* MCV list built */
 
 	/* columns in the statistics (attnums) */
 	int2vector *stakeys;		/* attnums of the columns covered */
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
index 02a7dda..b028192 100644
--- a/src/include/utils/mvstats.h
+++ b/src/include/utils/mvstats.h
@@ -50,30 +50,89 @@ typedef MVDependenciesData* MVDependencies;
 #define MVSTAT_DEPS_TYPE_BASIC	1			/* basic dependencies type */
 
 /*
+ * Multivariate MCV (most-common value) lists
+ *
+ * A straight-forward extension of MCV items - i.e. a list (array) of
+ * combinations of attribute values, together with a frequency and
+ * null flags.
+ */
+typedef struct MCVItemData {
+	double		frequency;	/* frequency of this combination */
+	bool	   *isnull;		/* lags of NULL values (up to 32 columns) */
+	Datum	   *values;		/* variable-length (ndimensions) */
+} MCVItemData;
+
+typedef MCVItemData *MCVItem;
+
+/* multivariate MCV list - essentally an array of MCV items */
+typedef struct MCVListData {
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of MCV list (BASIC) */
+	uint32		ndimensions;	/* number of dimensions */
+	uint32		nitems;			/* number of MCV items in the array */
+	MCVItem	   *items;			/* array of MCV items */
+} MCVListData;
+
+typedef MCVListData *MCVList;
+
+/* used to flag stats serialized to bytea */
+#define MVSTAT_MCV_MAGIC		0xE1A651C2	/* marks serialized bytea */
+#define MVSTAT_MCV_TYPE_BASIC	1			/* basic MCV list type */
+
+/*
+ * Limits used for mcv_max_items option, i.e. we're always guaranteed
+ * to have space for at least MVSTAT_MCVLIST_MIN_ITEMS, and we cannot
+ * have more than MVSTAT_MCVLIST_MAX_ITEMS items.
+ *
+ * This is just a boundary for the 'max' threshold - the actual list
+ * may of course contain less items than MVSTAT_MCVLIST_MIN_ITEMS.
+ */
+#define MVSTAT_MCVLIST_MIN_ITEMS	128		/* min items in MCV list */
+#define MVSTAT_MCVLIST_MAX_ITEMS	8192	/* max items in MCV list */
+
+/*
  * TODO Maybe fetching the histogram/MCV list separately is inefficient?
  *      Consider adding a single `fetch_stats` method, fetching all
  *      stats specified using flags (or something like that).
  */
 
 MVDependencies load_mv_dependencies(Oid mvoid);
+MCVList        load_mv_mcvlist(Oid mvoid);
 
 bytea * serialize_mv_dependencies(MVDependencies dependencies);
+bytea * serialize_mv_mcvlist(MCVList mcvlist, int2vector *attrs,
+							 VacAttrStats **stats);
 
 /* deserialization of stats (serialization is private to analyze) */
 MVDependencies	deserialize_mv_dependencies(bytea * data);
+MCVList			deserialize_mv_mcvlist(bytea * data);
+
+/*
+ * Returns index of the attribute number within the vector (i.e. a
+ * dimension within the stats).
+ */
+int mv_get_index(AttrNumber varattno, int2vector * stakeys);
+
+int2vector* find_mv_attnums(Oid mvoid, Oid *relid);
 
 /* FIXME this probably belongs somewhere else (not to operations stats) */
 extern Datum pg_mv_stats_dependencies_info(PG_FUNCTION_ARGS);
 extern Datum pg_mv_stats_dependencies_show(PG_FUNCTION_ARGS);
+extern Datum pg_mv_stats_mcvlist_info(PG_FUNCTION_ARGS);
+extern Datum pg_mv_mcvlist_items(PG_FUNCTION_ARGS);
 
 MVDependencies
-build_mv_dependencies(int numrows, HeapTuple *rows,
-								  int2vector *attrs,
-								  VacAttrStats **stats);
+build_mv_dependencies(int numrows, HeapTuple *rows, int2vector *attrs,
+					  VacAttrStats **stats);
+
+MCVList
+build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs,
+				 VacAttrStats **stats, int *numrows_filtered);
 
 void build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
-						   int natts, VacAttrStats **vacattrstats);
+					int natts, VacAttrStats **vacattrstats);
 
-void update_mv_stats(Oid relid, MVDependencies dependencies, int2vector *attrs);
+void update_mv_stats(Oid relid, MVDependencies dependencies, MCVList mcvlist,
+					 int2vector *attrs, VacAttrStats **stats);
 
 #endif
diff --git a/src/test/regress/expected/mv_mcv.out b/src/test/regress/expected/mv_mcv.out
new file mode 100644
index 0000000..4958390
--- /dev/null
+++ b/src/test/regress/expected/mv_mcv.out
@@ -0,0 +1,207 @@
+-- data type passed by value
+CREATE TABLE mcv_list (
+    a INT,
+    b INT,
+    c INT
+);
+-- unknown column
+CREATE STATISTICS s1 ON mcv_list (unknown_column) WITH (mcv);
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+CREATE STATISTICS s1 ON mcv_list (a) WITH (mcv);
+ERROR:  multivariate stats require 2 or more columns
+-- single column, duplicated
+CREATE STATISTICS s1 ON mcv_list (a, a) WITH (mcv);
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+CREATE STATISTICS s1 ON mcv_list (a, a, b) WITH (mcv);
+ERROR:  duplicate column name in statistics definition
+-- unknown option
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (unknown_option);
+ERROR:  unrecognized STATISTICS option "unknown_option"
+-- missing MCV statistics
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (dependencies, max_mcv_items 200);
+ERROR:  option 'mcv' is required by other options(s)
+-- invalid mcv_max_items value / too low
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (mcv, max_mcv_items 10);
+ERROR:  max number of MCV items must be at least 128
+-- invalid mcv_max_items value / too high
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (mcv, max_mcv_items 10000);
+ERROR:  max number of MCV items is 8192
+-- correct command
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (mcv);
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | f         | 
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=100
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = 10 AND b = 5;
+                 QUERY PLAN                 
+--------------------------------------------
+ Bitmap Heap Scan on mcv_list
+   Recheck Cond: ((a = 10) AND (b = 5))
+   ->  Bitmap Index Scan on mcv_idx
+         Index Cond: ((a = 10) AND (b = 5))
+(4 rows)
+
+DROP TABLE mcv_list;
+-- varlena type (text)
+CREATE TABLE mcv_list (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+CREATE STATISTICS s2 ON mcv_list (a, b, c) WITH (mcv);
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | f         | 
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=100
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = '10' AND b = '5';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Bitmap Heap Scan on mcv_list
+   Recheck Cond: ((a = '10'::text) AND (b = '5'::text))
+   ->  Bitmap Index Scan on mcv_idx
+         Index Cond: ((a = '10'::text) AND (b = '5'::text))
+(4 rows)
+
+TRUNCATE mcv_list;
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mcv_list
+     SELECT
+       (CASE WHEN i/10000 = 0 THEN NULL ELSE i/10000 END),
+       (CASE WHEN i/20000 = 0 THEN NULL ELSE i/20000 END),
+       (CASE WHEN i/40000 = 0 THEN NULL ELSE i/40000 END)
+     FROM generate_series(1,1000000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=100
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a IS NULL AND b IS NULL;
+                    QUERY PLAN                     
+---------------------------------------------------
+ Bitmap Heap Scan on mcv_list
+   Recheck Cond: ((a IS NULL) AND (b IS NULL))
+   ->  Bitmap Index Scan on mcv_idx
+         Index Cond: ((a IS NULL) AND (b IS NULL))
+(4 rows)
+
+DROP TABLE mcv_list;
+-- NULL values (mix of int and text columns)
+CREATE TABLE mcv_list (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+CREATE STATISTICS s3 ON mcv_list (a, b, c, d) WITH (mcv);
+INSERT INTO mcv_list
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1200
+(1 row)
+
+DROP TABLE mcv_list;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 428b1e8..50715db 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1369,7 +1369,9 @@ pg_mv_stats| SELECT n.nspname AS schemaname,
     c.relname AS tablename,
     s.stakeys AS attnums,
     length(s.stadeps) AS depsbytes,
-    pg_mv_stats_dependencies_info(s.stadeps) AS depsinfo
+    pg_mv_stats_dependencies_info(s.stadeps) AS depsinfo,
+    length(s.stamcv) AS mcvbytes,
+    pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo
    FROM ((pg_mv_statistic s
      JOIN pg_class c ON ((c.oid = s.starelid)))
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 81484f1..838c12b 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -112,4 +112,4 @@ test: event_trigger
 test: stats
 
 # run tests of multivariate stats
-test: mv_dependencies
+test: mv_dependencies mv_mcv
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 14ea574..d97a0ec 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -162,3 +162,4 @@ test: xml
 test: event_trigger
 test: stats
 test: mv_dependencies
+test: mv_mcv
diff --git a/src/test/regress/sql/mv_mcv.sql b/src/test/regress/sql/mv_mcv.sql
new file mode 100644
index 0000000..16d82cf
--- /dev/null
+++ b/src/test/regress/sql/mv_mcv.sql
@@ -0,0 +1,178 @@
+-- data type passed by value
+CREATE TABLE mcv_list (
+    a INT,
+    b INT,
+    c INT
+);
+
+-- unknown column
+CREATE STATISTICS s1 ON mcv_list (unknown_column) WITH (mcv);
+
+-- single column
+CREATE STATISTICS s1 ON mcv_list (a) WITH (mcv);
+
+-- single column, duplicated
+CREATE STATISTICS s1 ON mcv_list (a, a) WITH (mcv);
+
+-- two columns, one duplicated
+CREATE STATISTICS s1 ON mcv_list (a, a, b) WITH (mcv);
+
+-- unknown option
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (unknown_option);
+
+-- missing MCV statistics
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (dependencies, max_mcv_items 200);
+
+-- invalid mcv_max_items value / too low
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (mcv, max_mcv_items 10);
+
+-- invalid mcv_max_items value / too high
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (mcv, max_mcv_items 10000);
+
+-- correct command
+CREATE STATISTICS s1 ON mcv_list (a, b, c) WITH (mcv);
+
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = 10 AND b = 5;
+
+DROP TABLE mcv_list;
+
+-- varlena type (text)
+CREATE TABLE mcv_list (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+
+CREATE STATISTICS s2 ON mcv_list (a, b, c) WITH (mcv);
+
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/10000, i/20000, i/40000 FROM generate_series(1,1000000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = '10' AND b = '5';
+
+TRUNCATE mcv_list;
+
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mcv_list
+     SELECT
+       (CASE WHEN i/10000 = 0 THEN NULL ELSE i/10000 END),
+       (CASE WHEN i/20000 = 0 THEN NULL ELSE i/20000 END),
+       (CASE WHEN i/40000 = 0 THEN NULL ELSE i/40000 END)
+     FROM generate_series(1,1000000) s(i);
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a IS NULL AND b IS NULL;
+
+DROP TABLE mcv_list;
+
+-- NULL values (mix of int and text columns)
+CREATE TABLE mcv_list (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+
+CREATE STATISTICS s3 ON mcv_list (a, b, c, d) WITH (mcv);
+
+INSERT INTO mcv_list
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+DROP TABLE mcv_list;
-- 
2.1.0

