>From 19f5733988c75732a6fd4612c822fa99f05ed9ab Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Date: Thu, 6 Aug 2015 17:31:20 +0900
Subject: [PATCH 3/6] Make use of multivariate coefficeient in estimation of
 clauselist.

This is rather simple but walks on whole clauselist to collect column
information..
---
 src/backend/optimizer/path/clausesel.c | 100 +++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index dcac1c1..b28c271 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -14,8 +14,15 @@
  */
 #include "postgres.h"
 
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_class.h"
 #include "catalog/pg_operator.h"
+#include "catalog/pg_mvcoefficient.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -43,6 +50,96 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 			   bool varonleft, bool isLTsel, Selectivity s2);
 
 
+static bool
+collect_collist_walker(Node *node, Bitmapset **colsetlist)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Var))
+	{
+		Var *var = (Var*)node;
+
+		if (AttrNumberIsForUserDefinedAttr(var->varattno))
+			colsetlist[var->varno] = 
+				bms_add_member(colsetlist[var->varno], var->varattno);
+	}
+	return expression_tree_walker(node, collect_collist_walker,
+								  (void*)colsetlist);
+}
+
+/* Find multivariate distinctness coefficient for clauselist */
+static double
+find_mv_coeffeicient(PlannerInfo *root, List *clauses)
+{
+	int relid;
+	ListCell   *l;
+	Bitmapset **colsetlist = NULL;
+	double mv_coef = 1.0;
+
+	/* Collect columns this clauselist on */
+	colsetlist = (Bitmapset**)
+		palloc0(root->simple_rel_array_size * sizeof(Bitmapset*));
+
+	foreach(l, clauses)
+	{
+		RestrictInfo *rti = (RestrictInfo *) lfirst(l);
+
+		/* Consider only EC-related clauses */
+		if (rti->left_ec && (rti->left_ec == rti->right_ec))
+		{
+			if (IsA(rti, RestrictInfo))
+				collect_collist_walker((Node*)rti->clause, colsetlist);
+		}
+	}
+
+	/* Find pg_mv_coefficient entries match this columlist */
+	for (relid = 1 ; relid < root->simple_rel_array_size ; relid++)
+	{
+		Relation mvcrel;
+		SysScanDesc sscan;
+		ScanKeyData skeys[1];
+		HeapTuple tuple;
+		
+		/* at least two colums required for on rel*/
+		if (bms_num_members(colsetlist[relid]) < 2) continue;
+
+		/* tables other than ordinary ones have no mv statistics */
+		if (root->simple_rte_array[relid]->rtekind != RTE_RELATION ||
+			root->simple_rte_array[relid]->relkind != RELKIND_RELATION)
+			continue;
+
+		ScanKeyInit(&skeys[0],
+					Anum_pg_mvcoefficient_mvcreloid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(root->simple_rte_array[relid]->relid));
+		
+		mvcrel = heap_open(MvCoefficientRelationId, AccessShareLock);
+		sscan = systable_beginscan(mvcrel, MvCoefficientIndexId, true,
+								   NULL, 1, skeys);
+		while (HeapTupleIsValid(tuple = systable_getnext(sscan)))
+		{
+			int i;
+			Bitmapset *mvccols = NULL;
+			Form_pg_mvcoefficient mvc =
+				(Form_pg_mvcoefficient) GETSTRUCT (tuple);
+			
+			for (i = 0 ; i < mvc->mvcnattrs ; i++)
+				mvccols = bms_add_member(mvccols, mvc->mvcattrs.values[i]);
+
+			if (!bms_is_subset(mvccols, colsetlist[relid]))
+				continue;
+
+			/* Prefer smaller one */
+			if (mvc->mvccoefficient > 0 && mvc->mvccoefficient < mv_coef)
+				mv_coef = mvc->mvccoefficient;
+		}
+		systable_endscan(sscan);
+		heap_close(mvcrel, AccessShareLock);
+	}
+
+	return mv_coef;
+}
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -200,6 +297,9 @@ clauselist_selectivity(PlannerInfo *root,
 		s1 = s1 * s2;
 	}
 
+	/* Try multivariate distinctness correction for clauses */
+	s1 /= find_mv_coeffeicient(root, clauses);
+
 	/*
 	 * Now scan the rangequery pair list.
 	 */
-- 
1.8.3.1

