diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 68b5423..8b66dfb 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -101,8 +101,10 @@ #include #include +#include "access/brin.h" #include "access/gin.h" #include "access/htup_details.h" +#include "access/reloptions.h" #include "access/sysattr.h" #include "catalog/index.h" #include "catalog/pg_am.h" @@ -7717,11 +7719,20 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, List *indexOrderBys = path->indexorderbys; double numPages = index->pages; double numTuples = index->tuples; + RelOptInfo *baserel = index->rel; List *qinfos; Cost spc_seq_page_cost; Cost spc_random_page_cost; double qual_op_cost; double qual_arg_cost; + double qualSelectivity; + BlockNumber pagesPerRange; + double indexRanges; + double perfectRanges; + double probableRanges; + double selec; + Relation indexRel; + VariableStatData vardata; /* Do preliminary analysis of indexquals */ qinfos = deconstruct_indexquals(path); @@ -7745,14 +7756,151 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, */ *indexTotalCost = spc_random_page_cost * numPages * loop_count; - *indexSelectivity = - clauselist_selectivity(root, indexQuals, - path->indexinfo->rel->relid, - JOIN_INNER, NULL); - *indexCorrelation = 1; + /* + * Compute index correlation + * + * Because we can use all index quals equally when scanning, we can use + * the largest correlation (in absolute value) among columns used by the + * query. Start at zero, the worst possible case. If we cannot find + * any correlation statistics, we will keep use it as 0. + */ + *indexCorrelation = 0; + + { + RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); + ListCell *cell; + + Assert(rte->rtekind == RTE_RELATION); + + foreach(cell, qinfos) + { + IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(cell); + AttrNumber colnum = index->indexkeys[qinfo->indexcol]; + + if (colnum != 0) + { + /* Simple variable -- look to stats for the underlying table */ + if (get_relation_stats_hook && + (*get_relation_stats_hook) (root, rte, colnum, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. + * If it did supply a tuple, it'd better have supplied + * a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(rte->relid), + Int16GetDatum(colnum), + /* XXX no inh */ + BoolGetDatum(false)); + vardata.freefunc = ReleaseSysCache; + } + } + else + { + /* Expression --- maybe there are stats for the index itself */ + if (get_index_stats_hook && + (*get_index_stats_hook) (root, index->indexoid, 1, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(index->indexoid), + Int16GetDatum(1), + BoolGetDatum(false)); + vardata.freefunc = ReleaseSysCache; + } + } + + if (HeapTupleIsValid(vardata.statsTuple)) + { + float4 *numbers; + int nnumbers; + + if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0, + STATISTIC_KIND_CORRELATION, + InvalidOid, + NULL, + NULL, NULL, + &numbers, &nnumbers)) + { + double varCorrelation = 0.0; + + if (nnumbers > 0); + varCorrelation = Abs(numbers[0]); + + if (varCorrelation > *indexCorrelation) + *indexCorrelation = varCorrelation; + + free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers); + } + } + + ReleaseVariableStats(vardata); + } + } + + qualSelectivity = clauselist_selectivity(root, indexQuals, + baserel->relid, + JOIN_INNER, NULL); + + indexRel = index_open(index->indexoid, AccessShareLock); + pagesPerRange = BrinGetPagesPerRange(indexRel); + index_close(indexRel, AccessShareLock); + + Assert(pagesPerRange > 0); + + /* work out the actual number of ranges in the index */ + indexRanges = Max(ceil((double) baserel->pages / pagesPerRange), 1.0); + + /* + * Now calculate the minimum possible ranges we could visit if all of the + * rows were in perfect order in the table's heap. + */ + perfectRanges = ceil(indexRanges * qualSelectivity); + + /* + * now estimate the probably number of ranges that we'll touch by using + * the indexCorrelation from the stats. Careful not to divide by zero. + * + * Here we're using the absolute value of the correlation from the least + * correlated column from the statistics. It may be questionable as to + * whether this is a good idea, but we have little else to go on for now. + * In any case, this is much better than assuming everything is correlated + * perfectly. + */ + if (*indexCorrelation < 1.0e-10) + probableRanges = indexRanges; + else + probableRanges = Min(perfectRanges / *indexCorrelation, indexRanges); + + /* we expect to visit this portion of the table */ + selec = probableRanges / indexRanges; + + CLAMP_PROBABILITY(selec); /* - * Add on index qual eval costs, much as in genericcostestimate. + * Set the selectivity to the portion of the table we expect to return. + * The re-check will probably filter more tuples away. + */ + *indexSelectivity = selec; + + /* + * Add index qual arg costs, much as in genericcostestimate. */ qual_arg_cost = other_operands_eval_cost(root, qinfos) + orderby_operands_eval_cost(root, path); @@ -7761,8 +7909,8 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexStartupCost += qual_arg_cost; *indexTotalCost += qual_arg_cost; + /* also charge for the number of ranges we expect to visit */ + *indexTotalCost += probableRanges * cpu_index_tuple_cost; *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); *indexPages = index->pages; - - /* XXX what about pages_per_range? */ } diff --git a/src/test/regress/expected/brin.out b/src/test/regress/expected/brin.out index a40f87a..ca80f00 100644 --- a/src/test/regress/expected/brin.out +++ b/src/test/regress/expected/brin.out @@ -363,6 +363,8 @@ BEGIN END LOOP; END; $x$; +RESET enable_seqscan; +RESET enable_bitmapscan; INSERT INTO brintest SELECT repeat(stringu1, 42)::bytea, substr(stringu1, 1, 1)::"char", @@ -481,3 +483,27 @@ SELECT brin_summarize_range('brin_summarize_idx', -1); ERROR: block number out of range: -1 SELECT brin_summarize_range('brin_summarize_idx', 4294967296); ERROR: block number out of range: 4294967296 +-- test brin cost estimates behave sanely based on correlation of values +CREATE TABLE brin_test (a INT, b INT); +INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x); +CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2); +CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2); +VACUUM ANALYZE brin_test; +-- Ensure brin index is used when columns are perfectly correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1; + QUERY PLAN +-------------------------------------------- + Bitmap Heap Scan on brin_test + Recheck Cond: (a = 1) + -> Bitmap Index Scan on brin_test_a_idx + Index Cond: (a = 1) +(4 rows) + +-- Ensure brin index is not used when values are not correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1; + QUERY PLAN +----------------------- + Seq Scan on brin_test + Filter: (b = 1) +(2 rows) + diff --git a/src/test/regress/sql/brin.sql b/src/test/regress/sql/brin.sql index 521b22f..11f8fe9 100644 --- a/src/test/regress/sql/brin.sql +++ b/src/test/regress/sql/brin.sql @@ -370,6 +370,9 @@ BEGIN END; $x$; +RESET enable_seqscan; +RESET enable_bitmapscan; + INSERT INTO brintest SELECT repeat(stringu1, 42)::bytea, substr(stringu1, 1, 1)::"char", @@ -444,3 +447,16 @@ SELECT brin_summarize_range('brin_summarize_idx', 4294967295); -- invalid block number values SELECT brin_summarize_range('brin_summarize_idx', -1); SELECT brin_summarize_range('brin_summarize_idx', 4294967296); + + +-- test brin cost estimates behave sanely based on correlation of values +CREATE TABLE brin_test (a INT, b INT); +INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x); +CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2); +CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2); +VACUUM ANALYZE brin_test; + +-- Ensure brin index is used when columns are perfectly correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1; +-- Ensure brin index is not used when values are not correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;