From e3bc921ad94c82c677886c2baef9a4876f6faeea Mon Sep 17 00:00:00 2001 From: Floris van Nee Date: Thu, 19 Mar 2020 10:27:47 +0100 Subject: [PATCH 5/5] Support skip scan for non-distinct scans Adds planner support to choose a skip scan for regular non-distinct queries like: SELECT * FROM t1 WHERE b=1 (with index on (a,b)) --- src/backend/optimizer/path/indxpath.c | 181 +++++++++++++++++++++++++- src/backend/optimizer/plan/planner.c | 2 +- src/backend/optimizer/util/pathnode.c | 4 +- src/backend/utils/adt/selfuncs.c | 153 ++++++++++++++++++++-- src/include/optimizer/pathnode.h | 3 +- 5 files changed, 327 insertions(+), 16 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index ba2dd30a13..d4d3e1c7eb 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -192,6 +192,17 @@ static Expr *match_clause_to_ordering_op(IndexOptInfo *index, static bool ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, EquivalenceClass *ec, EquivalenceMember *em, void *arg); +static List* add_possible_index_skip_paths(List* result, + PlannerInfo *root, + IndexOptInfo *index, + List *indexclauses, + List *indexorderbys, + List *indexorderbycols, + List *pathkeys, + ScanDirection indexscandir, + bool indexonly, + Relids required_outer, + double loop_count); /* @@ -820,6 +831,136 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, } } +/* + * Find available index skip paths and add them to the path list + */ +static List* add_possible_index_skip_paths(List* result, + PlannerInfo *root, + IndexOptInfo *index, + List *indexclauses, + List *indexorderbys, + List *indexorderbycols, + List *pathkeys, + ScanDirection indexscandir, + bool indexonly, + Relids required_outer, + double loop_count) +{ + int indexcol; + bool eqQualHere; + bool eqQualPrev; + bool eqSoFar; + ListCell *lc; + + /* + * We need to find possible prefixes to use for the skip scan + * Any useful prefix is one just before an index clause, unless + * all clauses so far have been equal. + * For example, on an index (a,b,c), the qual b=1 would + * mean that an interesting skip prefix could be 1. + * For qual a=1 AND b=1, it is not interesting to skip with + * prefix 1, because the value of a is fixed already. + */ + indexcol = 0; + eqQualHere = false; + eqQualPrev = false; + eqSoFar = true; + foreach(lc, indexclauses) + { + IndexClause *iclause = lfirst_node(IndexClause, lc); + ListCell *lc2; + + if (indexcol != iclause->indexcol) + { + if (!eqQualHere || indexcol != iclause->indexcol - 1) + eqSoFar = false; + + /* Beginning of a new column's quals */ + if (!eqQualPrev && !eqSoFar) + { + /* We have a qual on current column, + * there is no equality qual on the previous column, + * not all of the previous quals are equality so far + * (last one is special case for the first column in the index). + * Optimal conditions to try an index skip path. + */ + IndexPath *ipath = create_index_path(root, index, + indexclauses, + indexorderbys, + indexorderbycols, + pathkeys, + indexscandir, + indexonly, + required_outer, + loop_count, + false, + iclause->indexcol); + result = lappend(result, ipath); + } + + eqQualPrev = eqQualHere; + eqQualHere = false; + indexcol++; + /* if the clause is not for this index col, increment until it is */ + while (indexcol != iclause->indexcol) + { + eqQualPrev = false; + eqSoFar = false; + indexcol++; + } + } + + /* Examine each indexqual associated with this index clause */ + foreach(lc2, iclause->indexquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + Expr *clause = rinfo->clause; + Oid clause_op = InvalidOid; + int op_strategy; + + if (IsA(clause, OpExpr)) + { + OpExpr *op = (OpExpr *) clause; + clause_op = op->opno; + } + else if (IsA(clause, RowCompareExpr)) + { + RowCompareExpr *rc = (RowCompareExpr *) clause; + clause_op = linitial_oid(rc->opnos); + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + clause_op = saop->opno; + } + else if (IsA(clause, NullTest)) + { + NullTest *nt = (NullTest *) clause; + + if (nt->nulltesttype == IS_NULL) + { + /* IS NULL is like = for selectivity purposes */ + eqQualHere = true; + } + } + else + elog(ERROR, "unsupported indexqual type: %d", + (int) nodeTag(clause)); + + /* check for equality operator */ + if (OidIsValid(clause_op)) + { + op_strategy = get_op_opfamily_strategy(clause_op, + index->opfamily[indexcol]); + Assert(op_strategy != 0); /* not a member of opfamily?? */ + if (op_strategy == BTEqualStrategyNumber) + eqQualHere = true; + } + } + } + return result; +} + /* * build_index_paths * Given an index and a set of index clauses for it, construct zero @@ -1055,9 +1196,25 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_only_scan, outer_relids, loop_count, - false); + false, + 0); result = lappend(result, ipath); + if (can_skip) + { + result = add_possible_index_skip_paths(result, root, index, + index_clauses, + orderbyclauses, + orderbyclausecols, + useful_pathkeys, + index_is_ordered ? + ForwardScanDirection : + NoMovementScanDirection, + index_only_scan, + outer_relids, + loop_count); + } + /* Consider index skip scan as well */ if (root->query_uniquekeys != NULL && can_skip) { @@ -1104,7 +1261,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_only_scan, outer_relids, loop_count, - true); + true, + 0); /* * if, after costing the path, we find that it's not worth using @@ -1137,9 +1295,23 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_only_scan, outer_relids, loop_count, - false); + false, + 0); result = lappend(result, ipath); + if (can_skip) + { + result = add_possible_index_skip_paths(result, root, index, + index_clauses, + NIL, + NIL, + useful_pathkeys, + BackwardScanDirection, + index_only_scan, + outer_relids, + loop_count); + } + /* Consider index skip scan as well */ if (root->query_uniquekeys != NULL && can_skip) { @@ -1181,7 +1353,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_only_scan, outer_relids, loop_count, - true); + true, + 0); /* * if, after costing the path, we find that it's not worth diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 91b1ca2634..affeee390c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6013,7 +6013,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) indexScanPath = create_index_path(root, indexInfo, NIL, NIL, NIL, NIL, ForwardScanDirection, false, - NULL, 1.0, false); + NULL, 1.0, false, 0); return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 16633fd672..2b1c64fb51 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1051,7 +1051,8 @@ create_index_path(PlannerInfo *root, bool indexonly, Relids required_outer, double loop_count, - bool partial_path) + bool partial_path, + int skip_prefix) { IndexPath *pathnode = makeNode(IndexPath); RelOptInfo *rel = index->rel; @@ -1071,6 +1072,7 @@ create_index_path(PlannerInfo *root, pathnode->indexorderbys = indexorderbys; pathnode->indexorderbycols = indexorderbycols; pathnode->indexscandir = indexscandir; + pathnode->indexskipprefix = skip_prefix; cost_index(pathnode, root, loop_count, partial_path); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 10895fb287..45f4fc814b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -210,7 +210,9 @@ static bool get_actual_variable_endpoint(Relation heapRel, MemoryContext outercontext, Datum *endpointDatum); static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); - +static double estimate_num_groups_internal(PlannerInfo *root, List *groupExprs, + double input_rows, double rel_input_rows, + List **pgset, EstimationInfo *estinfo); /* * eqsel - Selectivity of "=" for any data types. @@ -3367,6 +3369,19 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo) +{ + return estimate_num_groups_internal(root, groupExprs, input_rows, -1, pgset, estinfo); +} + +/* + * Same as estimate_num_groups, but with an extra argument to control + * the estimation used for the input rows of the relation. If + * rel_input_rows < 0, it uses the the original planner estimation for the + * individual rels, else if uses the estimation as provided to the function. + */ +static double +estimate_num_groups_internal(PlannerInfo *root, List *groupExprs, double input_rows, double rel_input_rows, + List **pgset, EstimationInfo *estinfo) { List *varinfos = NIL; double srf_multiplier = 1.0; @@ -3533,6 +3548,12 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, int relvarcount = 0; List *newvarinfos = NIL; List *relvarinfos = NIL; + double this_rel_input_rows; + + if (rel_input_rows < 0.0) + this_rel_input_rows = rel->rows; + else + this_rel_input_rows = rel_input_rows; /* * Split the list of varinfos in two - one for the current rel, one @@ -3638,7 +3659,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * guarding against division by zero when reldistinct is zero. * Also skip this if we know that we are returning all rows. */ - if (reldistinct > 0 && rel->rows < rel->tuples) + if (reldistinct > 0 && this_rel_input_rows < rel->tuples) { /* * Given a table containing N rows with n distinct values in a @@ -3675,7 +3696,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * works well even when n is small. */ reldistinct *= - (1 - pow((rel->tuples - rel->rows) / rel->tuples, + (1 - pow((rel->tuples - this_rel_input_rows) / rel->tuples, rel->tuples / reldistinct)); } reldistinct = clamp_row_est(reldistinct); @@ -6621,8 +6642,10 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, double numIndexTuples; Cost descentCost; List *indexBoundQuals; + List *prefixBoundQuals; int indexcol; bool eqQualHere; + bool stillEq; bool found_saop; bool found_is_null_op; double num_sa_scans; @@ -6646,9 +6669,11 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * considered to act the same as it normally does. */ indexBoundQuals = NIL; + prefixBoundQuals = NIL; indexcol = 0; eqQualHere = false; found_saop = false; + stillEq = true; found_is_null_op = false; num_sa_scans = 1; foreach(lc, path->indexclauses) @@ -6660,11 +6685,18 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { /* Beginning of a new column's quals */ if (!eqQualHere) - break; /* done if no '=' qual for indexcol */ + { + stillEq = false; + /* done if no '=' qual for indexcol and we're past the skip prefix */ + if (path->indexskipprefix <= indexcol) + break; + } eqQualHere = false; indexcol++; + while (indexcol != iclause->indexcol && path->indexskipprefix > indexcol) + indexcol++; if (indexcol != iclause->indexcol) - break; /* no quals at all for indexcol */ + break; /* no quals at all for indexcol */ } /* Examine each indexqual associated with this index clause */ @@ -6696,7 +6728,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, clause_op = saop->opno; found_saop = true; /* count number of SA scans induced by indexBoundQuals only */ - if (alength > 1) + if (alength > 1 && stillEq) num_sa_scans *= alength; } else if (IsA(clause, NullTest)) @@ -6724,7 +6756,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, eqQualHere = true; } - indexBoundQuals = lappend(indexBoundQuals, rinfo); + /* we keep two lists here, one with all quals up until the prefix + * and one with only the quals until the first inequality. + * we need the list with prefixes later + */ + if (stillEq) + indexBoundQuals = lappend(indexBoundQuals, rinfo); + if (path->indexskipprefix > 0) + prefixBoundQuals = lappend(prefixBoundQuals, rinfo); } } @@ -6750,7 +6789,10 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * index-bound quals to produce a more accurate idea of the number of * rows covered by the bound conditions. */ - selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals); + if (path->indexskipprefix > 0) + selectivityQuals = add_predicate_to_index_quals(index, prefixBoundQuals); + else + selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals); btreeSelectivity = clauselist_selectivity(root, selectivityQuals, index->rel->relid, @@ -6760,7 +6802,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, /* * As in genericcostestimate(), we have to adjust for any - * ScalarArrayOpExpr quals included in indexBoundQuals, and then round + * ScalarArrayOpExpr quals included in prefixBoundQuals, and then round * to integer. */ numIndexTuples = rint(numIndexTuples / num_sa_scans); @@ -6806,6 +6848,99 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; + /* + * Add extra costs for using an index skip scan. + * The index skip scan could have significantly lower cost until now, + * due to the different row estimation used (all the quals up to prefix, + * rather than all the quals up to the first non-equality operator). + * However, there are extra costs incurred for + * a) setting up the scan + * b) doing additional scans from root + * c) small extra cost per tuple comparison + * We add those here + */ + if (path->indexskipprefix > 0) + { + List *exprlist = NULL; + double numgroups_estimate; + int i = 0; + ListCell *indexpr_item = list_head(path->indexinfo->indexprs); + List *selectivityQuals; + Selectivity btreeSelectivity; + double estimatedIndexTuplesNoPrefix; + + /* some rather arbitrary extra cost for preprocessing structures needed for skip scan */ + costs.indexStartupCost += 200.0 * cpu_operator_cost; + costs.indexTotalCost += 200.0 * cpu_operator_cost; + + /* + * In order to reliably get a cost estimation for the number of scans we have to do from root, + * we need some estimation on the number of distinct prefixes that exist. Therefore, we need + * a different selectivity approximation (this time we do need to use the clauses until the first + * non-equality operator). Using that, we can estimate the number of groups + */ + for (i = 0; i < path->indexinfo->nkeycolumns && i < path->indexskipprefix; i++) + { + Expr *expr = NULL; + int attr = path->indexinfo->indexkeys[i]; + if(attr > 0) + { + TargetEntry *tentry = get_tle_by_resno(path->indexinfo->indextlist, i + 1); + Assert(tentry != NULL); + expr = tentry->expr; + } + else if (attr == 0) + { + /* Expression index */ + expr = lfirst(indexpr_item); + indexpr_item = lnext(path->indexinfo->indexprs, indexpr_item); + } + else /* attr < 0 */ + { + /* Index on system column is not supported */ + Assert(false); + } + + exprlist = lappend(exprlist, expr); + } + + selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals); + + btreeSelectivity = clauselist_selectivity(root, selectivityQuals, + index->rel->relid, + JOIN_INNER, + NULL); + estimatedIndexTuplesNoPrefix = btreeSelectivity * index->rel->tuples; + + /* + * As in genericcostestimate(), we have to adjust for any + * ScalarArrayOpExpr quals included in prefixBoundQuals, and then round + * to integer. + */ + estimatedIndexTuplesNoPrefix = rint(estimatedIndexTuplesNoPrefix / num_sa_scans); + + numgroups_estimate = estimate_num_groups_internal( + root, exprlist, estimatedIndexTuplesNoPrefix, + estimatedIndexTuplesNoPrefix, NULL, NULL); + + /* + * For each distinct prefix value we add descending cost as. + * This is similar to the startup cost calculation for regular scans. + * We can do at most 2 scans from root per distinct prefix, so multiply by 2. + * Also add some CPU processing cost per page that we need to process, plus + * some additional one-time cost for scanning the leaf page. This is a more + * expensive estimation than the per-page cpu cost for the regular index scan. + * This is intentional, because the index skip scan does more processing on + * the leaf page. + */ + if (index->tuples > 0) + descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost * 2; + else + descentCost = 0; + descentCost += (index->tree_height + 1) * 50.0 * cpu_operator_cost * 2 + 200 * cpu_operator_cost; + costs.indexTotalCost += costs.num_sa_scans * descentCost * numgroups_estimate; + } + /* * If we can get an estimate of the first column's ordering correlation C * from pg_statistic, estimate the index correlation as C for a diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 0343b2e1f6..da4c933166 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -49,7 +49,8 @@ extern IndexPath *create_index_path(PlannerInfo *root, bool indexonly, Relids required_outer, double loop_count, - bool partial_path); + bool partial_path, + int skip_prefix); extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual, -- 2.29.2