From c5023315eaaf9e1b4e338ce488c92f2e85f56543 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Mon, 8 Jun 2020 20:33:56 +0200 Subject: [PATCH 3/5] Extend UniqueKeys Prepares index skip scan implementation using UniqueKeys. Allows to specify what are the "requested" keys that should be unique, and add them to necessary Paths to make them useful later. Proposed by David Rowley, contains few bits out of previous version from Jesper Pedersen. --- src/backend/optimizer/path/pathkeys.c | 59 +++++++++++++++++++++++ src/backend/optimizer/path/uniquekeys.c | 63 +++++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 36 +++++++++++++- src/backend/optimizer/util/pathnode.c | 32 +++++++++---- src/include/nodes/pathnodes.h | 5 ++ src/include/optimizer/pathnode.h | 1 + src/include/optimizer/paths.h | 8 ++++ 7 files changed, 194 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 8b267de06f..ad4fe19872 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -29,6 +29,7 @@ #include "utils/lsyscache.h" +static bool pathkey_is_unique(PathKey *new_pathkey, List *pathkeys); static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, @@ -95,6 +96,29 @@ make_canonical_pathkey(PlannerInfo *root, return pk; } +/* + * pathkey_is_unique + * Checks if the new pathkey's equivalence class is the same as that of + * any existing member of the pathkey list. + */ +static bool +pathkey_is_unique(PathKey *new_pathkey, List *pathkeys) +{ + EquivalenceClass *new_ec = new_pathkey->pk_eclass; + ListCell *lc; + + /* If same EC already is already in the list, then not unique */ + foreach(lc, pathkeys) + { + PathKey *old_pathkey = (PathKey *) lfirst(lc); + + if (new_ec == old_pathkey->pk_eclass) + return false; + } + + return true; +} + /* * pathkey_is_redundant * Is a pathkey redundant with one already in the given list? @@ -1151,6 +1175,41 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return pathkeys; } +/* + * make_pathkeys_for_uniquekeyclauses + * Generate a pathkeys list to be used for uniquekey clauses + */ +List * +make_pathkeys_for_uniquekeys(PlannerInfo *root, + List *sortclauses, + List *tlist) +{ + List *pathkeys = NIL; + ListCell *l; + + foreach(l, sortclauses) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + Expr *sortkey; + PathKey *pathkey; + + sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + Assert(OidIsValid(sortcl->sortop)); + pathkey = make_pathkey_from_sortop(root, + sortkey, + root->nullable_baserels, + sortcl->sortop, + sortcl->nulls_first, + sortcl->tleSortGroupRef, + true); + + if (pathkey_is_unique(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + + return pathkeys; +} + /**************************************************************************** * PATHKEYS AND MERGECLAUSES ****************************************************************************/ diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c index ca40c40858..ab4b1d1939 100644 --- a/src/backend/optimizer/path/uniquekeys.c +++ b/src/backend/optimizer/path/uniquekeys.c @@ -1132,3 +1132,66 @@ add_combined_uniquekey(PlannerInfo *root, } return false; } + +List* +build_uniquekeys(PlannerInfo *root, List *sortclauses) +{ + List *result = NIL; + List *sortkeys; + ListCell *l; + List *exprs = NIL; + + sortkeys = make_pathkeys_for_uniquekeys(root, + sortclauses, + root->processed_tlist); + + /* Create a uniquekey and add it to the list */ + foreach(l, sortkeys) + { + PathKey *pathkey = (PathKey *) lfirst(l); + EquivalenceClass *ec = pathkey->pk_eclass; + EquivalenceMember *mem = (EquivalenceMember*) lfirst(list_head(ec->ec_members)); + if (EC_MUST_BE_REDUNDANT(ec)) + continue; + exprs = lappend(exprs, mem->em_expr); + } + + result = lappend(result, makeUniqueKey(exprs, false)); + + return result; +} + +bool +query_has_uniquekeys_for(PlannerInfo *root, List *pathuniquekeys, + bool allow_multinulls) +{ + ListCell *lc; + ListCell *lc2; + + /* root->query_uniquekeys are the requested DISTINCT clauses on query level + * pathuniquekeys are the unique keys on current path. + * All requested query_uniquekeys must be satisfied by the pathuniquekeys + */ + foreach(lc, root->query_uniquekeys) + { + UniqueKey *query_ukey = lfirst_node(UniqueKey, lc); + bool satisfied = false; + foreach(lc2, pathuniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc2); + if (ukey->multi_nullvals && !allow_multinulls) + continue; + if (list_length(ukey->exprs) == 0 && + list_length(query_ukey->exprs) != 0) + continue; + if (list_is_subset(ukey->exprs, query_ukey->exprs)) + { + satisfied = true; + break; + } + } + if (!satisfied) + return false; + } + return true; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 3cadd22e3e..ea2408c13f 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3082,12 +3082,18 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ if (qp_extra->groupClause && grouping_is_sortable(qp_extra->groupClause)) + { root->group_pathkeys = make_pathkeys_for_sortclauses(root, qp_extra->groupClause, tlist); + root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause); + } else + { root->group_pathkeys = NIL; + root->query_uniquekeys = NIL; + } /* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) @@ -4497,13 +4503,19 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, Path *path = (Path *) lfirst(lc); if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) - { add_path(distinct_rel, (Path *) create_upper_unique_path(root, distinct_rel, path, list_length(root->distinct_pathkeys), numDistinctRows)); - } + } + + foreach(lc, input_rel->unique_pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (query_has_uniquekeys_for(root, needed_pathkeys, false)) + add_path(distinct_rel, path); } /* For explicit-sort case, always use the more rigorous clause */ @@ -7118,6 +7130,26 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, } } + foreach(lc, rel->unique_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + + /* Shouldn't have any parameterized paths anymore */ + Assert(subpath->param_info == NULL); + + if (tlist_same_exprs) + subpath->pathtarget->sortgrouprefs = + scanjoin_target->sortgrouprefs; + else + { + Path *newpath; + + newpath = (Path *) create_projection_path(root, rel, subpath, + scanjoin_target); + lfirst(lc) = newpath; + } + } + /* * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s) * atop each existing path. (Note that this function doesn't look at the diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e53d381e19..74e100e5a9 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -416,10 +416,10 @@ set_cheapest(RelOptInfo *parent_rel) * 'parent_rel' is the relation entry to which the path corresponds. * 'new_path' is a potential path for parent_rel. * - * Returns nothing, but modifies parent_rel->pathlist. + * Returns modified pathlist. */ -void -add_path(RelOptInfo *parent_rel, Path *new_path) +static List * +add_path_to(RelOptInfo *parent_rel, List *pathlist, Path *new_path) { bool accept_new = true; /* unless we find a superior old path */ int insert_at = 0; /* where to insert new item */ @@ -440,7 +440,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) * for more than one old path to be tossed out because new_path dominates * it. */ - foreach(p1, parent_rel->pathlist) + foreach(p1, pathlist) { Path *old_path = (Path *) lfirst(p1); bool remove_old = false; /* unless new proves superior */ @@ -584,8 +584,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) */ if (remove_old) { - parent_rel->pathlist = foreach_delete_current(parent_rel->pathlist, - p1); + pathlist = foreach_delete_current(pathlist, p1); /* * Delete the data pointed-to by the deleted cell, if possible @@ -612,8 +611,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) if (accept_new) { /* Accept the new path: insert it at proper place in pathlist */ - parent_rel->pathlist = - list_insert_nth(parent_rel->pathlist, insert_at, new_path); + pathlist = list_insert_nth(pathlist, insert_at, new_path); } else { @@ -621,6 +619,23 @@ add_path(RelOptInfo *parent_rel, Path *new_path) if (!IsA(new_path, IndexPath)) pfree(new_path); } + + return pathlist; +} + +void +add_path(RelOptInfo *parent_rel, Path *new_path) +{ + parent_rel->pathlist = add_path_to(parent_rel, + parent_rel->pathlist, new_path); +} + +void +add_unique_path(RelOptInfo *parent_rel, Path *new_path) +{ + parent_rel->unique_pathlist = add_path_to(parent_rel, + parent_rel->unique_pathlist, + new_path); } /* @@ -2661,6 +2676,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.pathkeys = subpath->pathkeys; pathnode->subpath = subpath; + pathnode->path.uniquekeys = subpath->uniquekeys; /* * We might not need a separate Result node. If the input plan node type diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0c758d10c5..3ae6b91576 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -293,6 +293,7 @@ struct PlannerInfo List *query_pathkeys; /* desired pathkeys for query_planner() */ + List *query_uniquekeys; /* unique keys required for the query */ List *group_pathkeys; /* groupClause pathkeys, if any */ List *window_pathkeys; /* pathkeys of bottom window, if any */ List *distinct_pathkeys; /* distinctClause pathkeys, if any */ @@ -695,6 +696,7 @@ typedef struct RelOptInfo List *pathlist; /* Path structures */ List *ppilist; /* ParamPathInfos used in pathlist */ List *partial_pathlist; /* partial Paths */ + List *unique_pathlist; /* unique Paths */ struct Path *cheapest_startup_path; struct Path *cheapest_total_path; struct Path *cheapest_unique_path; @@ -886,6 +888,7 @@ struct IndexOptInfo bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ + bool amcanskip; /* can AM skip duplicate values? */ bool amcanparallel; /* does AM support parallel scan? */ bool amcanmarkpos; /* does AM support mark/restore? */ /* Rather than include amapi.h here, we declare amcostestimate like this */ @@ -1220,6 +1223,8 @@ typedef struct Path List *pathkeys; /* sort ordering of path's output */ /* pathkeys is a List of PathKey nodes; see above */ + + List *uniquekeys; /* the unique keys, or NIL if none */ } Path; /* Macro for extracting a path's parameterization relids; beware double eval */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index f704d39980..facb2dfe74 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -27,6 +27,7 @@ extern int compare_fractional_path_costs(Path *path1, Path *path2, double fraction); extern void set_cheapest(RelOptInfo *parent_rel); extern void add_path(RelOptInfo *parent_rel, Path *new_path); +extern void add_unique_path(RelOptInfo *parent_rel, Path *new_path); extern bool add_path_precheck(RelOptInfo *parent_rel, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 754dfcd549..e71e65264a 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -229,6 +229,9 @@ extern List *build_join_pathkeys(PlannerInfo *root, extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist); +extern List *make_pathkeys_for_uniquekeys(PlannerInfo *root, + List *sortclauses, + List *tlist); extern void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern void update_mergeclause_eclasses(PlannerInfo *root, @@ -296,6 +299,11 @@ extern bool relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs, bool allow_multinulls); +extern bool query_has_uniquekeys_for(PlannerInfo *root, + List *exprs, + bool allow_multinulls); extern bool relation_is_onerow(RelOptInfo *rel); +extern List *build_uniquekeys(PlannerInfo *root, List *sortclauses); + #endif /* PATHS_H */ -- 2.29.2