From 9e247a70a72c42a4ae2c69ad4b8483eb477eacc2 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Mon, 8 Jun 2020 20:33:56 +0200 Subject: [PATCH 3/5] Extend UniqueKeys Prepares index skip scan implementation using UniqueKeys. Allows to specify what are the "requested" keys that should be unique, and add them to necessary Paths to make them useful later. Proposed by David Rowley, contains few bits out of previous version from Jesper Pedersen. --- src/backend/optimizer/path/pathkeys.c | 59 +++++++++++++++++++++++ src/backend/optimizer/path/uniquekeys.c | 63 +++++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 36 +++++++++++++- src/backend/optimizer/util/pathnode.c | 32 +++++++++---- src/include/nodes/pathnodes.h | 5 ++ src/include/optimizer/pathnode.h | 1 + src/include/optimizer/paths.h | 8 ++++ 7 files changed, 194 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 9022c77dac..9b7cdce350 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -29,6 +29,7 @@ #include "utils/lsyscache.h" +static bool pathkey_is_unique(PathKey *new_pathkey, List *pathkeys); static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, @@ -95,6 +96,29 @@ make_canonical_pathkey(PlannerInfo *root, return pk; } +/* + * pathkey_is_unique + * Checks if the new pathkey's equivalence class is the same as that of + * any existing member of the pathkey list. + */ +static bool +pathkey_is_unique(PathKey *new_pathkey, List *pathkeys) +{ + EquivalenceClass *new_ec = new_pathkey->pk_eclass; + ListCell *lc; + + /* If same EC already is already in the list, then not unique */ + foreach(lc, pathkeys) + { + PathKey *old_pathkey = (PathKey *) lfirst(lc); + + if (new_ec == old_pathkey->pk_eclass) + return false; + } + + return true; +} + /* * pathkey_is_redundant * Is a pathkey redundant with one already in the given list? @@ -1151,6 +1175,41 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return pathkeys; } +/* + * make_pathkeys_for_uniquekeyclauses + * Generate a pathkeys list to be used for uniquekey clauses + */ +List * +make_pathkeys_for_uniquekeys(PlannerInfo *root, + List *sortclauses, + List *tlist) +{ + List *pathkeys = NIL; + ListCell *l; + + foreach(l, sortclauses) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + Expr *sortkey; + PathKey *pathkey; + + sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + Assert(OidIsValid(sortcl->sortop)); + pathkey = make_pathkey_from_sortop(root, + sortkey, + root->nullable_baserels, + sortcl->sortop, + sortcl->nulls_first, + sortcl->tleSortGroupRef, + true); + + if (pathkey_is_unique(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + + return pathkeys; +} + /**************************************************************************** * PATHKEYS AND MERGECLAUSES ****************************************************************************/ diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c index ca40c40858..ab4b1d1939 100644 --- a/src/backend/optimizer/path/uniquekeys.c +++ b/src/backend/optimizer/path/uniquekeys.c @@ -1132,3 +1132,66 @@ add_combined_uniquekey(PlannerInfo *root, } return false; } + +List* +build_uniquekeys(PlannerInfo *root, List *sortclauses) +{ + List *result = NIL; + List *sortkeys; + ListCell *l; + List *exprs = NIL; + + sortkeys = make_pathkeys_for_uniquekeys(root, + sortclauses, + root->processed_tlist); + + /* Create a uniquekey and add it to the list */ + foreach(l, sortkeys) + { + PathKey *pathkey = (PathKey *) lfirst(l); + EquivalenceClass *ec = pathkey->pk_eclass; + EquivalenceMember *mem = (EquivalenceMember*) lfirst(list_head(ec->ec_members)); + if (EC_MUST_BE_REDUNDANT(ec)) + continue; + exprs = lappend(exprs, mem->em_expr); + } + + result = lappend(result, makeUniqueKey(exprs, false)); + + return result; +} + +bool +query_has_uniquekeys_for(PlannerInfo *root, List *pathuniquekeys, + bool allow_multinulls) +{ + ListCell *lc; + ListCell *lc2; + + /* root->query_uniquekeys are the requested DISTINCT clauses on query level + * pathuniquekeys are the unique keys on current path. + * All requested query_uniquekeys must be satisfied by the pathuniquekeys + */ + foreach(lc, root->query_uniquekeys) + { + UniqueKey *query_ukey = lfirst_node(UniqueKey, lc); + bool satisfied = false; + foreach(lc2, pathuniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc2); + if (ukey->multi_nullvals && !allow_multinulls) + continue; + if (list_length(ukey->exprs) == 0 && + list_length(query_ukey->exprs) != 0) + continue; + if (list_is_subset(ukey->exprs, query_ukey->exprs)) + { + satisfied = true; + break; + } + } + if (!satisfied) + return false; + } + return true; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ff9f6df857..5ae2475400 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3082,12 +3082,18 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ if (qp_extra->groupClause && grouping_is_sortable(qp_extra->groupClause)) + { root->group_pathkeys = make_pathkeys_for_sortclauses(root, qp_extra->groupClause, tlist); + root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause); + } else + { root->group_pathkeys = NIL; + root->query_uniquekeys = NIL; + } /* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) @@ -4497,13 +4503,19 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, Path *path = (Path *) lfirst(lc); if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) - { add_path(distinct_rel, (Path *) create_upper_unique_path(root, distinct_rel, path, list_length(root->distinct_pathkeys), numDistinctRows)); - } + } + + foreach(lc, input_rel->unique_pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (query_has_uniquekeys_for(root, needed_pathkeys, false)) + add_path(distinct_rel, path); } /* For explicit-sort case, always use the more rigorous clause */ @@ -7118,6 +7130,26 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, } } + foreach(lc, rel->unique_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + + /* Shouldn't have any parameterized paths anymore */ + Assert(subpath->param_info == NULL); + + if (tlist_same_exprs) + subpath->pathtarget->sortgrouprefs = + scanjoin_target->sortgrouprefs; + else + { + Path *newpath; + + newpath = (Path *) create_projection_path(root, rel, subpath, + scanjoin_target); + lfirst(lc) = newpath; + } + } + /* * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s) * atop each existing path. (Note that this function doesn't look at the diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5c32c96b71..abb77d867e 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -416,10 +416,10 @@ set_cheapest(RelOptInfo *parent_rel) * 'parent_rel' is the relation entry to which the path corresponds. * 'new_path' is a potential path for parent_rel. * - * Returns nothing, but modifies parent_rel->pathlist. + * Returns modified pathlist. */ -void -add_path(RelOptInfo *parent_rel, Path *new_path) +static List * +add_path_to(RelOptInfo *parent_rel, List *pathlist, Path *new_path) { bool accept_new = true; /* unless we find a superior old path */ int insert_at = 0; /* where to insert new item */ @@ -440,7 +440,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) * for more than one old path to be tossed out because new_path dominates * it. */ - foreach(p1, parent_rel->pathlist) + foreach(p1, pathlist) { Path *old_path = (Path *) lfirst(p1); bool remove_old = false; /* unless new proves superior */ @@ -584,8 +584,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) */ if (remove_old) { - parent_rel->pathlist = foreach_delete_current(parent_rel->pathlist, - p1); + pathlist = foreach_delete_current(pathlist, p1); /* * Delete the data pointed-to by the deleted cell, if possible @@ -612,8 +611,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) if (accept_new) { /* Accept the new path: insert it at proper place in pathlist */ - parent_rel->pathlist = - list_insert_nth(parent_rel->pathlist, insert_at, new_path); + pathlist = list_insert_nth(pathlist, insert_at, new_path); } else { @@ -621,6 +619,23 @@ add_path(RelOptInfo *parent_rel, Path *new_path) if (!IsA(new_path, IndexPath)) pfree(new_path); } + + return pathlist; +} + +void +add_path(RelOptInfo *parent_rel, Path *new_path) +{ + parent_rel->pathlist = add_path_to(parent_rel, + parent_rel->pathlist, new_path); +} + +void +add_unique_path(RelOptInfo *parent_rel, Path *new_path) +{ + parent_rel->unique_pathlist = add_path_to(parent_rel, + parent_rel->unique_pathlist, + new_path); } /* @@ -2662,6 +2677,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.pathkeys = subpath->pathkeys; pathnode->subpath = subpath; + pathnode->path.uniquekeys = subpath->uniquekeys; /* * We might not need a separate Result node. If the input plan node type diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 57e80a47d2..1de5095e74 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -293,6 +293,7 @@ struct PlannerInfo List *query_pathkeys; /* desired pathkeys for query_planner() */ + List *query_uniquekeys; /* unique keys required for the query */ List *group_pathkeys; /* groupClause pathkeys, if any */ List *window_pathkeys; /* pathkeys of bottom window, if any */ List *distinct_pathkeys; /* distinctClause pathkeys, if any */ @@ -695,6 +696,7 @@ typedef struct RelOptInfo List *pathlist; /* Path structures */ List *ppilist; /* ParamPathInfos used in pathlist */ List *partial_pathlist; /* partial Paths */ + List *unique_pathlist; /* unique Paths */ struct Path *cheapest_startup_path; struct Path *cheapest_total_path; struct Path *cheapest_unique_path; @@ -886,6 +888,7 @@ struct IndexOptInfo bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ + bool amcanskip; /* can AM skip duplicate values? */ bool amcanparallel; /* does AM support parallel scan? */ bool amcanmarkpos; /* does AM support mark/restore? */ /* Rather than include amapi.h here, we declare amcostestimate like this */ @@ -1220,6 +1223,8 @@ typedef struct Path List *pathkeys; /* sort ordering of path's output */ /* pathkeys is a List of PathKey nodes; see above */ + + List *uniquekeys; /* the unique keys, or NIL if none */ } Path; /* Macro for extracting a path's parameterization relids; beware double eval */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 620eeda2d6..bb6d730e93 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -27,6 +27,7 @@ extern int compare_fractional_path_costs(Path *path1, Path *path2, double fraction); extern void set_cheapest(RelOptInfo *parent_rel); extern void add_path(RelOptInfo *parent_rel, Path *new_path); +extern void add_unique_path(RelOptInfo *parent_rel, Path *new_path); extern bool add_path_precheck(RelOptInfo *parent_rel, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 28db3c59cd..16bb5e0eea 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -229,6 +229,9 @@ extern List *build_join_pathkeys(PlannerInfo *root, extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist); +extern List *make_pathkeys_for_uniquekeys(PlannerInfo *root, + List *sortclauses, + List *tlist); extern void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern void update_mergeclause_eclasses(PlannerInfo *root, @@ -296,6 +299,11 @@ extern bool relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs, bool allow_multinulls); +extern bool query_has_uniquekeys_for(PlannerInfo *root, + List *exprs, + bool allow_multinulls); extern bool relation_is_onerow(RelOptInfo *rel); +extern List *build_uniquekeys(PlannerInfo *root, List *sortclauses); + #endif /* PATHS_H */ -- 2.33.1