diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index a35c881..a3ef94c 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -21,6 +21,10 @@ #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "nodes/relation.h" +#include "optimizer/clauses.h" +#include "optimizer/plancat.h" +#include "optimizer/restrictinfo.h" /* Hook for plugins to get control in add_paths_to_joinrel() */ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; @@ -37,6 +41,15 @@ static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); + +static List *get_var_nodes_recurse(Expr *one_check_constr, List *var_nodes); +static List *get_replaced_clause_constr(PlannerInfo *root, OpExpr *one_joinclause, + List *restrictinfo, RelOptInfo *relinfo); +static void try_hashjoin_pushdown(PlannerInfo *root, RelOptInfo *joinrel, + Path *outer_path, Path *inner_path, + List *hashclauses, JoinType jointype, + JoinPathExtraData *extra); + static List *select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, @@ -512,6 +525,384 @@ try_mergejoin_path(PlannerInfo *root, } } +static List * +get_var_nodes_recurse(Expr *one_check_constr, + List *var_nodes) +{ + if (IsA(one_check_constr, OpExpr)) + { + OpExpr *op = (OpExpr *) one_check_constr; + ListCell *l; + foreach(l, op->args) + { + var_nodes = get_var_nodes_recurse((Expr *) lfirst(l), var_nodes); + } + } + else + if (IsA(one_check_constr, Var)) + { + var_nodes = lappend(var_nodes, one_check_constr); + } + + return var_nodes; +} + +static List * +get_replaced_clause_constr(PlannerInfo *root, + OpExpr *one_joinclause, + List *restrictinfo, + RelOptInfo *relinfo) +{ + List *result = restrictinfo; + RangeTblEntry *childRTE = root->simple_rte_array[relinfo->relid]; + List *check_constr = + get_relation_constraints(root, childRTE->relid, relinfo, false); + ListCell *l_vn, *l_constr; + Var *var_args[2]; + int var_index; + + Assert(one_joinclause->opno == 96); + Assert(list_length(one_joinclause->args) == 2); + + for (var_index = 0; var_index < 2; var_index++) + { + var_args[var_index] = (Var *) list_nth(one_joinclause->args, var_index); + } + var_index = -1; + + if (list_length(check_constr) == 0) + { + ListCell *l_vn; + + foreach(l_vn, relinfo->reltargetlist) + { + Var *vn_temp = (Var *) lfirst(l_vn); + ListCell *l_app; + + foreach (l_app, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l_app); + + if (appinfo->child_relid != vn_temp->varno) + continue; + + if (var_args[0]->varno == appinfo->parent_relid) + var_index = 0; + else + if (var_args[1]->varno == appinfo->parent_relid) + var_index = 1; + + if (var_index >= 0) + break; + } + + Assert(var_index >= 0); + + if (var_args[var_index]->varattno != vn_temp->varattno) + continue; + + var_args[var_index]->varno = vn_temp->varno; + var_args[var_index]->varattno = vn_temp->varattno; + break; + } + return restrictinfo; + } + + foreach (l_constr, check_constr) + { + List *var_nodes = + get_var_nodes_recurse((Expr *) lfirst(l_constr), NIL); + + foreach (l_vn, var_nodes) + { + Var *vn_temp = (Var *) lfirst(l_vn); + ListCell *l_app; + + foreach (l_app, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l_app); + + if (appinfo->child_relid != vn_temp->varno) + continue; + + if (var_index < 0) + { + if (appinfo->parent_relid == var_args[0]->varno) + var_index = 1; + else + if (appinfo->parent_relid == var_args[1]->varno) + var_index = 0; + } + + if (var_args[1 - var_index]->varno == appinfo->parent_relid) + { + var_args[1 - var_index]->varno = vn_temp->varno; + var_args[1 - var_index]->varattno = vn_temp->varattno; + } + + vn_temp->varno = var_args[var_index]->varno; + vn_temp->varattno = var_args[var_index]->varattno; + } + } + } + + result = list_concat(result, make_restrictinfos_from_actual_clauses(root, check_constr)); + + return result; +} + +/* + Try to push down HashPath under AppendPath. +*/ +static void +try_hashjoin_pushdown(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *hashclauses, + JoinType jointype, + JoinPathExtraData *extra) +{ + AppendPath *append_path; + Path *other_path; + ListCell *l; + + List *new_subpaths = NIL; + + Assert(outer_path != inner_path); + + if (IS_OUTER_JOIN(jointype)) + { + /* TODO : Not supported yet, but we must support this pattern... */ + elog(DEBUG1, "This join type is not supported... : %d", jointype); + return; + } + + if (!IsA(outer_path, AppendPath)) + { + /* Outer path must be AppendPath */ + elog(DEBUG1, "Outer path must be AppendPath."); + return; + } + + /* Check join clauses */ + foreach (l, hashclauses) + { + RestrictInfo *rinfo = lfirst(l); + OpExpr *opexpr; + ListCell *ll; + + if (!is_opclause(rinfo->clause)) + { + elog(DEBUG1, "This join clause is not supported... : %d", (int) rinfo->clause->type); + return; + } + + opexpr = (OpExpr *)(rinfo->clause); + if (opexpr->opno != 96) /* "96" means "=" for int4 */ + { + elog(DEBUG1, "This operator is not supported... : %d", (int) opexpr->opno); + return; + } + + foreach (ll, opexpr->args) + { + if (!IsA(lfirst(ll), Var)) + { + elog(DEBUG1, "This expression is not supported... : %d", (int) ((Expr *) lfirst(ll))->type); + return; + } + } + } + + append_path = (AppendPath *) outer_path; + other_path = inner_path; + + if (other_path->pathtype != T_SeqScan) + { + /* TODO : Not supported yet, but we must support this pattern... */ + elog(DEBUG1, "This pathtype is not supported yet... : %d", (int) other_path->pathtype); + return; + } + + foreach(l, append_path->subpaths) + { + Path *one_of_subpaths = (Path *) lfirst(l); + if (one_of_subpaths->pathtype != T_SeqScan) + { + elog(DEBUG1, "This pathtype is not supported yet... : %d", (int) one_of_subpaths->pathtype); + return; + } + } + + foreach(l, append_path->subpaths) + { + Path *one_of_subpaths = (Path *) lfirst(l); + Path *rewritten_opath; + RelOptInfo *rel_sp = one_of_subpaths->parent; + RelOptInfo *rel_op = other_path->parent; + RelOptInfo *rel_op_rw; + + Relids new_joinrelids; + Relids new_required_outer; + RelOptInfo *new_joinrel; + SpecialJoinInfo sjinfo_data; + List *new_restrictinfos; + List *new_restrictlist; + List *new_hashclauses = NIL; + ListCell *l_clause; + JoinCostWorkspace workspace; + + List *old_join_rel_list = root->join_rel_list; + List **old_join_rel_level = root->join_rel_level; + + /* Create new RelOptInfo for inner path's SeqScan */ + rel_op_rw = makeNode(RelOptInfo); + rel_op_rw->reloptkind = rel_op->reloptkind; + rel_op_rw->relids = bms_copy(rel_op->relids); + /* rows and width will be changed. Do set_baserel_estimates() later. */ + rel_op_rw->consider_startup = rel_op->consider_startup; + rel_op_rw->consider_param_startup = false; + rel_op_rw->reltargetlist = copyObject(rel_op->reltargetlist); + rel_op_rw->pathlist = NIL; + rel_op_rw->ppilist = NIL; + rel_op_rw->cheapest_startup_path = NULL; + rel_op_rw->cheapest_total_path = NULL; + rel_op_rw->cheapest_unique_path = NULL; + rel_op_rw->cheapest_parameterized_paths = NIL; + rel_op_rw->relid = rel_op->relid; + rel_op_rw->rtekind = rel_op->rtekind; + rel_op_rw->min_attr = rel_op->min_attr; + rel_op_rw->max_attr = rel_op->max_attr; + rel_op_rw->attr_needed = (Relids *) + palloc0((rel_op->max_attr - rel_op->min_attr + 1) * sizeof(Relids)); + rel_op_rw->attr_widths = (int32 *) + palloc0((rel_op->max_attr - rel_op->min_attr + 1) * sizeof(int32)); + { + int i; + for (i = 0; i < (rel_op->max_attr - rel_op->min_attr); i++) + { + rel_op_rw->attr_needed[i] = bms_copy(rel_op->attr_needed[i]); + rel_op_rw->attr_widths[i] = rel_op->attr_widths[i]; + } + } + rel_op_rw->lateral_vars = copyObject(rel_op->lateral_vars); + rel_op_rw->lateral_relids = bms_copy(rel_op->lateral_relids); + rel_op_rw->lateral_referencers = bms_copy(rel_op->lateral_referencers); + rel_op_rw->indexlist = copyObject(rel_op->indexlist); + rel_op_rw->pages = rel_op->pages; + rel_op_rw->tuples = rel_op->tuples; + rel_op_rw->allvisfrac = rel_op->allvisfrac; + rel_op_rw->subplan = NULL; + rel_op_rw->subroot = NULL; + rel_op_rw->subplan_params = NIL; + rel_op_rw->serverid = rel_op->serverid; + rel_op_rw->fdwroutine = NULL; + rel_op_rw->fdw_private = NULL; + /* Base restrict List/Cost will be set later. */ + rel_op_rw->joininfo = copyObject(rel_op->joininfo); + rel_op_rw->has_eclass_joins = rel_op->has_eclass_joins; + + /* Estimate rows, width, and costs for inner path's SeqScan */ + new_restrictinfos = copyObject(rel_op->baserestrictinfo); + foreach (l_clause, hashclauses) + { + OpExpr *new_clause = copyObject(((RestrictInfo *) lfirst(l_clause))->clause); + + /* Create filters of inner path from CHECK() constraints of outer path */ + new_restrictinfos = + get_replaced_clause_constr(root, new_clause, + new_restrictinfos, rel_sp); + new_hashclauses = lappend(new_hashclauses, + make_restrictinfo(new_clause, true, false, false, NULL, NULL, NULL)); + } + rel_op_rw->baserestrictinfo = new_restrictinfos; + set_baserel_size_estimates(root, rel_op_rw); + + sjinfo_data.type = T_SpecialJoinInfo; + sjinfo_data.min_lefthand = rel_sp->relids; + sjinfo_data.min_righthand = rel_op_rw->relids; + sjinfo_data.syn_lefthand = rel_sp->relids; + sjinfo_data.syn_righthand = rel_op_rw->relids; + sjinfo_data.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo_data.lhs_strict = false; + sjinfo_data.delay_upper_joins = false; + sjinfo_data.semi_can_btree = false; + sjinfo_data.semi_can_hash = false; + sjinfo_data.semi_operators = NIL; + sjinfo_data.semi_rhs_exprs = NIL; + + /* Create NEW SeqScan path for inner path */ + rewritten_opath = create_seqscan_path(root, rel_op_rw, rel_op_rw->lateral_relids); + add_path(rel_op_rw, rewritten_opath); + + /* Create New HashPath between path under AppendPath and inner SeqScan path */ + new_joinrelids = bms_union(rel_sp->relids, rel_op_rw->relids); + + /* + * For avoidance of failing assertion in allpaths.c. + * We must keep root->join_rel_level[cur_level]->length == 1. + */ + root->join_rel_list = NIL; + root->join_rel_level = NULL; + + new_joinrel = build_join_rel(root, new_joinrelids, rel_sp, rel_op_rw, + &sjinfo_data, &new_restrictlist); + list_free(root->join_rel_list); + + root->join_rel_list = old_join_rel_list; + root->join_rel_level = old_join_rel_level; + + new_required_outer = calc_non_nestloop_required_outer(one_of_subpaths, rewritten_opath); + + initial_cost_hashjoin(root, &workspace, JOIN_INNER, new_hashclauses, + one_of_subpaths, rewritten_opath, &sjinfo_data, NULL); + if (add_path_precheck(joinrel, + workspace.startup_cost, workspace.total_cost, + NIL, new_required_outer)) + { + HashPath *new_hj_path; + + elog(DEBUG1, "add_path_precheck() returned TRUE."); + new_hj_path = create_hashjoin_path(root, + new_joinrel, + JOIN_INNER, + &workspace, + &sjinfo_data, + NULL, + one_of_subpaths, + rewritten_opath, + new_restrictlist, + new_required_outer, + new_hashclauses); + + add_path(new_joinrel, (Path *)new_hj_path); + /* new_hj_path can be clobbered above. */ + if (IsA((Node *)new_hj_path, HashPath)) + new_subpaths = lappend(new_subpaths, new_hj_path); + } + else + { + bms_free(new_required_outer); + } + } + + if (list_length(new_subpaths) > 0) + { + elog(DEBUG1, "Pushdown succeeded."); + add_path(joinrel, (Path *) + create_append_path(joinrel, new_subpaths, NULL)); + set_cheapest(joinrel); + } + else + { + elog(DEBUG1, "Pushdown failed."); + } + + return; +} + /* * try_hashjoin_path * Consider a hash join path; if it appears useful, push it into @@ -543,6 +934,10 @@ try_hashjoin_path(PlannerInfo *root, return; } + /* Try to push HashJoin down under Append */ + try_hashjoin_pushdown(root, joinrel, + outer_path, inner_path, hashclauses, jointype, extra); + /* * Independently of that, add parameterization needed for any * PlaceHolderVars that need to be computed at the join. diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 9442e5f..c137b09 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -54,9 +54,6 @@ get_relation_info_hook_type get_relation_info_hook = NULL; static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, List *idxExprs); static int32 get_rel_data_width(Relation rel, int32 *attr_widths); -static List *get_relation_constraints(PlannerInfo *root, - Oid relationObjectId, RelOptInfo *rel, - bool include_notnull); static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); @@ -1022,7 +1019,7 @@ get_relation_data_width(Oid relid, int32 *attr_widths) * run, and in many cases it won't be invoked at all, so there seems no * point in caching the data in RelOptInfo. */ -static List * +List * get_relation_constraints(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel, bool include_notnull) diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 11e7d4d..5246a6c 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -28,6 +28,10 @@ extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook; extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); +extern List *get_relation_constraints(PlannerInfo *root, + Oid relationObjectId, RelOptInfo *rel, + bool include_notnull); + extern List *infer_arbiter_indexes(PlannerInfo *root); extern void estimate_rel_size(Relation rel, int32 *attr_widths,