diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index c176ff9..63402cd 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1963,12 +1963,78 @@ _copyOnConflictExpr(const OnConflictExpr *from) /* **************************************************************** * relation.h copy functions * - * We don't support copying RelOptInfo, IndexOptInfo, or Path nodes. + * We don't support copying RelOptInfo, IndexOptInfo or Path node. * There are some subsidiary structs that are useful to copy, though. * **************************************************************** */ /* + * CopyPathFields + */ +static void +CopyPathFields(const Path *from, Path *newnode) +{ + COPY_SCALAR_FIELD(pathtype); + + /* + * We use COPY_SCALAR_FIELDS() for parent instead of COPY_NODE_FIELDS() + * because RelOptInfo contains Path which is made from, so + * jump into the infinite loop. + */ + COPY_SCALAR_FIELD(parent); + + COPY_SCALAR_FIELD(param_info); + + COPY_SCALAR_FIELD(rows); + COPY_SCALAR_FIELD(startup_cost); + COPY_SCALAR_FIELD(total_cost); + + COPY_NODE_FIELD(pathkeys); +} + +/* + * _copyPath + */ +static Path * +_copyPath(const Path *from) +{ + Path *newnode = makeNode(Path); + + CopyPathFields(from, newnode); + + return newnode; +} + +/* + * _copyIndexPath + * XXX Need to make copy function for IndexOptInfo, etc. + */ +static IndexPath * +_copyIndexPath(const IndexPath *from) +{ + IndexPath *newnode = makeNode(IndexPath); + + CopyPathFields(&from->path, &newnode->path); + + COPY_NODE_FIELD(indexinfo); + COPY_NODE_FIELD(indexclauses); + COPY_NODE_FIELD(indexquals); + COPY_NODE_FIELD(indexqualcols); + COPY_NODE_FIELD(indexorderbys); + COPY_NODE_FIELD(indexorderbycols); + COPY_SCALAR_FIELD(indexscandir); + COPY_SCALAR_FIELD(indextotalcost); + COPY_SCALAR_FIELD(indexselectivity); + + return newnode; +} + +/* + * XXX Need to make copy function for BitmapHeapPath + * and TidPath. + */ + +/* * _copyPathKey */ static PathKey * @@ -4506,6 +4572,12 @@ copyObject(const void *from) /* * RELATION NODES */ + case T_Path: + retval = _copyPath(from); + break; + case T_IndexPath: + retval = _copyIndexPath(from); + break; case T_PathKey: retval = _copyPathKey(from); break; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index a35c881..dd5d38c 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -18,9 +18,22 @@ #include "executor/executor.h" #include "foreign/fdwapi.h" +#include "nodes/nodeFuncs.h" +#include "nodes/nodes.h" +#include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/restrictinfo.h" +#include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" + +typedef struct +{ + List *joininfo; + bool is_substituted; +} substitution_node_context; /* Hook for plugins to get control in add_paths_to_joinrel() */ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; @@ -45,6 +58,11 @@ static List *select_mergejoin_clauses(PlannerInfo *root, JoinType jointype, bool *mergejoin_allowed); +static void try_join_pushdown(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + List *restrictlist); + /* * add_paths_to_joinrel @@ -82,6 +100,14 @@ add_paths_to_joinrel(PlannerInfo *root, bool mergejoin_allowed = true; ListCell *lc; + /* + * Try to push Join down under Append + */ + if (!IS_OUTER_JOIN(jointype)) + { + try_join_pushdown(root, joinrel, outerrel, innerrel, restrictlist); + } + extra.restrictlist = restrictlist; extra.mergeclause_list = NIL; extra.sjinfo = sjinfo; @@ -1474,3 +1500,468 @@ select_mergejoin_clauses(PlannerInfo *root, return result_list; } + +/* + * Try to substitute Var node according to join conditions. + * This process is from following steps. + * + * 1. Try to find whether Var node matches to left/right Var node of + * one join condition. + * 2. If found, replace Var node with the opposite expression node of + * the join condition. + * + * For example, let's assume that we have following expression and + * join condition. + * Expression : A.num % 4 = 1 + * Join condition : A.num = B.data + 2 + * In this case, we can get following expression. + * (B.data + 2) % 4 = 1 + */ +static Node * +substitute_node_with_join_cond(Node *node, substitution_node_context *context) +{ + /* Failed to substitute. Abort. */ + if (!context->is_substituted) + return (Node *) copyObject(node); + + if (node == NULL) + return NULL; + + if (IsA(node, Var)) + { + List *join_cond = context->joininfo; + ListCell *lc; + + Assert(list_length(join_cond) > 0); + + foreach (lc, join_cond) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Expr *expr = rinfo->clause; + + /* + * Make sure whether OpExpr of Join clause means "=". + */ + if (!rinfo->can_join || + !IsA(expr, OpExpr) || + !op_hashjoinable(((OpExpr *) expr)->opno, + exprType(get_leftop(expr)))) + continue; + + if (equal(get_leftop(expr), node)) + { + /* + * This node is equal to LEFT node of join condition, + * thus will be replaced with RIGHT clause. + */ + return (Node *) copyObject(get_rightop(expr)); + } + else + if (equal(get_rightop(expr), node)) + { + /* + * This node is equal to RIGHT node of join condition, + * thus will be replaced with LEFT clause. + */ + return (Node *) copyObject(get_leftop(expr)); + } + } + + /* Unfortunately, substituting is failed. */ + context->is_substituted = false; + return (Node *) copyObject(node); + } + + return expression_tree_mutator(node, substitute_node_with_join_cond, context); +} + +/* + * Create RestrictInfo_List from CHECK() constraints. + * + * This function creates list of RestrictInfo from CHECK() constraints + * according to expression of join clause. + * + * For example, let's assume that we have following CHECK() constraints + * for table A and join clause between table A and B. + * CHECK of table A : 0 <= num AND num <= 100 + * JOIN CLAUSE : A.num = B.data + * In this conditions, we can get below by mathematical substituting. + * 0 <= B.data AND B.data <= 100 + * + * We can use this restrictions to reduce result rows. + * This means that we can make Sort faster by reducing rows in MergeJoin, + * and also means that we can make HashTable smaller in HashJoin to fit + * to smaller work_mem environments. + */ +static List * +create_rinfo_from_check_constr(PlannerInfo *root, List *joininfo, + RelOptInfo *outer_rel, bool *succeed) +{ + List *result = NIL; + RangeTblEntry *childRTE = root->simple_rte_array[outer_rel->relid]; + List *check_constr = + get_relation_constraints(root, childRTE->relid, + outer_rel, false); + ListCell *lc; + substitution_node_context context; + + if (list_length(check_constr) <= 0) + { + *succeed = true; + return NIL; + } + + context.joininfo = joininfo; + context.is_substituted = true; + + /* + * Try to convert CHECK() constraints to filter expressions. + */ + foreach(lc, check_constr) + { + Node *substituted = + expression_tree_mutator((Node *) lfirst(lc), + substitute_node_with_join_cond, + (void *) &context); + + if (!context.is_substituted) + { + *succeed = false; + list_free_deep(check_constr); + return NIL; + } + result = lappend(result, substituted); + } + + Assert(list_length(check_constr) == list_length(result)); + list_free_deep(check_constr); + + return make_restrictinfos_from_actual_clauses(root, result); +} + +/* + * Convert parent's join clauses to child's. + */ +static List * +convert_parent_joinclauses_to_child(PlannerInfo *root, List *join_clauses, + RelOptInfo *outer_rel) +{ + Index parent_relid = + find_childrel_appendrelinfo(root, outer_rel)->parent_relid; + List *clauses_parent = get_actual_clauses(join_clauses); + List *clauses_child = NIL; + ListCell *lc; + + foreach(lc, clauses_parent) + { + Node *one_clause_child = (Node *) copyObject(lfirst(lc)); + + ChangeVarNodes(one_clause_child, parent_relid, outer_rel->relid, 0); + clauses_child = lappend(clauses_child, one_clause_child); + } + + return make_restrictinfos_from_actual_clauses(root, clauses_child); +} + +static inline List * +extract_join_clauses(List *restrictlist, RelOptInfo *outer_prel, + RelOptInfo *inner_rel) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (clause_sides_match_join(rinfo, outer_prel, inner_rel)) + result = lappend(result, rinfo); + } + + return result; +} + +/* + * try_join_pushdown + * + * When outer-path of JOIN is AppendPath, we can rewrite path-tree with + * relocation of JoinPath across AppendPath, to generate equivalent + * results, like a diagram below. + * This adjustment gives us a few performance benefits when the relations + * scaned by sub-plan of Append-node have CHECK() constraints - typically, + * configured as partitioned table. + * + * In case of INNER JOIN with equivalent join condition, like A = B, we + * can exclude a part of inner rows that are obviously unreferenced, if + * outer side has CHECK() constraints that contains join keys. + * The CHECK() constraints ensures all the rows within outer relation + * satisfies the condition, in other words, any inner rows that does not + * satisfies the condition (with adjustment using equivalence of join keys) + * never match any outer rows. + * + * Once we can reduce number of inner rows, here are two beneficial scenario. + * 1. HashJoin may avoid split of hash-table even if preload of entire + * inner relation exceeds work_mem. + * 2. MergeJoin may be able to take smaller scale of Sort, because quick-sort + * is O(NlogN) scale problem. Reduction of rows to be sorted on both side + * reduces CPU cost more than liner. + * + * [BEFORE] + * JoinPath ... (parent.X = inner.Y) + * -> AppendPath on parent + * -> ScanPath on child_1 ... CHECK(hash(X) % 3 = 0) + * -> ScanPath on child_2 ... CHECK(hash(X) % 3 = 1) + * -> ScanPath on child_3 ... CHECK(hash(X) % 3 = 2) + * -> ScanPath on inner + * + * [AFTER] + * AppendPath + * -> JoinPath ... (child_1.X = inner.Y) + * -> ScanPath on child_1 ... CHECK(hash(X) % 3 = 0) + * -> ScanPath on inner ... filter (hash(Y) % 3 = 0) + * -> JoinPath ... (child_2.X = inner.Y) + * -> ScanPath on child_2 ... CHECK(hash(X) % 3 = 1) + * -> ScanPath on inner ... filter (hash(Y) % 3 = 1) + * -> JoinPath ... (child_3.X = inner.Y) + * -> ScanPath on child_3 ... CHECK(hash(X) % 3 = 2) + * -> ScanPath on inner ... filter (hash(Y) % 3 = 2) + * + * Point to be focused on is filter condition attached on child relation's + * scan. It is clause of CHECK() constraint, but X is replaced by Y using + * equivalence join condition. + */ +static void +try_join_pushdown(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + List *restrictlist) +{ + AppendPath *outer_path; + ListCell *lc; + List *joinclauses_parent; + List *alter_append_subpaths = NIL; + + Assert(outer_rel->cheapest_total_path != NULL); + + /* When specified outer path is not an AppendPath, nothing to do here. */ + if (!IsA(outer_rel->cheapest_total_path, AppendPath)) + { + elog(DEBUG1, "Outer path is not an AppendPath. Do nothing."); + return; + } + + outer_path = (AppendPath *) outer_rel->cheapest_total_path; + + if (outer_rel->rtekind != RTE_RELATION) + { + elog(DEBUG1, "Outer Relation is not for table scan. Give up."); + return; + } + + switch (inner_rel->cheapest_total_path->pathtype) + { + case T_SeqScan : + case T_SampleScan : + case T_IndexScan : + case T_IndexOnlyScan : + case T_BitmapHeapScan : + case T_TidScan : + /* Do nothing. No-op */ + break; + default : + { + elog(DEBUG1, "Type of Inner path is not supported yet. Give up."); + return; + } + } + + /* + * Extract join clauses to convert CHECK() constraints. + * We don't have to clobber this list to convert CHECK() constraints, + * so we need to do only once. + */ + joinclauses_parent = extract_join_clauses(restrictlist, outer_rel, inner_rel); + if (list_length(joinclauses_parent) <= 0) + { + elog(DEBUG1, "No join clauses specified. Give up."); + return; + } + + if (list_length(inner_rel->ppilist) > 0) + { + elog(DEBUG1, "ParamPathInfo is already set in inner_rel. Can't pushdown."); + return; + } + + /* + * Make new joinrel between each of outer path's sub-paths and inner path. + */ + foreach(lc, outer_path->subpaths) + { + RelOptInfo *orig_outer_rel = ((Path *) lfirst(lc))->parent; + RelOptInfo *alter_outer_rel; + Path *alter_path = NULL; + List *joinclauses_child; + List *restrictlist_by_check_constr; + bool is_valid; + List **join_rel_level; + + Assert(!IS_DUMMY_REL(orig_outer_rel)); + + /* + * Join clause points parent's relid, + * so we must change it to child's one. + */ + joinclauses_child = convert_parent_joinclauses_to_child(root, joinclauses_parent, + orig_outer_rel); + + /* + * Make RestrictInfo list from CHECK() constraints of outer table. + * "is_valid" indicates whether making RestrictInfo list succeeded or not. + */ + restrictlist_by_check_constr = + create_rinfo_from_check_constr(root, joinclauses_child, + orig_outer_rel, &is_valid); + + if (!is_valid) + { + elog(DEBUG1, "Join clause doesn't match with CHECK() constraint. " + "Can't pushdown."); + list_free_deep(alter_append_subpaths); + list_free(joinclauses_parent); + return; + } + + if (list_length(restrictlist_by_check_constr) > 0) + { + /* Prepare ParamPathInfo for RestrictInfos by CHECK constraints. */ + ParamPathInfo *newppi = makeNode(ParamPathInfo); + + newppi->ppi_req_outer = NULL; + newppi->ppi_rows = + get_parameterized_baserel_size(root, + inner_rel, + restrictlist_by_check_constr); + newppi->ppi_clauses = restrictlist_by_check_constr; + + /* Copy Path of inner relation, and specify newppi to it. */ + alter_path = copyObject(inner_rel->cheapest_total_path); + alter_path->param_info = newppi; + + /* Re-calculate costs of alter_path */ + switch (alter_path->pathtype) + { + case T_SeqScan : + cost_seqscan(alter_path, root, inner_rel, newppi); + break; + case T_SampleScan : + cost_samplescan(alter_path, root, inner_rel, newppi); + break; + case T_IndexScan : + case T_IndexOnlyScan : + { + IndexPath *ipath = (IndexPath *) alter_path; + + cost_index(ipath, root, 1.0); + } + break; + case T_BitmapHeapScan : + { + BitmapHeapPath *bpath = (BitmapHeapPath *) alter_path; + + cost_bitmap_heap_scan(&bpath->path, root, inner_rel, + newppi, bpath->bitmapqual, 1.0); + } + break; + case T_TidScan : + { + TidPath *tpath = (TidPath *) alter_path; + + cost_tidscan(&tpath->path, root, inner_rel, + tpath->tidquals, newppi); + } + break; + default: + break; + } + + /* + * Append this path to pathlist temporary. + * This path will be removed after returning from make_join_rel(). + */ + inner_rel->pathlist = lappend(inner_rel->pathlist, alter_path); + set_cheapest(inner_rel); + } + + /* + * NOTE: root->join_rel_level is used to track candidate of join + * relations for each level, then these relations are consolidated + * to one relation. + * (See the comment in standard_join_search) + * + * Even though we construct RelOptInfo of child relations of the + * Append node, these relations should not appear as candidate of + * relations join in the later stage. So, we once save the list + * during make_join_rel() for the child relations. + */ + join_rel_level = root->join_rel_level; + root->join_rel_level = NULL; + + /* + * Create new joinrel (as a sub-path of Append). + */ + alter_outer_rel = + make_join_rel(root, orig_outer_rel, inner_rel); + + /* restore the join_rel_level */ + root->join_rel_level = join_rel_level; + + Assert(alter_outer_rel != NULL); + + if (alter_path) + { + /* + * Remove (temporary added) alter_path from pathlist. + * The alter_path may be inner/outer path of JoinPath made + * by make_join_rel() above, thus we must not free alter_path itself. + */ + inner_rel->pathlist = list_delete_ptr(inner_rel->pathlist, alter_path); + set_cheapest(inner_rel); + } + + if (IS_DUMMY_REL(alter_outer_rel)) + { + pfree(alter_outer_rel); + continue; + } + + /* + * We must check if alter_outer_rel has one or more paths. + * add_path() sometime rejects to add new path to parent RelOptInfo. + */ + if (list_length(alter_outer_rel->pathlist) <= 0) + { + /* + * Sadly, No paths added. This means that pushdown is failed, + * thus clean up here. + */ + list_free_deep(alter_append_subpaths); + pfree(alter_outer_rel); + list_free(joinclauses_parent); + elog(DEBUG1, "Join pushdown failed."); + return; + } + + set_cheapest(alter_outer_rel); + Assert(alter_outer_rel->cheapest_total_path != NULL); + alter_append_subpaths = lappend(alter_append_subpaths, + alter_outer_rel->cheapest_total_path); + } + + /* Join Pushdown is succeeded. Add path to original joinrel. */ + add_path(joinrel, + (Path *) create_append_path(joinrel, alter_append_subpaths, NULL)); + + list_free(joinclauses_parent); + elog(DEBUG1, "Join pushdown succeeded."); +} diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 791b64e..4c18572 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4230,8 +4230,14 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, /* * Ignore child members unless they match the rel being * sorted. + * + * If this is called from make_sort_from_pathkeys(), + * relids may be NULL. In this case, we must not ignore child + * members because inner/outer plan of pushed-down merge join is + * always child table. */ - if (em->em_is_child && + if (relids != NULL && + em->em_is_child && !bms_equal(em->em_relids, relids)) continue; @@ -4344,8 +4350,13 @@ find_ec_member_for_tle(EquivalenceClass *ec, /* * Ignore child members unless they match the rel being sorted. + * + * If this is called from make_sort_from_pathkeys(), relids may be NULL. + * In this case, we must not ignore child members because inner/outer + * plan of pushed-down merge join is always child table. */ - if (em->em_is_child && + if (relids != NULL && + em->em_is_child && !bms_equal(em->em_relids, relids)) continue; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 9442e5f..c137b09 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -54,9 +54,6 @@ get_relation_info_hook_type get_relation_info_hook = NULL; static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, List *idxExprs); static int32 get_rel_data_width(Relation rel, int32 *attr_widths); -static List *get_relation_constraints(PlannerInfo *root, - Oid relationObjectId, RelOptInfo *rel, - bool include_notnull); static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); @@ -1022,7 +1019,7 @@ get_relation_data_width(Oid relid, int32 *attr_widths) * run, and in many cases it won't be invoked at all, so there seems no * point in caching the data in RelOptInfo. */ -static List * +List * get_relation_constraints(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel, bool include_notnull) diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 68a93a1..f60ef98 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -496,19 +496,24 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, { Relids relids = joinrel->relids; ListCell *vars; + int nth = 0; foreach(vars, input_rel->reltargetlist) { Var *var = (Var *) lfirst(vars); RelOptInfo *baserel; int ndx; + bool is_needed = false; /* * Ignore PlaceHolderVars in the input tlists; we'll make our own * decisions about whether to copy them. */ if (IsA(var, PlaceHolderVar)) + { + nth++; continue; + } /* * Otherwise, anything in a baserel or joinrel targetlist ought to be @@ -521,15 +526,84 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, /* Get the Var's original base rel */ baserel = find_base_rel(root, var->varno); + ndx = var->varattno - baserel->min_attr; + + /* + * We must handle case of join pushdown. + */ + if (input_rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + { + /* Get the Var's PARENT base rel */ + Index parent_relid = + find_childrel_appendrelinfo(root, input_rel)->parent_relid; + RelOptInfo *parent_rel = find_base_rel(root, parent_relid); + Var *parent_var = + (Var *) list_nth(parent_rel->reltargetlist, nth); + int parent_ndx = parent_var->varattno - parent_rel->min_attr; + /* Relids have included parent_rel's instead of input_rel's. */ + Relids relids_tmp = + bms_del_members(bms_copy(relids), input_rel->relids); + + relids_tmp = bms_union(relids_tmp, parent_rel->relids); + + Assert(ndx == parent_ndx); + + is_needed = + (bms_nonempty_difference( + parent_rel->attr_needed[parent_ndx], + relids_tmp)); + + bms_free(relids_tmp); + } + else + { + Relids relids_tmp = + bms_del_members(bms_copy(relids), input_rel->relids); + int another_relid = -1; + + /* Try to detect Inner relation of pushed-down join. */ + if (bms_get_singleton_member(relids_tmp, &another_relid)) + { + RelOptInfo *another_rel = + find_base_rel(root, another_relid); + + if (another_rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + { + /* This may be inner relation of pushed-down join. */ + Index parent_relid = + find_childrel_appendrelinfo(root, another_rel)->parent_relid; + RelOptInfo *parent_rel = find_base_rel(root, parent_relid); + + bms_free(relids_tmp); + relids_tmp = + bms_union(input_rel->relids, parent_rel->relids); + } + } + + if (!bms_is_subset(input_rel->relids, relids_tmp)) + { + /* Can't detect inner relation of pushed-down join */ + bms_free(relids_tmp); + relids_tmp = bms_copy(relids); + } + + is_needed = + (bms_nonempty_difference( + baserel->attr_needed[ndx], + relids_tmp)); + + bms_free(relids_tmp); + } /* Is it still needed above this joinrel? */ - ndx = var->varattno - baserel->min_attr; - if (bms_nonempty_difference(baserel->attr_needed[ndx], relids)) + if (is_needed) { /* Yup, add it to the output */ joinrel->reltargetlist = lappend(joinrel->reltargetlist, var); joinrel->width += baserel->attr_widths[ndx]; } + + nth++; } } diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 11e7d4d..f799a5b 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -28,6 +28,10 @@ extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook; extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); +extern List *get_relation_constraints(PlannerInfo *root, + Oid relationObjectId, RelOptInfo *rel, + bool include_notnull); + extern List *infer_arbiter_indexes(PlannerInfo *root); extern void estimate_rel_size(Relation rel, int32 *attr_widths,