diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 9bf0098..88c8d98 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -3887,6 +3887,17 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, return all_fired; } +/* ---------- + * AfterTriggerQueueIsEmpty() + * + * True if there are no pending triggers in the queue. + * ---------- + */ +bool +AfterTriggerQueueIsEmpty(void) +{ + return (afterTriggers->query_depth == -1); +} /* ---------- * AfterTriggerBeginXact() diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index b7aff37..63dbc1b 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -32,6 +32,7 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, Relids nullable_relids, bool is_child, Oid datatype); +static void update_rel_class_joins(PlannerInfo *root); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); static void generate_base_implied_equalities_no_const(PlannerInfo *root, @@ -725,7 +726,6 @@ void generate_base_implied_equalities(PlannerInfo *root) { ListCell *lc; - Index rti; foreach(lc, root->eq_classes) { @@ -752,6 +752,19 @@ generate_base_implied_equalities(PlannerInfo *root) * This is also a handy place to mark base rels (which should all exist by * now) with flags showing whether they have pending eclass joins. */ + update_rel_class_joins(root); +} + +/* + * update_rel_class_joins + * Process each relation in the PlannerInfo to update the + * has_eclass_joins flag + */ +static void +update_rel_class_joins(PlannerInfo *root) +{ + Index rti; + for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *brel = root->simple_rel_array[rti]; @@ -764,6 +777,63 @@ generate_base_implied_equalities(PlannerInfo *root) } /* + * remove_rel_from_eclass + * Remove all eclass members that belong to relid and also any classes + * which have been left empty as a result of removing a member. + */ +void +remove_rel_from_eclass(PlannerInfo *root, int relid) +{ + ListCell *l, + *nextl, + *eqm, + *eqmnext; + + bool removedany = false; + + /* Strip all traces of this relation out of the eclasses */ + for (l = list_head(root->eq_classes); l != NULL; l = nextl) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(l); + + nextl = lnext(l); + + for (eqm = list_head(ec->ec_members); eqm != NULL; eqm = eqmnext) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(eqm); + + eqmnext = lnext(eqm); + + if (IsA(em->em_expr, Var)) + { + Var *var = (Var *) em->em_expr; + + if (var->varno == relid) + { + list_delete_ptr(ec->ec_members, em); + removedany = true; + } + } + } + + /* + * If we've removed the last member from the EquivalenceClass then we'd + * better delete the entire entry. + */ + if (list_length(ec->ec_members) == 0) + list_delete_ptr(root->eq_classes, ec); + } + + /* + * If we removed any eclass members then this may have changed if a + * relation has an eclass join or not, we'd better force an update + * of this + */ + if (removedany) + update_rel_class_joins(root); +} + +/* * generate_base_implied_equalities when EC contains pseudoconstant(s) */ static void diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 773f8a4..4e910a3 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,17 +22,33 @@ */ #include "postgres.h" +#include "commands/trigger.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/relation.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "utils/lsyscache.h" /* local functions */ -static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool semiorantijoin_is_removable(PlannerInfo *root, + SpecialJoinInfo *sjinfo, List **leftrelcolumns, + RelOptInfo **leftrel); +void convert_semijoin_to_isnotnull_quals(PlannerInfo *root, RelOptInfo *rel, + List *columnlist); +void convert_antijoin_to_isnull_quals(PlannerInfo *root, RelOptInfo *rel, + List *columnlist); +static bool relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *referencedrel, List *referencing_vars, + List *index_vars, List *operator_list); +static bool expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); @@ -53,8 +69,8 @@ remove_useless_joins(PlannerInfo *root, List *joinlist) ListCell *lc; /* - * We are only interested in relations that are left-joined to, so we can - * scan the join_info_list to find them easily. + * We are only interested in relations that are left, semi or anti-joined + * to, so we can scan the join_info_list to find them easily. */ restart: foreach(lc, root->join_info_list) @@ -63,14 +79,41 @@ restart: int innerrelid; int nremoved; - /* Skip if not removable */ - if (!join_is_removable(root, sjinfo)) - continue; + if (sjinfo->jointype == JOIN_LEFT) + { + /* Skip if not removable */ + if (!leftjoin_is_removable(root, sjinfo)) + continue; + } + else if (sjinfo->jointype == JOIN_SEMI) + { + List *columnlist; + RelOptInfo *rel; + + if (!semiorantijoin_is_removable(root, sjinfo, &columnlist, &rel)) + continue; + + Assert(columnlist != NIL); + convert_semijoin_to_isnotnull_quals(root, rel, columnlist); + } + else if (sjinfo->jointype == JOIN_ANTI) + { + List *columnlist; + RelOptInfo *rel; + + if (!semiorantijoin_is_removable(root, sjinfo, &columnlist, &rel)) + continue; + + Assert(columnlist != NIL); + convert_antijoin_to_isnull_quals(root, rel, columnlist); + } + else + continue; /* we don't support this join type */ /* - * Currently, join_is_removable can only succeed when the sjinfo's - * righthand is a single baserel. Remove that rel from the query and - * joinlist. + * Currently, all of the functions which test if join removals are + * possible can only succeed when the sjinfo's righthand is a single + * baserel. Remove that rel from the query and joinlist. */ innerrelid = bms_singleton_member(sjinfo->min_righthand); @@ -136,8 +179,8 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, } /* - * join_is_removable - * Check whether we need not perform this special join at all, because + * leftjoin_is_removable + * Check whether we need not perform this left join at all, because * it will just duplicate its left input. * * This is true for a left join for which the join condition cannot match @@ -147,7 +190,7 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, * above the join. */ static bool -join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; @@ -157,12 +200,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) ListCell *l; int attroff; + Assert(sjinfo->jointype == JOIN_LEFT); + /* - * Must be a non-delaying left join to a single baserel, else we aren't + * Must be a non-delaying join to a single baserel, else we aren't * going to be able to do anything with it. */ - if (sjinfo->jointype != JOIN_LEFT || - sjinfo->delay_upper_joins || + if (sjinfo->delay_upper_joins || bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) return false; @@ -367,6 +411,557 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; } +/* + * semiorantijoin_is_removable + * True if we can prove that the semi or anti join is redundant due to the + * existence of a foreign key constraint. + * + * Detecting if a SEMI or ANTI join may be removed is quite different to the + * detection code for left joins. For these we have no need to check if vars + * from the join are used in the query as the EXISTS and IN() syntax disallow + * this. In order to prove that a semi or anti join is redundant we must ensure + * that a foreign key exists on the left side of the join which references the + * table on the right side of the join. This means that we can only support a + * single table on either side of the join. We must also ensure that the join + * condition matches all the foreign key columns to each index column on the + * referenced table. If any columns are missing then we cannot be sure we'll + * get at most 1 record back, and if there are any extra conditions that are + * not in the foreign key then we cannot be sure that the join condition will + * produce at least 1 matching row. + * + * If we manage to find a foreign key which will allow the join to be removed + * then the caller may have to add NULL checking to the query in place of the + * join. For example if we determine that the join to the table b is not needed + * due to the existence of a foreign key on a.b_id referencing b.id in the + * following query: + * + * SELECT * FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = b.id); + * + * Then the only possible records that could be returned from a are the ones + * WHERE b_id IS NULL. + * + * If this function returns True, then leftrelcolumns will be populated with + * the list of columns from the left relation which exist in the join + * condition, leftrel will be set to the RelOptInfo of the left hand relation. + */ +static bool +semiorantijoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo, + List **leftrelcolumns, RelOptInfo **leftrel) +{ + int innerrelid; + int outerrelid; + RelOptInfo *innerrel; + RelOptInfo *outerrel; + ListCell *lc; + List *referencing_vars; + List *index_vars; + List *operator_list; + + Assert(sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI); + + /* + * We mustn't allow semi or anti joins to be removed if there are any + * pending foreign key triggers in the queue. This could happen if we + * are planning a query that has been executed from within a volatile + * function and the query which called this volatile function has made some + * changes to a table referenced by a foreign key. The reason for this is + * that any updates to a table which is referenced by a foreign key + * constraint will only have the referencing tables updated after the + * command is complete, so there is a window of time where records may + * violate the foreign key constraint. + * + * Currently this code is quite naive, as we won't even attempt to remove + * the join if there's any pending foreign key triggers. It may be + * worthwhile to improve this to check if there's any pending triggers for + * the referencing relation in the join, but to keep it simple, this will + * do for now. + */ + if (!AfterTriggerQueueIsEmpty()) + return false; + + /* + * We'll start by checking that the left hand relation is a singleton + * and that it has at least 1 foreign key constraint. A lack of foreign + * key seems like a more likely possibility to allow us to exit early than + * checking the right hand rel has any indexes. + */ + if (sjinfo->delay_upper_joins || + bms_membership(sjinfo->min_lefthand) != BMS_SINGLETON) + return false; + + outerrelid = bms_singleton_member(sjinfo->min_lefthand); + outerrel = find_base_rel(root, outerrelid); + + /* + * There's no possibility to remove the join if the outer rel is not a + * baserel or the baserel has no foreign keys defined. + */ + if (outerrel->reloptkind != RELOPT_BASEREL || + outerrel->rtekind != RTE_RELATION || + outerrel->fklist == NIL) + return false; + + if (bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) + return false; + + innerrelid = bms_singleton_member(sjinfo->min_righthand); + innerrel = find_base_rel(root, innerrelid); + + /* + * If the right hand relation is not a base rel then it can't possibly be + * referenced by a foreign key. The same goes if there's no unique indexes + * on the relation, however, to keep it simple here we'll make do with + * checking if there's any indexes, as if there's no indexes then there's + * certainly no unique indexes. + */ + if (innerrel->reloptkind != RELOPT_BASEREL || + innerrel->rtekind != RTE_RELATION || + innerrel->indexlist == NIL) + return false; + + referencing_vars = NIL; + index_vars = NIL; + operator_list = NIL; + + /* + * We now pre-process the join quals into lists that contain the vars from + * either side of the joins and also a list which contains the operators + * from the join conditions. At this stage we may still discover that the + * join cannot be removed if, for example we find a qual that does not + * reference both sides of the join. + * + * referencing_vars will contain a list of Vars from the left hand + * relation, these are the expressions that we'll check against the + * referencing side of the foreign key. + * + * index_vars will contain a list of Vars from the right hand relation, + * these are the expressions that we'll check on the referenced side of the + * foreign key. + * + * operator_list, this is list of operator Oids that we'll need to ensure + * are compatible with the operator specified in the foreign key. + */ + foreach(lc, sjinfo->join_quals) + { + OpExpr *opexpr = (OpExpr *) lfirst(lc); + Oid opno; + Node *left_expr; + Node *right_expr; + Relids left_varnos; + Relids right_varnos; + Relids all_varnos; + Oid opinputtype; + + /* Is it a binary opclause? */ + if (!IsA(opexpr, OpExpr) || + list_length(opexpr->args) != 2) + { + /* We only accept quals which reference both sides of the join. */ + return false; + } + + left_expr = linitial(opexpr->args); + + /* Punt if the left operand is anything apart from a Var */ + if (!IsA(left_expr, Var)) + return false; + + right_expr = lsecond(opexpr->args); + + /* Punt if the right operand is anything apart from a Var */ + if (!IsA(right_expr, Var)) + return false; + + opinputtype = exprType(left_expr); + opno = opexpr->opno; + + /* + * FIXME: it would be nice to fast path out if the + * operator couldn't possibly be used in a foreign + * key, but what to use to detect this? + */ + if (!op_mergejoinable(opno, opinputtype)) + return false; + + left_varnos = pull_varnos(left_expr); + right_varnos = pull_varnos(right_expr); + all_varnos = bms_union(left_varnos, right_varnos); + + /* + * Check if the clause matches both sides of the join. If only 1 side + * is matched then, since we're dealing with a SEMI or ANTI join then + * it must be from the inner side. So this qual could restrict the + * results, We must disallow this case as any additional quals that + * exist void the proof that the foreign key gives us that we'll match + * exactly 1 record on the referenced relation. + */ + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + return false; + + /* check rel membership of arguments */ + if (!bms_is_empty(right_varnos) && + bms_is_subset(right_varnos, sjinfo->syn_righthand) && + !bms_overlap(left_varnos, sjinfo->syn_righthand)) + { + /* typical case, right_expr is RHS variable */ + } + else if (!bms_is_empty(left_varnos) && + bms_is_subset(left_varnos, sjinfo->syn_righthand) && + !bms_overlap(right_varnos, sjinfo->syn_righthand)) + { + Node *tmp; + /* flipped case, left_expr is RHS variable */ + opno = get_commutator(opno); + if (!OidIsValid(opno)) + return false; + + /* swap the operands */ + tmp = left_expr; + left_expr = right_expr; + right_expr = tmp; + } + else + return false; + + /* so far so good, keep building lists */ + referencing_vars = lappend(referencing_vars, left_expr); + operator_list = lappend_oid(operator_list, opno); + index_vars = lappend(index_vars, right_expr); + } + + /* no suitable join condition items? Then we can't remove the join */ + if (referencing_vars == NIL) + return false; + + /* + * Now that we've built the join Var lists we can now check if there are + * any foreign keys that will support removing the join. + */ + if (relation_has_foreign_key_for(root, outerrel, innerrel, + referencing_vars, index_vars, operator_list)) + { + *leftrel = outerrel; + *leftrelcolumns = referencing_vars; + return true; + } + + return false; +} + +/* + * convert_semijoin_to_isnotnull_quals + * Adds any required "col IS NOT NULL" quals which are required to ensure + * that the query remains equivalent to what it was before the semi join + * was removed. + */ +void +convert_semijoin_to_isnotnull_quals(PlannerInfo *root, RelOptInfo *rel, List *columnlist) +{ + ListCell *l; + Bitmapset *handledcols = NULL; + Oid reloid; + + reloid = root->simple_rte_array[rel->relid]->relid; + + /* + * If a semi join has been successfully removed by the join removal code, + * then a foreign key must exist that proves the join to not be required. + * + * The semi join would have never allowed NULL values for any of the + * columns seen in the join condition, as these would have matched up to a + * record in the joined table. Now that we've proved the join to be + * redundant, we must maintain that behavior of not having NULLs by adding + * IS NOT NULL quals to the WHERE clause, although we may skip this if the + * column in question happens to have a NOT NULL constraint. + */ + foreach(l, columnlist) + { + Var *var = (Var *) lfirst(l); + + /* should be a var if it came from a foreign key */ + Assert(IsA(var, Var)); + Assert(var->varno == rel->relid); + + /* + * Skip this column if it's a duplicate of one we've previously + * handled. + */ + if (bms_is_member(var->varattno, handledcols)) + continue; + + /* mark this column as handled */ + handledcols = bms_add_member(handledcols, var->varattno); + + /* add the IS NOT NULL qual, but only if the column allows NULLs */ + if (!get_attnotnull(reloid, var->varattno)) + { + RestrictInfo *rinfo; + NullTest *ntest = makeNode(NullTest); + + ntest->nulltesttype = IS_NOT_NULL; + ntest->arg = (Expr *) var; + ntest->argisrow = false; + + rinfo = make_restrictinfo((Expr *)ntest, false, false, false, + NULL, NULL, NULL); + rel->baserestrictinfo = lappend(rel->baserestrictinfo, rinfo); + } + } +} + +/* + * convert_antijoin_to_isnull_quals + * Adds any required "col IS NULL" quals which are required to ensure + * that the query remains equivalent to what it was before the anti join + * was removed. + */ +void +convert_antijoin_to_isnull_quals(PlannerInfo *root, RelOptInfo *rel, List *columnlist) +{ + ListCell *l; + RestrictInfo *rinfo; + Expr *expr; + List *isnulltests = NIL; + Bitmapset *handledcols = NULL; + Oid reloid; + + reloid = root->simple_rte_array[rel->relid]->relid; + + /* + * If an anti join has been successfully removed by the join removal code, + * then a foreign key must exist that proves the join to not be required. + * + * The foreign key which proved this join redundant would ensure that each + * record in the referencing rel has a matching record in the referenced + * rel. Though this is not quite true when it comes to NULL valued columns, + * as these won't reference any record. So here, in order to make the query + * produce equivalent results as it would have done with the anti join, + * we'll just ensure that only these NULL valued columns can make their way + * into the final result set. There is also a special case here, if all of + * the columns in the foreign key happen to have a NOT NULL constraint then + * no records can match, so in this case we'll add a "WHERE false" in order + * to save the executer from wasting any time. + */ + foreach(l, columnlist) + { + Var *var = (Var *) lfirst(l); + + /* should be a var if it came from a foreign key */ + Assert(IsA(var, Var)); + + /* + * Skip this column if it's a duplicate of one we've previously + * handled. + */ + if (bms_is_member(var->varattno, handledcols)) + continue; + + /* mark this column as handled */ + handledcols = bms_add_member(handledcols, var->varattno); + + /* + * No point in adding a col IS NULL if the column + * has a NOT NULL constraint defined for it. + */ + if (!get_attnotnull(reloid, var->varattno)) + { + NullTest *ntest = makeNode(NullTest); + ntest->nulltesttype = IS_NULL; + ntest->arg = (Expr *) var; + ntest->argisrow = false; + + isnulltests = lappend(isnulltests, ntest); + } + } + + /* + * If we still have an empty list by the time we get to here then it would + * appear that each column has a NOT NULL constraint. In this case then + * it's not possible for the query to return any records, so we can simply + * add a "WHERE false" constant expression and tell the planner to check + * for gating quals. + */ + if (isnulltests == NIL) + { + expr = (Expr *) makeBoolConst(false, false); + rinfo = make_restrictinfo(expr, false, false, true, NULL, NULL, NULL); + + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; + } + else + { + /* + * Now we can build a RestrictInfo for the newly created IS NULL tests. + * If there's only 1 test expression then we can just make the + * RestrictInfo use that expression, if there's more than 1 we'll need + * to "OR" all of these together. + */ + if (list_length(isnulltests) == 1) + expr = (Expr *) linitial(isnulltests); + else + expr = make_orclause(isnulltests); + + rinfo = make_restrictinfo(expr, false, false, false, NULL, NULL, NULL); + } + + rel->baserestrictinfo = lappend(rel->baserestrictinfo, rinfo); +} + +/* + * relation_has_foreign_key_for + * Checks if rel has a foreign key which references referencedrel with the + * given list of expressions. + * + * For the match to succeed: + * referencing_vars must match the columns defined in the foreign key. + * index_vars must match the columns defined in the index for the foreign key. + */ +static bool +relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *referencedrel, List *referencing_vars, + List *index_vars, List *operator_list) +{ + ListCell *lc; + Oid refreloid; + + /* + * Look up the Oid of the referenced relation. We only want to look at + * foreign keys on the referencing relation which reference this relation. + */ + refreloid = root->simple_rte_array[referencedrel->relid]->relid; + + Assert(list_length(referencing_vars) > 0); + Assert(list_length(referencing_vars) == list_length(index_vars)); + Assert(list_length(referencing_vars) == list_length(operator_list)); + + /* + * Search through each foreign key on the referencing relation and try + * to find one which references the relation in the join condition. If we + * find one then we'll send the join conditions off to + * expressions_match_foreign_key() to see if they match the foreign key. + */ + foreach(lc, rel->fklist) + { + ForeignKeyInfo *fk = (ForeignKeyInfo *) lfirst(lc); + + if (fk->confrelid == refreloid) + { + if (expressions_match_foreign_key(fk, referencing_vars, + index_vars, operator_list)) + return true; + } + } + + return false; +} + +/* + * expressions_match_foreign_key + * True if the given fkvars, indexvars and operators will match + * exactly 1 record in the referenced relation of the foreign key. + * + * Note: This function expects fkvars and indexvars to only contain Var types. + * Expression indexes are not supported by foreign keys. + */ +static bool +expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators) +{ + ListCell *lc; + ListCell *lc2; + ListCell *lc3; + int col; + Bitmapset *allitems; + Bitmapset *matcheditems; + int lstidx; + + Assert(list_length(fkvars) == list_length(indexvars)); + Assert(list_length(fkvars) == list_length(operators)); + + /* + * Fast path out if there's not enough conditions to match each column in + * the foreign key. Note that we cannot check that the number of + * expressions are equal here since it would cause any expressions which + * are duplicated not to match. + */ + if (list_length(fkvars) < fk->conncols) + return false; + + /* + * We need to ensure that each foreign key column can be matched to a list + * item, and we need to ensure that each list item can be matched to a + * foreign key column. We do this by looping over each foreign key column + * and checking that we can find an item in the list which matches the + * current column, however this method does not allow us to ensure that no + * additional items exist in the list. We could solve that by performing + * another loop over each list item and check that it matches an foreign + * key column, but that's a bit wasteful. Instead we'll use 2 bitmapsets, + * one to store the 0 based index of each list item, and with the other + * we'll store each list index that we've managed to match. After we're + * done matching we'll just make sure that both bitmapsets are equal. + */ + allitems = NULL; + matcheditems = NULL; + + /* + * Build a bitmapset which contains each 1 based list index. It seems more + * efficient to do this in reverse so that we allocate enough memory for + * the bitmapset on first loop rather than reallocating each time we find + * we need a bit more space. + */ + for (lstidx = list_length(fkvars) - 1; lstidx >= 0; lstidx--) + allitems = bms_add_member(allitems, lstidx); + + for (col = 0; col < fk->conncols; col++) + { + bool matched = false; + + lstidx = 0; + + forthree(lc, fkvars, lc2, indexvars, lc3, operators) + { + Var *expr = (Var *) lfirst(lc); + Var *idxexpr = (Var *) lfirst(lc2); + Oid opr = lfirst_oid(lc3); + + Assert(IsA(expr, Var)); + Assert(IsA(idxexpr, Var)); + + /* Does this join qual match up to the current fkey column? */ + if (fk->conkey[col] == expr->varattno && + fk->confkey[col] == idxexpr->varattno && + equality_ops_are_compatible(opr, fk->conpfeqop[col])) + { + matched = true; + + /* mark this list item as matched */ + matcheditems = bms_add_member(matcheditems, lstidx); + + /* + * Don't break here as there may be duplicate expressions + * that we also need to match against. + */ + } + lstidx++; + } + + /* punt if there's no match. */ + if (!matched) + return false; + } + + /* + * Ensure that we managed to match every item in the list to a foreign key + * column. + */ + if (!bms_equal(allitems, matcheditems)) + return false; + + return true; /* matched */ +} + /* * Remove the target relid from the planner's data structures, having @@ -393,6 +988,9 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) */ rel->reloptkind = RELOPT_DEADREL; + /* Strip out any eclass members that belong to this rel */ + remove_rel_from_eclass(root, relid); + /* * Remove references to the rel from other baserels' attr_needed arrays. */ diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index b2becfa..0b1c1a6 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -25,7 +25,9 @@ #include "access/transam.h" #include "access/xlog.h" #include "catalog/catalog.h" +#include "catalog/pg_constraint.h" #include "catalog/heap.h" +#include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -38,6 +40,7 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -89,6 +92,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + List *fkinfos = NIL; + Relation fkeyRel; + Relation fkeyRelIdx; + ScanKeyData fkeyScankey; + SysScanDesc fkeyScan; + HeapTuple tuple; + /* * We need not lock the relation since it was already locked, either by @@ -384,6 +394,111 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, heap_close(relation, NoLock); + ScanKeyInit(&fkeyScankey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationObjectId)); + + fkeyRel = heap_open(ConstraintRelationId, AccessShareLock); + fkeyRelIdx = index_open(ConstraintRelidIndexId, AccessShareLock); + fkeyScan = systable_beginscan_ordered(fkeyRel, fkeyRelIdx, NULL, 1, &fkeyScankey); + + while ((tuple = systable_getnext_ordered(fkeyScan, ForwardScanDirection)) != NULL) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + ForeignKeyInfo *fkinfo; + Datum adatum; + bool isNull; + ArrayType *arr; + int numkeys; + + /* Not a foreign key */ + if (con->contype != CONSTRAINT_FOREIGN) + continue; + + /* we're not interested unless the fk has been validated */ + if (!con->convalidated) + continue; + + fkinfo = (ForeignKeyInfo *) palloc(sizeof(ForeignKeyInfo)); + fkinfo->conindid = con->conindid; + fkinfo->confrelid = con->confrelid; + fkinfo->convalidated = con->convalidated; + fkinfo->conrelid = con->conrelid; + fkinfo->confupdtype = con->confupdtype; + fkinfo->confdeltype = con->confdeltype; + fkinfo->confmatchtype = con->confmatchtype; + + adatum = heap_getattr(tuple, Anum_pg_constraint_conkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "conkey is not a 1-D smallint array"); + + fkinfo->conkey = (int16 *) ARR_DATA_PTR(arr); + fkinfo->conncols = numkeys; + + adatum = heap_getattr(tuple, Anum_pg_constraint_confkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null confkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + + /* sanity check */ + if (numkeys != fkinfo->conncols) + elog(ERROR, "number of confkey elements does not equal conkey elements"); + + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "confkey is not a 1-D smallint array"); + + fkinfo->confkey = (int16 *) ARR_DATA_PTR(arr); + adatum = heap_getattr(tuple, Anum_pg_constraint_conpfeqop, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conpfeqop for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + + /* sanity check */ + if (numkeys != fkinfo->conncols) + elog(ERROR, "number of conpfeqop elements does not equal conkey elements"); + + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "conpfeqop is not a 1-D smallint array"); + + fkinfo->conpfeqop = (Oid *) ARR_DATA_PTR(arr); + + fkinfos = lappend(fkinfos, fkinfo); + } + + rel->fklist = fkinfos; + systable_endscan_ordered(fkeyScan); + index_close(fkeyRelIdx, AccessShareLock); + heap_close(fkeyRel, AccessShareLock); + + /* * Allow a plugin to editorialize on the info we obtained from the * catalogs. Actions might include altering the assumed relation size, diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index c938c27..a0fb8eb 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -115,6 +115,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->lateral_relids = NULL; rel->lateral_referencers = NULL; rel->indexlist = NIL; + rel->fklist = NIL; rel->pages = 0; rel->tuples = 0; rel->allvisfrac = 0; @@ -377,6 +378,7 @@ build_join_rel(PlannerInfo *root, joinrel->lateral_relids = NULL; joinrel->lateral_referencers = NULL; joinrel->indexlist = NIL; + joinrel->fklist = NIL; joinrel->pages = 0; joinrel->tuples = 0; joinrel->allvisfrac = 0; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 552e498..aa81c7c 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -916,6 +916,33 @@ get_atttypetypmodcoll(Oid relid, AttrNumber attnum, ReleaseSysCache(tp); } +/* + * get_attnotnull + * + * Given the relation id and the attribute number, + * return the "attnotnull" field from the attribute relation. + */ +bool +get_attnotnull(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + bool result; + + result = att_tup->attnotnull; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + /* ---------- COLLATION CACHE ---------- */ /* diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index d0b0356..34a75e4 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -181,6 +181,7 @@ extern void ExecBSTruncateTriggers(EState *estate, extern void ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo); +extern bool AfterTriggerQueueIsEmpty(void); extern void AfterTriggerBeginXact(void); extern void AfterTriggerBeginQuery(void); extern void AfterTriggerEndQuery(EState *estate); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index dacbe9c..f69df09 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -355,6 +355,8 @@ typedef struct PlannerInfo * lateral_referencers - relids of rels that reference this one laterally * indexlist - list of IndexOptInfo nodes for relation's indexes * (always NIL if it's not a table) + * fklist - list of ForeignKeyInfo's for relation's foreign key + * constraints. (always NIL if it's not a table) * pages - number of disk pages in relation (zero if not a table) * tuples - number of tuples in relation (not considering restrictions) * allvisfrac - fraction of disk pages that are marked all-visible @@ -448,6 +450,7 @@ typedef struct RelOptInfo Relids lateral_relids; /* minimum parameterization of rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *fklist; /* list of ForeignKeyInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -538,6 +541,51 @@ typedef struct IndexOptInfo bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ } IndexOptInfo; +/* + * ForeignKeyInfo + * Used to store pg_constraint records for foreign key constraints for use + * by the planner. + * + * conindid - The index which supports the foreign key + * + * confrelid - The relation that is referenced by this foreign key + * + * convalidated - True if the foreign key has been validated. + * + * conrelid - The Oid of the relation that the foreign key belongs to + * + * confupdtype - ON UPDATE action for when the referenced table is updated + * + * confdeltype - ON DELETE action, controls what to do when a record is + * deleted from the referenced table. + * + * confmatchtype - foreign key match type, e.g MATCH FULL, MATCH PARTIAL + * + * conncols - Number of columns defined in the foreign key + * + * conkey - An array of conncols elements to store the varattno of the + * columns on the referencing side of the foreign key + * + * confkey - An array of conncols elements to store the varattno of the + * columns on the referenced side of the foreign key + * + * conpfeqop - An array of conncols elements to store the operators for + * PK = FK comparisons + */ +typedef struct ForeignKeyInfo +{ + Oid conindid; /* index supporting this constraint */ + Oid confrelid; /* relation referenced by foreign key */ + bool convalidated; /* constraint has been validated? */ + Oid conrelid; /* relation this constraint constrains */ + char confupdtype; /* foreign key's ON UPDATE action */ + char confdeltype; /* foreign key's ON DELETE action */ + char confmatchtype; /* foreign key's match type */ + int conncols; /* number of columns references */ + int16 *conkey; /* Columns of conrelid that the constraint applies to */ + int16 *confkey; /* columns of confrelid that foreign key references */ + Oid *conpfeqop; /* Operator list for comparing PK to FK */ +} ForeignKeyInfo; /* * EquivalenceClasses diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9b22fda..00716c9 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -108,6 +108,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it); extern void generate_base_implied_equalities(PlannerInfo *root); +extern void remove_rel_from_eclass(PlannerInfo *root, int relid); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 07d24d4..910190d 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -68,6 +68,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern int32 get_atttypmod(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); +extern bool get_attnotnull(Oid relid, AttrNumber attnum); extern char *get_collation_name(Oid colloid); extern char *get_constraint_name(Oid conoid); extern Oid get_opclass_family(Oid opclass); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 1cb1c51..d9252c1 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3249,6 +3249,330 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 (1 row) rollback; +BEGIN; +-- Test join removals for semi and anti joins +CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY, val INT); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT REFERENCES b(id)); +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id FROM b); + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + QUERY PLAN +-------------------------- + Seq Scan on a + Filter: (b_id IS NULL) +(2 rows) + +-- should not remove anti join as id > 100 will void +-- the foreign key's guarantee that 1 will exist. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id AND id > 100); + QUERY PLAN +----------------------------------------------- + Hash Anti Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Bitmap Heap Scan on b + Recheck Cond: (id > 100) + -> Bitmap Index Scan on b_pkey + Index Cond: (id > 100) +(8 rows) + +-- should not remove anti join as val is not part of the foreign key. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id AND val = id); + QUERY PLAN +---------------------------------- + Hash Anti Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (id = val) +(6 rows) + +-- should remove semi join to b (swapped condition order) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id = a.b_id); + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- should not remove semi join (since not using equals) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id >= a.b_id); + QUERY PLAN +----------------------------------------- + Nested Loop Semi Join + -> Seq Scan on a + -> Index Only Scan using b_pkey on b + Index Cond: (id >= a.b_id) +(4 rows) + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id+0 IN(SELECT id FROM b); + QUERY PLAN +------------------------------------ + Hash Semi Join + Hash Cond: ((a.b_id + 0) = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id+0 FROM b); + QUERY PLAN +------------------------------------ + Hash Semi Join + Hash Cond: (a.b_id = (b.id + 0)) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- should not remove semi join (wrong column) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE id IN(SELECT id FROM b); + QUERY PLAN +---------------------------- + Hash Semi Join + Hash Cond: (a.id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +ROLLBACK; +BEGIN; +-- Semi join removal code with 2 column foreign keys +CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2)); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT, b_id2 INT); +ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH SIMPLE; +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +--------------------------------------------------------- + Seq Scan on a + Filter: ((b_id1 IS NOT NULL) AND (b_id2 IS NOT NULL)) +(2 rows) + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +------------------------------------------------ + Seq Scan on a + Filter: ((b_id1 IS NULL) OR (b_id2 IS NULL)) +(2 rows) + +-- should not remove semi join to b (extra condition) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2 AND a.b_id2 >= id2); + QUERY PLAN +-------------------------------------------------------- + Hash Semi Join + Hash Cond: ((a.b_id1 = b.id1) AND (a.b_id2 = b.id2)) + Join Filter: (a.b_id2 >= b.id2) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(6 rows) + +-- should not remove semi join to b (wrong operator) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 > id1 AND a.b_id2 < id2); + QUERY PLAN +----------------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on a + -> Index Only Scan using b_pkey on b + Index Cond: ((id1 < a.b_id1) AND (id2 > a.b_id2)) +(4 rows) + +-- should not remove semi join (only checking id1) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1); + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (a.b_id1 = b.id1) + -> Seq Scan on a + -> Hash + -> HashAggregate + Group Key: b.id1 + -> Seq Scan on b +(7 rows) + +-- should not remove semi join (only checking id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id2); + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (a.b_id2 = b.id2) + -> Seq Scan on a + -> Hash + -> HashAggregate + Group Key: b.id2 + -> Seq Scan on b +(7 rows) + +-- should not remove semi join (checking wrong columns) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id2); + QUERY PLAN +-------------------------------------------------------- + Hash Join + Hash Cond: ((a.b_id2 = b.id1) AND (a.b_id1 = b.id2)) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- should not remove semi join (no check for id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id1); + QUERY PLAN +----------------------------------------- + Nested Loop Semi Join + -> Seq Scan on a + Filter: (b_id2 = b_id1) + -> Index Only Scan using b_pkey on b + Index Cond: (id1 = a.b_id2) +(5 rows) + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id2); + QUERY PLAN +----------------------------------- + Hash Join + Hash Cond: (a.b_id1 = b.id1) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (id1 = id2) +(6 rows) + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id1); + QUERY PLAN +--------------------------------------- + Hash Join + Hash Cond: (a.b_id1 = b.id1) + -> Seq Scan on a + -> Hash + -> HashAggregate + Group Key: b.id1, b.id1 + -> Seq Scan on b +(7 rows) + +-- Check that the IS NULL and IS NOT NULL filters are not added +-- for columns which have a NOT NULL constraint. +ALTER TABLE a ALTER COLUMN b_id1 SET NOT NULL; +-- Should only filter on b_id2 IS NOT NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +------------------------------- + Seq Scan on a + Filter: (b_id2 IS NOT NULL) +(2 rows) + +-- Should only filter on b_id2 IS NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +--------------------------- + Seq Scan on a + Filter: (b_id2 IS NULL) +(2 rows) + +ALTER TABLE a ALTER COLUMN b_id2 SET NOT NULL; +-- No IS NOT NULL filters should be added. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +-- Since now neither b_id1 or b_id2 can be NULL this query can't +-- produce any records. Check that we get a One-Time Filter: false +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +-------------------------- + Result + One-Time Filter: false + -> Seq Scan on a +(3 rows) + +ROLLBACK; +BEGIN WORK; +-- In this test we want to ensure that ANTI JOIN removal does not +-- occur when there are pending foreign key triggers. +-- We test this by updating a relation which is referenced by a foreign key +-- and then executing another query which would normally allow the anti +-- join to be removed. If the anti join was removed then the table +-- records_violating_fkey would be empty, but here we'll ensure that +-- the record that we update ends up violating the foreign key. +CREATE TABLE j2 (id INT NOT NULL PRIMARY KEY); +CREATE TABLE j1 ( + id INT PRIMARY KEY, + j2_id INT NOT NULL REFERENCES j2 (id) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE +); +INSERT INTO j2 VALUES(10),(20); +INSERT INTO j1 VALUES(1,10),(2,20); +-- create a table to store records that 'violate' the fkey. +CREATE TABLE records_violating_fkey (j2_id INT NOT NULL); +CREATE OR REPLACE FUNCTION j1_update() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO records_violating_fkey SELECT j2_id FROM j1 WHERE NOT EXISTS(SELECT 1 FROM j2 WHERE j2_id = j2.id); + RETURN NEW; + END; +$$ LANGUAGE plpgsql; +CREATE TRIGGER j1_update_trigger BEFORE UPDATE ON j2 FOR EACH ROW EXECUTE PROCEDURE j1_update(); +-- This update statement will cause some foreign key triggers to be queued. +-- The trigger defined above will fire which will cause all records which +-- currently violate the foreign key to be inserted into the records_violating_fkey +-- table. The intended behaviour of this is that we'll see records violating the +-- foreign key, however if we incorrectly performed an ANTI JOIN removal, then +-- we wouldn't see this violation record, as we'd wrongly assume that the query +-- could not produce any records. +UPDATE j2 SET id = id + 1; +SELECT * FROM records_violating_fkey; + j2_id +------- + 10 +(1 row) + +ROLLBACK; create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30); diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index fa3e068..5ec5016 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -973,6 +973,175 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 rollback; +BEGIN; + +-- Test join removals for semi and anti joins +CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY, val INT); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT REFERENCES b(id)); + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id FROM b); + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + +-- should not remove anti join as id > 100 will void +-- the foreign key's guarantee that 1 will exist. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id AND id > 100); + +-- should not remove anti join as val is not part of the foreign key. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id AND val = id); + +-- should remove semi join to b (swapped condition order) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id = a.b_id); + +-- should not remove semi join (since not using equals) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id >= a.b_id); + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id+0 IN(SELECT id FROM b); + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id+0 FROM b); + +-- should not remove semi join (wrong column) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE id IN(SELECT id FROM b); + +ROLLBACK; + +BEGIN; + +-- Semi join removal code with 2 column foreign keys + +CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2)); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT, b_id2 INT); + +ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH SIMPLE; + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- should not remove semi join to b (extra condition) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2 AND a.b_id2 >= id2); + +-- should not remove semi join to b (wrong operator) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 > id1 AND a.b_id2 < id2); + +-- should not remove semi join (only checking id1) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1); + +-- should not remove semi join (only checking id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id2); + +-- should not remove semi join (checking wrong columns) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id2); + +-- should not remove semi join (no check for id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id1); + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id2); + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id1); + + +-- Check that the IS NULL and IS NOT NULL filters are not added +-- for columns which have a NOT NULL constraint. +ALTER TABLE a ALTER COLUMN b_id1 SET NOT NULL; + +-- Should only filter on b_id2 IS NOT NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- Should only filter on b_id2 IS NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +ALTER TABLE a ALTER COLUMN b_id2 SET NOT NULL; + +-- No IS NOT NULL filters should be added. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- Since now neither b_id1 or b_id2 can be NULL this query can't +-- produce any records. Check that we get a One-Time Filter: false +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +ROLLBACK; + +BEGIN WORK; + +-- In this test we want to ensure that ANTI JOIN removal does not +-- occur when there are pending foreign key triggers. +-- We test this by updating a relation which is referenced by a foreign key +-- and then executing another query which would normally allow the anti +-- join to be removed. If the anti join was removed then the table +-- records_violating_fkey would be empty, but here we'll ensure that +-- the record that we update ends up violating the foreign key. + +CREATE TABLE j2 (id INT NOT NULL PRIMARY KEY); +CREATE TABLE j1 ( + id INT PRIMARY KEY, + j2_id INT NOT NULL REFERENCES j2 (id) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE +); + +INSERT INTO j2 VALUES(10),(20); +INSERT INTO j1 VALUES(1,10),(2,20); + +-- create a table to store records that 'violate' the fkey. +CREATE TABLE records_violating_fkey (j2_id INT NOT NULL); + +CREATE OR REPLACE FUNCTION j1_update() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO records_violating_fkey SELECT j2_id FROM j1 WHERE NOT EXISTS(SELECT 1 FROM j2 WHERE j2_id = j2.id); + RETURN NEW; + END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER j1_update_trigger BEFORE UPDATE ON j2 FOR EACH ROW EXECUTE PROCEDURE j1_update(); + +-- This update statement will cause some foreign key triggers to be queued. +-- The trigger defined above will fire which will cause all records which +-- currently violate the foreign key to be inserted into the records_violating_fkey +-- table. The intended behaviour of this is that we'll see records violating the +-- foreign key, however if we incorrectly performed an ANTI JOIN removal, then +-- we wouldn't see this violation record, as we'd wrongly assume that the query +-- could not produce any records. + +UPDATE j2 SET id = id + 1; + +SELECT * FROM records_violating_fkey; + +ROLLBACK; + create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30);