diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 9bf0098..88c8d98 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -3887,6 +3887,17 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, return all_fired; } +/* ---------- + * AfterTriggerQueueIsEmpty() + * + * True if there are no pending triggers in the queue. + * ---------- + */ +bool +AfterTriggerQueueIsEmpty(void) +{ + return (afterTriggers->query_depth == -1); +} /* ---------- * AfterTriggerBeginXact() diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index b7aff37..63dbc1b 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -32,6 +32,7 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, Relids nullable_relids, bool is_child, Oid datatype); +static void update_rel_class_joins(PlannerInfo *root); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); static void generate_base_implied_equalities_no_const(PlannerInfo *root, @@ -725,7 +726,6 @@ void generate_base_implied_equalities(PlannerInfo *root) { ListCell *lc; - Index rti; foreach(lc, root->eq_classes) { @@ -752,6 +752,19 @@ generate_base_implied_equalities(PlannerInfo *root) * This is also a handy place to mark base rels (which should all exist by * now) with flags showing whether they have pending eclass joins. */ + update_rel_class_joins(root); +} + +/* + * update_rel_class_joins + * Process each relation in the PlannerInfo to update the + * has_eclass_joins flag + */ +static void +update_rel_class_joins(PlannerInfo *root) +{ + Index rti; + for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *brel = root->simple_rel_array[rti]; @@ -764,6 +777,63 @@ generate_base_implied_equalities(PlannerInfo *root) } /* + * remove_rel_from_eclass + * Remove all eclass members that belong to relid and also any classes + * which have been left empty as a result of removing a member. + */ +void +remove_rel_from_eclass(PlannerInfo *root, int relid) +{ + ListCell *l, + *nextl, + *eqm, + *eqmnext; + + bool removedany = false; + + /* Strip all traces of this relation out of the eclasses */ + for (l = list_head(root->eq_classes); l != NULL; l = nextl) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(l); + + nextl = lnext(l); + + for (eqm = list_head(ec->ec_members); eqm != NULL; eqm = eqmnext) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(eqm); + + eqmnext = lnext(eqm); + + if (IsA(em->em_expr, Var)) + { + Var *var = (Var *) em->em_expr; + + if (var->varno == relid) + { + list_delete_ptr(ec->ec_members, em); + removedany = true; + } + } + } + + /* + * If we've removed the last member from the EquivalenceClass then we'd + * better delete the entire entry. + */ + if (list_length(ec->ec_members) == 0) + list_delete_ptr(root->eq_classes, ec); + } + + /* + * If we removed any eclass members then this may have changed if a + * relation has an eclass join or not, we'd better force an update + * of this + */ + if (removedany) + update_rel_class_joins(root); +} + +/* * generate_base_implied_equalities when EC contains pseudoconstant(s) */ static void diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 773f8a4..569580f 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,17 +22,34 @@ */ #include "postgres.h" +#include "commands/trigger.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/relation.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "utils/lsyscache.h" /* local functions */ -static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool semiorantijoin_is_removable(PlannerInfo *root, + SpecialJoinInfo *sjinfo, List **leftrelcolumns, + RelOptInfo **leftrel); +void convert_semijoin_to_isnotnull_quals(PlannerInfo *root, RelOptInfo *rel, + List *columnlist); +void convert_antijoin_to_isnull_quals(PlannerInfo *root, RelOptInfo *rel, + List *columnlist); +static bool relation_has_foreign_key_for(PlannerInfo *root, JoinType jointype, + RelOptInfo *rel, RelOptInfo *referencedrel, + List *referencing_vars, List *index_vars, + List *operator_list); +static bool expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); @@ -53,8 +70,8 @@ remove_useless_joins(PlannerInfo *root, List *joinlist) ListCell *lc; /* - * We are only interested in relations that are left-joined to, so we can - * scan the join_info_list to find them easily. + * We are only interested in relations that are left, semi or anti-joined + * to, so we can scan the join_info_list to find them easily. */ restart: foreach(lc, root->join_info_list) @@ -63,9 +80,37 @@ restart: int innerrelid; int nremoved; - /* Skip if not removable */ - if (!join_is_removable(root, sjinfo)) - continue; + if (sjinfo->jointype == JOIN_LEFT) + { + /* Skip if not removable */ + if (!leftjoin_is_removable(root, sjinfo)) + continue; + } + else if (sjinfo->jointype == JOIN_SEMI) + { + List *columnlist; + RelOptInfo *rel; + + /* Skip if not removable */ + if (!semiorantijoin_is_removable(root, sjinfo, &columnlist, &rel)) + continue; + + Assert(columnlist != NIL); + convert_semijoin_to_isnotnull_quals(root, rel, columnlist); + } + else if (sjinfo->jointype == JOIN_ANTI) + { + List *columnlist; + RelOptInfo *rel; + + if (!semiorantijoin_is_removable(root, sjinfo, &columnlist, &rel)) + continue; + + Assert(columnlist != NIL); + convert_antijoin_to_isnull_quals(root, rel, columnlist); + } + else + continue; /* we don't support this join type */ /* * Currently, join_is_removable can only succeed when the sjinfo's @@ -136,7 +181,7 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, } /* - * join_is_removable + * leftjoin_is_removable * Check whether we need not perform this special join at all, because * it will just duplicate its left input. * @@ -147,7 +192,7 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, * above the join. */ static bool -join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; @@ -157,12 +202,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) ListCell *l; int attroff; + Assert(sjinfo->jointype == JOIN_LEFT); + /* - * Must be a non-delaying left join to a single baserel, else we aren't + * Must be a non-delaying join to a single baserel, else we aren't * going to be able to do anything with it. */ - if (sjinfo->jointype != JOIN_LEFT || - sjinfo->delay_upper_joins || + if (sjinfo->delay_upper_joins || bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) return false; @@ -367,6 +413,503 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; } +/* + * semiorantijoin_is_removable + * True if we can remove this semi or anti join. + * + * Detecting if a SEMI or ANTI join may be removed is quite different to the + * detection code for left joins. For these we have no need to check if vars + * from the join are used in the query as the EXISTS and IN() syntax disallow + * this. In order to prove that a semi or anti join is redundant we must ensure + * that a foreign key exists on the left side of the join which references the + * table on the right side of the join. This means that we can only support a + * single table on either side of the join. We must also ensure that the join + * condition matches all the foreign key columns to each index column on the + * referenced table. If any columns are missing then we cannot be sure we'll + * get at most 1 record back, and if there are any extra conditions that are + * not in the foreign key then we cannot be sure that the join condition will + * produce at least 1 matching row. + * + * If we manage to find a foreign key which will allow the join to be removed + * then the calling may have to add NULL checking to the query in place of the + * join. For example if we determine that the join to the table b is not needed + * due to the existence of a foreign key on a.b_id referencing b.id in the + * following query: + * + * SELECT * FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = b.id); + * + * Then the only possible records that could be returned from a are the ones + * WHERE b_id IS NULL. + * + * If this function returns True, then leftrelcolumns will be populated with + * the list of columns from the left relation which exist in the join + * condition, leftrel will be set to the RelOptInfo of the left hand relation. + */ +static bool +semiorantijoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo, + List **leftrelcolumns, RelOptInfo **leftrel) +{ + int innerrelid; + int outerrelid; + RelOptInfo *innerrel; + RelOptInfo *outerrel; + ListCell *lc; + List *referencing_vars; + List *index_vars; + List *operator_list; + + Assert(sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI); + + /* + * We mustn't allow semi or anti joins to be removed if there are any + * pending foreign key triggers in the queue. This could happen if we + * are planning a query that has been executed from within a volatile + * function and the query which called this volatile function has made some + * changes to a table referenced by a foreign key. The reason for this is + * that any updates to a table which is referenced by a foreign key + * constraint will only have the referencing tables updated after the + * command is complete, so there is a window of time where records may + * violate the foreign key constraint. The following code intends to + * maintain correct results by disabling the join removal if there's a + * possibility that there are records which violate the foreign key, though + * this code is quite naive and we will simply just disallow removal of + * semi and anti joins if there's anything in the foreign key trigger + * queue. A more complete solution would be able to check if the relation + * in question has pending triggers, but this will do for now. + */ + if (!AfterTriggerQueueIsEmpty()) + return false; + + /* + * We'll start by checking that the left hand relation is a singleton + * and that it has at least 1 foreign key. A lack of foreign key seems + * like a more likely possibility to allow us to exit early than checking + * the right hand rel has any indexes. + */ + if (sjinfo->delay_upper_joins || + bms_membership(sjinfo->min_lefthand) != BMS_SINGLETON) + return false; + + outerrelid = bms_singleton_member(sjinfo->min_lefthand); + outerrel = find_base_rel(root, outerrelid); + + /* + * There's no possibility to remove the join if the outer rel is not a + * baserel or the baserel has no foreign keys defined. + */ + if (outerrel->reloptkind != RELOPT_BASEREL || + outerrel->rtekind != RTE_RELATION || + outerrel->fklist == NIL) + return false; + + if (bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) + return false; + + innerrelid = bms_singleton_member(sjinfo->min_righthand); + innerrel = find_base_rel(root, innerrelid); + + /* + * If the right hand relation is not a base rel then it can't possibly be + * referenced by a foreign key. The same goes if there's no unique indexes + * on the relation, however, to keep it simple here we'll make do with + * checking if there's any indexes, as if there's no indexes then there's + * certainly no unique indexes. + */ + if (innerrel->reloptkind != RELOPT_BASEREL || + innerrel->rtekind != RTE_RELATION || + innerrel->indexlist == NIL) + return false; + + referencing_vars = NIL; + index_vars = NIL; + operator_list = NIL; + + /* + * Pre-process the join quals into lists that contain the vars from either + * side of the joins and also a list which contains the operators from the + * join conditions. At this stage we may still discover that the join + * cannot be removed if, for example we find a qual that does not reference + * both sides of the join. + * + * referencing_vars will contain a list of Vars from the left hand + * relation, these are the expressions that we'll check against the + * referencing side of the foreign key. + * + * index_vars will contain a list of Vars from the right hand relation, + * these are the expressions that we'll check on the referenced side of the + * foreign key. + * + * operator_list, this is list of operator Oids that we'll need to ensure + * are compatible with the operator specified in the foreign key. + */ + foreach(lc, sjinfo->join_quals) + { + OpExpr *opexpr = (OpExpr *) lfirst(lc); + Oid opno; + Node *left_expr; + Node *right_expr; + Relids left_varnos; + Relids right_varnos; + Relids all_varnos; + Oid opinputtype; + + /* Is it a binary opclause? */ + if (!IsA(opexpr, OpExpr) || + list_length(opexpr->args) != 2) + { + /* We only accept quals which reference both sides of the join. */ + return false; + } + + left_expr = linitial(opexpr->args); + + /* Punt if it's anything apart from a Var */ + if (!IsA(left_expr, Var)) + return false; + + right_expr = lsecond(opexpr->args); + + /* Punt if it's anything apart from a Var */ + if (!IsA(right_expr, Var)) + return false; + + opinputtype = exprType(left_expr); + opno = opexpr->opno; + + /* + * FIXME: it would be nice to fast path out if the + * operator couldn't possibly be used in a foreign + * key, but what to use to detect this? + */ + if (!op_mergejoinable(opno, opinputtype)) + return false; + + left_varnos = pull_varnos(left_expr); + right_varnos = pull_varnos(right_expr); + all_varnos = bms_union(left_varnos, right_varnos); + + /* + * Check if the clause matches both sides of the join. If only 1 side + * is matched then, since we're dealing with a SEMI or ANTI join then + * it must be from the inner side. So this qual could restrict the + * results so that we can't be sure the foreign key will cause us to + * match at least 1 record in the relation. In this case we must punt. + */ + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + return false; + + /* check rel membership of arguments */ + if (!bms_is_empty(right_varnos) && + bms_is_subset(right_varnos, sjinfo->syn_righthand) && + !bms_overlap(left_varnos, sjinfo->syn_righthand)) + { + /* typical case, right_expr is RHS variable */ + } + else if (!bms_is_empty(left_varnos) && + bms_is_subset(left_varnos, sjinfo->syn_righthand) && + !bms_overlap(right_varnos, sjinfo->syn_righthand)) + { + Node *tmp; + /* flipped case, left_expr is RHS variable */ + opno = get_commutator(opno); + if (!OidIsValid(opno)) + return false; + + /* swap the operands */ + tmp = left_expr; + left_expr = right_expr; + right_expr = tmp; + } + else + return false; + + /* so far so good, keep building lists */ + referencing_vars = lappend(referencing_vars, left_expr); + operator_list = lappend_oid(operator_list, opno); + index_vars = lappend(index_vars, right_expr); + } + + /* no suitable join condition items? Then we can't remove the join */ + if (referencing_vars == NIL) + return false; + + /* + * Now that we've built the join Var lists we can now check if there are + * any foreign keys that will support us removing this join. + */ + if (relation_has_foreign_key_for(root, sjinfo->jointype, outerrel, + innerrel, referencing_vars, index_vars, operator_list)) + { + *leftrel = outerrel; + *leftrelcolumns = referencing_vars; + return true; + } + + return false; +} + +/* + * convert_semijoin_to_isnotnull_quals + * Adds any required "col IS NOT NULL" quals which are required to ensure + * that the query remains equivalent to what it was before the semi join + * was removed. + */ +void +convert_semijoin_to_isnotnull_quals(PlannerInfo *root, RelOptInfo *rel, List *columnlist) +{ + ListCell *l; + + /* + * If a semi join has been successfully removed by the join removal code, + * then a foreign key must exist that proves the join to not be required. + * For us this means that we can be sure each column that is part of that + * foreign key, which has a non-null value mustn't be filtered out as there + * must be a record in the foreign relation which these records reference. + * This is not the case for columns which are part of the foreign key but + * have a NULL value. These records obviously aren't referencing any + * foreign tuple, so in order to get the query to produce the same the same + * result, we'll just filter these NULLs out. We do this by adding items to + * the WHERE clause, such as: + * "WHERE fkeycol1 IS NOT NULL AND fkeycol2 IS NOT NULL", though we needn't + * bother doing this if the column has a NOT NULL constraint. + */ + + foreach(l, columnlist) + { + Var *var = (Var *) lfirst(l); + RangeTblEntry *rte; + + /* should be a var if it came from a foreign key */ + Assert(IsA(var, Var)); + + rte = root->simple_rte_array[var->varno]; + + /* + * No point in adding a col IS NOT NULL if the column + * has a NOT NULL constraint defined for it. + */ + if (!get_attnotnull(rte->relid, var->varattno)) + { + RestrictInfo *rinfo; + NullTest *ntest = makeNode(NullTest); + + ntest->nulltesttype = IS_NOT_NULL; + ntest->arg = (Expr *) var; + ntest->argisrow = false; + + rinfo = make_restrictinfo((Expr *)ntest, false, false, false, + NULL, NULL, NULL); + rel->baserestrictinfo = lappend(rel->baserestrictinfo, rinfo); + } + } +} + +/* + * convert_antijoin_to_isnull_quals + * Adds any required "col IS NULL" quals which are required to ensure + * that the query remains equivalent to what it was before the anti join + * was removed. + */ +void +convert_antijoin_to_isnull_quals(PlannerInfo *root, RelOptInfo *rel, List *columnlist) +{ + ListCell *l; + RestrictInfo *rinfo; + Expr *expr; + List *isnulltests = NIL; + + /* + * If an anti join has been successfully removed by the join removal code, + * then a foreign key must exist that proves the join to not be required. + * For us this means that in order to make the query produce the same + * result as if the anti join had not been removed then we should only be + * allowing tuples where any of the foreign key columns has a NULL value to + * make it through. We do this simply by adding items to the WHERE clause + * of the query, such as "WHERE fkeycol1 IS NULL OR fkeycol2 IS NULL", + * though we can skip any columns that have a NOT NULL constraint. If all + * of the columns happen to have NOT NULL constraints defined, then it's + * not possible for the query to produce any records at all. In that case + * we add "WHERE false" to the WHERE clause. + */ + + foreach(l, columnlist) + { + Var *var = (Var *) lfirst(l); + RangeTblEntry *rte; + + /* should be a var if it came from a foreign key */ + Assert(IsA(var, Var)); + + rte = root->simple_rte_array[var->varno]; + + /* + * No point in adding a col IS NULL if the column + * has a NOT NULL constraint defined for it. + */ + if (!get_attnotnull(rte->relid, var->varattno)) + { + NullTest *ntest = makeNode(NullTest); + ntest->nulltesttype = IS_NULL; + ntest->arg = (Expr *) var; + ntest->argisrow = false; + + isnulltests = lappend(isnulltests, ntest); + } + } + + /* + * If we still have an empty list by the time we get to here then it would + * appear that each column has a NOT NULL constraint. In this case then + * it's not possible for the query to return any records, so we can simply + * add a "WHERE false" constant expression and tell the planner to check + * for gating quals. + */ + if (isnulltests == NIL) + { + expr = (Expr *) makeBoolConst(false, false); + rinfo = make_restrictinfo(expr, false, false, true, NULL, NULL, NULL); + + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; + } + else + { + /* + * Now we can build a RestrictInfo for the newly created IS NULL tests. + * If there's only 1 test expression then we can just make the + * RestrictInfo use that expression, if there's more than 1 we'll need + * to "OR" all of these together. + */ + if (list_length(isnulltests) == 1) + expr = (Expr *) linitial(isnulltests); + else + expr = make_orclause(isnulltests); + + rinfo = make_restrictinfo(expr, false, false, false, NULL, NULL, NULL); + } + + rel->baserestrictinfo = lappend(rel->baserestrictinfo, rinfo); +} + +/* + * relation_has_foreign_key_for + * Checks if rel has a foreign key which references referencedrel with the + * given list of expressions. + * + * For the match to succeed: + * referencing_vars must match the columns defined in the foreign key. + * index_vars must match the columns defined in the index for the foreign key. + */ +static bool +relation_has_foreign_key_for(PlannerInfo *root, JoinType jointype, + RelOptInfo *rel, RelOptInfo *referencedrel, + List *referencing_vars, List *index_vars, List *operator_list) +{ + ListCell *lc; + Oid refreloid; + + /* + * Look up the Oid of the referenced relation. We'll need to see if + * the referencing relation has a foreign key which references this + */ + refreloid = root->simple_rte_array[referencedrel->relid]->relid; + + Assert(list_length(referencing_vars) > 0); + Assert(list_length(referencing_vars) == list_length(index_vars)); + Assert(list_length(referencing_vars) == list_length(operator_list)); + + /* + * Search through each foreign key on the referencing relation and try + * to find one which references the referenced relation. If we find one + * then we'll see if the join condition matches the foreign key definition. + */ + foreach(lc, rel->fklist) + { + ForeignKeyInfo *fk = (ForeignKeyInfo *) lfirst(lc); + + if (fk->confrelid == refreloid) + { + if (expressions_match_foreign_key(fk, referencing_vars, + index_vars, operator_list)) + return true; + } + } + + return false; +} + +/* + * expressions_match_foreign_key + * True if the given fkvars, indexvars and operators will match + * exactly 1 record in the referenced relation of the foreign key. + * + * Note: This function expects fkvars and indexvars to only contain Var types. + * Expression indexes are not supported by foreign keys. + */ +static bool +expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators) +{ + ListCell *lc; + ListCell *lc2; + ListCell *lc3; + int col; + + Assert(list_length(fkvars) == list_length(indexvars)); + Assert(list_length(fkvars) == list_length(operators)); + + /* + * Fast path out if there's not enough conditions to match each column in + * the foreign key. Note that we cannot check that the number of + * expressions are equal here since it would cause any expressions which + * are duplicated not to match. + */ + if (list_length(fkvars) < fk->conncols) + return false; + + /* + * For each column defined in the foreign key we must ensure that we find + * a matching var in fkvars for the referencing side of the foreign key and + * also a matching indexvar on the referenced side of the foreign key. + */ + for (col = 0; col < fk->conncols; col++) + { + bool matched = false; + + forthree(lc, fkvars, lc2, indexvars, lc3, operators) + { + Var *expr = (Var *) lfirst(lc); + Var *idxexpr = (Var *) lfirst(lc2); + Oid opr = lfirst_oid(lc3); + + Assert(IsA(expr, Var)); + Assert(IsA(idxexpr, Var)); + + /* + * Search for a column in the foreign key which matches the current + * join condition expression. + */ + if (fk->conkey[col] == expr->varattno && + fk->confkey[col] == idxexpr->varattno && + equality_ops_are_compatible(opr, fk->conpfeqop[col])) + { + matched = true; + break; + } + } + + /* + * Did we find anything matching the fk col? If not then we'll + * return no match. + */ + if (!matched) + return false; + } + + return true; /* matched */ +} + /* * Remove the target relid from the planner's data structures, having @@ -393,6 +936,9 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) */ rel->reloptkind = RELOPT_DEADREL; + /* Strip out any eclass members that belong to this rel */ + remove_rel_from_eclass(root, relid); + /* * Remove references to the rel from other baserels' attr_needed arrays. */ diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index b2becfa..0b1c1a6 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -25,7 +25,9 @@ #include "access/transam.h" #include "access/xlog.h" #include "catalog/catalog.h" +#include "catalog/pg_constraint.h" #include "catalog/heap.h" +#include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -38,6 +40,7 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -89,6 +92,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + List *fkinfos = NIL; + Relation fkeyRel; + Relation fkeyRelIdx; + ScanKeyData fkeyScankey; + SysScanDesc fkeyScan; + HeapTuple tuple; + /* * We need not lock the relation since it was already locked, either by @@ -384,6 +394,111 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, heap_close(relation, NoLock); + ScanKeyInit(&fkeyScankey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationObjectId)); + + fkeyRel = heap_open(ConstraintRelationId, AccessShareLock); + fkeyRelIdx = index_open(ConstraintRelidIndexId, AccessShareLock); + fkeyScan = systable_beginscan_ordered(fkeyRel, fkeyRelIdx, NULL, 1, &fkeyScankey); + + while ((tuple = systable_getnext_ordered(fkeyScan, ForwardScanDirection)) != NULL) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + ForeignKeyInfo *fkinfo; + Datum adatum; + bool isNull; + ArrayType *arr; + int numkeys; + + /* Not a foreign key */ + if (con->contype != CONSTRAINT_FOREIGN) + continue; + + /* we're not interested unless the fk has been validated */ + if (!con->convalidated) + continue; + + fkinfo = (ForeignKeyInfo *) palloc(sizeof(ForeignKeyInfo)); + fkinfo->conindid = con->conindid; + fkinfo->confrelid = con->confrelid; + fkinfo->convalidated = con->convalidated; + fkinfo->conrelid = con->conrelid; + fkinfo->confupdtype = con->confupdtype; + fkinfo->confdeltype = con->confdeltype; + fkinfo->confmatchtype = con->confmatchtype; + + adatum = heap_getattr(tuple, Anum_pg_constraint_conkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "conkey is not a 1-D smallint array"); + + fkinfo->conkey = (int16 *) ARR_DATA_PTR(arr); + fkinfo->conncols = numkeys; + + adatum = heap_getattr(tuple, Anum_pg_constraint_confkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null confkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + + /* sanity check */ + if (numkeys != fkinfo->conncols) + elog(ERROR, "number of confkey elements does not equal conkey elements"); + + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "confkey is not a 1-D smallint array"); + + fkinfo->confkey = (int16 *) ARR_DATA_PTR(arr); + adatum = heap_getattr(tuple, Anum_pg_constraint_conpfeqop, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conpfeqop for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + + /* sanity check */ + if (numkeys != fkinfo->conncols) + elog(ERROR, "number of conpfeqop elements does not equal conkey elements"); + + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "conpfeqop is not a 1-D smallint array"); + + fkinfo->conpfeqop = (Oid *) ARR_DATA_PTR(arr); + + fkinfos = lappend(fkinfos, fkinfo); + } + + rel->fklist = fkinfos; + systable_endscan_ordered(fkeyScan); + index_close(fkeyRelIdx, AccessShareLock); + heap_close(fkeyRel, AccessShareLock); + + /* * Allow a plugin to editorialize on the info we obtained from the * catalogs. Actions might include altering the assumed relation size, diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index c938c27..a0fb8eb 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -115,6 +115,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->lateral_relids = NULL; rel->lateral_referencers = NULL; rel->indexlist = NIL; + rel->fklist = NIL; rel->pages = 0; rel->tuples = 0; rel->allvisfrac = 0; @@ -377,6 +378,7 @@ build_join_rel(PlannerInfo *root, joinrel->lateral_relids = NULL; joinrel->lateral_referencers = NULL; joinrel->indexlist = NIL; + joinrel->fklist = NIL; joinrel->pages = 0; joinrel->tuples = 0; joinrel->allvisfrac = 0; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 4b5ef99..1d581a8 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -972,6 +972,33 @@ get_atttypetypmodcoll(Oid relid, AttrNumber attnum, ReleaseSysCache(tp); } +/* + * get_attnotnull + * + * Given the relation id and the attribute number, + * return the "attnotnull" field from the attribute relation. + */ +bool +get_attnotnull(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + bool result; + + result = att_tup->attnotnull; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + /* ---------- COLLATION CACHE ---------- */ /* diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index d0b0356..34a75e4 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -181,6 +181,7 @@ extern void ExecBSTruncateTriggers(EState *estate, extern void ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo); +extern bool AfterTriggerQueueIsEmpty(void); extern void AfterTriggerBeginXact(void); extern void AfterTriggerBeginQuery(void); extern void AfterTriggerEndQuery(EState *estate); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index dacbe9c..8cf5c28 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -355,6 +355,8 @@ typedef struct PlannerInfo * lateral_referencers - relids of rels that reference this one laterally * indexlist - list of IndexOptInfo nodes for relation's indexes * (always NIL if it's not a table) + * fklist - list of ForeignKeyInfo's for relation's foreign key + * constraints. (always NIL if it's not a table) * pages - number of disk pages in relation (zero if not a table) * tuples - number of tuples in relation (not considering restrictions) * allvisfrac - fraction of disk pages that are marked all-visible @@ -448,6 +450,7 @@ typedef struct RelOptInfo Relids lateral_relids; /* minimum parameterization of rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *fklist; /* list of ForeignKeyInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -538,6 +541,20 @@ typedef struct IndexOptInfo bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ } IndexOptInfo; +typedef struct ForeignKeyInfo +{ + Oid conindid; /* index supporting this constraint */ + Oid confrelid; /* relation referenced by foreign key */ + bool convalidated; /* constraint has been validated? */ + Oid conrelid; /* relation this constraint constrains */ + char confupdtype; /* foreign key's ON UPDATE action */ + char confdeltype; /* foreign key's ON DELETE action */ + char confmatchtype; /* foreign key's match type */ + int conncols; /* number of columns references */ + int16 *conkey; /* Columns of conrelid that the constraint applies to */ + int16 *confkey; /* columns of confrelid that foreign key references */ + Oid *conpfeqop; /* Operator list for comparing PK to FK */ +} ForeignKeyInfo; /* * EquivalenceClasses diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9b22fda..00716c9 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -108,6 +108,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it); extern void generate_base_implied_equalities(PlannerInfo *root); +extern void remove_rel_from_eclass(PlannerInfo *root, int relid); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index f46460a..3ec200a 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -70,6 +70,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern int32 get_atttypmod(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); +extern bool get_attnotnull(Oid relid, AttrNumber attnum); extern char *get_collation_name(Oid colloid); extern char *get_constraint_name(Oid conoid); extern Oid get_opclass_family(Oid opclass); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 1cb1c51..8530733 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3249,6 +3249,317 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 (1 row) rollback; +BEGIN; +-- Test join removals for semi and anti joins +CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT REFERENCES b(id)); +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id FROM b); + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + QUERY PLAN +-------------------------- + Seq Scan on a + Filter: (b_id IS NULL) +(2 rows) + +-- should not remove anti join as id > 100 will void +-- the foreign key's guarantee that 1 will exist. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id AND id > 100); + QUERY PLAN +----------------------------------------------- + Hash Anti Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Bitmap Heap Scan on b + Recheck Cond: (id > 100) + -> Bitmap Index Scan on b_pkey + Index Cond: (id > 100) +(8 rows) + +-- should remove semi join to b (swapped condition order) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id = a.b_id); + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- should not remove semi join (since not using equals) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id >= a.b_id); + QUERY PLAN +----------------------------------------- + Nested Loop Semi Join + -> Seq Scan on a + -> Index Only Scan using b_pkey on b + Index Cond: (id >= a.b_id) +(4 rows) + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id+0 IN(SELECT id FROM b); + QUERY PLAN +------------------------------------ + Hash Semi Join + Hash Cond: ((a.b_id + 0) = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id+0 FROM b); + QUERY PLAN +------------------------------------ + Hash Semi Join + Hash Cond: (a.b_id = (b.id + 0)) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- should not remove semi join (wrong column) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE id IN(SELECT id FROM b); + QUERY PLAN +---------------------------- + Hash Join + Hash Cond: (b.id = a.id) + -> Seq Scan on b + -> Hash + -> Seq Scan on a +(5 rows) + +ROLLBACK; +BEGIN; +-- Semi join removal code with 2 column foreign keys +CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2)); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT, b_id2 INT); +ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH SIMPLE; +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +--------------------------------------------------------- + Seq Scan on a + Filter: ((b_id1 IS NOT NULL) AND (b_id2 IS NOT NULL)) +(2 rows) + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +------------------------------------------------ + Seq Scan on a + Filter: ((b_id1 IS NULL) OR (b_id2 IS NULL)) +(2 rows) + +-- should not remove semi join to b (extra condition) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2 AND a.b_id2 >= id2); + QUERY PLAN +-------------------------------------------------------- + Hash Semi Join + Hash Cond: ((a.b_id1 = b.id1) AND (a.b_id2 = b.id2)) + Join Filter: (a.b_id2 >= b.id2) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(6 rows) + +-- should not remove semi join to b (wrong operator) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 > id1 AND a.b_id2 < id2); + QUERY PLAN +----------------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on a + -> Index Only Scan using b_pkey on b + Index Cond: ((id1 < a.b_id1) AND (id2 > a.b_id2)) +(4 rows) + +-- should not remove semi join (only checking id1) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1); + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (a.b_id1 = b.id1) + -> Seq Scan on a + -> Hash + -> HashAggregate + Group Key: b.id1 + -> Seq Scan on b +(7 rows) + +-- should not remove semi join (only checking id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id2); + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (a.b_id2 = b.id2) + -> Seq Scan on a + -> Hash + -> HashAggregate + Group Key: b.id2 + -> Seq Scan on b +(7 rows) + +-- should not remove semi join (checking wrong columns) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id2); + QUERY PLAN +-------------------------------------------------------- + Hash Join + Hash Cond: ((a.b_id2 = b.id1) AND (a.b_id1 = b.id2)) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- should not remove semi join (no check for id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id1); + QUERY PLAN +----------------------------------------- + Nested Loop Semi Join + -> Seq Scan on a + Filter: (b_id2 = b_id1) + -> Index Only Scan using b_pkey on b + Index Cond: (id1 = a.b_id2) +(5 rows) + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id2); + QUERY PLAN +----------------------------------- + Hash Join + Hash Cond: (a.b_id1 = b.id1) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (id1 = id2) +(6 rows) + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id1); + QUERY PLAN +--------------------------------------- + Hash Join + Hash Cond: (a.b_id1 = b.id1) + -> Seq Scan on a + -> Hash + -> HashAggregate + Group Key: b.id1, b.id1 + -> Seq Scan on b +(7 rows) + +-- Check that the IS NULL and IS NOT NULL filters are not added +-- for columns which have a NOT NULL constraint. +ALTER TABLE a ALTER COLUMN b_id1 SET NOT NULL; +-- Should only filter on b_id2 IS NOT NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +------------------------------- + Seq Scan on a + Filter: (b_id2 IS NOT NULL) +(2 rows) + +-- Should only filter on b_id2 IS NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +--------------------------- + Seq Scan on a + Filter: (b_id2 IS NULL) +(2 rows) + +ALTER TABLE a ALTER COLUMN b_id2 SET NOT NULL; +-- No IS NOT NULL filters should be added. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +-- Since now neither b_id1 or b_id2 can be NULL this query can't +-- produce any records. Check that we get a One-Time Filter: false +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + QUERY PLAN +-------------------------- + Result + One-Time Filter: false + -> Seq Scan on a +(3 rows) + +ROLLBACK; +BEGIN WORK; +-- In this test we want to ensure that ANTI JOIN removal does not +-- occur when there are pending foreign key triggers. +-- We test this by updating a relation which is referenced by a foreign key +-- and then executing another query which would normally allow the anti +-- join to be removed. If the anti join was removed then the table +-- records_violating_fkey would be empty, but here we'll ensure that +-- the record that we update ends up violating the foreign key. +CREATE TABLE j2 (id INT NOT NULL PRIMARY KEY); +CREATE TABLE j1 ( + id INT PRIMARY KEY, + j2_id INT NOT NULL REFERENCES j2 (id) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE +); +INSERT INTO j2 VALUES(10),(20); +INSERT INTO j1 VALUES(1,10),(2,20); +-- create a table to store records that 'violate' the fkey. +CREATE TABLE records_violating_fkey (j2_id INT NOT NULL); +CREATE OR REPLACE FUNCTION j1_update() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO records_violating_fkey SELECT j2_id FROM j1 WHERE NOT EXISTS(SELECT 1 FROM j2 WHERE j2_id = j2.id); + RETURN NEW; + END; +$$ LANGUAGE plpgsql; +CREATE TRIGGER j1_update_trigger BEFORE UPDATE ON j2 FOR EACH ROW EXECUTE PROCEDURE j1_update(); +-- This update statement will cause some foreign key triggers to be queued. +-- The trigger defined above will fire which will cause all records which +-- currently violate the foreign key to be inserted into the records_violating_fkey +-- table. The intended behaviour of this is that we'll see records violating the +-- foreign key, however if we incorrectly performed an ANTI JOIN removal, then +-- we wouldn't see this violation record, as we'd wrongly assume that the query +-- could not produce any records. +UPDATE j2 SET id = id + 1; +SELECT * FROM records_violating_fkey; + j2_id +------- + 10 +(1 row) + +ROLLBACK; create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30); diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index fa3e068..66b02ea 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -973,6 +973,171 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 rollback; +BEGIN; + +-- Test join removals for semi and anti joins +CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT REFERENCES b(id)); + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id FROM b); + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id); + +-- should not remove anti join as id > 100 will void +-- the foreign key's guarantee that 1 will exist. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id = id AND id > 100); + +-- should remove semi join to b (swapped condition order) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id = a.b_id); + +-- should not remove semi join (since not using equals) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id >= a.b_id); + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id+0 IN(SELECT id FROM b); + +-- should not remove semi join +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE b_id IN(SELECT id+0 FROM b); + +-- should not remove semi join (wrong column) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE id IN(SELECT id FROM b); + +ROLLBACK; + +BEGIN; + +-- Semi join removal code with 2 column foreign keys + +CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2)); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT, b_id2 INT); + +ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH SIMPLE; + +-- should remove semi join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- should remove anti join to b +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- should not remove semi join to b (extra condition) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2 AND a.b_id2 >= id2); + +-- should not remove semi join to b (wrong operator) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 > id1 AND a.b_id2 < id2); + +-- should not remove semi join (only checking id1) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1); + +-- should not remove semi join (only checking id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id2); + +-- should not remove semi join (checking wrong columns) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id2); + +-- should not remove semi join (no check for id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id1); + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id2); + +-- should not remove semi join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id1); + + +-- Check that the IS NULL and IS NOT NULL filters are not added +-- for columns which have a NOT NULL constraint. +ALTER TABLE a ALTER COLUMN b_id1 SET NOT NULL; + +-- Should only filter on b_id2 IS NOT NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- Should only filter on b_id2 IS NULL +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +ALTER TABLE a ALTER COLUMN b_id2 SET NOT NULL; + +-- No IS NOT NULL filters should be added. +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +-- Since now neither b_id1 or b_id2 can be NULL this query can't +-- produce any records. Check that we get a One-Time Filter: false +EXPLAIN (COSTS OFF) +SELECT id FROM a WHERE NOT EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2); + +ROLLBACK; + +BEGIN WORK; + +-- In this test we want to ensure that ANTI JOIN removal does not +-- occur when there are pending foreign key triggers. +-- We test this by updating a relation which is referenced by a foreign key +-- and then executing another query which would normally allow the anti +-- join to be removed. If the anti join was removed then the table +-- records_violating_fkey would be empty, but here we'll ensure that +-- the record that we update ends up violating the foreign key. + +CREATE TABLE j2 (id INT NOT NULL PRIMARY KEY); +CREATE TABLE j1 ( + id INT PRIMARY KEY, + j2_id INT NOT NULL REFERENCES j2 (id) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE +); + +INSERT INTO j2 VALUES(10),(20); +INSERT INTO j1 VALUES(1,10),(2,20); + +-- create a table to store records that 'violate' the fkey. +CREATE TABLE records_violating_fkey (j2_id INT NOT NULL); + +CREATE OR REPLACE FUNCTION j1_update() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO records_violating_fkey SELECT j2_id FROM j1 WHERE NOT EXISTS(SELECT 1 FROM j2 WHERE j2_id = j2.id); + RETURN NEW; + END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER j1_update_trigger BEFORE UPDATE ON j2 FOR EACH ROW EXECUTE PROCEDURE j1_update(); + +-- This update statement will cause some foreign key triggers to be queued. +-- The trigger defined above will fire which will cause all records which +-- currently violate the foreign key to be inserted into the records_violating_fkey +-- table. The intended behaviour of this is that we'll see records violating the +-- foreign key, however if we incorrectly performed an ANTI JOIN removal, then +-- we wouldn't see this violation record, as we'd wrongly assume that the query +-- could not produce any records. + +UPDATE j2 SET id = id + 1; + +SELECT * FROM records_violating_fkey; + +ROLLBACK; + create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30);