diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 68f8434..6d2e5c0 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1916,6 +1916,15 @@ + relhasfkey + bool + + + True if the table has (or once had) a foreign key constraint + + + + relhasrules bool diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index c346eda..93433ec 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -797,6 +797,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_relchecks - 1] = Int16GetDatum(rd_rel->relchecks); values[Anum_pg_class_relhasoids - 1] = BoolGetDatum(rd_rel->relhasoids); values[Anum_pg_class_relhaspkey - 1] = BoolGetDatum(rd_rel->relhaspkey); + values[Anum_pg_class_relhasfkey - 1] = BoolGetDatum(rd_rel->relhasfkey); values[Anum_pg_class_relhasrules - 1] = BoolGetDatum(rd_rel->relhasrules); values[Anum_pg_class_relhastriggers - 1] = BoolGetDatum(rd_rel->relhastriggers); values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 7bc579b..60a5857 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -361,6 +361,7 @@ static void ATAddCheckConstraint(List **wqueue, LOCKMODE lockmode); static void ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, Constraint *fkconstraint, LOCKMODE lockmode); +static void SetRelationHasForeignKey(Relation rel, bool hasfkey); static void ATExecDropConstraint(Relation rel, const char *constrName, DropBehavior behavior, bool recurse, bool recursing, @@ -6353,6 +6354,12 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, constrOid, indexOid); /* + * Ensure that the relation is marked as having foreign key constraints in + * pg_class + */ + SetRelationHasForeignKey(rel, true); + + /* * Tell Phase 3 to check that the constraint is satisfied by existing * rows. We can skip this during table creation, when requested explicitly * by specifying NOT VALID in an ADD FOREIGN KEY command, and when we're @@ -6381,6 +6388,50 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, } /* + * Update the relhasfkey column in the relation's pg_class tuple. + * + * Caller had better hold exclusive lock on the relation. + * + * An important side effect is that a SI update message will be sent out for + * the pg_class tuple, which will force other backends to rebuild their + * relcache entries for the rel. Also, this backend will rebuild its + * own relcache entry at the next CommandCounterIncrement. + */ +static void +SetRelationHasForeignKey(Relation rel, bool hasfkey) +{ + Relation relrel; + HeapTuple reltup; + Form_pg_class relStruct; + + relrel = heap_open(RelationRelationId, RowExclusiveLock); + reltup = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(rel))); + if (!HeapTupleIsValid(reltup)) + elog(ERROR, "cache lookup failed for relation %u", + RelationGetRelid(rel)); + relStruct = (Form_pg_class) GETSTRUCT(reltup); + + if (relStruct->relhasfkey != hasfkey) + { + relStruct->relhasfkey = hasfkey; + + simple_heap_update(relrel, &reltup->t_self, reltup); + + /* keep catalog indexes current */ + CatalogUpdateIndexes(relrel, reltup); + } + else + { + /* Skip the disk update, but force relcache inval anyway */ + CacheInvalidateRelcache(rel); + } + + heap_freetuple(reltup); + heap_close(relrel, RowExclusiveLock); +} + +/* * ALTER TABLE ALTER CONSTRAINT * * Update the attributes of a constraint. diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 9bf0098..88c8d98 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -3887,6 +3887,17 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, return all_fired; } +/* ---------- + * AfterTriggerQueueIsEmpty() + * + * True if there are no pending triggers in the queue. + * ---------- + */ +bool +AfterTriggerQueueIsEmpty(void) +{ + return (afterTriggers->query_depth == -1); +} /* ---------- * AfterTriggerBeginXact() diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index b7aff37..29d9eb3 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -32,6 +32,7 @@ static EquivalenceMember *add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, Relids nullable_relids, bool is_child, Oid datatype); +static void update_rel_class_joins(PlannerInfo *root); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); static void generate_base_implied_equalities_no_const(PlannerInfo *root, @@ -49,8 +50,6 @@ static List *generate_join_implied_equalities_broken(PlannerInfo *root, Relids outer_relids, Relids nominal_inner_relids, AppendRelInfo *inner_appinfo); -static Oid select_equality_operator(EquivalenceClass *ec, - Oid lefttype, Oid righttype); static RestrictInfo *create_join_clause(PlannerInfo *root, EquivalenceClass *ec, Oid opno, EquivalenceMember *leftem, @@ -725,7 +724,6 @@ void generate_base_implied_equalities(PlannerInfo *root) { ListCell *lc; - Index rti; foreach(lc, root->eq_classes) { @@ -752,6 +750,19 @@ generate_base_implied_equalities(PlannerInfo *root) * This is also a handy place to mark base rels (which should all exist by * now) with flags showing whether they have pending eclass joins. */ + update_rel_class_joins(root); +} + +/* + * update_rel_class_joins + * Process each relation in the PlannerInfo to update the + * has_eclass_joins flag + */ +static void +update_rel_class_joins(PlannerInfo *root) +{ + Index rti; + for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *brel = root->simple_rel_array[rti]; @@ -764,6 +775,63 @@ generate_base_implied_equalities(PlannerInfo *root) } /* + * remove_rel_from_eclass + * Remove all eclass members that belong to relid and also any classes + * which have been left empty as a result of removing a member. + */ +void +remove_rel_from_eclass(PlannerInfo *root, int relid) +{ + ListCell *l, + *nextl, + *eqm, + *eqmnext; + + bool removedany = false; + + /* Strip all traces of this relation out of the eclasses */ + for (l = list_head(root->eq_classes); l != NULL; l = nextl) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(l); + + nextl = lnext(l); + + for (eqm = list_head(ec->ec_members); eqm != NULL; eqm = eqmnext) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(eqm); + + eqmnext = lnext(eqm); + + if (IsA(em->em_expr, Var)) + { + Var *var = (Var *) em->em_expr; + + if (var->varno == relid) + { + list_delete_ptr(ec->ec_members, em); + removedany = true; + } + } + } + + /* + * If we've removed the last member from the EquivalenceClass then we'd + * better delete the entire entry. + */ + if (list_length(ec->ec_members) == 0) + list_delete_ptr(root->eq_classes, ec); + } + + /* + * If we removed any eclass members then this may have changed if a + * relation has an eclass join or not, we'd better force an update + * of this + */ + if (removedany) + update_rel_class_joins(root); +} + +/* * generate_base_implied_equalities when EC contains pseudoconstant(s) */ static void @@ -1281,7 +1349,7 @@ generate_join_implied_equalities_broken(PlannerInfo *root, * * Returns InvalidOid if no operator can be found for this datatype combination */ -static Oid +Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) { ListCell *lc; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 773f8a4..4cb5c98 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,23 +22,38 @@ */ #include "postgres.h" +#include "commands/trigger.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/relation.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "utils/lsyscache.h" /* local functions */ -static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool innerjoin_is_removable(PlannerInfo *root, List *joinlist, + RangeTblRef *removalrtr, RelOptInfo **removerrel, + List **columnlist); +static bool leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool relation_is_needed(PlannerInfo *root, Relids joinrelids, + RelOptInfo *rel); +static void convert_join_to_isnotnull_quals(PlannerInfo *root, RelOptInfo *rel, + List *columnlist); +static bool relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *referencedrel, List *referencing_vars, + List *index_vars, List *operator_list); +static bool expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static Oid distinct_col_search(int colno, List *colnos, List *opids); - /* * remove_useless_joins * Check for relations that don't actually need to be joined at all, @@ -51,21 +66,76 @@ List * remove_useless_joins(PlannerInfo *root, List *joinlist) { ListCell *lc; + int nremoved; - /* - * We are only interested in relations that are left-joined to, so we can - * scan the join_info_list to find them easily. - */ restart: + + /* start with trying to remove needless inner joins */ + foreach(lc, joinlist) + { + RangeTblRef *rtr = (RangeTblRef *) lfirst(lc); + RelOptInfo *rel; + RelOptInfo *removerrel; + List *columnlist; + + if (!IsA(rtr, RangeTblRef)) + continue; + + /* skip if the join can't be removed */ + if (!innerjoin_is_removable(root, joinlist, rtr, &removerrel, &columnlist)) + continue; + + rel = find_base_rel(root, rtr->rtindex); + + /* + * If any of the columns on the join condition are NULLable then since + * we've removed the join, there's now a possibility that null valued + * rows could make it into the results. To ensure this does not happen + * we'll add IS NOT NULL quals to the rel that allowed the join to be + * removed, though we need only do this if the columns are actually + * NULLable. + */ + convert_join_to_isnotnull_quals(root, removerrel, columnlist); + + remove_rel_from_query(root, rtr->rtindex, + bms_union(rel->relids, removerrel->relids)); + + /* We verify that exactly one reference gets removed from joinlist */ + nremoved = 0; + joinlist = remove_rel_from_joinlist(joinlist, rtr->rtindex, &nremoved); + if (nremoved != 1) + elog(ERROR, "failed to find relation %d in joinlist", rtr->rtindex); + + /* + * We can delete this RangeTblRef from the list too, since it's no + * longer of interest. + */ + joinlist = list_delete_ptr(joinlist, rtr); + + /* + * Restart the scan. This is necessary to ensure we find all removable + * joins independently of their ordering. (note that removal of + * attr_needed bits may make a join, inner or outer, appear removable + * that did not before). Also, since we just deleted the current list + * cell, we'd have to have some kluge to continue the list scan anyway. + */ + goto restart; + } + + /* now process special joins. Currently only left joins are supported */ foreach(lc, root->join_info_list) { SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); int innerrelid; - int nremoved; - /* Skip if not removable */ - if (!join_is_removable(root, sjinfo)) - continue; + if (sjinfo->jointype == JOIN_LEFT) + { + /* Skip if not removable */ + if (!leftjoin_is_removable(root, sjinfo)) + continue; + } + else + continue; /* we don't support this join type */ /* * Currently, join_is_removable can only succeed when the sjinfo's @@ -91,12 +161,11 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* - * Restart the scan. This is necessary to ensure we find all - * removable joins independently of ordering of the join_info_list - * (note that removal of attr_needed bits may make a join appear - * removable that did not before). Also, since we just deleted the - * current list cell, we'd have to have some kluge to continue the - * list scan anyway. + * Restart the scan. This is necessary to ensure we find all removable + * joins independently of their ordering. (note that removal of + * attr_needed bits may make a join, inner or outer, appear removable + * that did not before). Also, since we just deleted the current list + * cell, we'd have to have some kluge to continue the list scan anyway. */ goto restart; } @@ -136,8 +205,203 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, } /* - * join_is_removable - * Check whether we need not perform this special join at all, because + * innerjoin_is_removable + * True if removalrtr's join can be removed due to the existence of + * a foreign key which proves that we'll get exactly 1 matching row + * from the join. + * + * We are able to prove the join is not required under the following + * conditions: + * 1. No Vars from the join are needed anywhere in the query. + * 2. We have another relation in the joinlist which references this relation + * with a condition which matches the join condition exactly. The exact + * match on the join condition means that the join will neither duplicate + * nor restrict the results on the left side of the join. + */ +static bool +innerjoin_is_removable(PlannerInfo *root, List *joinlist, + RangeTblRef *removalrtr, RelOptInfo **removerrel, + List **columnlist) +{ + ListCell *lc; + RelOptInfo *removalrel; + + removalrel = find_base_rel(root, removalrtr->rtindex); + + /* + * If removalrel has no indexes, then it mustn't have any unique indexes, + * so therefore won't support being referenced by a foreign key. + */ + if (removalrel->indexlist == NIL) + return false; + + /* + * Currently we disallow the removal if we find any baserestrictinfo quals + * on the relation. The reason for this is that these could filter out + * rows and make it so the foreign key cannot prove that we'll match + * exactly 1 row on the join condition. However, this check is currently + * probably a bit overly strict, as we should be able to allow quals that + * are present in the join condition. e.g: + * SELECT a.* FROM a INNER JOIN b ON a.x = b.x WHERE b.x = 1 + */ + if (removalrel->baserestrictinfo != NIL) + return false; + + /* + * Currently only eclass joins are supported, so if there are any non + * eclass join quals then we'll report the join is non-removable. + */ + if (removalrel->joininfo != NIL) + return false; + + /* + * We mustn't allow any joins to be removed if there are any pending + * foreign key triggers in the queue. This could happen if we are planning + * a query that has been executed from within a volatile function and the + * query which called this volatile function has made some changes to a + * table referenced by a foreign key. The reason for this is that any + * updates to a table which is referenced by a foreign key constraint will + * only have the referencing tables updated after the command is complete, + * so there is a window of time where records may violate the foreign key + * constraint. + * + * Currently this code is quite naive, as we won't even attempt to remove + * the join if there are *any* pending foreign key triggers, on any + * relation. It may be worthwhile to improve this to check if there's any + * pending triggers for the referencing relation in the join. + */ + if (!AfterTriggerQueueIsEmpty()) + return false; + + /* + * Now we'll search through each relation in the joinlist to see if we can + * find a relation which has a foreign key which references removalrel on + * the join condition. If we find a rel with a foreign key which matches + * the join condition exactly, then we can be sure that exactly 1 row will + * be matched on the join, if we also see that no Vars from the relation + * are needed, then we can report the join as removable. + */ + foreach (lc, joinlist) + { + RangeTblRef *rtr = (RangeTblRef *) lfirst(lc); + RelOptInfo *rel; + ListCell *lc2; + List *referencing_vars; + List *index_vars; + List *operator_list; + Relids joinrelids; + + /* we can't remove ourself, or anything other than RangeTblRefs */ + if (rtr == removalrtr || !IsA(rtr, RangeTblRef)) + continue; + + rel = find_base_rel(root, rtr->rtindex); + + /* a rel without foreign keys won't help us, so skip it */ + if (rel->fklist == NIL) + continue; + + /* + * If there's no join condition between the 2 rels, we can't use it to + * prove the join is redundant. + */ + if (!have_relevant_eclass_joinclause(root, rel, removalrel)) + continue; + + joinrelids = bms_union(rel->relids, removalrel->relids); + + if (relation_is_needed(root, joinrelids, removalrel)) + return false; + + referencing_vars = NIL; + index_vars = NIL; + operator_list = NIL; + + foreach(lc2, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc2); + + if (list_length(ec->ec_members) <= 1) + continue; + + if (bms_overlap(removalrel->relids, ec->ec_relids) && + bms_overlap(removalrel->relids, ec->ec_relids)) + { + ListCell *lc3; + Var *refvar = NULL; + Var *idxvar = NULL; + + /* + * Look at each member of the eclass and try to find a Var from + * each side of the join that we can append to the list of + * columns that should be checked against each foreign key. + * + * The following logic does not allow for join removals to take + * place for foreign keys that have duplicate columns on the + * referencing side of the foreign key, such as: + * (a,a) references (x,y) + * The use case for such a foreign key is likely small enough + * that we needn't bother making this code anymore complex to + * solve. If we find more than 1 var from any of the rels then + * we'll bail out. + */ + foreach (lc3, ec->ec_members) + { + EquivalenceMember *ecm = (EquivalenceMember *) lfirst(lc3); + + Var *var = (Var *) ecm->em_expr; + + if (!IsA(var, Var)) + continue; /* Ignore Consts */ + + if (var->varno == rtr->rtindex) + { + if (refvar != NULL) + return false; + refvar = var; + } + + else if (var->varno == removalrtr->rtindex) + { + if (idxvar != NULL) + return false; + idxvar = var; + } + } + + if (refvar != NULL && idxvar != NULL) + { + /* grab the correct equality operator for these two vars */ + Oid opno = select_equality_operator(ec, refvar->vartype, idxvar->vartype); + + if (!OidIsValid(opno)) + return false; + + referencing_vars = lappend(referencing_vars, refvar); + index_vars = lappend(index_vars, idxvar); + operator_list = lappend_oid(operator_list, opno); + } + } + } + + if (referencing_vars != NULL) + { + if (relation_has_foreign_key_for(root, rel, removalrel, + referencing_vars, index_vars, operator_list)) + { + *removerrel = rel; + *columnlist = referencing_vars; + return true; /* removalrel can be removed */ + } + } + } + + return false; /* can't remove join */ +} + +/* + * leftjoin_is_removable + * Check whether we need not perform this left join at all, because * it will just duplicate its left input. * * This is true for a left join for which the join condition cannot match @@ -147,7 +411,7 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, * above the join. */ static bool -join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; @@ -155,14 +419,14 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) Relids joinrelids; List *clause_list = NIL; ListCell *l; - int attroff; + + Assert(sjinfo->jointype == JOIN_LEFT); /* - * Must be a non-delaying left join to a single baserel, else we aren't + * Must be a non-delaying join to a single baserel, else we aren't * going to be able to do anything with it. */ - if (sjinfo->jointype != JOIN_LEFT || - sjinfo->delay_upper_joins || + if (sjinfo->delay_upper_joins || bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) return false; @@ -205,52 +469,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); - /* - * We can't remove the join if any inner-rel attributes are used above the - * join. - * - * Note that this test only detects use of inner-rel attributes in higher - * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. - * - * As a micro-optimization, it seems better to start with max_attr and - * count down rather than starting with min_attr and counting up, on the - * theory that the system attributes are somewhat less likely to be wanted - * and should be tested last. - */ - for (attroff = innerrel->max_attr - innerrel->min_attr; - attroff >= 0; - attroff--) - { - if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) - return false; - } - - /* - * Similarly check that the inner rel isn't needed by any PlaceHolderVars - * that will be used above the join. We only need to fail if such a PHV - * actually references some inner-rel attributes; but the correct check - * for that is relatively expensive, so we first check against ph_eval_at, - * which must mention the inner rel if the PHV uses any inner-rel attrs as - * non-lateral references. Note that if the PHV's syntactic scope is just - * the inner rel, we can't drop the rel even if the PHV is variable-free. - */ - foreach(l, root->placeholder_list) - { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); - - if (bms_is_subset(phinfo->ph_needed, joinrelids)) - continue; /* PHV is not used above the join */ - if (bms_overlap(phinfo->ph_lateral, innerrel->relids)) - return false; /* it references innerrel laterally */ - if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) - continue; /* it definitely doesn't reference innerrel */ - if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids)) - return false; /* there isn't any other place to eval PHV */ - if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr), - innerrel->relids)) - return false; /* it does reference innerrel */ - } + /* if the relation is referenced in the query then it cannot be removed */ + if (relation_is_needed(root, joinrelids, innerrel)) + return false; /* * Search for mergejoinable clauses that constrain the inner rel against @@ -367,6 +588,279 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; } +/* + * relation_is_needed + * True if any of the Vars from this relation are required in the query + */ +static inline bool +relation_is_needed(PlannerInfo *root, Relids joinrelids, RelOptInfo *rel) +{ + int attroff; + ListCell *l; + + /* + * rel is referenced if any of it's attributes are used above the join. + * + * Note that this test only detects use of rel's attributes in higher + * join conditions and the target list. There might be such attributes in + * pushed-down conditions at this join, too. We check that case below. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = rel->max_attr - rel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(rel->attr_needed[attroff], joinrelids)) + return true; + } + + /* + * Similarly check that rel isn't needed by any PlaceHolderVars that will + * be used above the join. We only need to fail if such a PHV actually + * references some of rel's attributes; but the correct check for that is + * relatively expensive, so we first check against ph_eval_at, which must + * mention rel if the PHV uses any of-rel's attrs as non-lateral + * references. Note that if the PHV's syntactic scope is just rel, we + * can't return true even if the PHV is variable-free. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(phinfo->ph_needed, joinrelids)) + continue; /* PHV is not used above the join */ + if (bms_overlap(phinfo->ph_lateral, rel->relids)) + return true; /* it references rel laterally */ + if (!bms_overlap(phinfo->ph_eval_at, rel->relids)) + continue; /* it definitely doesn't reference rel */ + if (bms_is_subset(phinfo->ph_eval_at, rel->relids)) + return true; /* there isn't any other place to eval PHV */ + if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr), + rel->relids)) + return true; /* it does reference rel */ + } + + return false; /* it does not reference rel */ +} + +/* + * convert_join_to_isnotnull_quals + * Adds any required "col IS NOT NULL" quals which are required to ensure + * that the query remains equivalent to what it was before the join + * was removed. + */ +static void +convert_join_to_isnotnull_quals(PlannerInfo *root, RelOptInfo *rel, List *columnlist) +{ + ListCell *l; + Bitmapset *handledcols = NULL; + Oid reloid; + + reloid = root->simple_rte_array[rel->relid]->relid; + + /* + * If a join has been successfully removed by the join removal code, + * then a foreign key must exist that proves the join to not be required. + * + * The join would have never allowed NULL values for any of the columns + * seen in the join condition, as these would have matched up to a record + * in the joined table. Now that we've proved the join to be redundant, we + * must maintain that behavior of not having NULLs by adding IS NOT NULL + * quals to the WHERE clause, although we may skip this if the column in + * question happens to have a NOT NULL constraint. + */ + foreach(l, columnlist) + { + Var *var = (Var *) lfirst(l); + + /* should be a var if it came from a foreign key */ + Assert(IsA(var, Var)); + Assert(var->varno == rel->relid); + + /* + * Skip this column if it's a duplicate of one we've previously + * handled. + */ + if (bms_is_member(var->varattno, handledcols)) + continue; + + /* mark this column as handled */ + handledcols = bms_add_member(handledcols, var->varattno); + + /* add the IS NOT NULL qual, but only if the column allows NULLs */ + if (!get_attnotnull(reloid, var->varattno)) + { + RestrictInfo *rinfo; + NullTest *ntest = makeNode(NullTest); + + ntest->nulltesttype = IS_NOT_NULL; + ntest->arg = (Expr *) var; + ntest->argisrow = false; + + rinfo = make_restrictinfo((Expr *)ntest, false, false, false, + NULL, NULL, NULL); + rel->baserestrictinfo = lappend(rel->baserestrictinfo, rinfo); + } + } +} + +/* + * relation_has_foreign_key_for + * Checks if rel has a foreign key which references referencedrel with the + * given list of expressions. + * + * For the match to succeed: + * referencing_vars must match the columns defined in the foreign key. + * index_vars must match the columns defined in the index for the foreign key. + */ +static bool +relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *referencedrel, List *referencing_vars, + List *index_vars, List *operator_list) +{ + ListCell *lc; + Oid refreloid; + + /* + * Look up the Oid of the referenced relation. We only want to look at + * foreign keys on the referencing relation which reference this relation. + */ + refreloid = root->simple_rte_array[referencedrel->relid]->relid; + + Assert(list_length(referencing_vars) > 0); + Assert(list_length(referencing_vars) == list_length(index_vars)); + Assert(list_length(referencing_vars) == list_length(operator_list)); + + /* + * Search through each foreign key on the referencing relation and try + * to find one which references the relation in the join condition. If we + * find one then we'll send the join conditions off to + * expressions_match_foreign_key() to see if they match the foreign key. + */ + foreach(lc, rel->fklist) + { + ForeignKeyInfo *fk = (ForeignKeyInfo *) lfirst(lc); + + if (fk->confrelid == refreloid) + { + if (expressions_match_foreign_key(fk, referencing_vars, + index_vars, operator_list)) + return true; + } + } + + return false; +} + +/* + * expressions_match_foreign_key + * True if the given fkvars, indexvars and operators will match + * exactly 1 record in the referenced relation of the foreign key. + * + * Note: This function expects fkvars and indexvars to only contain Var types. + * Expression indexes are not supported by foreign keys. + */ +static bool +expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators) +{ + ListCell *lc; + ListCell *lc2; + ListCell *lc3; + int col; + Bitmapset *allitems; + Bitmapset *matcheditems; + int lstidx; + + Assert(list_length(fkvars) == list_length(indexvars)); + Assert(list_length(fkvars) == list_length(operators)); + + /* + * Fast path out if there's not enough conditions to match each column in + * the foreign key. Note that we cannot check that the number of + * expressions are equal here since it would cause any expressions which + * are duplicated not to match. + */ + if (list_length(fkvars) < fk->conncols) + return false; + + /* + * We need to ensure that each foreign key column can be matched to a list + * item, and we need to ensure that each list item can be matched to a + * foreign key column. We do this by looping over each foreign key column + * and checking that we can find an item in the list which matches the + * current column, however this method does not allow us to ensure that no + * additional items exist in the list. We could solve that by performing + * another loop over each list item and check that it matches an foreign + * key column, but that's a bit wasteful. Instead we'll use 2 bitmapsets, + * one to store the 0 based index of each list item, and with the other + * we'll store each list index that we've managed to match. After we're + * done matching we'll just make sure that both bitmapsets are equal. + */ + allitems = NULL; + matcheditems = NULL; + + /* + * Build a bitmapset which contains each 1 based list index. It seems more + * efficient to do this in reverse so that we allocate enough memory for + * the bitmapset on first loop rather than reallocating each time we find + * we need a bit more space. + */ + for (lstidx = list_length(fkvars) - 1; lstidx >= 0; lstidx--) + allitems = bms_add_member(allitems, lstidx); + + for (col = 0; col < fk->conncols; col++) + { + bool matched = false; + + lstidx = 0; + + forthree(lc, fkvars, lc2, indexvars, lc3, operators) + { + Var *expr = (Var *) lfirst(lc); + Var *idxexpr = (Var *) lfirst(lc2); + Oid opr = lfirst_oid(lc3); + + Assert(IsA(expr, Var)); + Assert(IsA(idxexpr, Var)); + + /* Does this join qual match up to the current fkey column? */ + if (fk->conkey[col] == expr->varattno && + fk->confkey[col] == idxexpr->varattno && + equality_ops_are_compatible(opr, fk->conpfeqop[col])) + { + matched = true; + + /* mark this list item as matched */ + matcheditems = bms_add_member(matcheditems, lstidx); + + /* + * Don't break here as there may be duplicate expressions + * that we also need to match against. + */ + } + lstidx++; + } + + /* punt if there's no match. */ + if (!matched) + return false; + } + + /* + * Ensure that we managed to match every item in the list to a foreign key + * column. + */ + if (!bms_equal(allitems, matcheditems)) + return false; + + return true; /* matched */ +} + /* * Remove the target relid from the planner's data structures, having @@ -393,6 +887,9 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) */ rel->reloptkind = RELOPT_DEADREL; + /* Strip out any eclass members that belong to this rel */ + remove_rel_from_eclass(root, relid); + /* * Remove references to the rel from other baserels' attr_needed arrays. */ diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index b2becfa..8e98d4b 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -25,7 +25,9 @@ #include "access/transam.h" #include "access/xlog.h" #include "catalog/catalog.h" +#include "catalog/pg_constraint.h" #include "catalog/heap.h" +#include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -38,6 +40,7 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -384,6 +387,121 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, heap_close(relation, NoLock); + /* load the foreign key constraints, if there are any */ + if (relation->rd_rel->relhasfkey) + { + List *fkinfos = NIL; + Relation fkeyRel; + Relation fkeyRelIdx; + ScanKeyData fkeyScankey; + SysScanDesc fkeyScan; + HeapTuple tuple; + + ScanKeyInit(&fkeyScankey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationObjectId)); + + fkeyRel = heap_open(ConstraintRelationId, AccessShareLock); + fkeyRelIdx = index_open(ConstraintRelidIndexId, AccessShareLock); + fkeyScan = systable_beginscan_ordered(fkeyRel, fkeyRelIdx, NULL, 1, &fkeyScankey); + + while ((tuple = systable_getnext_ordered(fkeyScan, ForwardScanDirection)) != NULL) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + ForeignKeyInfo *fkinfo; + Datum adatum; + bool isNull; + ArrayType *arr; + int numkeys; + + /* Not a foreign key */ + if (con->contype != CONSTRAINT_FOREIGN) + continue; + + /* we're not interested unless the fk has been validated */ + if (!con->convalidated) + continue; + + fkinfo = (ForeignKeyInfo *) palloc(sizeof(ForeignKeyInfo)); + fkinfo->conindid = con->conindid; + fkinfo->confrelid = con->confrelid; + fkinfo->convalidated = con->convalidated; + fkinfo->conrelid = con->conrelid; + fkinfo->confupdtype = con->confupdtype; + fkinfo->confdeltype = con->confdeltype; + fkinfo->confmatchtype = con->confmatchtype; + + adatum = heap_getattr(tuple, Anum_pg_constraint_conkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "conkey is not a 1-D smallint array"); + + fkinfo->conkey = (int16 *) ARR_DATA_PTR(arr); + fkinfo->conncols = numkeys; + + adatum = heap_getattr(tuple, Anum_pg_constraint_confkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null confkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + + /* sanity check */ + if (numkeys != fkinfo->conncols) + elog(ERROR, "number of confkey elements does not equal conkey elements"); + + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "confkey is not a 1-D smallint array"); + + fkinfo->confkey = (int16 *) ARR_DATA_PTR(arr); + adatum = heap_getattr(tuple, Anum_pg_constraint_conpfeqop, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conpfeqop for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + + /* sanity check */ + if (numkeys != fkinfo->conncols) + elog(ERROR, "number of conpfeqop elements does not equal conkey elements"); + + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "conpfeqop is not a 1-D smallint array"); + + fkinfo->conpfeqop = (Oid *) ARR_DATA_PTR(arr); + + fkinfos = lappend(fkinfos, fkinfo); + } + + rel->fklist = fkinfos; + systable_endscan_ordered(fkeyScan); + index_close(fkeyRelIdx, AccessShareLock); + heap_close(fkeyRel, AccessShareLock); + } + /* * Allow a plugin to editorialize on the info we obtained from the * catalogs. Actions might include altering the assumed relation size, diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index c938c27..a0fb8eb 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -115,6 +115,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->lateral_relids = NULL; rel->lateral_referencers = NULL; rel->indexlist = NIL; + rel->fklist = NIL; rel->pages = 0; rel->tuples = 0; rel->allvisfrac = 0; @@ -377,6 +378,7 @@ build_join_rel(PlannerInfo *root, joinrel->lateral_relids = NULL; joinrel->lateral_referencers = NULL; joinrel->indexlist = NIL; + joinrel->fklist = NIL; joinrel->pages = 0; joinrel->tuples = 0; joinrel->allvisfrac = 0; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 552e498..aa81c7c 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -916,6 +916,33 @@ get_atttypetypmodcoll(Oid relid, AttrNumber attnum, ReleaseSysCache(tp); } +/* + * get_attnotnull + * + * Given the relation id and the attribute number, + * return the "attnotnull" field from the attribute relation. + */ +bool +get_attnotnull(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + bool result; + + result = att_tup->attnotnull; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + /* ---------- COLLATION CACHE ---------- */ /* diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index f2fb317..55f2155 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -62,6 +62,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO int16 relchecks; /* # of CHECK constraints for class */ bool relhasoids; /* T if we generate OIDs for rows of rel */ bool relhaspkey; /* has (or has had) PRIMARY KEY index */ + bool relhasfkey; /* has (or has had) a FOREIGN KEY constraint */ bool relhasrules; /* has (or has had) any rules */ bool relhastriggers; /* has (or has had) any TRIGGERs */ bool relhassubclass; /* has (or has had) derived classes */ @@ -94,7 +95,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 29 +#define Natts_pg_class 30 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -115,15 +116,16 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_relchecks 18 #define Anum_pg_class_relhasoids 19 #define Anum_pg_class_relhaspkey 20 -#define Anum_pg_class_relhasrules 21 -#define Anum_pg_class_relhastriggers 22 -#define Anum_pg_class_relhassubclass 23 -#define Anum_pg_class_relispopulated 24 -#define Anum_pg_class_relreplident 25 -#define Anum_pg_class_relfrozenxid 26 -#define Anum_pg_class_relminmxid 27 -#define Anum_pg_class_relacl 28 -#define Anum_pg_class_reloptions 29 +#define Anum_pg_class_relhasfkey 21 +#define Anum_pg_class_relhasrules 22 +#define Anum_pg_class_relhastriggers 23 +#define Anum_pg_class_relhassubclass 24 +#define Anum_pg_class_relispopulated 25 +#define Anum_pg_class_relreplident 26 +#define Anum_pg_class_relfrozenxid 27 +#define Anum_pg_class_relminmxid 28 +#define Anum_pg_class_relacl 29 +#define Anum_pg_class_reloptions 30 /* ---------------- * initial contents of pg_class @@ -138,13 +140,13 @@ typedef FormData_pg_class *Form_pg_class; * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId; * similarly, "1" in relminmxid stands for FirstMultiXactId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 27 0 t f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 27 0 t f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ )); DESCR(""); diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index d0b0356..34a75e4 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -181,6 +181,7 @@ extern void ExecBSTruncateTriggers(EState *estate, extern void ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo); +extern bool AfterTriggerQueueIsEmpty(void); extern void AfterTriggerBeginXact(void); extern void AfterTriggerBeginQuery(void); extern void AfterTriggerEndQuery(EState *estate); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index dacbe9c..f69df09 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -355,6 +355,8 @@ typedef struct PlannerInfo * lateral_referencers - relids of rels that reference this one laterally * indexlist - list of IndexOptInfo nodes for relation's indexes * (always NIL if it's not a table) + * fklist - list of ForeignKeyInfo's for relation's foreign key + * constraints. (always NIL if it's not a table) * pages - number of disk pages in relation (zero if not a table) * tuples - number of tuples in relation (not considering restrictions) * allvisfrac - fraction of disk pages that are marked all-visible @@ -448,6 +450,7 @@ typedef struct RelOptInfo Relids lateral_relids; /* minimum parameterization of rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *fklist; /* list of ForeignKeyInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -538,6 +541,51 @@ typedef struct IndexOptInfo bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ } IndexOptInfo; +/* + * ForeignKeyInfo + * Used to store pg_constraint records for foreign key constraints for use + * by the planner. + * + * conindid - The index which supports the foreign key + * + * confrelid - The relation that is referenced by this foreign key + * + * convalidated - True if the foreign key has been validated. + * + * conrelid - The Oid of the relation that the foreign key belongs to + * + * confupdtype - ON UPDATE action for when the referenced table is updated + * + * confdeltype - ON DELETE action, controls what to do when a record is + * deleted from the referenced table. + * + * confmatchtype - foreign key match type, e.g MATCH FULL, MATCH PARTIAL + * + * conncols - Number of columns defined in the foreign key + * + * conkey - An array of conncols elements to store the varattno of the + * columns on the referencing side of the foreign key + * + * confkey - An array of conncols elements to store the varattno of the + * columns on the referenced side of the foreign key + * + * conpfeqop - An array of conncols elements to store the operators for + * PK = FK comparisons + */ +typedef struct ForeignKeyInfo +{ + Oid conindid; /* index supporting this constraint */ + Oid confrelid; /* relation referenced by foreign key */ + bool convalidated; /* constraint has been validated? */ + Oid conrelid; /* relation this constraint constrains */ + char confupdtype; /* foreign key's ON UPDATE action */ + char confdeltype; /* foreign key's ON DELETE action */ + char confmatchtype; /* foreign key's match type */ + int conncols; /* number of columns references */ + int16 *conkey; /* Columns of conrelid that the constraint applies to */ + int16 *confkey; /* columns of confrelid that foreign key references */ + Oid *conpfeqop; /* Operator list for comparing PK to FK */ +} ForeignKeyInfo; /* * EquivalenceClasses diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9b22fda..b11ae78 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -108,10 +108,13 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it); extern void generate_base_implied_equalities(PlannerInfo *root); +extern void remove_rel_from_eclass(PlannerInfo *root, int relid); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, RelOptInfo *inner_rel); +extern Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, + Oid righttype); extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); extern void add_child_rel_equivalences(PlannerInfo *root, AppendRelInfo *appinfo, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 07d24d4..910190d 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -68,6 +68,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern int32 get_atttypmod(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); +extern bool get_attnotnull(Oid relid, AttrNumber attnum); extern char *get_collation_name(Oid colloid); extern char *get_constraint_name(Oid conoid); extern Oid get_opclass_family(Oid opclass); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 2501184..251d44f 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3276,6 +3276,260 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 (1 row) rollback; +BEGIN; +-- Test join removals for inner joins +CREATE TEMP TABLE c (id INT NOT NULL PRIMARY KEY); +CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY, c_id INT NOT NULL REFERENCES c(id), val INT); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT REFERENCES b(id)); +-- this should remove inner join to b +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.b_id = b.id; + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- this should remove inner join to b and c +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.b_id = b.id INNER JOIN c ON b.c_id = c.id; + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- this should generate the same plan as above. +EXPLAIN (COSTS OFF) +SELECT a.* FROM c INNER JOIN b ON c.id = b.c_id INNER JOIN a ON a.b_id = b.id; + QUERY PLAN +------------------------------ + Seq Scan on a + Filter: (b_id IS NOT NULL) +(2 rows) + +-- inner join can't be removed due to b columns in the target list +EXPLAIN (COSTS OFF) +SELECT * FROM a INNER JOIN b ON a.b_id = b.id; + QUERY PLAN +------------------------------ + Hash Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- this should not remove inner join to b due to quals restricting results from b +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b on a.b_id = b.id WHERE b.val = 10; + QUERY PLAN +---------------------------------- + Hash Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (val = 10) +(6 rows) + +-- this should remove joins to b and c. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM a INNER JOIN b ON a.b_id = b.id LEFT OUTER JOIN c ON a.id = c.id; + QUERY PLAN +------------------------------------ + Aggregate + -> Seq Scan on a + Filter: (b_id IS NOT NULL) +(3 rows) + +-- this should remove joins to b and c, however it b will only be removed on +-- 2nd attempt after c is removed by the left join removal code. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM a INNER JOIN b ON a.b_id = b.id LEFT OUTER JOIN c ON b.id = c.id; + QUERY PLAN +------------------------------------ + Aggregate + -> Seq Scan on a + Filter: (b_id IS NOT NULL) +(3 rows) + +-- this should not remove join to b +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b on a.b_id = b.id WHERE b.val = b.id; + QUERY PLAN +---------------------------------- + Hash Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (id = val) +(6 rows) + +-- this should not remove the join, no foreign key exists between a.id and b.id +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.id = b.id; + QUERY PLAN +---------------------------- + Hash Join + Hash Cond: (a.id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +ALTER TABLE a ALTER COLUMN b_id SET NOT NULL; +-- Ensure the join gets removed, but an IS NOT NULL qual is not added for b_id +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.b_id = b.id; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +ROLLBACK; +BEGIN; +-- inner join removal code with 2 column foreign keys +CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2)); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT, b_id2 INT); +ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH SIMPLE; +-- this should remove inner join to b +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id2 = b.id2; + QUERY PLAN +--------------------------------------------------------- + Seq Scan on a + Filter: ((b_id1 IS NOT NULL) AND (b_id2 IS NOT NULL)) +(2 rows) + +-- should not remove inner join to b (extra condition) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id2 = b.id2 AND a.b_id2 >= b.id2; + QUERY PLAN +--------------------------------------------------------- + Merge Join + Merge Cond: ((b.id1 = a.b_id1) AND (b.id2 = a.b_id2)) + Join Filter: (a.b_id2 >= b.id2) + -> Index Only Scan using b_pkey on b + -> Sort + Sort Key: a.b_id1, a.b_id2 + -> Seq Scan on a +(7 rows) + +-- should not remove inner join to b (wrong operator) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 > b.id1 AND a.b_id2 < b.id2; + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> Seq Scan on a + -> Index Only Scan using b_pkey on b + Index Cond: ((id1 < a.b_id1) AND (id2 > a.b_id2)) +(4 rows) + +-- should not remove inner join (only checking id1) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1; + QUERY PLAN +----------------------------------------- + Merge Join + Merge Cond: (b.id1 = a.b_id1) + -> Index Only Scan using b_pkey on b + -> Sort + Sort Key: a.b_id1 + -> Seq Scan on a +(6 rows) + +-- should not remove inner join (checking wrong columns) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id2 = b.id1 AND a.b_id1 = b.id2; + QUERY PLAN +--------------------------------------------------------- + Merge Join + Merge Cond: ((b.id1 = a.b_id2) AND (b.id2 = a.b_id1)) + -> Index Only Scan using b_pkey on b + -> Sort + Sort Key: a.b_id2, a.b_id1 + -> Seq Scan on a +(6 rows) + +-- should not remove inner join (no check for id2) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id2 = b.id1 AND a.b_id1 = b.id1; + QUERY PLAN +--------------------------------------- + Hash Join + Hash Cond: (b.id1 = a.b_id2) + -> Seq Scan on b + -> Hash + -> Seq Scan on a + Filter: (b_id2 = b_id1) +(6 rows) + +-- should not remove inner join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id1 = b.id2; + QUERY PLAN +----------------------------------- + Hash Join + Hash Cond: (a.b_id1 = b.id1) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (id1 = id2) +(6 rows) + +-- should not remove inner join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id1 = b.id1; + QUERY PLAN +----------------------------------------- + Merge Join + Merge Cond: (b.id1 = a.b_id1) + -> Index Only Scan using b_pkey on b + -> Sort + Sort Key: a.b_id1 + -> Seq Scan on a +(6 rows) + +ROLLBACK; +-- In this test we want to ensure that INNER JOIN removal does not +-- occur when there are pending foreign key triggers. +-- We test this by updating a relation which is referenced by a foreign key +-- and then executing another query which would normally allow the inner +-- join to be removed. +CREATE TABLE j2 (id INT NOT NULL PRIMARY KEY); +CREATE TABLE j1 ( + id INT PRIMARY KEY, + j2_id INT NOT NULL REFERENCES j2 (id) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE +); +INSERT INTO j2 VALUES(10),(20); +INSERT INTO j1 VALUES(1,10),(2,20); +-- create a table to store records that 'violate' the fkey. +CREATE TABLE results (j2_id INT NOT NULL); +CREATE OR REPLACE FUNCTION j1_update() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO results SELECT j2_id FROM j1 INNER JOIN j2 ON j1.j2_id = j2.id; + RETURN NEW; + END; +$$ LANGUAGE plpgsql; +CREATE TRIGGER j1_update_trigger BEFORE UPDATE ON j2 FOR EACH ROW EXECUTE PROCEDURE j1_update(); +UPDATE j2 SET id = id + 1; +-- results should only contain 3 records. If we blindly removed the join despite the +-- foreign key not having updated the referenced records yet, we'd get 4 rows in results. +SELECT * FROM results; + j2_id +------- + 10 + 20 + 20 +(3 rows) + +DROP TABLE j1; +DROP TABLE j2; +DROP TABLE results; +DROP FUNCTION j1_update(); create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30); diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 718e1d9..b94f12d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -977,6 +977,140 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 rollback; +BEGIN; + +-- Test join removals for inner joins +CREATE TEMP TABLE c (id INT NOT NULL PRIMARY KEY); +CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY, c_id INT NOT NULL REFERENCES c(id), val INT); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT REFERENCES b(id)); + +-- this should remove inner join to b +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.b_id = b.id; + +-- this should remove inner join to b and c +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.b_id = b.id INNER JOIN c ON b.c_id = c.id; + +-- this should generate the same plan as above. +EXPLAIN (COSTS OFF) +SELECT a.* FROM c INNER JOIN b ON c.id = b.c_id INNER JOIN a ON a.b_id = b.id; + +-- inner join can't be removed due to b columns in the target list +EXPLAIN (COSTS OFF) +SELECT * FROM a INNER JOIN b ON a.b_id = b.id; + +-- this should not remove inner join to b due to quals restricting results from b +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b on a.b_id = b.id WHERE b.val = 10; + +-- this should remove joins to b and c. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM a INNER JOIN b ON a.b_id = b.id LEFT OUTER JOIN c ON a.id = c.id; + +-- this should remove joins to b and c, however it b will only be removed on +-- 2nd attempt after c is removed by the left join removal code. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM a INNER JOIN b ON a.b_id = b.id LEFT OUTER JOIN c ON b.id = c.id; + +-- this should not remove join to b +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b on a.b_id = b.id WHERE b.val = b.id; + +-- this should not remove the join, no foreign key exists between a.id and b.id +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.id = b.id; + +ALTER TABLE a ALTER COLUMN b_id SET NOT NULL; + +-- Ensure the join gets removed, but an IS NOT NULL qual is not added for b_id +EXPLAIN (COSTS OFF) +SELECT a.* FROM a INNER JOIN b ON a.b_id = b.id; + +ROLLBACK; + +BEGIN; + +-- inner join removal code with 2 column foreign keys + +CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2)); +CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT, b_id2 INT); + +ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH SIMPLE; + +-- this should remove inner join to b +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id2 = b.id2; + +-- should not remove inner join to b (extra condition) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id2 = b.id2 AND a.b_id2 >= b.id2; + +-- should not remove inner join to b (wrong operator) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 > b.id1 AND a.b_id2 < b.id2; + +-- should not remove inner join (only checking id1) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1; + +-- should not remove inner join (checking wrong columns) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id2 = b.id1 AND a.b_id1 = b.id2; + +-- should not remove inner join (no check for id2) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id2 = b.id1 AND a.b_id1 = b.id1; + +-- should not remove inner join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT a.id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id1 = b.id2; + +-- should not remove inner join (no check for b_id2) +EXPLAIN (COSTS OFF) +SELECT id FROM a INNER JOIN b ON a.b_id1 = b.id1 AND a.b_id1 = b.id1; + +ROLLBACK; + +-- In this test we want to ensure that INNER JOIN removal does not +-- occur when there are pending foreign key triggers. +-- We test this by updating a relation which is referenced by a foreign key +-- and then executing another query which would normally allow the inner +-- join to be removed. + +CREATE TABLE j2 (id INT NOT NULL PRIMARY KEY); +CREATE TABLE j1 ( + id INT PRIMARY KEY, + j2_id INT NOT NULL REFERENCES j2 (id) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE +); + +INSERT INTO j2 VALUES(10),(20); +INSERT INTO j1 VALUES(1,10),(2,20); + +-- create a table to store records that 'violate' the fkey. +CREATE TABLE results (j2_id INT NOT NULL); + +CREATE OR REPLACE FUNCTION j1_update() RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO results SELECT j2_id FROM j1 INNER JOIN j2 ON j1.j2_id = j2.id; + RETURN NEW; + END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER j1_update_trigger BEFORE UPDATE ON j2 FOR EACH ROW EXECUTE PROCEDURE j1_update(); + +UPDATE j2 SET id = id + 1; + +-- results should only contain 3 records. If we blindly removed the join despite the +-- foreign key not having updated the referenced records yet, we'd get 4 rows in results. +SELECT * FROM results; + +DROP TABLE j1; +DROP TABLE j2; +DROP TABLE results; +DROP FUNCTION j1_update(); + + create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30);