diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index c7b4153..001781a 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2138,6 +2138,16 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node) } static void +_outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) +{ + WRITE_NODE_TYPE("FOREIGNKEYOPTINFO"); + + WRITE_OID_FIELD(conrelid); + WRITE_OID_FIELD(confrelid); + WRITE_INT_FIELD(nkeys); +} + +static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { /* @@ -3607,6 +3617,9 @@ outNode(StringInfo str, const void *obj) case T_IndexOptInfo: _outIndexOptInfo(str, obj); break; + case T_ForeignKeyOptInfo: + _outForeignKeyOptInfo(str, obj); + break; case T_EquivalenceClass: _outEquivalenceClass(str, obj); break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e7f63f4..54d2222 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3888,6 +3888,219 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, } /* + * find_matching_foreign_keys + * identifies foreign keys matched by joinquals (or eclasses) + * + * Searches foreign keys connecting the left and right side of the join, and + * returns those with all conditions matches by an eclass or a regular qual. + * + * The function may return multiple foreign keys for a single pair of tables + */ +static List * +find_matching_foreign_keys(PlannerInfo *root, List *joinquals, + JoinType jointype, SpecialJoinInfo *sjinfo, + List **remaining_joinquals) +{ + int j; + ListCell *lc; + List *keys = NIL; + + /* make a local copy of joinquals so that we can remove items from it */ + *remaining_joinquals = list_copy(joinquals); + + /* find a combination of foreign keys */ + while (true) + { + FKInfo *best = NULL; + List *remove = NIL; + + foreach(lc, root->foreign_keys) + { + FKInfo *info = (FKInfo *)lfirst(lc); + + /* if we've already added this key, skip it */ + if (list_member_ptr(keys, info)) + continue; + + /* + * We can only use foreign keys connecting the two sides of the join. + * + * XXX In Tom's design proposal, this works with inner_rel/outer_rel, + * but we don't have that here. So we use relids from sjinfo instead. + */ + if ((bms_is_member(info->src_relid, sjinfo->min_lefthand) && + bms_is_member(info->dst_relid, sjinfo->min_righthand)) || + (bms_is_member(info->dst_relid, sjinfo->min_lefthand) && + bms_is_member(info->src_relid, sjinfo->min_righthand))) + { + int i; + + /* FIXME this needs to consider the remaining quals. */ + info->nmatched = 0; + for (i = 0; i < info->nkeys; i++) + if ((info->eclass[i] != NULL) || (info->rinfos[i] != NULL)) + info->nmatched += 1; + + /* compare the current key to the best key so far */ + if ((info->nmatched == info->nkeys) && + ((best == NULL) || (info->nmatched > best->nmatched))) + best = info; + } + } + + /* if we found no foreign key, terminate the search */ + if (!best) + break; + + keys = lappend(keys, best); + + /* now remove the quals used to match the new key from joinquals */ + for (j = 0; j < best->nkeys; j++) + if (best->rinfos[j] != NULL) + { + ListCell *lc2; + foreach(lc2, best->rinfos[j]) + *remaining_joinquals + = list_delete_ptr(*remaining_joinquals, best->rinfos[j]); + } + + /* + * we also need to remove clauses implied by an eclass + * + * We can't simplye remove clauses using parent_ec, we need to + * actually check the clause matches the foreign key. + */ + foreach(lc, *remaining_joinquals) + { + int i; + RestrictInfo *rinfo = (RestrictInfo *)lfirst(lc); + + Assert(IsA(rinfo, RestrictInfo)); /* sanity check */ + + /* we can ignore clauses not generated from an eclass */ + if (! rinfo->parent_ec) + continue; + + for (i = 0; i < best->nkeys; i++) + { + ListCell *lc3; + int foundvarmask = 0; + + /* if not matched by the same eclass, skip */ + if (best->eclass[i] != rinfo->parent_ec) + continue; + + foreach(lc3, rinfo->parent_ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc3); + Var *var = (Var *) em->em_expr; + + /* we can only use simple (Var = Var) eclasses for FKs */ + if (!IsA(var, Var)) + continue; + + /* check which side of the foreign key is satisfied */ + if (best->src_relid == var->varno && + best->conkeys[i] == var->varattno) + foundvarmask |= 1; + + else if (best->dst_relid == var->varno && + best->confkeys[i] == var->varattno) + foundvarmask |= 2; + + if (foundvarmask == 3) + remove = lappend(remove, rinfo); + } + } + } + + /* actually delete the EC-quals */ + foreach(lc, remove) + *remaining_joinquals + = list_delete_ptr(*remaining_joinquals, lfirst(lc)); + } + + return keys; +} + +/* + * clauselist_join_selectivity + * Estimate selectivity of join clauses either by using foreign key info + * or by using the regular clauselist_selectivity(). + * + * Since selectivity estimates for each joinqual are multiplied together, this + * can cause significant underestimates on the number of join tuples in cases + * where there's more than 1 clause in the join condition. To help ease the + * pain here we make use of foreign keys, and we assume that 1 row will match + * when *all* of the foreign key columns are present in the join condition, any + * additional clauses are estimated using clauselist_selectivity(). + * + * Note this ignores whether the FK is invalid or currently deferred; we don't + * rely on this assumption for correctness of the query, so it is a reasonable + * and safe assumption for planning purposes. + * + * XXX Currently this applies all fully-matched foreign keys, but maybe + * that's not the right thing to do - e.g. when there's a duplicate foreign + * key, we'll apply it twice (clearly wrong). Also, overlapping foreign keys + * are likely correlated, but multiplication assumes independence. + * + * XXX We might also apply even foreign keys that are matched only partially, + * by estimating the selectivity as (1/N)^(nmatch/nkeys) where nmatch is number + * of matched conditions and nkeys is the total number of conditions. + * + * FIXME Consired NULL values properly (although that'll be tricky due to + * correlated columns, which is likely for multi-column keys). + */ +static Selectivity +clauselist_join_selectivity(PlannerInfo *root, List *joinquals, + JoinType jointype, SpecialJoinInfo *sjinfo) +{ + ListCell *lc; + Selectivity sel = 1.0; + List *matches; + List *remaining = NIL; + + /* find all foreign keys matched by join quals (or eclasses) */ + matches = find_matching_foreign_keys(root, joinquals, jointype, sjinfo, &remaining); + + /* did we find any matches at all */ + foreach(lc, matches) + { + RelOptInfo *inner_rel, *outer_rel; + FKInfo *info = (FKInfo *) lfirst(lc); + double ntuples; + + /* + * We know the foreign key connects the relations on the inner and + * outer side of the join, but we don't know which is which, so we + * have to decide using min_righthand (in) and min_lefthand (out). + * + * XXX This feature is enforced in find_matching_foreign_keys(). + */ + if (bms_is_member(info->src_relid, sjinfo->min_righthand)) + { + inner_rel = find_base_rel(root, info->src_relid); + outer_rel = find_base_rel(root, info->dst_relid); + ntuples = outer_rel->tuples; + } + else + { + inner_rel = find_base_rel(root, info->dst_relid); + outer_rel = find_base_rel(root, info->src_relid); + ntuples = inner_rel->tuples; + } + + /* XXX this needs more thought I guess */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + sel *= Min(outer_rel->tuples / inner_rel->tuples, 1.0); + else + sel *= 1.0 / Max(ntuples, 1.0); + } + + return sel * clauselist_selectivity(root, remaining, 0, jointype, sjinfo); +} + +/* * calc_joinrel_size_estimate * Workhorse for set_joinrel_size_estimates and * get_parameterized_joinrel_size. @@ -3933,11 +4146,11 @@ calc_joinrel_size_estimate(PlannerInfo *root, } /* Get the separate selectivities */ - jselec = clauselist_selectivity(root, - joinquals, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + joinquals, + jointype, + sjinfo); + pselec = clauselist_selectivity(root, pushedquals, 0, @@ -3950,11 +4163,10 @@ calc_joinrel_size_estimate(PlannerInfo *root, } else { - jselec = clauselist_selectivity(root, - restrictlist, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + restrictlist, + jointype, + sjinfo); pselec = 0.0; /* not used, keep compiler quiet */ } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index edd95d8..0d824e2 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -170,6 +170,17 @@ query_planner(PlannerInfo *root, List *tlist, generate_base_implied_equalities(root); /* + * Fetch information about foreign keys potentially useful for cardinality + * estimation (i.e. only those with both sides referenced in the query). + */ + collect_foreign_keys(root); + + /* + * Match foreign keys to eclasses and quals (mark satisfied conditions). + */ + match_foreign_keys_to_quals(root); + + /* * We have completed merging equivalence sets, so it's now possible to * generate pathkeys in canonical form; so compute query_pathkeys and * other pathkeys fields in PlannerInfo. diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 6aa8192..2d2eaaf 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -41,6 +41,7 @@ #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -391,6 +392,17 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, rel->indexlist = indexinfos; + /* + * Load foreign key data. Note this is the definitional data from the + * catalog only and does not lock the referenced tables here. The + * precise definition of the FK is important and may affect the usage + * elsewhere in the planner, e.g. if the constraint is deferred or + * if the constraint is not valid then relying upon this in the executor + * may not be accurate, though might be considered a useful estimate for + * planning purposes. + */ + rel->fkeylist = RelationGetFKeyList(relation); + /* Grab foreign-table info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index ba185ae..2349937 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -81,6 +81,311 @@ setup_simple_rel_arrays(PlannerInfo *root) } /* + * collect_foreign_keys + * fetch info about foreign keys applicable to join estimation + * + * Builds a list of foreign keys potentially usable for join cardinality + * estimation. We only keep foreign keys where both tables are present in + * the query. We only care about foreign keys with multiple columns, so + * keys with a single column are simply skipped. + * + * When building the list, we also replace the OIDs with relids, so that + * we can match the keys to eclasses/quals later (which use relids). This + * also means that if a relation has multiple RTEs (e.g. in a self-join), + * we will create multiple copies of the foreign key, one for each RTE. + * + * FIXME Perhaps this should further optimize the case with a single base + * relation (when there are no joins). Sadly we can't use all_baserels here + * because that's computed in make_one_rel() and that's too late. So we'd + * have to walk through the rels just like now, and count the baserels on + * our own. Which we kinda do anyway already. + */ +void +collect_foreign_keys(PlannerInfo *root) +{ + List *reloids; + Index rti, rti1, rti2; + + /* + * Let's compile a list of OIDs of tables, so that we can optimize the + * case with many foreign keys a bit. + */ + reloids = NIL; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + RangeTblEntry *rte = root->simple_rte_array[rti]; + + if (rel == NULL) /* empty slot for non-baserel RTEs */ + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" or not relations */ + if ((rel->reloptkind != RELOPT_BASEREL) || + (rel->rtekind != RTE_RELATION)) + continue; + + reloids = list_append_unique_oid(reloids, rte->relid); + } + + /* + * Identify foreign keys referencing pairs of base relations in query. + * + * The code is optimized for possibly large number of foreign keys, so + * it only walks through each fkeylist once. It skips foreign keys on + * a single column and foreign keys where the other relation is not in + * the query (detected using the OID list compiled above). + */ + for (rti1 = 1; rti1 < root->simple_rel_array_size; rti1++) + { + ListCell *lc; + RelOptInfo *rel1 = root->simple_rel_array[rti1]; + RangeTblEntry *rte1 = root->simple_rte_array[rti1]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel1 == NULL) + continue; + + /* sanity check on array */ + Assert(rel1->relid == rti1); + + /* ignore rel types that can't have foreign keys */ + if ((rel1->reloptkind != RELOPT_BASEREL) || + (rel1->rtekind != RTE_RELATION)) + continue; + + /* sanity check */ + Assert(rte1 != NULL); + + /* we only check one direction here */ + foreach(lc, rel1->fkeylist) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + + /* + * Skip foreign keys on a single column, as those are not useful + * for join cardinality estimation (which is about improving + * estimates for multi-column keys). + */ + if (fkinfo->nkeys == 1) + continue; + + /* Check if the other side of the foreign key is in the query. */ + if (! list_member_oid(reloids, fkinfo->confrelid)) + continue; + + /* We need to build both (rti1 < rti2) and (rti2 < rti1) here. */ + for (rti2 = 1; rti2 < root->simple_rel_array_size; rti2++) + { + RelOptInfo *rel2 = root->simple_rel_array[rti2]; + RangeTblEntry *rte2 = root->simple_rte_array[rti2]; + + /* we never join a RTE to itself */ + if (rti1 == rti2) + continue; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel2 == NULL) + continue; + + /* sanity check on array */ + Assert(rel2->relid == rti2); + + /* ignore rel types that can't have foreign keys */ + if ((rel2->reloptkind != RELOPT_BASEREL) || + (rel2->rtekind != RTE_RELATION)) + continue; + + /* sanity check */ + Assert(rte2 != NULL); + + /* + * We only need to check confrelid, as conrelid is the current + * relation, so it's implicitly satisfied. + */ + if (fkinfo->confrelid == rte2->relid) + { + /* build a struct with relids instead of OIDs */ + FKInfo *info = makeNode(FKInfo); + + info->src_relid = rti1; + info->dst_relid = rti2; + + info->nkeys = fkinfo->nkeys; + + /* XXX Are pointers back to relcache OK, or do we need a copy? */ + info->conkeys = fkinfo->conkeys; + info->confkeys = fkinfo->confkeys; + info->conpfeqop = fkinfo->conpfeqop; + + info->eclass = (EquivalenceClass **)palloc0(info->nkeys * sizeof(EquivalenceClass*)); + info->rinfos = (List **)palloc0(info->nkeys * sizeof(List *)); + + root->foreign_keys = lappend(root->foreign_keys, info); + } + } + } + } +} + +/* + * match_foreign_keys_to_quals + * identify foreign key conditions satisfied by eclasses or regular quals + * + * For each foreign key we walk through all equivalence classes and try to + * match them to the foreign key conditions. Later when matching quals to + * keys we can count those conditions as matched. + * + * Then we try to do the same with foreign keys and regular join conditions + * (in RTE->joininfo). Currently this is a separate loop over foreign_keys, + * but it might as well be merged into the eclass one. + */ +void +match_foreign_keys_to_quals(PlannerInfo *root) +{ + ListCell *lc; + + /* if there are no eclasses or foreign keys, bail out immediately */ + if ((root->foreign_keys == NIL) || (root->eq_classes == NIL)) + return; + + /* walk through the foreign keys once, and check them against eclasses */ + foreach(lc, root->foreign_keys) + { + int i; + FKInfo *info = (FKInfo *) lfirst(lc); + + /* try to match FK conditions to an equivalence class */ + for (i = 0; i < info->nkeys; i++) + { + ListCell *lc2; + + foreach(lc2, root->eq_classes) + { + ListCell *lc3; + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc2); + int foundvarmask = 0; + + foreach(lc3, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc3); + Var *var = (Var *) em->em_expr; + + /* we can only use simple (Var = Var) eclasses for FKs */ + if (!IsA(var, Var)) + continue; + + /* check which side of the foreign key is satisfied */ + if (info->src_relid == var->varno && + info->conkeys[i] == var->varattno) + foundvarmask |= 1; + + else if (info->dst_relid == var->varno && + info->confkeys[i] == var->varattno) + foundvarmask |= 2; + + /* + * Check if we've found both matches in the same equivalence + * class, we can mark it accordingly. + */ + if (foundvarmask == 3) + { + info->eclass[i] = ec; + break; + } + } + + /* + * If we have just matched this part of the FK, we're done (no + * need to try the other eclasses) + */ + if (info->eclass[i] != NULL) + break; + } + } + } + + /* now also match the fkeys to regular join conditions */ + foreach(lc, root->foreign_keys) + { + ListCell *lc2; + FKInfo *info = (FKInfo *) lfirst(lc); + + RelOptInfo *rel = root->simple_rel_array[info->src_relid]; + + foreach (lc2, rel->joininfo) + { + RestrictInfo *rinfo; + OpExpr *clause; + Var *leftvar; + Var *rightvar; + + rinfo = (RestrictInfo *) lfirst(lc2); + + Assert(IsA(rinfo, RestrictInfo)); /* sanity check */ + + /* + * XXX should we skip rinfo with parent_ec here (EC clauses are + * already matched in the preceding loop)? + */ + + clause = (OpExpr *) rinfo->clause; + + /* only OpExprs are useful for consideration */ + if (!IsA(clause, OpExpr)) + continue; + + leftvar = (Var *) get_leftop((Expr *) clause); + rightvar = (Var *) get_rightop((Expr *) clause); + + /* Foreign keys only support Vars, so ignore anything more complex */ + if (!IsA(leftvar, Var) || !IsA(rightvar, Var)) + continue; + + /* now try to match the vars to the current foreign key */ + if ((info->src_relid == leftvar->varno) && + (info->dst_relid == rightvar->varno)) + { + int i; + for (i = 0; i < info->nkeys; i++) + { + /* + * If the operator does not match the FK, there's no point + * in checking the operands. + */ + if (clause->opno != info->conpfeqop[i]) + continue; + + if ((info->conkeys[i] == leftvar->varattno) && + (info->confkeys[i] == rightvar->varattno)) + info->rinfos[i] = lappend(info->rinfos[i], rinfo); + } + } + else if ((info->src_relid == rightvar->varno) && + (info->dst_relid == leftvar->varno)) + { + int i; + for (i = 0; i < info->nkeys; i++) + { + /* + * If the operator does not match the FK, there's no point + * in checking the operands. + */ + if (clause->opno != info->conpfeqop[i]) + continue; + + if ((info->conkeys[i] == rightvar->varattno) && + (info->confkeys[i] == leftvar->varattno)) + info->rinfos[i] = lappend(info->rinfos[i], rinfo); + } + } + } + } +} + + +/* * build_simple_rel * Construct a new RelOptInfo for a base relation or 'other' relation. */ diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index c958758..37bcee2 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2031,6 +2031,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) FreeTupleDesc(relation->rd_att); } list_free(relation->rd_indexlist); + list_free(relation->rd_fkeylist); bms_free(relation->rd_indexattr); bms_free(relation->rd_keyattr); bms_free(relation->rd_idattr); @@ -3957,6 +3958,139 @@ RelationGetIndexList(Relation relation) } /* + * RelationGetFKeyList -- get a list of foreign key oids + * + * Use an index scan on pg_constraint to load in FK definitions, + * intended for use within the planner, not for enforcing FKs. + * + * Data is ordered by Oid, though this is not critical at this point + * since we do not lock the referenced relations. + */ +List * +RelationGetFKeyList(Relation relation) +{ + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_fkeylist) + return list_copy(relation->rd_fkeylist); + + /* Fast path if no FKs... if it doesn't have a trigger, it can't have a FK */ + if (!relation->rd_rel->relhastriggers) + return NIL; + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_constraint for entries having conrelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + conrel = heap_open(ConstraintRelationId, AccessShareLock); + conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(conscan))) + { + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup); + + ForeignKeyOptInfo *info; + Datum adatum; + bool isnull; + ArrayType *arr; + int numkeys; + int i; + + /* return only foreign keys */ + if (constraint->contype != CONSTRAINT_FOREIGN) + continue; + + /* lookup number of keys first, and allocate all the pieces at once */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_conkey, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + numkeys = ARR_DIMS(arr)[0]; + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + info = makeNode(ForeignKeyOptInfo); + + info->nkeys = numkeys; + info->conrelid = constraint->conrelid; + info->confrelid = constraint->confrelid; + + info->conkeys = (int*)palloc(numkeys * sizeof(int)); + info->confkeys = (int*)palloc(numkeys * sizeof(int)); + info->conpfeqop = (Oid*)palloc(numkeys * sizeof(Oid)); + + MemoryContextSwitchTo(oldcxt); + + /* conkey */ + for (i = 0; i < numkeys; i++) + info->conkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i]; + + /* confkey */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_confkey, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + Assert(numkeys == ARR_DIMS(arr)[0]); + + for (i = 0; i < numkeys; i++) + info->confkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i]; + + /* conpfeqop */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_conpfeqop, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + Assert(numkeys == ARR_DIMS(arr)[0]); + + for (i = 0; i < numkeys; i++) + info->conpfeqop[i] = ((Oid *) ARR_DATA_PTR(arr))[i]; + + /* Add FK's node to the result list */ + result = lappend(result, info); + } + + systable_endscan(conscan); + + heap_close(conrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_fkeylist; + relation->rd_fkeylist = list_copy(result); + MemoryContextSwitchTo(oldcxt); + + /* + * Don't leak the old list, if there is one + * + * FIXME This is insufficient, as it only frees the list, not the contents + * (so we end up with old ForeignKeyOptInto entries). + */ + list_free(oldlist); + + return result; +} + +/* * insert_ordered_oid * Insert a new Oid into a sorted list of Oids, preserving ordering * @@ -4920,6 +5054,7 @@ load_relcache_init_file(bool shared) rel->rd_indexattr = NULL; rel->rd_keyattr = NULL; rel->rd_idattr = NULL; + rel->rd_fkeylist = NIL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c4b9c14..1f71031 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -223,6 +223,8 @@ typedef enum NodeTag T_PlannerGlobal, T_RelOptInfo, T_IndexOptInfo, + T_ForeignKeyOptInfo, + T_FKInfo, T_ParamPathInfo, T_Path, T_IndexPath, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index a4892cb..2b5fc2a 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -303,6 +303,10 @@ typedef struct PlannerInfo /* optional private data for join_search_hook, e.g., GEQO */ void *join_search_private; + + /* list of foreign keys potentially useful for join estimation */ + List *foreign_keys; + } PlannerInfo; @@ -516,6 +520,7 @@ typedef struct RelOptInfo List *lateral_vars; /* LATERAL Vars and PHVs referenced by rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *fkeylist; /* list of ForeignKeyOptInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -724,6 +729,49 @@ typedef struct EquivalenceMember } EquivalenceMember; /* + * ForeignKeyOptInfo + * Per-foreign-key information for planning/optimization + * + * Only includes columns from pg_constraint related to foreign keys. + * + * conkeys[], confkeys[] and conpfeqop[] each have nkeys entries. + */ +typedef struct ForeignKeyOptInfo +{ + NodeTag type; + + Oid conrelid; /* relation constrained by the foreign key */ + Oid confrelid; /* relation referenced by the foreign key */ + + int nkeys; /* number of columns in the foreign key */ + int *conkeys; /* attnums of columns in the constrained table */ + int *confkeys; /* attnums of columns in the referenced table */ + Oid *conpfeqop; /* OIDs of equality operators used by the FK */ + +} ForeignKeyOptInfo; + +/* only used in planner when matching foreign keys to conditions/eclasses */ +typedef struct FKInfo +{ + NodeTag type; + + Index src_relid; /* relid of the source relation (conrelid) */ + Index dst_relid; /* relid of the target relation (confrelid) */ + + int nkeys; /* number of columns in the foreign key */ + int nmatched; /* number of conditions matched */ + int *conkeys; /* attnums of columns in the constrained table */ + int *confkeys; /* attnums of columns in the referenced table */ + Oid *conpfeqop; /* OIDs of equality operators used by the FK */ + + /* used in costsize.c */ + EquivalenceClass **eclass; /* pointer to eclass matching the condition */ + List **rinfos; /* list of non-EC clauses matched to key */ + +} FKInfo; + + +/* * PathKeys * * The sort ordering of a path is represented by a list of PathKey nodes. diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 5de4c34..8cd5762 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -236,6 +236,8 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path, * prototypes for relnode.c */ extern void setup_simple_rel_arrays(PlannerInfo *root); +extern void collect_foreign_keys(PlannerInfo *root); +extern void match_foreign_keys_to_quals(PlannerInfo *root); extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind); extern RelOptInfo *find_base_rel(PlannerInfo *root, int relid); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index f3b25e2..e0ac451 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -76,6 +76,8 @@ extern Expr *adjust_rowcompare_for_index(RowCompareExpr *clause, int indexcol, List **indexcolnos, bool *var_on_left_p); +extern bool has_matching_fkey(RelOptInfo *rel, RelOptInfo *frel, List *clauses, + bool reverse); /* * tidpath.h diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index fd858fd..d00a765 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -95,6 +95,9 @@ typedef struct RelationData Oid rd_oidindex; /* OID of unique index on OID, if any */ Oid rd_replidindex; /* OID of replica identity index, if any */ + /* data managed by RelationGetFKList: */ + List *rd_fkeylist; /* list of ForeignKeyOptInfo entries */ + /* data managed by RelationGetIndexAttrBitmap: */ Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 1b48304..7f07c26 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -38,6 +38,7 @@ extern void RelationClose(Relation relation); * Routines to compute/retrieve additional cached information */ extern List *RelationGetIndexList(Relation relation); +extern List *RelationGetFKeyList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); extern List *RelationGetIndexExpressions(Relation relation); diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 00ef421..6b2ec7f 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -1,18 +1,18 @@ SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%'; - name | setting -----------------------+--------- - enable_bitmapscan | on - enable_hashagg | on - enable_hashjoin | on - enable_indexonlyscan | on - enable_indexscan | on - enable_material | on - enable_mergejoin | on - enable_nestloop | on - enable_seqscan | on - enable_sort | on - enable_tidscan | on -(11 rows) + name | setting +-----------------------+--------- + enable_bitmapscan | on + enable_hashagg | on + enable_hashjoin | on + enable_indexonlyscan | on + enable_indexscan | on + enable_material | on + enable_mergejoin | on + enable_nestloop | on + enable_seqscan | on + enable_sort | on + enable_tidscan | on +(12 rows) CREATE TABLE foo2(fooid int, f2 int); INSERT INTO foo2 VALUES(1, 11);