diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e1b49d5..8dbfeba 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1907,6 +1907,16 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node) } static void +_outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) +{ + WRITE_NODE_TYPE("FOREIGNKEYOPTINFO"); + + WRITE_OID_FIELD(conrelid); + WRITE_OID_FIELD(confrelid); + WRITE_INT_FIELD(nkeys); +} + +static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { /* @@ -3297,6 +3307,9 @@ _outNode(StringInfo str, const void *obj) case T_IndexOptInfo: _outIndexOptInfo(str, obj); break; + case T_ForeignKeyOptInfo: + _outForeignKeyOptInfo(str, obj); + break; case T_EquivalenceClass: _outEquivalenceClass(str, obj); break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d107d76..0a57bc9 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3732,6 +3732,322 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, } /* + * find_best_foreign_key_quals + * Analyzes joinquals to determine if any quals match foreign keys defined + * on 'fkrel' which reference 'foreignrel'. We return the number of keys + * of the best matching foreign key. We assume the best matching foreign + * key is the one with the most keys, not the most quals matching quals, + * as quals could be duplicated. We also set 'joinqualsbitmap' bits to + * mark which quals in 'joinquals' were matched. Zero is returned if no + * match is found. In this case 'joinqualsbitmap' is set to NULL. Partial + * foreign key matches are currently ignored. + */ +static int +find_best_foreign_key_quals(PlannerInfo *root, RelOptInfo *fkrel, + RelOptInfo *foreignrel, List *joinquals, + Bitmapset **joinqualsbitmap) +{ + Bitmapset *qualbestmatch; + Bitmapset *usefulquals; + ListCell *lc; + int quallstidx; + int bestmatchnkeys; + Oid frelid; + + /* fast path out when there's no foreign keys on fkrel */ + if (fkrel->fkeylist == NIL) + { + *joinqualsbitmap = NULL; + return 0; + } + + qualbestmatch = NULL; + quallstidx = -1; + usefulquals = NULL; + bestmatchnkeys = 0; + + /* + * First build a bitmapset with all possibly useful quals. This will save + * from having to do this for each foreign key later. The only quals that + * are useful to us are in the form "var op var", so here we'll ignore + * things like "function(var1, var2)". + */ + foreach(lc, joinquals) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + OpExpr *clause; + Var *leftvar, *rightvar; + + /* + * Increment this at the start of the loop to save doing it before each + * continue statement. + */ + quallstidx++; + + if (!IsA(rinfo, RestrictInfo)) + continue; + + if (!IsA(rinfo->clause, OpExpr)) + continue; + + clause = (OpExpr *) rinfo->clause; + + if (list_length(clause->args) != 2) + continue; + + leftvar = (Var *) get_leftop((Expr *) clause); + rightvar = (Var *) get_rightop((Expr *) clause); + + /* Foreign keys only support Vars, so ignore anything more complex */ + if (!IsA(leftvar, Var) || !IsA(rightvar, Var)) + continue; + + usefulquals = bms_add_member(usefulquals, quallstidx); + } + + frelid = root->simple_rte_array[foreignrel->relid]->relid; + + /* now check the matches for each foreign key defined on the fkrel */ + foreach(lc, fkrel->fkeylist) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + Bitmapset *qualmatches; + Bitmapset *fkmatches; + int i; + int nkeys; + + /* skip any foreign keys which don't reference frelid */ + if (fkinfo->confrelid != frelid) + continue; + + qualmatches = NULL; + fkmatches = NULL; + nkeys = fkinfo->nkeys; + + /* + * Loop over each column of the foreign key and build a bitmap index + * of each joinqual which matches. Note that we don't stop when we find + * the first match, as the expression could be duplicated in the + * joinquals, and we want to match as many as possible. + */ + for (i = 0; i < nkeys; i++) + { + ListCell *lc2; + + quallstidx = -1; + foreach(lc2, joinquals) + { + RestrictInfo *rinfo; + OpExpr *clause; + + quallstidx++; + + /* skip anything we didn't mark as useful above. */ + if (!bms_is_member(quallstidx, usefulquals)) + continue; + + /* + * Here we can safely assume that we have an OpExpr, in the + * from of "var op var" + */ + rinfo = (RestrictInfo *) lfirst(lc2); + clause = (OpExpr *) rinfo->clause; + + /* + * if the operator does not match then there's little point in + * checking the operands + */ + if (clause->opno != fkinfo->conpfeqop[i]) + continue; + + /* + * For RestrictInfos built from an eclass we must consider + * each member of the eclass, as rinfo may not be using the + * foreign key Vars. For efficient tracking of which Vars we've + * found, since we're only tracking 2 Vars, we use a bitmask. + * We can safely finish searching when both of the least + * significant bits are set. + */ + if (rinfo->parent_ec) + { + EquivalenceClass *ec = rinfo->parent_ec; + ListCell *lc3; + int foundvarmask = 0; + + foreach(lc3, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc3); + Var *var = (Var *) em->em_expr; + + if (!IsA(var, Var)) + continue; + + if (foreignrel->relid == var->varno && + fkinfo->confkeys[i] == var->varattno) + foundvarmask |= 1; + + else if (fkrel->relid == var->varno && + fkinfo->conkeys[i] == var->varattno) + foundvarmask |= 2; + + /* + * Check if we've found both matches. If found we add + * this qual to the matched list and mark this key as + * matched too. + */ + if (foundvarmask == 3) + { + qualmatches = bms_add_member(qualmatches, quallstidx); + fkmatches = bms_add_member(fkmatches, i); + break; + } + } + } + else + { + Var *leftvar = (Var *) get_leftop((Expr *) clause); + Var *rightvar = (Var *) get_rightop((Expr *) clause); + + /* + * In this non eclass RestrictInfo case we'll check if the + * left and right Vars match to this part of the foreign + * key. Remember that this could be written with the Vars + * in either order, so we test both permutations of the + * expression. + */ + if (foreignrel->relid == leftvar->varno && + fkrel->relid == rightvar->varno && + fkinfo->confkeys[i] == leftvar->varattno && + fkinfo->conkeys[i] == rightvar->varattno) + { + qualmatches = bms_add_member(qualmatches, quallstidx); + fkmatches = bms_add_member(fkmatches, i); + } + else if (foreignrel->relid == rightvar->varno && + fkrel->relid == leftvar->varno && + fkinfo->confkeys[i] == rightvar->varattno && + fkinfo->conkeys[i] == leftvar->varattno) + { + qualmatches = bms_add_member(qualmatches, quallstidx); + fkmatches = bms_add_member(fkmatches, i); + } + } + } + } + + /* did we get a complete match to the foreign key? */ + if (bms_num_members(fkmatches) == nkeys) + { + /* does this match match more keys than any previous match? */ + if (nkeys > bestmatchnkeys) + { + bms_free(qualbestmatch); + qualbestmatch = qualmatches; + bestmatchnkeys = nkeys; + } + } + + bms_free(fkmatches); + } + + *joinqualsbitmap = qualbestmatch; + return bestmatchnkeys; +} + +/* + * clauselist_join_selectivity + * Estimate selectivity of join clauses either by using foreign key info + * or by using the regular clauselist_selectivity(). + * + * Since selectivity estimates for each joinqual are multiplied together, this + * can cause significant underestimates on the number of join tuples in cases + * where there's more than 1 clause in the join condition. To help ease the + * pain here we make use of foreign keys, and we assume that 1 row will match + * when *all* of the foreign key columns are present in the join condition, any + * additional clauses are estimated using clauselist_selectivity(). + */ +static Selectivity +clauselist_join_selectivity(PlannerInfo *root, List *joinquals, + JoinType jointype, SpecialJoinInfo *sjinfo) +{ + int outerid; + int innerid; + Selectivity sel = 1.0; + Bitmapset *nonfkquals = NULL; + + innerid = -1; + while ((innerid = bms_next_member(sjinfo->min_righthand, innerid)) >= 0) + { + RelOptInfo *innerrel = find_base_rel(root, innerid); + + outerid = -1; + while ((outerid = bms_next_member(sjinfo->min_lefthand, outerid)) >= 0) + { + RelOptInfo *outerrel = find_base_rel(root, outerid); + Bitmapset *outer2inner; + Bitmapset *inner2outer; + int innermatches; + int outermatches; + + /* + * check which quals are matched by a foreign key referencing the + * innerrel. + */ + outermatches = find_best_foreign_key_quals(root, outerrel, + innerrel, joinquals, &outer2inner); + + /* do the same, but with relations swapped */ + innermatches = find_best_foreign_key_quals(root, innerrel, + outerrel, joinquals, &inner2outer); + + /* + * did we find any matches at all? If so we need to see which one is + * the best/longest match + */ + if (outermatches != 0 || innermatches != 0) + { + /* either could be zero, but not both. */ + if (outermatches < innermatches) + { + nonfkquals = bms_add_members(nonfkquals, inner2outer); + + if (jointype != JOIN_SEMI && jointype != JOIN_ANTI) + sel *= 1.0 / Max(outerrel->tuples, 1.0); + } + else + { + nonfkquals = bms_add_members(nonfkquals, outer2inner); + + if (jointype != JOIN_SEMI && jointype != JOIN_ANTI) + sel *= 1.0 / Max(innerrel->tuples, 1.0); + } + } + } + } + + /* + * If any non matched quals exist then we build a list of the non-matches + * and use clauselist_selectivity() to estimate the selectivity of these. + */ + if (bms_num_members(nonfkquals) < list_length(joinquals)) + { + ListCell *lc; + int lstidx = 0; + List *nonfkeyclauses = NIL; + + foreach (lc, joinquals) + { + if (!bms_is_member(lstidx, nonfkquals)) + nonfkeyclauses = lappend(nonfkeyclauses, lfirst(lc)); + lstidx++; + } + sel *= clauselist_selectivity(root, nonfkeyclauses, 0, jointype, sjinfo); + } + + return sel; +} + +/* * calc_joinrel_size_estimate * Workhorse for set_joinrel_size_estimates and * get_parameterized_joinrel_size. @@ -3777,11 +4093,11 @@ calc_joinrel_size_estimate(PlannerInfo *root, } /* Get the separate selectivities */ - jselec = clauselist_selectivity(root, - joinquals, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + joinquals, + jointype, + sjinfo); + pselec = clauselist_selectivity(root, pushedquals, 0, @@ -3794,11 +4110,10 @@ calc_joinrel_size_estimate(PlannerInfo *root, } else { - jselec = clauselist_selectivity(root, - restrictlist, - 0, - jointype, - sjinfo); + jselec = clauselist_join_selectivity(root, + restrictlist, + jointype, + sjinfo); pselec = 0.0; /* not used, keep compiler quiet */ } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 9442e5f..dbe6038 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -27,6 +27,7 @@ #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" +#include "catalog/pg_constraint.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -40,6 +41,7 @@ #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -93,6 +95,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + List *fkinfos = NIL; + List *fkoidlist; + ListCell *l; /* * We need not lock the relation since it was already locked, either by @@ -140,7 +145,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, if (hasindex) { List *indexoidlist; - ListCell *l; LOCKMODE lmode; indexoidlist = RelationGetIndexList(relation); @@ -380,6 +384,77 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, } rel->indexlist = indexinfos; + + /* load foreign keys */ + fkoidlist = RelationGetFKeyList(relation); + + foreach(l, fkoidlist) + { + int i; + ArrayType *arr; + Datum adatum; + bool isnull; + int numkeys; + Oid fkoid = lfirst_oid(l); + + HeapTuple htup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(fkoid)); + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup); + + ForeignKeyOptInfo *info; + + Assert(constraint->contype == CONSTRAINT_FOREIGN); + + info = makeNode(ForeignKeyOptInfo); + + info->conrelid = constraint->conrelid; + info->confrelid = constraint->confrelid; + + /* conkey */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_conkey, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + numkeys = ARR_DIMS(arr)[0]; + info->conkeys = (int*)palloc0(numkeys * sizeof(int)); + + for (i = 0; i < numkeys; i++) + info->conkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i]; + + /* confkey */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_confkey, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + numkeys = ARR_DIMS(arr)[0]; + info->confkeys = (int*)palloc0(numkeys * sizeof(int)); + + for (i = 0; i < numkeys; i++) + info->confkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i]; + + /* conpfeqop */ + adatum = SysCacheGetAttr(CONSTROID, htup, + Anum_pg_constraint_conpfeqop, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + numkeys = ARR_DIMS(arr)[0]; + info->conpfeqop = (Oid*)palloc0(numkeys * sizeof(Oid)); + + for (i = 0; i < numkeys; i++) + info->conpfeqop[i] = ((Oid *) ARR_DATA_PTR(arr))[i]; + + info->nkeys = numkeys; + + ReleaseSysCache(htup); + + fkinfos = lcons(info, fkinfos); + } + + list_free(fkoidlist); + + rel->fkeylist = fkinfos; /* Grab foreign-table info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 9c3d096..3ae9022 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3923,6 +3923,73 @@ RelationGetIndexList(Relation relation) } /* + * RelationGetFKeyList -- get a list of foreign key oids + * + * TODO blah blah blah + */ +List * +RelationGetFKeyList(Relation relation) +{ + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_fkeyvalid) + return list_copy(relation->rd_fkeylist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_constraint for entries having conrelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + conrel = heap_open(ConstraintRelationId, AccessShareLock); + conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(conscan))) + { + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup); + + /* return only foreign keys */ + if (constraint->contype != CONSTRAINT_FOREIGN) + continue; + + /* Add index's OID to result list in the proper order */ + result = insert_ordered_oid(result, HeapTupleGetOid(htup)); + } + + systable_endscan(conscan); + + heap_close(conrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_fkeylist; + relation->rd_fkeylist = list_copy(result); + relation->rd_fkeyvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + +/* * insert_ordered_oid * Insert a new Oid into a sorted list of Oids, preserving ordering * @@ -4891,6 +4958,8 @@ load_relcache_init_file(bool shared) rel->rd_indexattr = NULL; rel->rd_keyattr = NULL; rel->rd_idattr = NULL; + rel->rd_fkeylist = NIL; + rel->rd_fkeyvalid = false; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 274480e..5c40b83 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -221,6 +221,7 @@ typedef enum NodeTag T_PlannerGlobal, T_RelOptInfo, T_IndexOptInfo, + T_ForeignKeyOptInfo, T_ParamPathInfo, T_Path, T_IndexPath, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 79bed33..8ce35e7 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -472,6 +472,7 @@ typedef struct RelOptInfo Relids lateral_relids; /* minimum parameterization of rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *fkeylist; /* list of ForeignKeyOptInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -566,6 +567,20 @@ typedef struct IndexOptInfo bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ } IndexOptInfo; +/* TODO add info*/ +typedef struct ForeignKeyOptInfo +{ + NodeTag type; + + Oid conrelid; + Oid confrelid; + + int nkeys; + int *conkeys; + int *confkeys; + Oid *conpfeqop; + +} ForeignKeyOptInfo; /* * EquivalenceClasses diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 87123a5..da1fd58 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -73,6 +73,8 @@ extern Expr *adjust_rowcompare_for_index(RowCompareExpr *clause, int indexcol, List **indexcolnos, bool *var_on_left_p); +extern bool has_matching_fkey(RelOptInfo *rel, RelOptInfo *frel, List *clauses, + bool reverse); /* * tidpath.h diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 8a55a09..c56cf8a 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -79,6 +79,7 @@ typedef struct RelationData bool rd_isvalid; /* relcache entry is valid */ char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 = * valid, 2 = temporarily forced */ + bool rd_fkeyvalid; /* state of rd_fkeylist: 0 = not valid, 1 = valid */ /* * rd_createSubid is the ID of the highest subtransaction the rel has @@ -112,6 +113,9 @@ typedef struct RelationData Oid rd_oidindex; /* OID of unique index on OID, if any */ Oid rd_replidindex; /* OID of replica identity index, if any */ + /* data managed by RelationGetFKList: */ + List *rd_fkeylist; /* OIDs of foreign keys */ + /* data managed by RelationGetIndexAttrBitmap: */ Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 6953281..8878478 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -38,6 +38,7 @@ extern void RelationClose(Relation relation); * Routines to compute/retrieve additional cached information */ extern List *RelationGetIndexList(Relation relation); +extern List *RelationGetFKeyList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); extern List *RelationGetIndexExpressions(Relation relation);