doc/src/sgml/custom-scan.sgml | 43 ++++++++++++++++++ doc/src/sgml/fdwhandler.sgml | 51 +++++++++++++++++++++ src/backend/commands/explain.c | 15 +++++-- src/backend/executor/execScan.c | 4 ++ src/backend/executor/nodeCustom.c | 38 ++++++++++++---- src/backend/executor/nodeForeignscan.c | 34 +++++++++----- src/backend/foreign/foreign.c | 31 ++++++++++--- src/backend/nodes/bitmapset.c | 57 +++++++++++++++++++++++ src/backend/nodes/copyfuncs.c | 5 +++ src/backend/nodes/outfuncs.c | 5 +++ src/backend/optimizer/path/allpaths.c | 1 - src/backend/optimizer/path/joinpath.c | 13 ++++++ src/backend/optimizer/path/joinrels.c | 21 ++++++++- src/backend/optimizer/plan/createplan.c | 80 ++++++++++++++++++++++++++------- src/backend/optimizer/plan/setrefs.c | 64 ++++++++++++++++++++++++++ src/backend/optimizer/util/plancat.c | 7 ++- src/backend/optimizer/util/relnode.c | 22 ++++++++- src/backend/utils/adt/ruleutils.c | 4 ++ src/include/foreign/fdwapi.h | 12 +++++ src/include/nodes/bitmapset.h | 1 + src/include/nodes/plannodes.h | 24 +++++++--- src/include/nodes/relation.h | 2 + src/include/optimizer/pathnode.h | 3 +- src/include/optimizer/paths.h | 13 ++++++ src/include/optimizer/planmain.h | 1 + 25 files changed, 499 insertions(+), 52 deletions(-) diff --git a/doc/src/sgml/custom-scan.sgml b/doc/src/sgml/custom-scan.sgml index 8a4a3df..b1400ae 100644 --- a/doc/src/sgml/custom-scan.sgml +++ b/doc/src/sgml/custom-scan.sgml @@ -48,6 +48,27 @@ extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook; + A custom scan provider will be also able to add paths by setting the + following hook, to replace built-in join paths by custom-scan that + performs as if a scan on preliminary joined relations, which us called + after the core code has generated what it believes to be the complete + and correct set of access paths for the join. + +typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype, + SpecialJoinInfo *sjinfo, + SemiAntiJoinFactors *semifactors, + Relids param_source_rels, + Relids extra_lateral_rels); +extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook; + + + + Although this hook function can be used to examine, modify, or remove paths generated by the core system, a custom scan provider will typically confine itself to generating CustomPath objects and adding @@ -124,7 +145,9 @@ typedef struct CustomScan Scan scan; uint32 flags; List *custom_exprs; + List *custom_ps_tlist; List *custom_private; + List *custom_relids; const CustomScanMethods *methods; } CustomScan; @@ -141,10 +164,30 @@ typedef struct CustomScan is only used by the custom scan provider itself. Plan trees must be able to be duplicated using copyObject, so all the data stored within these two fields must consist of nodes that function can handle. + custom_relids is set by the backend, thus custom-scan provider + does not need to touch, to track underlying relations represented by this + custom-scan node. methods must point to a (usually statically allocated) object implementing the required custom scan methods, which are further detailed below. + + In case when CustomScan replaced built-in join paths, + custom-scan provider must have two characteristic setup. + The first one is zero on the scan.scanrelid, which + should be usually an index of range-tables. It informs the backend + this CustomScan node is not associated with a particular + table. The second one is valid list of TargetEntry on + the custom_ps_tlist. A CustomScan node + looks to the backend like a scan as literal, but on a relation which is + the result of relations join. It means we cannot construct a tuple + descriptor based on table definition, thus custom-scan provider must + introduce the expected record-type of the tuples. + Tuple-descriptor of scan-slot shall be constructed based on the + custom_ps_tlist, and assigned on executor initialization. + Also, referenced by EXPLAIN to solve name of the underlying + columns and relations. + Custom Scan Callbacks diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index c1daa4b..54ba45f 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -598,6 +598,57 @@ IsForeignRelUpdatable (Relation rel); + + FDW Routines for remote join + + +void +GetForeignJoinPaths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, + List *restrictlist); + + Create possible access paths for a join of two foreign tables or + joined relations, but both of them needs to be managed with same + FDW driver. + This optional function is called during query planning. + + + This function allows FDW driver to add ForeignScan path + towards the supplied joinrel. From the standpoint of + query planner, it looks like scan-node is added for join-relation. + It means, ForeignScan path added instead of the built-in + local join logic has to generate tuples as if it scans on a joined + and materialized relations. + + + Usually, we expect FDW drivers issues a remote query that involves + tables join on remote side, then FDW driver fetches the joined result + on local side. + Unlike simple table scan, definition of slot descriptor of the joined + relations is determined on the fly, thus we cannot know its definition + from the system catalog. + So, FDW driver is responsible to introduce the query planner expected + form of the joined relations. In case when ForeignScan + replaced a relations join, scanrelid of the generated plan + node shall be zero, to mark this ForeignScan node is not + associated with a particular foreign tables. + Also, it need to construct pseudo scan tlist (fdw_ps_tlist) + to indicate expected tuple definition. + + + Once scanrelid equals zero, executor initializes the slot + for scan according to fdw_ps_tlist, but excludes junk + entries. This list is also used to solve the name of the original + relation and columns, so FDW can chains expression nodes which are + not run on local side actually, like a join clause to be executed on + the remote side, however, target-entries of them will have + resjunk=true. + + + FDW Routines for <command>EXPLAIN</> diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a951c55..8892dca 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -730,11 +730,17 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) case T_ValuesScan: case T_CteScan: case T_WorkTableScan: - case T_ForeignScan: - case T_CustomScan: *rels_used = bms_add_member(*rels_used, ((Scan *) plan)->scanrelid); break; + case T_ForeignScan: + *rels_used = bms_add_members(*rels_used, + ((ForeignScan *) plan)->fdw_relids); + break; + case T_CustomScan: + *rels_used = bms_add_members(*rels_used, + ((CustomScan *) plan)->custom_relids); + break; case T_ModifyTable: *rels_used = bms_add_member(*rels_used, ((ModifyTable *) plan)->nominalRelation); @@ -1072,9 +1078,12 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_ValuesScan: case T_CteScan: case T_WorkTableScan: + ExplainScanTarget((Scan *) plan, es); + break; case T_ForeignScan: case T_CustomScan: - ExplainScanTarget((Scan *) plan, es); + if (((Scan *) plan)->scanrelid > 0) + ExplainScanTarget((Scan *) plan, es); break; case T_IndexScan: { diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index 3f0d809..2f18a8a 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -251,6 +251,10 @@ ExecAssignScanProjectionInfo(ScanState *node) /* Vars in an index-only scan's tlist should be INDEX_VAR */ if (IsA(scan, IndexOnlyScan)) varno = INDEX_VAR; + /* Also foreign-/custom-scan on pseudo relation should be INDEX_VAR */ + else if (scan->scanrelid == 0 && + (IsA(scan, ForeignScan) || IsA(scan, CustomScan))) + varno = INDEX_VAR; else varno = scan->scanrelid; diff --git a/src/backend/executor/nodeCustom.c b/src/backend/executor/nodeCustom.c index b07932b..2344129 100644 --- a/src/backend/executor/nodeCustom.c +++ b/src/backend/executor/nodeCustom.c @@ -23,6 +23,7 @@ CustomScanState * ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags) { CustomScanState *css; + Index scan_relid = cscan->scan.scanrelid; Relation scan_rel; /* populate a CustomScanState according to the CustomScan */ @@ -48,12 +49,31 @@ ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags) ExecInitScanTupleSlot(estate, &css->ss); ExecInitResultTupleSlot(estate, &css->ss.ps); - /* initialize scan relation */ - scan_rel = ExecOpenScanRelation(estate, cscan->scan.scanrelid, eflags); - css->ss.ss_currentRelation = scan_rel; - css->ss.ss_currentScanDesc = NULL; /* set by provider */ - ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel)); - + /* + * open the base relation and acquire appropriate lock on it, then + * get the scan type from the relation descriptor, if this custom + * scan is on actual relations. + * + * on the other hands, custom-scan may scan on a pseudo relation; + * that is usually a result-set of relations join by external + * computing resource, or others. It has to get the scan type from + * the pseudo-scan target-list that should be assigned by custom-scan + * provider. + */ + if (scan_relid > 0) + { + scan_rel = ExecOpenScanRelation(estate, scan_relid, eflags); + css->ss.ss_currentRelation = scan_rel; + css->ss.ss_currentScanDesc = NULL; /* set by provider */ + ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel)); + } + else + { + TupleDesc ps_tupdesc; + + ps_tupdesc = ExecCleanTypeFromTL(cscan->custom_ps_tlist, false); + ExecAssignScanType(&css->ss, ps_tupdesc); + } css->ss.ps.ps_TupFromTlist = false; /* @@ -89,11 +109,11 @@ ExecEndCustomScan(CustomScanState *node) /* Clean out the tuple table */ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); - if (node->ss.ss_ScanTupleSlot) - ExecClearTuple(node->ss.ss_ScanTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); /* Close the heap relation */ - ExecCloseScanRelation(node->ss.ss_currentRelation); + if (node->ss.ss_currentRelation) + ExecCloseScanRelation(node->ss.ss_currentRelation); } void diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c index 7399053..542d176 100644 --- a/src/backend/executor/nodeForeignscan.c +++ b/src/backend/executor/nodeForeignscan.c @@ -102,6 +102,7 @@ ForeignScanState * ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags) { ForeignScanState *scanstate; + Index scanrelid = node->scan.scanrelid; Relation currentRelation; FdwRoutine *fdwroutine; @@ -141,16 +142,28 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags) ExecInitScanTupleSlot(estate, &scanstate->ss); /* - * open the base relation and acquire appropriate lock on it. + * open the base relation and acquire appropriate lock on it, then + * get the scan type from the relation descriptor, if this foreign + * scan is on actual foreign-table. + * + * on the other hands, foreign-scan may scan on a pseudo relation; + * that is usually a result-set of remote relations join. It has + * to get the scan type from the pseudo-scan target-list that should + * be assigned by FDW driver. */ - currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); - scanstate->ss.ss_currentRelation = currentRelation; + if (scanrelid > 0) + { + currentRelation = ExecOpenScanRelation(estate, scanrelid, eflags); + scanstate->ss.ss_currentRelation = currentRelation; + ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); + } + else + { + TupleDesc ps_tupdesc; - /* - * get the scan type from the relation descriptor. (XXX at some point we - * might want to let the FDW editorialize on the scan tupdesc.) - */ - ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); + ps_tupdesc = ExecCleanTypeFromTL(node->fdw_ps_tlist, false); + ExecAssignScanType(&scanstate->ss, ps_tupdesc); + } /* * Initialize result tuple type and projection info. @@ -161,7 +174,7 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags) /* * Acquire function pointers from the FDW's handler, and init fdw_state. */ - fdwroutine = GetFdwRoutineForRelation(currentRelation, true); + fdwroutine = GetFdwRoutine(node->fdw_handler); scanstate->fdwroutine = fdwroutine; scanstate->fdw_state = NULL; @@ -193,7 +206,8 @@ ExecEndForeignScan(ForeignScanState *node) ExecClearTuple(node->ss.ss_ScanTupleSlot); /* close the relation. */ - ExecCloseScanRelation(node->ss.ss_currentRelation); + if (node->ss.ss_currentRelation) + ExecCloseScanRelation(node->ss.ss_currentRelation); } /* ---------------------------------------------------------------- diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c index cbe8b78..1901749 100644 --- a/src/backend/foreign/foreign.c +++ b/src/backend/foreign/foreign.c @@ -304,11 +304,11 @@ GetFdwRoutine(Oid fdwhandler) /* - * GetFdwRoutineByRelId - look up the handler of the foreign-data wrapper - * for the given foreign table, and retrieve its FdwRoutine struct. + * GetFdwHandlerByRelId - look up the handler of the foreign-data wrapper + * for the given foreign table */ -FdwRoutine * -GetFdwRoutineByRelId(Oid relid) +static Oid +GetFdwHandlerByRelId(Oid relid) { HeapTuple tp; Form_pg_foreign_data_wrapper fdwform; @@ -350,7 +350,18 @@ GetFdwRoutineByRelId(Oid relid) ReleaseSysCache(tp); - /* And finally, call the handler function. */ + return fdwhandler; +} + +/* + * GetFdwRoutineByRelId - look up the handler of the foreign-data wrapper + * for the given foreign table, and retrieve its FdwRoutine struct. + */ +FdwRoutine * +GetFdwRoutineByRelId(Oid relid) +{ + Oid fdwhandler = GetFdwHandlerByRelId(relid); + return GetFdwRoutine(fdwhandler); } @@ -398,6 +409,16 @@ GetFdwRoutineForRelation(Relation relation, bool makecopy) return relation->rd_fdwroutine; } +/* + * GetFdwHandlerForRelation + * + * returns OID of FDW handler which is associated with the given relation. + */ +Oid +GetFdwHandlerForRelation(Relation relation) +{ + return GetFdwHandlerByRelId(RelationGetRelid(relation)); +} /* * IsImportableForeignTable - filter table names for IMPORT FOREIGN SCHEMA diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index a9c3b4b..4dc3286 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -301,6 +301,63 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) } /* + * bms_shift_members - move all the bits by shift + */ +Bitmapset * +bms_shift_members(const Bitmapset *a, int shift) +{ + Bitmapset *b; + bitmapword h_word; + bitmapword l_word; + int nwords; + int w_shift; + int b_shift; + int i, j; + + /* fast path if result shall be NULL obviously */ + if (a == NULL || a->nwords * BITS_PER_BITMAPWORD + shift <= 0) + return NULL; + /* actually, not shift members */ + if (shift == 0) + return bms_copy(a); + + nwords = (a->nwords * BITS_PER_BITMAPWORD + shift + + BITS_PER_BITMAPWORD - 1) / BITS_PER_BITMAPWORD; + b = palloc(BITMAPSET_SIZE(nwords)); + b->nwords = nwords; + + if (shift > 0) + { + /* Left shift */ + w_shift = WORDNUM(shift); + b_shift = BITNUM(shift); + + for (i=0, j=-w_shift; i < b->nwords; i++, j++) + { + h_word = (j >= 0 && j < a->nwords ? a->words[j] : 0); + l_word = (j-1 >= 0 && j-1 < a->nwords ? a->words[j-1] : 0); + b->words[i] = ((h_word << b_shift) | + (l_word >> (BITS_PER_BITMAPWORD - b_shift))); + } + } + else + { + /* Right shift */ + w_shift = WORDNUM(-shift); + b_shift = BITNUM(-shift); + + for (i=0, j=-w_shift; i < b->nwords; i++, j++) + { + h_word = (j+1 >= 0 && j+1 < a->nwords ? a->words[j+1] : 0); + l_word = (j >= 0 && j < a->nwords ? a->words[j] : 0); + b->words[i] = ((h_word >> (BITS_PER_BITMAPWORD - b_shift)) | + (l_word << b_shift)); + } + } + return b; +} + +/* * bms_is_subset - is A a subset of B? */ bool diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 029761e..61379a7 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -592,8 +592,11 @@ _copyForeignScan(const ForeignScan *from) /* * copy remainder of node */ + COPY_SCALAR_FIELD(fdw_handler); COPY_NODE_FIELD(fdw_exprs); + COPY_NODE_FIELD(fdw_ps_tlist); COPY_NODE_FIELD(fdw_private); + COPY_BITMAPSET_FIELD(fdw_relids); COPY_SCALAR_FIELD(fsSystemCol); return newnode; @@ -617,7 +620,9 @@ _copyCustomScan(const CustomScan *from) */ COPY_SCALAR_FIELD(flags); COPY_NODE_FIELD(custom_exprs); + COPY_NODE_FIELD(custom_ps_tlist); COPY_NODE_FIELD(custom_private); + COPY_BITMAPSET_FIELD(custom_relids); /* * NOTE: The method field of CustomScan is required to be a pointer to a diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 385b289..a178132 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -558,8 +558,11 @@ _outForeignScan(StringInfo str, const ForeignScan *node) _outScanInfo(str, (const Scan *) node); + WRITE_OID_FIELD(fdw_handler); WRITE_NODE_FIELD(fdw_exprs); + WRITE_NODE_FIELD(fdw_ps_tlist); WRITE_NODE_FIELD(fdw_private); + WRITE_BITMAPSET_FIELD(fdw_relids); WRITE_BOOL_FIELD(fsSystemCol); } @@ -572,7 +575,9 @@ _outCustomScan(StringInfo str, const CustomScan *node) WRITE_UINT_FIELD(flags); WRITE_NODE_FIELD(custom_exprs); + WRITE_NODE_FIELD(custom_ps_tlist); WRITE_NODE_FIELD(custom_private); + WRITE_BITMAPSET_FIELD(custom_relids); appendStringInfoString(str, " :methods "); _outToken(str, node->methods->CustomName); if (node->methods->TextOutCustomScan) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 58d78e6..14872ae 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -60,7 +60,6 @@ set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL; /* Hook for plugins to replace standard_join_search() */ join_search_hook_type join_search_hook = NULL; - static void set_base_rel_sizes(PlannerInfo *root); static void set_base_rel_pathlists(PlannerInfo *root); static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 1da953f..61f1a78 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -17,10 +17,13 @@ #include #include "executor/executor.h" +#include "foreign/fdwapi.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +/* Hook for plugins to get control in add_paths_to_joinrel() */ +set_join_pathlist_hook_type set_join_pathlist_hook = NULL; #define PATH_PARAM_BY_REL(path, rel) \ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) @@ -260,6 +263,16 @@ add_paths_to_joinrel(PlannerInfo *root, restrictlist, jointype, sjinfo, &semifactors, param_source_rels, extra_lateral_rels); + + /* + * 5. Consider paths added by custom-scan providers, or other extensions + * in addition to the built-in paths. + */ + if (set_join_pathlist_hook) + set_join_pathlist_hook(root, joinrel, outerrel, innerrel, + restrictlist, jointype, + sjinfo, &semifactors, + param_source_rels, extra_lateral_rels); } /* diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index fe9fd57..b1c7bcb 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -14,6 +14,7 @@ */ #include "postgres.h" +#include "foreign/fdwapi.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -582,6 +583,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) SpecialJoinInfo sjinfo_data; RelOptInfo *joinrel; List *restrictlist; + bool found; /* We should never try to join two overlapping sets of rels. */ Assert(!bms_overlap(rel1->relids, rel2->relids)); @@ -635,7 +637,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) * goes with this particular joining. */ joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo, - &restrictlist); + &restrictlist, &found); /* * If we've already proven this join is empty, we needn't consider any @@ -648,6 +650,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) } /* + * Prior to all the built-in join logics, consider paths that replaces + * an entire join sub-tree by foreign-scan path, both of inner/outer + * relations are managed by same FDW driver. + * We expect remote join path has usually cheaper cost than local join + * on top of two foreign-scan, so we consult FDW driver to add remote- + * join path first, to break off path consideration with local join + * logics. + */ + if (!found && + joinrel->fdwroutine && + joinrel->fdwroutine->GetForeignJoinPaths) + { + joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel, rel1, rel2, + sjinfo, restrictlist); + } + + /* * Consider paths using each rel as both outer and inner. Depending on * the join type, a provably empty outer or inner rel might mean the join * is provably empty too; in which case throw away any previously computed diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cb69c03..7f86fcb 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -44,7 +44,6 @@ #include "utils/lsyscache.h" -static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path); static Plan *create_scan_plan(PlannerInfo *root, Path *best_path); static List *build_path_tlist(PlannerInfo *root, Path *path); static bool use_physical_tlist(PlannerInfo *root, RelOptInfo *rel); @@ -220,7 +219,7 @@ create_plan(PlannerInfo *root, Path *best_path) * create_plan_recurse * Recursive guts of create_plan(). */ -static Plan * +Plan * create_plan_recurse(PlannerInfo *root, Path *best_path) { Plan *plan; @@ -1961,16 +1960,26 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, ForeignScan *scan_plan; RelOptInfo *rel = best_path->path.parent; Index scan_relid = rel->relid; - RangeTblEntry *rte; + Oid rel_oid = InvalidOid; Bitmapset *attrs_used = NULL; ListCell *lc; int i; - /* it should be a base rel... */ - Assert(scan_relid > 0); - Assert(rel->rtekind == RTE_RELATION); - rte = planner_rt_fetch(scan_relid, root); - Assert(rte->rtekind == RTE_RELATION); + /* + * Fetch relation-id, if this foreign-scan node actuall scans on + * a particular real relation. Elsewhere, InvalidOid shall be + * informed to the FDW driver. + */ + if (scan_relid > 0) + { + RangeTblEntry *rte; + + Assert(rel->rtekind == RTE_RELATION); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + rel_oid = rte->relid; + } + Assert(rel->fdwroutine != NULL); /* * Sort clauses into best execution order. We do this first since the FDW @@ -1985,13 +1994,37 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, * has selected some join clauses for remote use but also wants them * rechecked locally). */ - scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rte->relid, + scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rel_oid, best_path, tlist, scan_clauses); + /* + * Sanity check. Pseudo scan tuple-descriptor shall be constructed + * based on the fdw_ps_tlist, excluding resjunk=true, so we need to + * ensure all valid TLEs have to locate prior to junk ones. + */ + if (scan_plan->scan.scanrelid == 0) + { + bool found_resjunk = false; + + foreach (lc, scan_plan->fdw_ps_tlist) + { + TargetEntry *tle = lfirst(lc); + + if (tle->resjunk) + found_resjunk = true; + else if (found_resjunk) + elog(ERROR, "junk TLE should not apper prior to valid one"); + } + } + /* Set the relids that are represented by this foreign scan for Explain */ + scan_plan->fdw_relids = best_path->path.parent->relids; /* Copy cost data from Path to Plan; no need to make FDW do this */ copy_path_costsize(&scan_plan->scan.plan, &best_path->path); + /* Track FDW server-id; no need to make FDW do this */ + scan_plan->fdw_handler = rel->fdw_handler; + /* * Replace any outer-relation variables with nestloop params in the qual * and fdw_exprs expressions. We do this last so that the FDW doesn't @@ -2053,12 +2086,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, { CustomScan *cplan; RelOptInfo *rel = best_path->path.parent; - - /* - * Right now, all we can support is CustomScan node which is associated - * with a particular base relation to be scanned. - */ - Assert(rel && rel->reloptkind == RELOPT_BASEREL); + ListCell *lc; /* * Sort clauses into the best execution order, although custom-scan @@ -2078,6 +2106,28 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, Assert(IsA(cplan, CustomScan)); /* + * Sanity check. Pseudo scan tuple-descriptor shall be constructed + * based on the custom_ps_tlist, excluding resjunk=true, so we need + * to ensure all valid TLEs have to locate prior to junk ones. + */ + if (cplan->scan.scanrelid == 0) + { + bool found_resjunk = false; + + foreach (lc, cplan->custom_ps_tlist) + { + TargetEntry *tle = lfirst(lc); + + if (tle->resjunk) + found_resjunk = true; + else if (found_resjunk) + elog(ERROR, "junk TLE should not apper prior to valid one"); + } + } + /* Set the relids that are represented by this custom scan for Explain */ + cplan->custom_relids = best_path->path.parent->relids; + + /* * Copy cost data from Path to Plan; no need to make custom-plan providers * do this */ diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index ec828cd..2961f44 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -568,6 +568,38 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) { ForeignScan *splan = (ForeignScan *) plan; + if (rtoffset > 0) + splan->fdw_relids = + bms_shift_members(splan->fdw_relids, rtoffset); + + if (splan->scan.scanrelid == 0) + { + indexed_tlist *pscan_itlist = + build_tlist_index(splan->fdw_ps_tlist); + + splan->scan.plan.targetlist = (List *) + fix_upper_expr(root, + (Node *) splan->scan.plan.targetlist, + pscan_itlist, + INDEX_VAR, + rtoffset); + splan->scan.plan.qual = (List *) + fix_upper_expr(root, + (Node *) splan->scan.plan.qual, + pscan_itlist, + INDEX_VAR, + rtoffset); + splan->fdw_exprs = (List *) + fix_upper_expr(root, + (Node *) splan->fdw_exprs, + pscan_itlist, + INDEX_VAR, + rtoffset); + splan->fdw_ps_tlist = + fix_scan_list(root, splan->fdw_ps_tlist, rtoffset); + pfree(pscan_itlist); + break; + } splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); @@ -582,6 +614,38 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) { CustomScan *splan = (CustomScan *) plan; + if (rtoffset > 0) + splan->custom_relids = + bms_shift_members(splan->custom_relids, rtoffset); + + if (splan->scan.scanrelid == 0) + { + indexed_tlist *pscan_itlist = + build_tlist_index(splan->custom_ps_tlist); + + splan->scan.plan.targetlist = (List *) + fix_upper_expr(root, + (Node *) splan->scan.plan.targetlist, + pscan_itlist, + INDEX_VAR, + rtoffset); + splan->scan.plan.qual = (List *) + fix_upper_expr(root, + (Node *) splan->scan.plan.qual, + pscan_itlist, + INDEX_VAR, + rtoffset); + splan->custom_exprs = (List *) + fix_upper_expr(root, + (Node *) splan->custom_exprs, + pscan_itlist, + INDEX_VAR, + rtoffset); + splan->custom_ps_tlist = + fix_scan_list(root, splan->custom_ps_tlist, rtoffset); + pfree(pscan_itlist); + break; + } splan->scan.scanrelid += rtoffset; splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 313a5c1..1c570c8 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -378,10 +378,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* Grab the fdwroutine info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + rel->fdw_handler = GetFdwHandlerForRelation(relation); rel->fdwroutine = GetFdwRoutineForRelation(relation, true); + } else + { + rel->fdw_handler = InvalidOid; rel->fdwroutine = NULL; - + } heap_close(relation, NoLock); /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 8cfbea0..da2bd22 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -14,6 +14,7 @@ */ #include "postgres.h" +#include "foreign/fdwapi.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -122,6 +123,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->subroot = NULL; rel->subplan_params = NIL; rel->fdwroutine = NULL; + rel->fdw_handler = InvalidOid; rel->fdw_private = NULL; rel->baserestrictinfo = NIL; rel->baserestrictcost.startup = 0; @@ -316,6 +318,8 @@ find_join_rel(PlannerInfo *root, Relids relids) * 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr * receives the list of RestrictInfo nodes that apply to this * particular pair of joinable relations. + * 'found' : indicates whether RelOptInfo is actually constructed. + * true, if it was already built and on the cache. * * restrictlist_ptr makes the routine's API a little grotty, but it saves * duplicated calculation of the restrictlist... @@ -326,7 +330,8 @@ build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, - List **restrictlist_ptr) + List **restrictlist_ptr, + bool *found) { RelOptInfo *joinrel; List *restrictlist; @@ -347,8 +352,11 @@ build_join_rel(PlannerInfo *root, joinrel, outer_rel, inner_rel); + *found = true; return joinrel; } + /* not found on the cache */ + *found = false; /* * Nope, so make one. @@ -427,6 +435,18 @@ build_join_rel(PlannerInfo *root, sjinfo, restrictlist); /* + * Set FDW handler and routine if both outer and inner relation + * are managed by same FDW driver. + */ + if (OidIsValid(outer_rel->fdw_handler) && + OidIsValid(inner_rel->fdw_handler) && + outer_rel->fdw_handler == inner_rel->fdw_handler) + { + joinrel->fdw_handler = outer_rel->fdw_handler; + joinrel->fdwroutine = GetFdwRoutine(joinrel->fdw_handler); + } + + /* * Add the joinrel to the query's joinrel list, and store it into the * auxiliary hashtable if there is one. NB: GEQO requires us to append * the new joinrel to the end of the list! diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 28e1acf..90e1107 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -3842,6 +3842,10 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) /* index_tlist is set only if it's an IndexOnlyScan */ if (IsA(ps->plan, IndexOnlyScan)) dpns->index_tlist = ((IndexOnlyScan *) ps->plan)->indextlist; + else if (IsA(ps->plan, ForeignScan)) + dpns->index_tlist = ((ForeignScan *) ps->plan)->fdw_ps_tlist; + else if (IsA(ps->plan, CustomScan)) + dpns->index_tlist = ((CustomScan *) ps->plan)->custom_ps_tlist; else dpns->index_tlist = NIL; } diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 1d76841..d3a5261 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -82,6 +82,13 @@ typedef void (*EndForeignModify_function) (EState *estate, typedef int (*IsForeignRelUpdatable_function) (Relation rel); +typedef void (*GetForeignJoinPaths_function ) (PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, + List *restrictlist); + typedef void (*ExplainForeignScan_function) (ForeignScanState *node, struct ExplainState *es); @@ -150,6 +157,10 @@ typedef struct FdwRoutine /* Support functions for IMPORT FOREIGN SCHEMA */ ImportForeignSchema_function ImportForeignSchema; + + /* Support functions for join push-down */ + GetForeignJoinPaths_function GetForeignJoinPaths; + } FdwRoutine; @@ -157,6 +168,7 @@ typedef struct FdwRoutine extern FdwRoutine *GetFdwRoutine(Oid fdwhandler); extern FdwRoutine *GetFdwRoutineByRelId(Oid relid); extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy); +extern Oid GetFdwHandlerForRelation(Relation relation); extern bool IsImportableForeignTable(const char *tablename, ImportForeignSchemaStmt *stmt); diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 3a556ee..3ca9791 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -66,6 +66,7 @@ extern void bms_free(Bitmapset *a); extern Bitmapset *bms_union(const Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_intersect(const Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b); +extern Bitmapset *bms_shift_members(const Bitmapset *a, int shift); extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b); extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b); extern bool bms_is_member(int x, const Bitmapset *a); diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 21cbfa8..b25330e 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -471,7 +471,13 @@ typedef struct WorkTableScan * fdw_exprs and fdw_private are both under the control of the foreign-data * wrapper, but fdw_exprs is presumed to contain expression trees and will * be post-processed accordingly by the planner; fdw_private won't be. - * Note that everything in both lists must be copiable by copyObject(). + * An optional fdw_ps_tlist is used to map a reference to an attribute of + * underlying relation(s) on a pair of INDEX_VAR and alternative varattno. + * It looks like a scan on pseudo relation that is usually result of + * relations join on remote data source, and FDW driver is responsible to + * set expected target list for this. If FDW returns records as foreign- + * table definition, just put NIL here. + * Note that everything in above lists must be copiable by copyObject(). * One way to store an arbitrary blob of bytes is to represent it as a bytea * Const. Usually, though, you'll be better off choosing a representation * that can be dumped usefully by nodeToString(). @@ -480,18 +486,23 @@ typedef struct WorkTableScan typedef struct ForeignScan { Scan scan; + Oid fdw_handler; /* OID of FDW handler */ List *fdw_exprs; /* expressions that FDW may evaluate */ + List *fdw_ps_tlist; /* optional pseudo-scan tlist for FDW */ List *fdw_private; /* private data for FDW */ + Bitmapset *fdw_relids; /* set of relid (index of range-tables) + * represented by this node */ bool fsSystemCol; /* true if any "system column" is needed */ } ForeignScan; /* ---------------- * CustomScan node * - * The comments for ForeignScan's fdw_exprs and fdw_private fields apply - * equally to custom_exprs and custom_private. Note that since Plan trees - * can be copied, custom scan providers *must* fit all plan data they need - * into those fields; embedding CustomScan in a larger struct will not work. + * The comments for ForeignScan's fdw_exprs, fdw_varmap and fdw_private fields + * apply equally to custom_exprs, custom_ps_tlist and custom_private. + * Note that since Plan trees can be copied, custom scan providers *must* + * fit all plan data they need into those fields; embedding CustomScan in + * a larger struct will not work. * ---------------- */ struct CustomScan; @@ -512,7 +523,10 @@ typedef struct CustomScan Scan scan; uint32 flags; /* mask of CUSTOMPATH_* flags, see relation.h */ List *custom_exprs; /* expressions that custom code may evaluate */ + List *custom_ps_tlist;/* optional pseudo-scan target list */ List *custom_private; /* private data for custom code */ + Bitmapset *custom_relids; /* set of relid (index of range-tables) + * represented by this node */ const CustomScanMethods *methods; } CustomScan; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 334cf51..4eb89c6 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -366,6 +366,7 @@ typedef struct PlannerInfo * subroot - PlannerInfo for subquery (NULL if it's not a subquery) * subplan_params - list of PlannerParamItems to be passed to subquery * fdwroutine - function hooks for FDW, if foreign table (else NULL) + * fdw_handler - OID of FDW handler, if foreign table (else InvalidOid) * fdw_private - private state for FDW, if foreign table (else NULL) * * Note: for a subquery, tuples, subplan, subroot are not set immediately @@ -461,6 +462,7 @@ typedef struct RelOptInfo List *subplan_params; /* if subquery */ /* use "struct FdwRoutine" to avoid including fdwapi.h here */ struct FdwRoutine *fdwroutine; /* if foreign table */ + Oid fdw_handler; /* if foreign table */ void *fdw_private; /* if foreign table */ /* used by various scans and joins: */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 9923f0e..3053f0f 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -141,7 +141,8 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, - List **restrictlist_ptr); + List **restrictlist_ptr, + bool *found); extern RelOptInfo *build_empty_join_rel(PlannerInfo *root); extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root, RelOptInfo *rel); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 6cad92e..c42c69d 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -30,6 +30,19 @@ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, RangeTblEntry *rte); extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook; +/* Hook for plugins to get control in add_paths_to_joinrel() */ +typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype, + SpecialJoinInfo *sjinfo, + SemiAntiJoinFactors *semifactors, + Relids param_source_rels, + Relids extra_lateral_rels); +extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook; + /* Hook for plugins to replace standard_join_search() */ typedef RelOptInfo *(*join_search_hook_type) (PlannerInfo *root, int levels_needed, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index fa72918..0c8cbcd 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -41,6 +41,7 @@ extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist, * prototypes for plan/createplan.c */ extern Plan *create_plan(PlannerInfo *root, Path *best_path); +extern Plan *create_plan_recurse(PlannerInfo *root, Path *best_path); extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, Index scanrelid, Plan *subplan); extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual,