doc/src/sgml/custom-scan.sgml | 43 ++++++++++++++++++
doc/src/sgml/fdwhandler.sgml | 51 +++++++++++++++++++++
src/backend/commands/explain.c | 15 +++++--
src/backend/executor/execScan.c | 4 ++
src/backend/executor/nodeCustom.c | 38 ++++++++++++----
src/backend/executor/nodeForeignscan.c | 34 +++++++++-----
src/backend/foreign/foreign.c | 31 ++++++++++---
src/backend/nodes/bitmapset.c | 57 +++++++++++++++++++++++
src/backend/nodes/copyfuncs.c | 5 +++
src/backend/nodes/outfuncs.c | 5 +++
src/backend/optimizer/path/allpaths.c | 1 -
src/backend/optimizer/path/joinpath.c | 13 ++++++
src/backend/optimizer/path/joinrels.c | 21 ++++++++-
src/backend/optimizer/plan/createplan.c | 80 ++++++++++++++++++++++++++-------
src/backend/optimizer/plan/setrefs.c | 64 ++++++++++++++++++++++++++
src/backend/optimizer/util/plancat.c | 7 ++-
src/backend/optimizer/util/relnode.c | 22 ++++++++-
src/backend/utils/adt/ruleutils.c | 4 ++
src/include/foreign/fdwapi.h | 12 +++++
src/include/nodes/bitmapset.h | 1 +
src/include/nodes/plannodes.h | 24 +++++++---
src/include/nodes/relation.h | 2 +
src/include/optimizer/pathnode.h | 3 +-
src/include/optimizer/paths.h | 13 ++++++
src/include/optimizer/planmain.h | 1 +
25 files changed, 499 insertions(+), 52 deletions(-)
diff --git a/doc/src/sgml/custom-scan.sgml b/doc/src/sgml/custom-scan.sgml
index 8a4a3df..b1400ae 100644
--- a/doc/src/sgml/custom-scan.sgml
+++ b/doc/src/sgml/custom-scan.sgml
@@ -48,6 +48,27 @@ extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook;
+ A custom scan provider will be also able to add paths by setting the
+ following hook, to replace built-in join paths by custom-scan that
+ performs as if a scan on preliminary joined relations, which us called
+ after the core code has generated what it believes to be the complete
+ and correct set of access paths for the join.
+
+typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels,
+ Relids extra_lateral_rels);
+extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook;
+
+
+
+
Although this hook function can be used to examine, modify, or remove
paths generated by the core system, a custom scan provider will typically
confine itself to generating CustomPath> objects and adding
@@ -124,7 +145,9 @@ typedef struct CustomScan
Scan scan;
uint32 flags;
List *custom_exprs;
+ List *custom_ps_tlist;
List *custom_private;
+ List *custom_relids;
const CustomScanMethods *methods;
} CustomScan;
@@ -141,10 +164,30 @@ typedef struct CustomScan
is only used by the custom scan provider itself. Plan trees must be able
to be duplicated using copyObject>, so all the data stored
within these two fields must consist of nodes that function can handle.
+ custom_relids> is set by the backend, thus custom-scan provider
+ does not need to touch, to track underlying relations represented by this
+ custom-scan node.
methods> must point to a (usually statically allocated)
object implementing the required custom scan methods, which are further
detailed below.
+
+ In case when CustomScan> replaced built-in join paths,
+ custom-scan provider must have two characteristic setup.
+ The first one is zero on the scan.scanrelid>, which
+ should be usually an index of range-tables. It informs the backend
+ this CustomScan> node is not associated with a particular
+ table. The second one is valid list of TargetEntry> on
+ the custom_ps_tlist>. A CustomScan> node
+ looks to the backend like a scan as literal, but on a relation which is
+ the result of relations join. It means we cannot construct a tuple
+ descriptor based on table definition, thus custom-scan provider must
+ introduce the expected record-type of the tuples.
+ Tuple-descriptor of scan-slot shall be constructed based on the
+ custom_ps_tlist>, and assigned on executor initialization.
+ Also, referenced by EXPLAIN> to solve name of the underlying
+ columns and relations.
+
Custom Scan Callbacks
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index c1daa4b..54ba45f 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -598,6 +598,57 @@ IsForeignRelUpdatable (Relation rel);
+
+ FDW Routines for remote join
+
+
+void
+GetForeignJoinPaths(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
+
+ Create possible access paths for a join of two foreign tables or
+ joined relations, but both of them needs to be managed with same
+ FDW driver.
+ This optional function is called during query planning.
+
+
+ This function allows FDW driver to add ForeignScan> path
+ towards the supplied joinrel>. From the standpoint of
+ query planner, it looks like scan-node is added for join-relation.
+ It means, ForeignScan> path added instead of the built-in
+ local join logic has to generate tuples as if it scans on a joined
+ and materialized relations.
+
+
+ Usually, we expect FDW drivers issues a remote query that involves
+ tables join on remote side, then FDW driver fetches the joined result
+ on local side.
+ Unlike simple table scan, definition of slot descriptor of the joined
+ relations is determined on the fly, thus we cannot know its definition
+ from the system catalog.
+ So, FDW driver is responsible to introduce the query planner expected
+ form of the joined relations. In case when ForeignScan>
+ replaced a relations join, scanrelid> of the generated plan
+ node shall be zero, to mark this ForeignScan> node is not
+ associated with a particular foreign tables.
+ Also, it need to construct pseudo scan tlist (fdw_ps_tlist>)
+ to indicate expected tuple definition.
+
+
+ Once scanrelid> equals zero, executor initializes the slot
+ for scan according to fdw_ps_tlist>, but excludes junk
+ entries. This list is also used to solve the name of the original
+ relation and columns, so FDW can chains expression nodes which are
+ not run on local side actually, like a join clause to be executed on
+ the remote side, however, target-entries of them will have
+ resjunk=true>.
+
+
+
FDW Routines for EXPLAIN>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index a951c55..8892dca 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -730,11 +730,17 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
- case T_ForeignScan:
- case T_CustomScan:
*rels_used = bms_add_member(*rels_used,
((Scan *) plan)->scanrelid);
break;
+ case T_ForeignScan:
+ *rels_used = bms_add_members(*rels_used,
+ ((ForeignScan *) plan)->fdw_relids);
+ break;
+ case T_CustomScan:
+ *rels_used = bms_add_members(*rels_used,
+ ((CustomScan *) plan)->custom_relids);
+ break;
case T_ModifyTable:
*rels_used = bms_add_member(*rels_used,
((ModifyTable *) plan)->nominalRelation);
@@ -1072,9 +1078,12 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
+ ExplainScanTarget((Scan *) plan, es);
+ break;
case T_ForeignScan:
case T_CustomScan:
- ExplainScanTarget((Scan *) plan, es);
+ if (((Scan *) plan)->scanrelid > 0)
+ ExplainScanTarget((Scan *) plan, es);
break;
case T_IndexScan:
{
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index 3f0d809..2f18a8a 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -251,6 +251,10 @@ ExecAssignScanProjectionInfo(ScanState *node)
/* Vars in an index-only scan's tlist should be INDEX_VAR */
if (IsA(scan, IndexOnlyScan))
varno = INDEX_VAR;
+ /* Also foreign-/custom-scan on pseudo relation should be INDEX_VAR */
+ else if (scan->scanrelid == 0 &&
+ (IsA(scan, ForeignScan) || IsA(scan, CustomScan)))
+ varno = INDEX_VAR;
else
varno = scan->scanrelid;
diff --git a/src/backend/executor/nodeCustom.c b/src/backend/executor/nodeCustom.c
index b07932b..2344129 100644
--- a/src/backend/executor/nodeCustom.c
+++ b/src/backend/executor/nodeCustom.c
@@ -23,6 +23,7 @@ CustomScanState *
ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
{
CustomScanState *css;
+ Index scan_relid = cscan->scan.scanrelid;
Relation scan_rel;
/* populate a CustomScanState according to the CustomScan */
@@ -48,12 +49,31 @@ ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
ExecInitScanTupleSlot(estate, &css->ss);
ExecInitResultTupleSlot(estate, &css->ss.ps);
- /* initialize scan relation */
- scan_rel = ExecOpenScanRelation(estate, cscan->scan.scanrelid, eflags);
- css->ss.ss_currentRelation = scan_rel;
- css->ss.ss_currentScanDesc = NULL; /* set by provider */
- ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel));
-
+ /*
+ * open the base relation and acquire appropriate lock on it, then
+ * get the scan type from the relation descriptor, if this custom
+ * scan is on actual relations.
+ *
+ * on the other hands, custom-scan may scan on a pseudo relation;
+ * that is usually a result-set of relations join by external
+ * computing resource, or others. It has to get the scan type from
+ * the pseudo-scan target-list that should be assigned by custom-scan
+ * provider.
+ */
+ if (scan_relid > 0)
+ {
+ scan_rel = ExecOpenScanRelation(estate, scan_relid, eflags);
+ css->ss.ss_currentRelation = scan_rel;
+ css->ss.ss_currentScanDesc = NULL; /* set by provider */
+ ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel));
+ }
+ else
+ {
+ TupleDesc ps_tupdesc;
+
+ ps_tupdesc = ExecCleanTypeFromTL(cscan->custom_ps_tlist, false);
+ ExecAssignScanType(&css->ss, ps_tupdesc);
+ }
css->ss.ps.ps_TupFromTlist = false;
/*
@@ -89,11 +109,11 @@ ExecEndCustomScan(CustomScanState *node)
/* Clean out the tuple table */
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
- if (node->ss.ss_ScanTupleSlot)
- ExecClearTuple(node->ss.ss_ScanTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
/* Close the heap relation */
- ExecCloseScanRelation(node->ss.ss_currentRelation);
+ if (node->ss.ss_currentRelation)
+ ExecCloseScanRelation(node->ss.ss_currentRelation);
}
void
diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c
index 7399053..542d176 100644
--- a/src/backend/executor/nodeForeignscan.c
+++ b/src/backend/executor/nodeForeignscan.c
@@ -102,6 +102,7 @@ ForeignScanState *
ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
{
ForeignScanState *scanstate;
+ Index scanrelid = node->scan.scanrelid;
Relation currentRelation;
FdwRoutine *fdwroutine;
@@ -141,16 +142,28 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
ExecInitScanTupleSlot(estate, &scanstate->ss);
/*
- * open the base relation and acquire appropriate lock on it.
+ * open the base relation and acquire appropriate lock on it, then
+ * get the scan type from the relation descriptor, if this foreign
+ * scan is on actual foreign-table.
+ *
+ * on the other hands, foreign-scan may scan on a pseudo relation;
+ * that is usually a result-set of remote relations join. It has
+ * to get the scan type from the pseudo-scan target-list that should
+ * be assigned by FDW driver.
*/
- currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
- scanstate->ss.ss_currentRelation = currentRelation;
+ if (scanrelid > 0)
+ {
+ currentRelation = ExecOpenScanRelation(estate, scanrelid, eflags);
+ scanstate->ss.ss_currentRelation = currentRelation;
+ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
+ }
+ else
+ {
+ TupleDesc ps_tupdesc;
- /*
- * get the scan type from the relation descriptor. (XXX at some point we
- * might want to let the FDW editorialize on the scan tupdesc.)
- */
- ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
+ ps_tupdesc = ExecCleanTypeFromTL(node->fdw_ps_tlist, false);
+ ExecAssignScanType(&scanstate->ss, ps_tupdesc);
+ }
/*
* Initialize result tuple type and projection info.
@@ -161,7 +174,7 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
/*
* Acquire function pointers from the FDW's handler, and init fdw_state.
*/
- fdwroutine = GetFdwRoutineForRelation(currentRelation, true);
+ fdwroutine = GetFdwRoutine(node->fdw_handler);
scanstate->fdwroutine = fdwroutine;
scanstate->fdw_state = NULL;
@@ -193,7 +206,8 @@ ExecEndForeignScan(ForeignScanState *node)
ExecClearTuple(node->ss.ss_ScanTupleSlot);
/* close the relation. */
- ExecCloseScanRelation(node->ss.ss_currentRelation);
+ if (node->ss.ss_currentRelation)
+ ExecCloseScanRelation(node->ss.ss_currentRelation);
}
/* ----------------------------------------------------------------
diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c
index cbe8b78..1901749 100644
--- a/src/backend/foreign/foreign.c
+++ b/src/backend/foreign/foreign.c
@@ -304,11 +304,11 @@ GetFdwRoutine(Oid fdwhandler)
/*
- * GetFdwRoutineByRelId - look up the handler of the foreign-data wrapper
- * for the given foreign table, and retrieve its FdwRoutine struct.
+ * GetFdwHandlerByRelId - look up the handler of the foreign-data wrapper
+ * for the given foreign table
*/
-FdwRoutine *
-GetFdwRoutineByRelId(Oid relid)
+static Oid
+GetFdwHandlerByRelId(Oid relid)
{
HeapTuple tp;
Form_pg_foreign_data_wrapper fdwform;
@@ -350,7 +350,18 @@ GetFdwRoutineByRelId(Oid relid)
ReleaseSysCache(tp);
- /* And finally, call the handler function. */
+ return fdwhandler;
+}
+
+/*
+ * GetFdwRoutineByRelId - look up the handler of the foreign-data wrapper
+ * for the given foreign table, and retrieve its FdwRoutine struct.
+ */
+FdwRoutine *
+GetFdwRoutineByRelId(Oid relid)
+{
+ Oid fdwhandler = GetFdwHandlerByRelId(relid);
+
return GetFdwRoutine(fdwhandler);
}
@@ -398,6 +409,16 @@ GetFdwRoutineForRelation(Relation relation, bool makecopy)
return relation->rd_fdwroutine;
}
+/*
+ * GetFdwHandlerForRelation
+ *
+ * returns OID of FDW handler which is associated with the given relation.
+ */
+Oid
+GetFdwHandlerForRelation(Relation relation)
+{
+ return GetFdwHandlerByRelId(RelationGetRelid(relation));
+}
/*
* IsImportableForeignTable - filter table names for IMPORT FOREIGN SCHEMA
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index a9c3b4b..4dc3286 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -301,6 +301,63 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
}
/*
+ * bms_shift_members - move all the bits by shift
+ */
+Bitmapset *
+bms_shift_members(const Bitmapset *a, int shift)
+{
+ Bitmapset *b;
+ bitmapword h_word;
+ bitmapword l_word;
+ int nwords;
+ int w_shift;
+ int b_shift;
+ int i, j;
+
+ /* fast path if result shall be NULL obviously */
+ if (a == NULL || a->nwords * BITS_PER_BITMAPWORD + shift <= 0)
+ return NULL;
+ /* actually, not shift members */
+ if (shift == 0)
+ return bms_copy(a);
+
+ nwords = (a->nwords * BITS_PER_BITMAPWORD + shift +
+ BITS_PER_BITMAPWORD - 1) / BITS_PER_BITMAPWORD;
+ b = palloc(BITMAPSET_SIZE(nwords));
+ b->nwords = nwords;
+
+ if (shift > 0)
+ {
+ /* Left shift */
+ w_shift = WORDNUM(shift);
+ b_shift = BITNUM(shift);
+
+ for (i=0, j=-w_shift; i < b->nwords; i++, j++)
+ {
+ h_word = (j >= 0 && j < a->nwords ? a->words[j] : 0);
+ l_word = (j-1 >= 0 && j-1 < a->nwords ? a->words[j-1] : 0);
+ b->words[i] = ((h_word << b_shift) |
+ (l_word >> (BITS_PER_BITMAPWORD - b_shift)));
+ }
+ }
+ else
+ {
+ /* Right shift */
+ w_shift = WORDNUM(-shift);
+ b_shift = BITNUM(-shift);
+
+ for (i=0, j=-w_shift; i < b->nwords; i++, j++)
+ {
+ h_word = (j+1 >= 0 && j+1 < a->nwords ? a->words[j+1] : 0);
+ l_word = (j >= 0 && j < a->nwords ? a->words[j] : 0);
+ b->words[i] = ((h_word >> (BITS_PER_BITMAPWORD - b_shift)) |
+ (l_word << b_shift));
+ }
+ }
+ return b;
+}
+
+/*
* bms_is_subset - is A a subset of B?
*/
bool
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 029761e..61379a7 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -592,8 +592,11 @@ _copyForeignScan(const ForeignScan *from)
/*
* copy remainder of node
*/
+ COPY_SCALAR_FIELD(fdw_handler);
COPY_NODE_FIELD(fdw_exprs);
+ COPY_NODE_FIELD(fdw_ps_tlist);
COPY_NODE_FIELD(fdw_private);
+ COPY_BITMAPSET_FIELD(fdw_relids);
COPY_SCALAR_FIELD(fsSystemCol);
return newnode;
@@ -617,7 +620,9 @@ _copyCustomScan(const CustomScan *from)
*/
COPY_SCALAR_FIELD(flags);
COPY_NODE_FIELD(custom_exprs);
+ COPY_NODE_FIELD(custom_ps_tlist);
COPY_NODE_FIELD(custom_private);
+ COPY_BITMAPSET_FIELD(custom_relids);
/*
* NOTE: The method field of CustomScan is required to be a pointer to a
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 385b289..a178132 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -558,8 +558,11 @@ _outForeignScan(StringInfo str, const ForeignScan *node)
_outScanInfo(str, (const Scan *) node);
+ WRITE_OID_FIELD(fdw_handler);
WRITE_NODE_FIELD(fdw_exprs);
+ WRITE_NODE_FIELD(fdw_ps_tlist);
WRITE_NODE_FIELD(fdw_private);
+ WRITE_BITMAPSET_FIELD(fdw_relids);
WRITE_BOOL_FIELD(fsSystemCol);
}
@@ -572,7 +575,9 @@ _outCustomScan(StringInfo str, const CustomScan *node)
WRITE_UINT_FIELD(flags);
WRITE_NODE_FIELD(custom_exprs);
+ WRITE_NODE_FIELD(custom_ps_tlist);
WRITE_NODE_FIELD(custom_private);
+ WRITE_BITMAPSET_FIELD(custom_relids);
appendStringInfoString(str, " :methods ");
_outToken(str, node->methods->CustomName);
if (node->methods->TextOutCustomScan)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 58d78e6..14872ae 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -60,7 +60,6 @@ set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
/* Hook for plugins to replace standard_join_search() */
join_search_hook_type join_search_hook = NULL;
-
static void set_base_rel_sizes(PlannerInfo *root);
static void set_base_rel_pathlists(PlannerInfo *root);
static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 1da953f..61f1a78 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -17,10 +17,13 @@
#include
#include "executor/executor.h"
+#include "foreign/fdwapi.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
+/* Hook for plugins to get control in add_paths_to_joinrel() */
+set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
#define PATH_PARAM_BY_REL(path, rel) \
((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
@@ -260,6 +263,16 @@ add_paths_to_joinrel(PlannerInfo *root,
restrictlist, jointype,
sjinfo, &semifactors,
param_source_rels, extra_lateral_rels);
+
+ /*
+ * 5. Consider paths added by custom-scan providers, or other extensions
+ * in addition to the built-in paths.
+ */
+ if (set_join_pathlist_hook)
+ set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
+ restrictlist, jointype,
+ sjinfo, &semifactors,
+ param_source_rels, extra_lateral_rels);
}
/*
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index fe9fd57..b1c7bcb 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -14,6 +14,7 @@
*/
#include "postgres.h"
+#include "foreign/fdwapi.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@@ -582,6 +583,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
SpecialJoinInfo sjinfo_data;
RelOptInfo *joinrel;
List *restrictlist;
+ bool found;
/* We should never try to join two overlapping sets of rels. */
Assert(!bms_overlap(rel1->relids, rel2->relids));
@@ -635,7 +637,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
* goes with this particular joining.
*/
joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
- &restrictlist);
+ &restrictlist, &found);
/*
* If we've already proven this join is empty, we needn't consider any
@@ -648,6 +650,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
}
/*
+ * Prior to all the built-in join logics, consider paths that replaces
+ * an entire join sub-tree by foreign-scan path, both of inner/outer
+ * relations are managed by same FDW driver.
+ * We expect remote join path has usually cheaper cost than local join
+ * on top of two foreign-scan, so we consult FDW driver to add remote-
+ * join path first, to break off path consideration with local join
+ * logics.
+ */
+ if (!found &&
+ joinrel->fdwroutine &&
+ joinrel->fdwroutine->GetForeignJoinPaths)
+ {
+ joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel, rel1, rel2,
+ sjinfo, restrictlist);
+ }
+
+ /*
* Consider paths using each rel as both outer and inner. Depending on
* the join type, a provably empty outer or inner rel might mean the join
* is provably empty too; in which case throw away any previously computed
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index cb69c03..7f86fcb 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -44,7 +44,6 @@
#include "utils/lsyscache.h"
-static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path);
static Plan *create_scan_plan(PlannerInfo *root, Path *best_path);
static List *build_path_tlist(PlannerInfo *root, Path *path);
static bool use_physical_tlist(PlannerInfo *root, RelOptInfo *rel);
@@ -220,7 +219,7 @@ create_plan(PlannerInfo *root, Path *best_path)
* create_plan_recurse
* Recursive guts of create_plan().
*/
-static Plan *
+Plan *
create_plan_recurse(PlannerInfo *root, Path *best_path)
{
Plan *plan;
@@ -1961,16 +1960,26 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
ForeignScan *scan_plan;
RelOptInfo *rel = best_path->path.parent;
Index scan_relid = rel->relid;
- RangeTblEntry *rte;
+ Oid rel_oid = InvalidOid;
Bitmapset *attrs_used = NULL;
ListCell *lc;
int i;
- /* it should be a base rel... */
- Assert(scan_relid > 0);
- Assert(rel->rtekind == RTE_RELATION);
- rte = planner_rt_fetch(scan_relid, root);
- Assert(rte->rtekind == RTE_RELATION);
+ /*
+ * Fetch relation-id, if this foreign-scan node actuall scans on
+ * a particular real relation. Elsewhere, InvalidOid shall be
+ * informed to the FDW driver.
+ */
+ if (scan_relid > 0)
+ {
+ RangeTblEntry *rte;
+
+ Assert(rel->rtekind == RTE_RELATION);
+ rte = planner_rt_fetch(scan_relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+ rel_oid = rte->relid;
+ }
+ Assert(rel->fdwroutine != NULL);
/*
* Sort clauses into best execution order. We do this first since the FDW
@@ -1985,13 +1994,37 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
* has selected some join clauses for remote use but also wants them
* rechecked locally).
*/
- scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rte->relid,
+ scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rel_oid,
best_path,
tlist, scan_clauses);
+ /*
+ * Sanity check. Pseudo scan tuple-descriptor shall be constructed
+ * based on the fdw_ps_tlist, excluding resjunk=true, so we need to
+ * ensure all valid TLEs have to locate prior to junk ones.
+ */
+ if (scan_plan->scan.scanrelid == 0)
+ {
+ bool found_resjunk = false;
+
+ foreach (lc, scan_plan->fdw_ps_tlist)
+ {
+ TargetEntry *tle = lfirst(lc);
+
+ if (tle->resjunk)
+ found_resjunk = true;
+ else if (found_resjunk)
+ elog(ERROR, "junk TLE should not apper prior to valid one");
+ }
+ }
+ /* Set the relids that are represented by this foreign scan for Explain */
+ scan_plan->fdw_relids = best_path->path.parent->relids;
/* Copy cost data from Path to Plan; no need to make FDW do this */
copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
+ /* Track FDW server-id; no need to make FDW do this */
+ scan_plan->fdw_handler = rel->fdw_handler;
+
/*
* Replace any outer-relation variables with nestloop params in the qual
* and fdw_exprs expressions. We do this last so that the FDW doesn't
@@ -2053,12 +2086,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
{
CustomScan *cplan;
RelOptInfo *rel = best_path->path.parent;
-
- /*
- * Right now, all we can support is CustomScan node which is associated
- * with a particular base relation to be scanned.
- */
- Assert(rel && rel->reloptkind == RELOPT_BASEREL);
+ ListCell *lc;
/*
* Sort clauses into the best execution order, although custom-scan
@@ -2078,6 +2106,28 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
Assert(IsA(cplan, CustomScan));
/*
+ * Sanity check. Pseudo scan tuple-descriptor shall be constructed
+ * based on the custom_ps_tlist, excluding resjunk=true, so we need
+ * to ensure all valid TLEs have to locate prior to junk ones.
+ */
+ if (cplan->scan.scanrelid == 0)
+ {
+ bool found_resjunk = false;
+
+ foreach (lc, cplan->custom_ps_tlist)
+ {
+ TargetEntry *tle = lfirst(lc);
+
+ if (tle->resjunk)
+ found_resjunk = true;
+ else if (found_resjunk)
+ elog(ERROR, "junk TLE should not apper prior to valid one");
+ }
+ }
+ /* Set the relids that are represented by this custom scan for Explain */
+ cplan->custom_relids = best_path->path.parent->relids;
+
+ /*
* Copy cost data from Path to Plan; no need to make custom-plan providers
* do this
*/
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index ec828cd..2961f44 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -568,6 +568,38 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
{
ForeignScan *splan = (ForeignScan *) plan;
+ if (rtoffset > 0)
+ splan->fdw_relids =
+ bms_shift_members(splan->fdw_relids, rtoffset);
+
+ if (splan->scan.scanrelid == 0)
+ {
+ indexed_tlist *pscan_itlist =
+ build_tlist_index(splan->fdw_ps_tlist);
+
+ splan->scan.plan.targetlist = (List *)
+ fix_upper_expr(root,
+ (Node *) splan->scan.plan.targetlist,
+ pscan_itlist,
+ INDEX_VAR,
+ rtoffset);
+ splan->scan.plan.qual = (List *)
+ fix_upper_expr(root,
+ (Node *) splan->scan.plan.qual,
+ pscan_itlist,
+ INDEX_VAR,
+ rtoffset);
+ splan->fdw_exprs = (List *)
+ fix_upper_expr(root,
+ (Node *) splan->fdw_exprs,
+ pscan_itlist,
+ INDEX_VAR,
+ rtoffset);
+ splan->fdw_ps_tlist =
+ fix_scan_list(root, splan->fdw_ps_tlist, rtoffset);
+ pfree(pscan_itlist);
+ break;
+ }
splan->scan.scanrelid += rtoffset;
splan->scan.plan.targetlist =
fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
@@ -582,6 +614,38 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
{
CustomScan *splan = (CustomScan *) plan;
+ if (rtoffset > 0)
+ splan->custom_relids =
+ bms_shift_members(splan->custom_relids, rtoffset);
+
+ if (splan->scan.scanrelid == 0)
+ {
+ indexed_tlist *pscan_itlist =
+ build_tlist_index(splan->custom_ps_tlist);
+
+ splan->scan.plan.targetlist = (List *)
+ fix_upper_expr(root,
+ (Node *) splan->scan.plan.targetlist,
+ pscan_itlist,
+ INDEX_VAR,
+ rtoffset);
+ splan->scan.plan.qual = (List *)
+ fix_upper_expr(root,
+ (Node *) splan->scan.plan.qual,
+ pscan_itlist,
+ INDEX_VAR,
+ rtoffset);
+ splan->custom_exprs = (List *)
+ fix_upper_expr(root,
+ (Node *) splan->custom_exprs,
+ pscan_itlist,
+ INDEX_VAR,
+ rtoffset);
+ splan->custom_ps_tlist =
+ fix_scan_list(root, splan->custom_ps_tlist, rtoffset);
+ pfree(pscan_itlist);
+ break;
+ }
splan->scan.scanrelid += rtoffset;
splan->scan.plan.targetlist =
fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 313a5c1..1c570c8 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -378,10 +378,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
/* Grab the fdwroutine info using the relcache, while we have it */
if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ rel->fdw_handler = GetFdwHandlerForRelation(relation);
rel->fdwroutine = GetFdwRoutineForRelation(relation, true);
+ }
else
+ {
+ rel->fdw_handler = InvalidOid;
rel->fdwroutine = NULL;
-
+ }
heap_close(relation, NoLock);
/*
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 8cfbea0..da2bd22 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -14,6 +14,7 @@
*/
#include "postgres.h"
+#include "foreign/fdwapi.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@@ -122,6 +123,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->subroot = NULL;
rel->subplan_params = NIL;
rel->fdwroutine = NULL;
+ rel->fdw_handler = InvalidOid;
rel->fdw_private = NULL;
rel->baserestrictinfo = NIL;
rel->baserestrictcost.startup = 0;
@@ -316,6 +318,8 @@ find_join_rel(PlannerInfo *root, Relids relids)
* 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr
* receives the list of RestrictInfo nodes that apply to this
* particular pair of joinable relations.
+ * 'found' : indicates whether RelOptInfo is actually constructed.
+ * true, if it was already built and on the cache.
*
* restrictlist_ptr makes the routine's API a little grotty, but it saves
* duplicated calculation of the restrictlist...
@@ -326,7 +330,8 @@ build_join_rel(PlannerInfo *root,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
SpecialJoinInfo *sjinfo,
- List **restrictlist_ptr)
+ List **restrictlist_ptr,
+ bool *found)
{
RelOptInfo *joinrel;
List *restrictlist;
@@ -347,8 +352,11 @@ build_join_rel(PlannerInfo *root,
joinrel,
outer_rel,
inner_rel);
+ *found = true;
return joinrel;
}
+ /* not found on the cache */
+ *found = false;
/*
* Nope, so make one.
@@ -427,6 +435,18 @@ build_join_rel(PlannerInfo *root,
sjinfo, restrictlist);
/*
+ * Set FDW handler and routine if both outer and inner relation
+ * are managed by same FDW driver.
+ */
+ if (OidIsValid(outer_rel->fdw_handler) &&
+ OidIsValid(inner_rel->fdw_handler) &&
+ outer_rel->fdw_handler == inner_rel->fdw_handler)
+ {
+ joinrel->fdw_handler = outer_rel->fdw_handler;
+ joinrel->fdwroutine = GetFdwRoutine(joinrel->fdw_handler);
+ }
+
+ /*
* Add the joinrel to the query's joinrel list, and store it into the
* auxiliary hashtable if there is one. NB: GEQO requires us to append
* the new joinrel to the end of the list!
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 28e1acf..90e1107 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -3842,6 +3842,10 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps)
/* index_tlist is set only if it's an IndexOnlyScan */
if (IsA(ps->plan, IndexOnlyScan))
dpns->index_tlist = ((IndexOnlyScan *) ps->plan)->indextlist;
+ else if (IsA(ps->plan, ForeignScan))
+ dpns->index_tlist = ((ForeignScan *) ps->plan)->fdw_ps_tlist;
+ else if (IsA(ps->plan, CustomScan))
+ dpns->index_tlist = ((CustomScan *) ps->plan)->custom_ps_tlist;
else
dpns->index_tlist = NIL;
}
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 1d76841..d3a5261 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -82,6 +82,13 @@ typedef void (*EndForeignModify_function) (EState *estate,
typedef int (*IsForeignRelUpdatable_function) (Relation rel);
+typedef void (*GetForeignJoinPaths_function ) (PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
+
typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
struct ExplainState *es);
@@ -150,6 +157,10 @@ typedef struct FdwRoutine
/* Support functions for IMPORT FOREIGN SCHEMA */
ImportForeignSchema_function ImportForeignSchema;
+
+ /* Support functions for join push-down */
+ GetForeignJoinPaths_function GetForeignJoinPaths;
+
} FdwRoutine;
@@ -157,6 +168,7 @@ typedef struct FdwRoutine
extern FdwRoutine *GetFdwRoutine(Oid fdwhandler);
extern FdwRoutine *GetFdwRoutineByRelId(Oid relid);
extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy);
+extern Oid GetFdwHandlerForRelation(Relation relation);
extern bool IsImportableForeignTable(const char *tablename,
ImportForeignSchemaStmt *stmt);
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 3a556ee..3ca9791 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -66,6 +66,7 @@ extern void bms_free(Bitmapset *a);
extern Bitmapset *bms_union(const Bitmapset *a, const Bitmapset *b);
extern Bitmapset *bms_intersect(const Bitmapset *a, const Bitmapset *b);
extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b);
+extern Bitmapset *bms_shift_members(const Bitmapset *a, int shift);
extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 21cbfa8..b25330e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -471,7 +471,13 @@ typedef struct WorkTableScan
* fdw_exprs and fdw_private are both under the control of the foreign-data
* wrapper, but fdw_exprs is presumed to contain expression trees and will
* be post-processed accordingly by the planner; fdw_private won't be.
- * Note that everything in both lists must be copiable by copyObject().
+ * An optional fdw_ps_tlist is used to map a reference to an attribute of
+ * underlying relation(s) on a pair of INDEX_VAR and alternative varattno.
+ * It looks like a scan on pseudo relation that is usually result of
+ * relations join on remote data source, and FDW driver is responsible to
+ * set expected target list for this. If FDW returns records as foreign-
+ * table definition, just put NIL here.
+ * Note that everything in above lists must be copiable by copyObject().
* One way to store an arbitrary blob of bytes is to represent it as a bytea
* Const. Usually, though, you'll be better off choosing a representation
* that can be dumped usefully by nodeToString().
@@ -480,18 +486,23 @@ typedef struct WorkTableScan
typedef struct ForeignScan
{
Scan scan;
+ Oid fdw_handler; /* OID of FDW handler */
List *fdw_exprs; /* expressions that FDW may evaluate */
+ List *fdw_ps_tlist; /* optional pseudo-scan tlist for FDW */
List *fdw_private; /* private data for FDW */
+ Bitmapset *fdw_relids; /* set of relid (index of range-tables)
+ * represented by this node */
bool fsSystemCol; /* true if any "system column" is needed */
} ForeignScan;
/* ----------------
* CustomScan node
*
- * The comments for ForeignScan's fdw_exprs and fdw_private fields apply
- * equally to custom_exprs and custom_private. Note that since Plan trees
- * can be copied, custom scan providers *must* fit all plan data they need
- * into those fields; embedding CustomScan in a larger struct will not work.
+ * The comments for ForeignScan's fdw_exprs, fdw_varmap and fdw_private fields
+ * apply equally to custom_exprs, custom_ps_tlist and custom_private.
+ * Note that since Plan trees can be copied, custom scan providers *must*
+ * fit all plan data they need into those fields; embedding CustomScan in
+ * a larger struct will not work.
* ----------------
*/
struct CustomScan;
@@ -512,7 +523,10 @@ typedef struct CustomScan
Scan scan;
uint32 flags; /* mask of CUSTOMPATH_* flags, see relation.h */
List *custom_exprs; /* expressions that custom code may evaluate */
+ List *custom_ps_tlist;/* optional pseudo-scan target list */
List *custom_private; /* private data for custom code */
+ Bitmapset *custom_relids; /* set of relid (index of range-tables)
+ * represented by this node */
const CustomScanMethods *methods;
} CustomScan;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 334cf51..4eb89c6 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -366,6 +366,7 @@ typedef struct PlannerInfo
* subroot - PlannerInfo for subquery (NULL if it's not a subquery)
* subplan_params - list of PlannerParamItems to be passed to subquery
* fdwroutine - function hooks for FDW, if foreign table (else NULL)
+ * fdw_handler - OID of FDW handler, if foreign table (else InvalidOid)
* fdw_private - private state for FDW, if foreign table (else NULL)
*
* Note: for a subquery, tuples, subplan, subroot are not set immediately
@@ -461,6 +462,7 @@ typedef struct RelOptInfo
List *subplan_params; /* if subquery */
/* use "struct FdwRoutine" to avoid including fdwapi.h here */
struct FdwRoutine *fdwroutine; /* if foreign table */
+ Oid fdw_handler; /* if foreign table */
void *fdw_private; /* if foreign table */
/* used by various scans and joins: */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 9923f0e..3053f0f 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -141,7 +141,8 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
SpecialJoinInfo *sjinfo,
- List **restrictlist_ptr);
+ List **restrictlist_ptr,
+ bool *found);
extern RelOptInfo *build_empty_join_rel(PlannerInfo *root);
extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root,
RelOptInfo *rel);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 6cad92e..c42c69d 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -30,6 +30,19 @@ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
RangeTblEntry *rte);
extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook;
+/* Hook for plugins to get control in add_paths_to_joinrel() */
+typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels,
+ Relids extra_lateral_rels);
+extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook;
+
/* Hook for plugins to replace standard_join_search() */
typedef RelOptInfo *(*join_search_hook_type) (PlannerInfo *root,
int levels_needed,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index fa72918..0c8cbcd 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -41,6 +41,7 @@ extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
* prototypes for plan/createplan.c
*/
extern Plan *create_plan(PlannerInfo *root, Path *best_path);
+extern Plan *create_plan_recurse(PlannerInfo *root, Path *best_path);
extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
Index scanrelid, Plan *subplan);
extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual,