Odd system-column handling in postgres_fdw join pushdown patch
Hi,
I noticed that the postgres_fdw join pushdown patch retrieves system
columns other than ctid (and oid) from the remote server as shown in the
example:
postgres=# explain verbose select foo.tableoid, foo.xmin, foo.cmin,
foo.xmax, foo.cmax, foo.* from foo, bar where foo.a = bar.a;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
--------
Foreign Scan (cost=100.00..102.09 rows=2 width=28)
Output: foo.tableoid, foo.xmin, foo.cmin, foo.xmax, foo.cmax, foo.a,
foo.b
Relations: (public.foo) INNER JOIN (public.bar)
Remote SQL: SELECT r1.tableoid, r1.xmin, r1.cmin, r1.xmax, r1.cmax,
r1.a, r1.b FROM (public.foo r1 INNER JOIN public.bar r2 ON (TRUE)) WHERE
((r1.a =
r2.a))
(4 rows)
BUT: we don't make any effort to ensure that local and remote values
match, so system columns other than ctid and oid should not be retrieved
from the remote server. So, I'd like to propose: (1) when tableoids are
requested from the remote server, postgres_fdw sets valid values for
them locally, instead (core should support that?) and (2) when any of
xmins, xmaxs, cmins, and cmaxs are requested, postgres_fdw gives up
pushing down foreign joins. (We might be able to set appropriate values
for them locally the same way as for tableoids, but I'm not sure it's
worth complicating the code.) I think that would be probably OK,
because users wouldn't retrieve any such columns in practice.
Attached is a proposed patch for that.
Best regards,
Etsuro Fujita
Attachments:
postgres-fdw-join-pushdown-syscol-handling.patchapplication/x-patch; name=postgres-fdw-join-pushdown-syscol-handling.patchDownload
*** a/contrib/postgres_fdw/deparse.c
--- b/contrib/postgres_fdw/deparse.c
***************
*** 123,129 **** static void deparseTargetList(StringInfo buf,
Bitmapset *attrs_used,
bool qualify_col,
List **retrieved_attrs);
! static void deparseExplicitTargetList(List *tlist, List **retrieved_attrs,
deparse_expr_cxt *context);
static void deparseReturningList(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
--- 123,131 ----
Bitmapset *attrs_used,
bool qualify_col,
List **retrieved_attrs);
! static void deparseExplicitTargetList(List *tlist,
! List **retrieved_attrs,
! List **tableoid_attrs,
deparse_expr_cxt *context);
static void deparseReturningList(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
***************
*** 152,158 **** static void printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod,
deparse_expr_cxt *context);
static void printRemotePlaceholder(Oid paramtype, int32 paramtypmod,
deparse_expr_cxt *context);
! static void deparseSelectSql(List *tlist, List **retrieved_attrs,
deparse_expr_cxt *context);
static void deparseLockingClause(deparse_expr_cxt *context);
static void appendOrderByClause(List *pathkeys, deparse_expr_cxt *context);
--- 154,162 ----
deparse_expr_cxt *context);
static void printRemotePlaceholder(Oid paramtype, int32 paramtypmod,
deparse_expr_cxt *context);
! static void deparseSelectSql(List *tlist,
! List **retrieved_attrs,
! List **tableoid_attrs,
deparse_expr_cxt *context);
static void deparseLockingClause(deparse_expr_cxt *context);
static void appendOrderByClause(List *pathkeys, deparse_expr_cxt *context);
***************
*** 762,768 **** build_tlist_to_deparse(RelOptInfo *foreignrel)
extern void
deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel,
List *tlist, List *remote_conds, List *pathkeys,
! List **retrieved_attrs, List **params_list)
{
deparse_expr_cxt context;
--- 766,773 ----
extern void
deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel,
List *tlist, List *remote_conds, List *pathkeys,
! List **retrieved_attrs, List **tableoid_attrs,
! List **params_list)
{
deparse_expr_cxt context;
***************
*** 778,784 **** deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel,
context.params_list = params_list;
/* Construct SELECT clause and FROM clause */
! deparseSelectSql(tlist, retrieved_attrs, &context);
/*
* Construct WHERE clause
--- 783,789 ----
context.params_list = params_list;
/* Construct SELECT clause and FROM clause */
! deparseSelectSql(tlist, retrieved_attrs, tableoid_attrs, &context);
/*
* Construct WHERE clause
***************
*** 809,815 **** deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel,
* deparseSelectStmtForRel() for details.
*/
static void
! deparseSelectSql(List *tlist, List **retrieved_attrs, deparse_expr_cxt *context)
{
StringInfo buf = context->buf;
RelOptInfo *foreignrel = context->foreignrel;
--- 814,823 ----
* deparseSelectStmtForRel() for details.
*/
static void
! deparseSelectSql(List *tlist,
! List **retrieved_attrs,
! List **tableoid_attrs,
! deparse_expr_cxt *context)
{
StringInfo buf = context->buf;
RelOptInfo *foreignrel = context->foreignrel;
***************
*** 824,830 **** deparseSelectSql(List *tlist, List **retrieved_attrs, deparse_expr_cxt *context)
if (foreignrel->reloptkind == RELOPT_JOINREL)
{
/* For a join relation use the input tlist */
! deparseExplicitTargetList(tlist, retrieved_attrs, context);
}
else
{
--- 832,838 ----
if (foreignrel->reloptkind == RELOPT_JOINREL)
{
/* For a join relation use the input tlist */
! deparseExplicitTargetList(tlist, retrieved_attrs, tableoid_attrs, context);
}
else
{
***************
*** 1091,1130 **** get_jointype_name(JoinType jointype)
*
* tlist is list of TargetEntry's which in turn contain Var nodes.
*
! * retrieved_attrs is the list of continuously increasing integers starting
! * from 1. It has same number of entries as tlist.
*/
static void
! deparseExplicitTargetList(List *tlist, List **retrieved_attrs,
deparse_expr_cxt *context)
{
- ListCell *lc;
StringInfo buf = context->buf;
! int i = 0;
*retrieved_attrs = NIL;
foreach(lc, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Var *var;
- /* Extract expression if TargetEntry node */
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
/* We expect only Var nodes here */
Assert(IsA(var, Var));
! if (i > 0)
! appendStringInfoString(buf, ", ");
! deparseVar(var, context);
! *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
i++;
}
! if (i == 0)
appendStringInfoString(buf, "NULL");
}
--- 1099,1163 ----
*
* tlist is list of TargetEntry's which in turn contain Var nodes.
*
! * We create an integer List of the columns being retrieved, which is
! * returned to *retrieved_attrs, and an integer List of the tableoid
! * columns in tlist.
*/
static void
! deparseExplicitTargetList(List *tlist,
! List **retrieved_attrs,
! List **tableoid_attrs,
deparse_expr_cxt *context)
{
StringInfo buf = context->buf;
! ListCell *lc;
! bool first;
! int i;
*retrieved_attrs = NIL;
+ *tableoid_attrs = NIL;
+ i = 1;
+ first = true;
foreach(lc, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Var *var;
Assert(IsA(tle, TargetEntry));
+ /* Extract expression */
var = (Var *) tle->expr;
/* We expect only Var nodes here */
Assert(IsA(var, Var));
! if (var->varattno < 0 &&
! var->varattno != ObjectIdAttributeNumber &&
! var->varattno != SelfItemPointerAttributeNumber)
! {
! /*
! * varattno should be tableoid, since we don't consider pushing down
! * any foreign joins that retrieve any of xmin, xmax, cmin, and cmax.
! * See comments in postgresGetForeignRelSize.
! */
! Assert(var->varattno == TableOidAttributeNumber);
! *tableoid_attrs = lappend_int(*tableoid_attrs, i);
! }
! else
! {
! if (!first)
! appendStringInfoString(buf, ", ");
! first = false;
!
! deparseVar(var, context);
!
! *retrieved_attrs = lappend_int(*retrieved_attrs, i);
! }
i++;
}
! if (first)
appendStringInfoString(buf, "NULL");
}
*** a/contrib/postgres_fdw/expected/postgres_fdw.out
--- b/contrib/postgres_fdw/expected/postgres_fdw.out
***************
*** 1671,1676 **** SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
--- 1671,1704 ----
1
(10 rows)
+ -- System columns, except oid and ctid, should not be retrieved from remote
+ EXPLAIN (COSTS false, VERBOSE)
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ QUERY PLAN
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: ((t1.tableoid)::regclass), t1.c1, ((t2.tableoid)::regclass), t2.c1, t1.c3
+ -> Foreign Scan
+ Output: (t1.tableoid)::regclass, t1.c1, (t2.tableoid)::regclass, t2.c1, t1.c3
+ Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+ Remote SQL: SELECT r1."C 1", r1.c3, r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+ (6 rows)
+
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ tableoid | c1 | tableoid | c1
+ ----------+-----+----------+-----
+ ft1 | 101 | ft2 | 101
+ ft1 | 102 | ft2 | 102
+ ft1 | 103 | ft2 | 103
+ ft1 | 104 | ft2 | 104
+ ft1 | 105 | ft2 | 105
+ ft1 | 106 | ft2 | 106
+ ft1 | 107 | ft2 | 107
+ ft1 | 108 | ft2 | 108
+ ft1 | 109 | ft2 | 109
+ ft1 | 110 | ft2 | 110
+ (10 rows)
+
-- create another user for permission, user mapping, effective user tests
CREATE USER view_owner;
-- grant privileges on ft4 and ft5 to view_owner
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 72,78 **** enum FdwScanPrivateIndex
* String describing join i.e. names of relations being joined and types
* of join, added when the scan is join
*/
! FdwScanPrivateRelations
};
/*
--- 72,81 ----
* String describing join i.e. names of relations being joined and types
* of join, added when the scan is join
*/
! FdwScanPrivateRelations,
!
! /* Integer list of attribute numbers of tableoids */
! FdwScanPrivateTableOidAttrs
};
/*
***************
*** 98,111 **** enum FdwModifyPrivateIndex
};
/*
* Execution state of a foreign scan using postgres_fdw.
*/
typedef struct PgFdwScanState
{
Relation rel; /* relcache entry for the foreign table. NULL
* for a foreign join scan. */
- TupleDesc tupdesc; /* tuple descriptor of scan */
AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
/* extracted fdw_private data */
char *query; /* text of SELECT command */
--- 101,126 ----
};
/*
+ * Struct for extra information for making tuple for a foreign join
+ */
+ typedef struct TupleExtraData
+ {
+ TupleDesc tupdesc; /* tuple descriptor for the tuple */
+ List *fdw_scan_tlist; /* tlist describing contents of the tuple */
+ List *tableoid_attrs; /* indexes of tableoids in fdw_scan_tlist */
+ List *tableoid_vals; /* values of tableoids in fdw_scan_tlist */
+ List *rtables; /* EState's list of RangeTblEntry */
+ } TupleExtraData;
+
+ /*
* Execution state of a foreign scan using postgres_fdw.
*/
typedef struct PgFdwScanState
{
Relation rel; /* relcache entry for the foreign table. NULL
* for a foreign join scan. */
AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
+ TupleExtraData *extra; /* info about result tup, if foreign join */
/* extracted fdw_private data */
char *query; /* text of SELECT command */
***************
*** 193,207 **** typedef struct PgFdwAnalyzeState
typedef struct ConversionLocation
{
Relation rel; /* foreign table's relcache entry. */
AttrNumber cur_attno; /* attribute number being processed, or 0 */
-
- /*
- * In case of foreign join push down, fdw_scan_tlist is used to identify
- * the Var node corresponding to the error location and
- * fsstate->ss.ps.state gives access to the RTEs of corresponding relation
- * to get the relation name and attribute name.
- */
- ForeignScanState *fsstate;
} ConversionLocation;
/* Callback argument for ec_member_matches_foreign */
--- 208,215 ----
typedef struct ConversionLocation
{
Relation rel; /* foreign table's relcache entry. */
+ TupleExtraData *extra; /* info about result tup, if foreign join */
AttrNumber cur_attno; /* attribute number being processed, or 0 */
} ConversionLocation;
/* Callback argument for ec_member_matches_foreign */
***************
*** 321,328 **** static HeapTuple make_tuple_from_result_row(PGresult *res,
int row,
Relation rel,
AttInMetadata *attinmeta,
List *retrieved_attrs,
- ForeignScanState *fsstate,
MemoryContext temp_context);
static void conversion_error_callback(void *arg);
static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
--- 329,336 ----
int row,
Relation rel,
AttInMetadata *attinmeta,
+ TupleExtraData *extra,
List *retrieved_attrs,
MemoryContext temp_context);
static void conversion_error_callback(void *arg);
static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
***************
*** 407,415 **** postgresGetForeignRelSize(PlannerInfo *root,
fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo));
baserel->fdw_private = (void *) fpinfo;
- /* Base foreign tables need to be push down always. */
- fpinfo->pushdown_safe = true;
-
/* Look up foreign-table catalog info. */
fpinfo->table = GetForeignTable(foreigntableid);
fpinfo->server = GetForeignServer(fpinfo->table->serverid);
--- 415,420 ----
***************
*** 492,497 **** postgresGetForeignRelSize(PlannerInfo *root,
--- 497,521 ----
}
/*
+ * The table is pushdown-safe unless any of xmin, xmax, cmin, and cmax
+ * are requested from the table. Note that we don't make any effort to
+ * ensure that local and remote values match. We don't care about ctid,
+ * oid, and tableoid; ctid and oid are retrieved from the remote server,
+ * and tableoid is filled with a valid value locally.
+ */
+ if (bms_is_member(MinTransactionIdAttributeNumber - FirstLowInvalidHeapAttributeNumber,
+ fpinfo->attrs_used) ||
+ bms_is_member(MaxTransactionIdAttributeNumber - FirstLowInvalidHeapAttributeNumber,
+ fpinfo->attrs_used) ||
+ bms_is_member(MinCommandIdAttributeNumber - FirstLowInvalidHeapAttributeNumber,
+ fpinfo->attrs_used) ||
+ bms_is_member(MaxCommandIdAttributeNumber - FirstLowInvalidHeapAttributeNumber,
+ fpinfo->attrs_used))
+ fpinfo->pushdown_safe = false;
+ else
+ fpinfo->pushdown_safe = true;
+
+ /*
* Compute the selectivity and cost of the local_conds, so we don't have
* to do it over again for each path. The best we can do for these
* conditions is to estimate selectivity on the basis of local statistics.
***************
*** 998,1003 **** postgresGetForeignPlan(PlannerInfo *root,
--- 1022,1028 ----
List *local_exprs = NIL;
List *params_list = NIL;
List *retrieved_attrs;
+ List *tableoid_attrs;
StringInfoData sql;
ListCell *lc;
List *fdw_scan_tlist = NIL;
***************
*** 1116,1122 **** postgresGetForeignPlan(PlannerInfo *root,
initStringInfo(&sql);
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_conds, best_path->path.pathkeys,
! &retrieved_attrs, ¶ms_list);
/*
* Build the fdw_private list that will be available to the executor.
--- 1141,1147 ----
initStringInfo(&sql);
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_conds, best_path->path.pathkeys,
! &retrieved_attrs, &tableoid_attrs, ¶ms_list);
/*
* Build the fdw_private list that will be available to the executor.
***************
*** 1127,1134 **** postgresGetForeignPlan(PlannerInfo *root,
--- 1152,1162 ----
makeInteger(fpinfo->fetch_size),
makeInteger(foreignrel->umid));
if (foreignrel->reloptkind == RELOPT_JOINREL)
+ {
fdw_private = lappend(fdw_private,
makeString(fpinfo->relation_name->data));
+ fdw_private = lappend(fdw_private, tableoid_attrs);
+ }
/*
* Create the ForeignScan node for the given relation.
***************
*** 1158,1163 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
--- 1186,1192 ----
EState *estate = node->ss.ps.state;
PgFdwScanState *fsstate;
UserMapping *user;
+ TupleDesc tupdesc;
int numParams;
int i;
ListCell *lc;
***************
*** 1241,1251 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
* into local representation and error reporting during that process.
*/
if (fsplan->scan.scanrelid > 0)
! fsstate->tupdesc = RelationGetDescr(fsstate->rel);
else
! fsstate->tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
! fsstate->attinmeta = TupleDescGetAttInMetadata(fsstate->tupdesc);
/* Prepare for output conversion of parameters used in remote query. */
numParams = list_length(fsplan->fdw_exprs);
--- 1270,1312 ----
* into local representation and error reporting during that process.
*/
if (fsplan->scan.scanrelid > 0)
! tupdesc = RelationGetDescr(fsstate->rel);
else
! tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
!
! fsstate->attinmeta = TupleDescGetAttInMetadata(tupdesc);
! /* Create extra info for making tuples, if foreign join */
! if (fsplan->scan.scanrelid > 0)
! fsstate->extra = NULL;
! else
! {
! TupleExtraData *extra;
!
! extra = (TupleExtraData *) palloc0(sizeof(TupleExtraData));
!
! extra->tupdesc = tupdesc;
! extra->fdw_scan_tlist = fsplan->fdw_scan_tlist;
! extra->tableoid_attrs = (List *) list_nth(fsplan->fdw_private,
! FdwScanPrivateTableOidAttrs);
! extra->tableoid_vals = NIL;
! foreach(lc, extra->tableoid_attrs)
! {
! TargetEntry *tle;
! Var *var;
! RangeTblEntry *rte;
!
! i = lfirst_int(lc);
! tle = (TargetEntry *) list_nth(fsplan->fdw_scan_tlist, i - 1);
! var = (Var *) tle->expr;
! rte = rt_fetch(var->varno, estate->es_range_table);
!
! extra->tableoid_vals = lappend_oid(extra->tableoid_vals, rte->relid);
! }
! extra->rtables = estate->es_range_table;
!
! fsstate->extra = extra;
! }
/* Prepare for output conversion of parameters used in remote query. */
numParams = list_length(fsplan->fdw_exprs);
***************
*** 2092,2097 **** estimate_path_cost_size(PlannerInfo *root,
--- 2153,2159 ----
/* Required only to be passed to deparseSelectStmtForRel */
List *retrieved_attrs;
+ List *tableoid_attrs;
/*
* param_join_conds might contain both clauses that are safe to send
***************
*** 2123,2129 **** estimate_path_cost_size(PlannerInfo *root,
appendStringInfoString(&sql, "EXPLAIN ");
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_conds, pathkeys, &retrieved_attrs,
! NULL);
/* Get the remote estimate */
conn = GetConnection(fpinfo->user, false);
--- 2185,2191 ----
appendStringInfoString(&sql, "EXPLAIN ");
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_conds, pathkeys, &retrieved_attrs,
! &tableoid_attrs, NULL);
/* Get the remote estimate */
conn = GetConnection(fpinfo->user, false);
***************
*** 2536,2543 **** fetch_more_data(ForeignScanState *node)
make_tuple_from_result_row(res, i,
fsstate->rel,
fsstate->attinmeta,
fsstate->retrieved_attrs,
- node,
fsstate->temp_cxt);
}
--- 2598,2605 ----
make_tuple_from_result_row(res, i,
fsstate->rel,
fsstate->attinmeta,
+ fsstate->extra,
fsstate->retrieved_attrs,
fsstate->temp_cxt);
}
***************
*** 2755,2762 **** store_returning_result(PgFdwModifyState *fmstate,
newtup = make_tuple_from_result_row(res, 0,
fmstate->rel,
fmstate->attinmeta,
- fmstate->retrieved_attrs,
NULL,
fmstate->temp_cxt);
/* tuple will be deleted when it is cleared from the slot */
ExecStoreTuple(newtup, slot, InvalidBuffer, true);
--- 2817,2824 ----
newtup = make_tuple_from_result_row(res, 0,
fmstate->rel,
fmstate->attinmeta,
NULL,
+ fmstate->retrieved_attrs,
fmstate->temp_cxt);
/* tuple will be deleted when it is cleared from the slot */
ExecStoreTuple(newtup, slot, InvalidBuffer, true);
***************
*** 3066,3073 **** analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
astate->rows[pos] = make_tuple_from_result_row(res, row,
astate->rel,
astate->attinmeta,
- astate->retrieved_attrs,
NULL,
astate->temp_cxt);
MemoryContextSwitchTo(oldcontext);
--- 3128,3135 ----
astate->rows[pos] = make_tuple_from_result_row(res, row,
astate->rel,
astate->attinmeta,
NULL,
+ astate->retrieved_attrs,
astate->temp_cxt);
MemoryContextSwitchTo(oldcontext);
***************
*** 3736,3743 **** make_tuple_from_result_row(PGresult *res,
int row,
Relation rel,
AttInMetadata *attinmeta,
List *retrieved_attrs,
- ForeignScanState *fsstate,
MemoryContext temp_context)
{
HeapTuple tuple;
--- 3798,3805 ----
int row,
Relation rel,
AttInMetadata *attinmeta,
+ TupleExtraData *extra,
List *retrieved_attrs,
MemoryContext temp_context)
{
HeapTuple tuple;
***************
*** 3764,3774 **** make_tuple_from_result_row(PGresult *res,
tupdesc = RelationGetDescr(rel);
else
{
! PgFdwScanState *fdw_sstate;
!
! Assert(fsstate);
! fdw_sstate = (PgFdwScanState *) fsstate->fdw_state;
! tupdesc = fdw_sstate->tupdesc;
}
values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum));
--- 3826,3833 ----
tupdesc = RelationGetDescr(rel);
else
{
! Assert(extra);
! tupdesc = extra->tupdesc;
}
values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum));
***************
*** 3780,3787 **** make_tuple_from_result_row(PGresult *res,
* Set up and install callback to report where conversion error occurs.
*/
errpos.rel = rel;
errpos.cur_attno = 0;
- errpos.fsstate = fsstate;
errcallback.callback = conversion_error_callback;
errcallback.arg = (void *) &errpos;
errcallback.previous = error_context_stack;
--- 3839,3846 ----
* Set up and install callback to report where conversion error occurs.
*/
errpos.rel = rel;
+ errpos.extra = extra;
errpos.cur_attno = 0;
errcallback.callback = conversion_error_callback;
errcallback.arg = (void *) &errpos;
errcallback.previous = error_context_stack;
***************
*** 3841,3846 **** make_tuple_from_result_row(PGresult *res,
--- 3900,3920 ----
if (j > 0 && j != PQnfields(res))
elog(ERROR, "remote query result does not match the foreign table");
+ /* Set valid values for tableoids in the tuple, if foreign join */
+ if (extra)
+ {
+ ListCell *lc2;
+
+ forboth(lc, extra->tableoid_attrs, lc2, extra->tableoid_vals)
+ {
+ int i = lfirst_int(lc);
+ Oid tableoid = lfirst_oid(lc2);
+
+ nulls[i - 1] = false;
+ values[i - 1] = ObjectIdGetDatum(tableoid);
+ }
+ }
+
/*
* Build the result tuple in caller's memory context.
*/
***************
*** 3889,3909 **** conversion_error_callback(void *arg)
else
{
/* error occurred in a scan against a foreign join */
- ForeignScanState *fsstate = errpos->fsstate;
- ForeignScan *fsplan = (ForeignScan *) fsstate->ss.ps.plan;
- EState *estate = fsstate->ss.ps.state;
TargetEntry *tle;
Var *var;
RangeTblEntry *rte;
! Assert(IsA(fsplan, ForeignScan));
! tle = (TargetEntry *) list_nth(fsplan->fdw_scan_tlist,
errpos->cur_attno - 1);
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
Assert(IsA(var, Var));
! rte = rt_fetch(var->varno, estate->es_range_table);
relname = get_rel_name(rte->relid);
attname = get_relid_attribute_name(rte->relid, var->varattno);
}
--- 3963,3980 ----
else
{
/* error occurred in a scan against a foreign join */
TargetEntry *tle;
Var *var;
RangeTblEntry *rte;
! Assert(errpos->cur_attno > 0);
! tle = (TargetEntry *) list_nth(errpos->extra->fdw_scan_tlist,
errpos->cur_attno - 1);
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
Assert(IsA(var, Var));
! rte = rt_fetch(var->varno, errpos->extra->rtables);
relname = get_rel_name(rte->relid);
attname = get_relid_attribute_name(rte->relid, var->varattno);
}
*** a/contrib/postgres_fdw/postgres_fdw.h
--- b/contrib/postgres_fdw/postgres_fdw.h
***************
*** 143,149 **** extern List *build_tlist_to_deparse(RelOptInfo *foreign_rel);
extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
RelOptInfo *foreignrel, List *tlist,
List *remote_conds, List *pathkeys,
! List **retrieved_attrs, List **params_list);
/* in shippable.c */
extern bool is_builtin(Oid objectId);
--- 143,150 ----
extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
RelOptInfo *foreignrel, List *tlist,
List *remote_conds, List *pathkeys,
! List **retrieved_attrs, List **tableoid_attrs,
! List **params_list);
/* in shippable.c */
extern bool is_builtin(Oid objectId);
*** a/contrib/postgres_fdw/sql/postgres_fdw.sql
--- b/contrib/postgres_fdw/sql/postgres_fdw.sql
***************
*** 430,435 **** SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2
--- 430,439 ----
EXPLAIN (COSTS false, VERBOSE)
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
+ -- System columns, except oid and ctid, should not be retrieved from remote
+ EXPLAIN (COSTS false, VERBOSE)
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-- create another user for permission, user mapping, effective user tests
CREATE USER view_owner;
On Thu, Mar 17, 2016 at 4:30 PM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:
Hi,
I noticed that the postgres_fdw join pushdown patch retrieves system
columns other than ctid (and oid) from the remote server as shown in the
example:postgres=# explain verbose select foo.tableoid, foo.xmin, foo.cmin,
foo.xmax, foo.cmax, foo.* from foo, bar where foo.a = bar.a;QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
--------
Foreign Scan (cost=100.00..102.09 rows=2 width=28)
Output: foo.tableoid, foo.xmin, foo.cmin, foo.xmax, foo.cmax, foo.a,
foo.b
Relations: (public.foo) INNER JOIN (public.bar)
Remote SQL: SELECT r1.tableoid, r1.xmin, r1.cmin, r1.xmax, r1.cmax,
r1.a, r1.b FROM (public.foo r1 INNER JOIN public.bar r2 ON (TRUE)) WHERE
((r1.a =
r2.a))
(4 rows)
Thanks for catching the bug and producing a patch.
BUT: we don't make any effort to ensure that local and remote values
match, so system columns other than ctid and oid should not be retrieved
from the remote server. So, I'd like to propose: (1) when tableoids are
requested from the remote server, postgres_fdw sets valid values for
them locally, instead (core should support that?) and
If we are disabling join pushdown when the targetlist has other system
columns, shouldn't we treat tableoid in the same fashion. We should disable
join pushdown when tableoid is requested?
I agree that we might want to do this in core instead of FDW specific core.
That way we avoid each FDW implementing its own solution. Ultimately, all
that needs to be done to push OID of the foreign table in place of tableoid
column. The core code can do that. It already does that for the base
tables.
(2) when any of
xmins, xmaxs, cmins, and cmaxs are requested, postgres_fdw gives up
pushing down foreign joins. (We might be able to set appropriate values
for them locally the same way as for tableoids, but I'm not sure it's
worth complicating the code.) I think that would be probably OK,
because users wouldn't retrieve any such columns in practice.
In that patch you have set pushdown_safe to false for the base relation
fetching system columns. But pushdown_safe = false means that that relation
is not safe to push down. A base foreign relation is always safe to push
down, so should have pushdown_safe = true always. Instead, I suggest having
a separate boolean has_unshippable_syscols (or something with similar name)
in PgFdwRelationInfo, which is set to true in such case. In
foreign_join_ok, we return false (thus not pushing down the join), if any
of the joining relation has that attribute set. By default this member is
false.
Even for a base table those values are rather random, although they are not
fetched from the foreign server. Instead of not pushing the join down, we
should push the join down without fetching those attributes. While
constructing the query, don't include these system attributes in SELECT
clause and don't include corresponding positions in retrieved_attributes
list. That means those attributes won't be set while fetching the row from
the foreign server and will have garbage values in corresponding places. I
guess that would work.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On 2016/03/17 22:15, Ashutosh Bapat wrote:
On Thu, Mar 17, 2016 at 4:30 PM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp <mailto:fujita.etsuro@lab.ntt.co.jp>> wrote:
BUT: we don't make any effort to ensure that local and remote values
match, so system columns other than ctid and oid should not be retrieved
from the remote server. So, I'd like to propose: (1) when tableoids are
requested from the remote server, postgres_fdw sets valid values for
them locally, instead (core should support that?) and
If we are disabling join pushdown when the targetlist has other system
columns, shouldn't we treat tableoid in the same fashion. We should
disable join pushdown when tableoid is requested?
That seems a bit too restrictive to me.
I agree that we might want to do this in core instead of FDW specific
core. That way we avoid each FDW implementing its own solution.
Ultimately, all that needs to be done to push OID of the foreign table
in place of tableoid column. The core code can do that. It already does
that for the base tables.
OK, I'll try to modify the patch so that the core does that work.
(2) when any of
xmins, xmaxs, cmins, and cmaxs are requested, postgres_fdw gives up
pushing down foreign joins. (We might be able to set appropriate values
for them locally the same way as for tableoids, but I'm not sure it's
worth complicating the code.) I think that would be probably OK,
because users wouldn't retrieve any such columns in practice.
In that patch you have set pushdown_safe to false for the base relation
fetching system columns. But pushdown_safe = false means that that
relation is not safe to push down. A base foreign relation is always
safe to push down, so should have pushdown_safe = true always. Instead,
I suggest having a separate boolean has_unshippable_syscols (or
something with similar name) in PgFdwRelationInfo, which is set to true
in such case. In foreign_join_ok, we return false (thus not pushing down
the join), if any of the joining relation has that attribute set. By
default this member is false.
Maybe I'm missing something, but why do you consider that base foreign
tables need always be safe to push down? IIUC, the pushdown_safe flag
is used only for foreign_join_ok, so I think that a base foreign table
needs not necessarily be safe to push down.
Even for a base table those values are rather random, although they are
not fetched from the foreign server. Instead of not pushing the join
down, we should push the join down without fetching those attributes.
While constructing the query, don't include these system attributes in
SELECT clause and don't include corresponding positions in
retrieved_attributes list. That means those attributes won't be set
while fetching the row from the foreign server and will have garbage
values in corresponding places. I guess that would work.
That might be an idea, but as I said above, users wouldn't specify any
system columns other than tableoid and ctid (and oid) in their queries,
in practice, so I'm not sure it's worth complicating the code.
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
(2) when any of
xmins, xmaxs, cmins, and cmaxs are requested, postgres_fdw gives up
pushing down foreign joins. (We might be able to set appropriate
values
for them locally the same way as for tableoids, but I'm not sure it's
worth complicating the code.) I think that would be probably OK,
because users wouldn't retrieve any such columns in practice.In that patch you have set pushdown_safe to false for the base relation
fetching system columns. But pushdown_safe = false means that that
relation is not safe to push down. A base foreign relation is always
safe to push down, so should have pushdown_safe = true always. Instead,
I suggest having a separate boolean has_unshippable_syscols (or
something with similar name) in PgFdwRelationInfo, which is set to true
in such case. In foreign_join_ok, we return false (thus not pushing down
the join), if any of the joining relation has that attribute set. By
default this member is false.Maybe I'm missing something, but why do you consider that base foreign
tables need always be safe to push down? IIUC, the pushdown_safe flag is
used only for foreign_join_ok, so I think that a base foreign table needs
not necessarily be safe to push down.
A base foreign table "always" fetches data from the foreign server, so it
"has to be" always safe to push down. pushdown_safe flag is designated to
tell whether the relation corresponding to PgFdwRelationInfo where this
flag is set is safe to push down.Right now it's only used for joins but in
future it would be used for any push down of higher operations. It seems
very much possible after the upper pathification changes. We can not have a
query sent to the foreign server for a relation, when pushdown_safe is
false for that. Your patch does that for foreign base relation which try to
fetch system columns.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On Thu, Mar 17, 2016 at 7:00 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:
BUT: we don't make any effort to ensure that local and remote values
match, so system columns other than ctid and oid should not be retrieved
from the remote server.
I agree.
So, I'd like to propose: (1) when tableoids are
requested from the remote server, postgres_fdw sets valid values for
them locally, instead (core should support that?)
Sure.
and (2) when any of
xmins, xmaxs, cmins, and cmaxs are requested, postgres_fdw gives up
pushing down foreign joins. (We might be able to set appropriate values
for them locally the same way as for tableoids, but I'm not sure it's
worth complicating the code.) I think that would be probably OK,
because users wouldn't retrieve any such columns in practice.
Now that seems like the wrong reaction. I mean, aren't these just
going to be 0 or something? Refusing to push the join down seems
strange.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2016/03/19 4:51, Robert Haas wrote:
On Thu, Mar 17, 2016 at 7:00 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:So, I'd like to propose: (1) when tableoids are
requested from the remote server, postgres_fdw sets valid values for
them locally, instead (core should support that?)
Sure.
and (2) when any of
xmins, xmaxs, cmins, and cmaxs are requested, postgres_fdw gives up
pushing down foreign joins. (We might be able to set appropriate values
for them locally the same way as for tableoids, but I'm not sure it's
worth complicating the code.) I think that would be probably OK,
because users wouldn't retrieve any such columns in practice.
Now that seems like the wrong reaction. I mean, aren't these just
going to be 0 or something? Refusing to push the join down seems
strange.
OK, I'll modify the patch so that the join is pushed down even if any of
xmins, xmaxs, cmins, and cmaxs are requested. Do you think which one
should set values for these as well as tableoids, postgres_fdw or core?
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Mar 22, 2016 at 8:03 AM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:
On 2016/03/19 4:51, Robert Haas wrote:
On Thu, Mar 17, 2016 at 7:00 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:So, I'd like to propose: (1) when tableoids are
requested from the remote server, postgres_fdw sets valid values for
them locally, instead (core should support that?)Sure.
and (2) when any of
xmins, xmaxs, cmins, and cmaxs are requested, postgres_fdw gives up
pushing down foreign joins. (We might be able to set appropriate values
for them locally the same way as for tableoids, but I'm not sure it's
worth complicating the code.) I think that would be probably OK,
because users wouldn't retrieve any such columns in practice.Now that seems like the wrong reaction. I mean, aren't these just
going to be 0 or something? Refusing to push the join down seems
strange.OK, I'll modify the patch so that the join is pushed down even if any of
xmins, xmaxs, cmins, and cmaxs are requested. Do you think which one
should set values for these as well as tableoids, postgres_fdw or core?
Earlier in this mail chain, I suggested that the core should take care of
storing the values for these columns. But instead, I think, core should
provide functions which can be used by FDWs, if they want, to return values
for those columns. Something like Datum
get_syscol_value(RelOptInfo/Relation, attno). The function will return
Datum 0 for most of the columns and table's OID for tableoid. My 0.02.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On 2016/03/22 14:54, Ashutosh Bapat wrote:
On Tue, Mar 22, 2016 at 8:03 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp <mailto:fujita.etsuro@lab.ntt.co.jp>> wrote:
OK, I'll modify the patch so that the join is pushed down even if
any of xmins, xmaxs, cmins, and cmaxs are requested. Do you think
which one should set values for these as well as tableoids,
postgres_fdw or core?
Earlier in this mail chain, I suggested that the core should take care
of storing the values for these columns. But instead, I think, core
should provide functions which can be used by FDWs, if they want, to
return values for those columns. Something like Datum
get_syscol_value(RelOptInfo/Relation, attno). The function will return
Datum 0 for most of the columns and table's OID for tableoid. My 0.02.
What I had in mind was (1) create_foreignscan_plan would create Lists
from the ForeignScan's fdw_scan_tlist: (a) indexes/OID values of
tableoids in fdw_scan_tlist, and (b) indexes of xids and cids in
fdw_scan_tlist, and then (2) ForeignNext would set the OID values for
the tableoid columns in the scan tuple, using the Lists (a), and
appropriate values (0 or something) for the xid and cid columns in the
scan tuple, using the List (b).
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Mar 22, 2016 at 5:05 PM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:
On 2016/03/22 14:54, Ashutosh Bapat wrote:
On Tue, Mar 22, 2016 at 8:03 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp <mailto:fujita.etsuro@lab.ntt.co.jp>> wrote:
OK, I'll modify the patch so that the join is pushed down even if
any of xmins, xmaxs, cmins, and cmaxs are requested. Do you think
which one should set values for these as well as tableoids,
postgres_fdw or core?Earlier in this mail chain, I suggested that the core should take care
of storing the values for these columns. But instead, I think, core
should provide functions which can be used by FDWs, if they want, to
return values for those columns. Something like Datum
get_syscol_value(RelOptInfo/Relation, attno). The function will return
Datum 0 for most of the columns and table's OID for tableoid. My 0.02.What I had in mind was (1) create_foreignscan_plan would create Lists from
the ForeignScan's fdw_scan_tlist: (a) indexes/OID values of tableoids in
fdw_scan_tlist, and (b) indexes of xids and cids in fdw_scan_tlist, and
then (2) ForeignNext would set the OID values for the tableoid columns in
the scan tuple, using the Lists (a), and appropriate values (0 or
something) for the xid and cid columns in the scan tuple, using the List
(b).
Looks Ok to me, although, that way an FDW looses an ability to use its own
values for those columns, in case it wants to. For example, while using
postgres_fdw for sharding, it might help saving xmax, xmin, cmax, cmin from
the foreign server and use them while communicating with the foreign server.
Best regards,
Etsuro Fujita
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On 2016/03/22 21:10, Ashutosh Bapat wrote:
On Tue, Mar 22, 2016 at 5:05 PM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp <mailto:fujita.etsuro@lab.ntt.co.jp>> wrote:
On 2016/03/22 14:54, Ashutosh Bapat wrote:
Earlier in this mail chain, I suggested that the core should
take care
of storing the values for these columns. But instead, I think, core
should provide functions which can be used by FDWs, if they want, to
return values for those columns. Something like Datum
get_syscol_value(RelOptInfo/Relation, attno). The function will
return
Datum 0 for most of the columns and table's OID for tableoid. My
0.02.
What I had in mind was (1) create_foreignscan_plan would create
Lists from the ForeignScan's fdw_scan_tlist: (a) indexes/OID values
of tableoids in fdw_scan_tlist, and (b) indexes of xids and cids in
fdw_scan_tlist, and then (2) ForeignNext would set the OID values
for the tableoid columns in the scan tuple, using the Lists (a), and
appropriate values (0 or something) for the xid and cid columns in
the scan tuple, using the List (b).
Looks Ok to me, although, that way an FDW looses an ability to use its
own values for those columns, in case it wants to. For example, while
using postgres_fdw for sharding, it might help saving xmax, xmin, cmax,
cmin from the foreign server and use them while communicating with the
foreign server.
Yeah, it might be the case.
On second thoughts, I changed my mind; I think it'd be better for the
FDW's to set values for tableoids, xids, and cids in the scan tuple.
The reason other than your suggestion is because expressions in
fdw_scan_tlist that contain such columns are not necessarily simple Vars
and because such expressions might be evaluated more efficiently by the
FDW than core. We assume in postgres_fdw that expressions in
fdw_scan_tlist are always simple Vars, though.
I'm not sure it's worth providing functions you suggested, because we
can't assume that columns in the scan tuple are always simple Var
columns, as I said above.
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Mar 23, 2016 at 8:20 AM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:
On 2016/03/22 21:10, Ashutosh Bapat wrote:
On Tue, Mar 22, 2016 at 5:05 PM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp <mailto:fujita.etsuro@lab.ntt.co.jp>> wrote:
On 2016/03/22 14:54, Ashutosh Bapat wrote:
Earlier in this mail chain, I suggested that the core should
take care
of storing the values for these columns. But instead, I think,
core
should provide functions which can be used by FDWs, if they want,
to
return values for those columns. Something like Datum
get_syscol_value(RelOptInfo/Relation, attno). The function will
return
Datum 0 for most of the columns and table's OID for tableoid. My
0.02.What I had in mind was (1) create_foreignscan_plan would create
Lists from the ForeignScan's fdw_scan_tlist: (a) indexes/OID values
of tableoids in fdw_scan_tlist, and (b) indexes of xids and cids in
fdw_scan_tlist, and then (2) ForeignNext would set the OID values
for the tableoid columns in the scan tuple, using the Lists (a), and
appropriate values (0 or something) for the xid and cid columns in
the scan tuple, using the List (b).Looks Ok to me, although, that way an FDW looses an ability to use its
own values for those columns, in case it wants to. For example, while
using postgres_fdw for sharding, it might help saving xmax, xmin, cmax,
cmin from the foreign server and use them while communicating with the
foreign server.Yeah, it might be the case.
On second thoughts, I changed my mind; I think it'd be better for the
FDW's to set values for tableoids, xids, and cids in the scan tuple. The
reason other than your suggestion is because expressions in fdw_scan_tlist
that contain such columns are not necessarily simple Vars and because such
expressions might be evaluated more efficiently by the FDW than core. We
assume in postgres_fdw that expressions in fdw_scan_tlist are always simple
Vars, though.I'm not sure it's worth providing functions you suggested, because we
can't assume that columns in the scan tuple are always simple Var columns,
as I said above.
An FDW can choose not to use those functions, so I don't see a connection
between scan list having simple Vars and existence of those functions
(actually a single one). But having those function would minimize the code
that each FDW has to write, in case they want those functions. E.g. we have
to translate Var::varno to tableoid in case that's requested by pulling RTE
and then getting oid out from there. If that functionality is available in
the core, 1. the code is not duplicated 2. every FDW will get the same
tableoid. Similarly for the other columns.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On 2016/03/23 13:44, Ashutosh Bapat wrote:
An FDW can choose not to use those functions, so I don't see a
connection between scan list having simple Vars and existence of those
functions (actually a single one). But having those function would
minimize the code that each FDW has to write, in case they want those
functions. E.g. we have to translate Var::varno to tableoid in case
that's requested by pulling RTE and then getting oid out from there. If
that functionality is available in the core, 1. the code is not
duplicated 2. every FDW will get the same tableoid. Similarly for the
other columns.
OK. Then, I'd like to propose a function that would create interger
Lists of indexes of tableoids, xids and cids plus an OID List of these
tableoids, in a given fdw_scan_tlist, on the assumption that each
expression in the fdw_scan_tlist is a simple Var. I'd also like to
propose another function that would fill system columns using these
Lists when creating a scan tuple.
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Mar 24, 2016 at 9:31 AM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:
On 2016/03/23 13:44, Ashutosh Bapat wrote:
An FDW can choose not to use those functions, so I don't see a
connection between scan list having simple Vars and existence of those
functions (actually a single one). But having those function would
minimize the code that each FDW has to write, in case they want those
functions. E.g. we have to translate Var::varno to tableoid in case
that's requested by pulling RTE and then getting oid out from there. If
that functionality is available in the core, 1. the code is not
duplicated 2. every FDW will get the same tableoid. Similarly for the
other columns.OK. Then, I'd like to propose a function that would create interger Lists
of indexes of tableoids, xids and cids plus
Ok,
an OID List of these tableoids,
I didn't get this.
in a given fdw_scan_tlist, on the assumption that each expression in the
fdw_scan_tlist is a simple Var.
I guess this is Ok. In fact, at least for now an expression involving any
of those columns is not pushable to the foreign server, as the expression
can not be evaluated there. So, if we come across such a case in further
pushdowns, we will need to have a different solution for pushing down such
target lists.
I'd also like to propose another function that would fill system columns
using these Lists when creating a scan tuple.Ok.
I had imagined that the code to extract the above lists and filling the
values in scan tuple will be in FDW. We only provide a function to supply
those values. But what you propose might actually be much practical.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
A much simpler solution, that will work with postgres_fdw, might be to just
deparse these columns with whatever random values (except for tableoid)
they are expected to have in those places. Often these values can simply be
NULL or 0. For tableoid deparse it to 'oid value'::oid. Thus for a user
query
select t1.taleoid, t2.xmax, t1.c1, t2.c2 from t1 join t2 on (...) ... --
where t1 and t2 are foreign tables with same names on the foreign server.
the query sent to the foreign server would look like
select '15623'::oid, NULL, t1.c1, t2.c2 from t1 join t2 on (...) ... --
where '15623' is oid of t1 on local server.
This does spend more bandwidth than necessary and affect performance, here
is why the approach might be better,
1. It's not very common to request these system columns in a "join" query
involving foreign tables. Usually they will have user columns or ctid
(DMLs) but very rarely other system columns.
2. This allows expressions involving these system columns to be pushed
down, whenever we will start pushing them down in the targetlist.
3. The changes to the code are rather small. deparseColumnRef() will need
to produce the strings above instead of actual column names.
4. The approach will work with slight change, if and when, we need the
actual system column values from the foreign server. That time the above
function needs to deparse the column names instead of constant values.
Having to hardcode tableoid at the time of planning should be fine since
change in tableoid between planning and execution will trigger plan cache
invalidation. I haven't tried this though.
Sorry for bringing this solution late to the table.
On Thu, Mar 24, 2016 at 3:04 PM, Ashutosh Bapat <
ashutosh.bapat@enterprisedb.com> wrote:
On Thu, Mar 24, 2016 at 9:31 AM, Etsuro Fujita <
fujita.etsuro@lab.ntt.co.jp> wrote:On 2016/03/23 13:44, Ashutosh Bapat wrote:
An FDW can choose not to use those functions, so I don't see a
connection between scan list having simple Vars and existence of those
functions (actually a single one). But having those function would
minimize the code that each FDW has to write, in case they want those
functions. E.g. we have to translate Var::varno to tableoid in case
that's requested by pulling RTE and then getting oid out from there. If
that functionality is available in the core, 1. the code is not
duplicated 2. every FDW will get the same tableoid. Similarly for the
other columns.OK. Then, I'd like to propose a function that would create interger
Lists of indexes of tableoids, xids and cids plusOk,
an OID List of these tableoids,
I didn't get this.
in a given fdw_scan_tlist, on the assumption that each expression in the
fdw_scan_tlist is a simple Var.I guess this is Ok. In fact, at least for now an expression involving any
of those columns is not pushable to the foreign server, as the expression
can not be evaluated there. So, if we come across such a case in further
pushdowns, we will need to have a different solution for pushing down such
target lists.I'd also like to propose another function that would fill system columns
using these Lists when creating a scan tuple.Ok.
I had imagined that the code to extract the above lists and filling the
values in scan tuple will be in FDW. We only provide a function to supply
those values. But what you propose might actually be much practical.--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On 2016/03/25 13:37, Ashutosh Bapat wrote:
A much simpler solution, that will work with postgres_fdw, might be to
just deparse these columns with whatever random values (except for
tableoid) they are expected to have in those places. Often these values
can simply be NULL or 0. For tableoid deparse it to 'oid value'::oid.
Thus for a user queryselect t1.taleoid, t2.xmax, t1.c1, t2.c2 from t1 join t2 on (...) ... --
where t1 and t2 are foreign tables with same names on the foreign server.the query sent to the foreign server would look like
select '15623'::oid, NULL, t1.c1, t2.c2 from t1 join t2 on (...) ... --
where '15623' is oid of t1 on local server.This does spend more bandwidth than necessary and affect performance,
here is why the approach might be better,
1. It's not very common to request these system columns in a "join"
query involving foreign tables. Usually they will have user columns or
ctid (DMLs) but very rarely other system columns.
That may be true for now, but once we implement pair-wise join for two
distributedly-partitioned tables in which we can push down pair-wise
foreign joins, tableoid would be used in many cases, to identify child
tables for rows to come from.
2. This allows expressions involving these system columns to be pushed
down, whenever we will start pushing them down in the targetlist.3. The changes to the code are rather small. deparseColumnRef() will
need to produce the strings above instead of actual column names.4. The approach will work with slight change, if and when, we need the
actual system column values from the foreign server. That time the above
function needs to deparse the column names instead of constant values.
As you pointed out, spending more bandwidth than necessary seems a bit
inefficient.
The approach that we discussed would minimize the code for the FDW
author to write, by providing the support functions you proposed. I'll
post a patch for that early next week. (It would also minimize the
patch to push down UPDATE/DELETE on a foreign join, proposed in [1]/messages/by-id/56D57C4A.9000500@lab.ntt.co.jp,
which has the same issue as for handling system columns in a RETURNING
clause in such pushed-down UPDATE/DELETE. So I'd like to propose that
approach as a common functionality.)
Sorry for bringing this solution late to the table.
No problem.
Best regards,
Etsuro Fujita
[1]: /messages/by-id/56D57C4A.9000500@lab.ntt.co.jp
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2016/03/25 17:16, Etsuro Fujita wrote:
The approach that we discussed would minimize the code for the FDW
author to write, by providing the support functions you proposed. I'll
post a patch for that early next week.
I added two helper functions: GetFdwScanTupleExtraData and
FillFdwScanTupleSysAttrs. The FDW author could use the former to get
info about system attributes other than ctids and oids in fdw_scan_tlist
during BeginForeignScan, and the latter to set values for these system
attributes during IterateForeignScan (InvalidTransactionId for
xmins/xmaxs, InvalidCommandId for cmins/cmaxs, and valid values for
tableoids). Attached is a proposed patch for that. I also slightly
simplified the changes to make_tuple_from_result_row and
conversion_error_callback made by the postgres_fdw join pushdown patch.
What do you think about that?
Best regards,
Etsuro Fujita
Attachments:
postgres-fdw-join-pushdown-syscol-handling-v2.patchapplication/x-download; name=postgres-fdw-join-pushdown-syscol-handling-v2.patchDownload
*** a/contrib/postgres_fdw/deparse.c
--- b/contrib/postgres_fdw/deparse.c
***************
*** 1091,1130 **** get_jointype_name(JoinType jointype)
*
* tlist is list of TargetEntry's which in turn contain Var nodes.
*
! * retrieved_attrs is the list of continuously increasing integers starting
! * from 1. It has same number of entries as tlist.
*/
static void
deparseExplicitTargetList(List *tlist, List **retrieved_attrs,
deparse_expr_cxt *context)
{
- ListCell *lc;
StringInfo buf = context->buf;
! int i = 0;
*retrieved_attrs = NIL;
foreach(lc, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Var *var;
- /* Extract expression if TargetEntry node */
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
/* We expect only Var nodes here */
Assert(IsA(var, Var));
! if (i > 0)
! appendStringInfoString(buf, ", ");
! deparseVar(var, context);
! *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
i++;
}
! if (i == 0)
appendStringInfoString(buf, "NULL");
}
--- 1091,1139 ----
*
* tlist is list of TargetEntry's which in turn contain Var nodes.
*
! * We create an integer List of the columns being retrieved, which is returned
! * to *retrieved_attrs.
*/
static void
deparseExplicitTargetList(List *tlist, List **retrieved_attrs,
deparse_expr_cxt *context)
{
StringInfo buf = context->buf;
! ListCell *lc;
! bool first;
! int i;
*retrieved_attrs = NIL;
+ i = 1;
+ first = true;
foreach(lc, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Var *var;
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
/* We expect only Var nodes here */
Assert(IsA(var, Var));
! if (var->varattno >= 0 ||
! var->varattno == SelfItemPointerAttributeNumber ||
! var->varattno == ObjectIdAttributeNumber)
! {
! if (!first)
! appendStringInfoString(buf, ", ");
! first = false;
! deparseVar(var, context);
!
! *retrieved_attrs = lappend_int(*retrieved_attrs, i);
! }
i++;
}
! if (first)
appendStringInfoString(buf, "NULL");
}
*** a/contrib/postgres_fdw/expected/postgres_fdw.out
--- b/contrib/postgres_fdw/expected/postgres_fdw.out
***************
*** 1923,1928 **** SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
--- 1923,1956 ----
1
(10 rows)
+ -- System columns, except ctid and oid, should not be retrieved from remote
+ EXPLAIN (COSTS false, VERBOSE)
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ QUERY PLAN
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: ((t1.tableoid)::regclass), t1.c1, ((t2.tableoid)::regclass), t2.c1, t1.c3
+ -> Foreign Scan
+ Output: (t1.tableoid)::regclass, t1.c1, (t2.tableoid)::regclass, t2.c1, t1.c3
+ Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+ Remote SQL: SELECT r1."C 1", r1.c3, r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+ (6 rows)
+
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ tableoid | c1 | tableoid | c1
+ ----------+-----+----------+-----
+ ft1 | 101 | ft2 | 101
+ ft1 | 102 | ft2 | 102
+ ft1 | 103 | ft2 | 103
+ ft1 | 104 | ft2 | 104
+ ft1 | 105 | ft2 | 105
+ ft1 | 106 | ft2 | 106
+ ft1 | 107 | ft2 | 107
+ ft1 | 108 | ft2 | 108
+ ft1 | 109 | ft2 | 109
+ ft1 | 110 | ft2 | 110
+ (10 rows)
+
-- create another user for permission, user mapping, effective user tests
CREATE USER view_owner;
-- grant privileges on ft4 and ft5 to view_owner
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 127,134 **** typedef struct PgFdwScanState
{
Relation rel; /* relcache entry for the foreign table. NULL
* for a foreign join scan. */
- TupleDesc tupdesc; /* tuple descriptor of scan */
AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
/* extracted fdw_private data */
char *query; /* text of SELECT command */
--- 127,134 ----
{
Relation rel; /* relcache entry for the foreign table. NULL
* for a foreign join scan. */
AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
+ FdwScanTupleExtraData *extra; /* info about result tup, if foreign join */
/* extracted fdw_private data */
char *query; /* text of SELECT command */
***************
*** 246,260 **** typedef struct PgFdwAnalyzeState
typedef struct ConversionLocation
{
Relation rel; /* foreign table's relcache entry. */
AttrNumber cur_attno; /* attribute number being processed, or 0 */
-
- /*
- * In case of foreign join push down, fdw_scan_tlist is used to identify
- * the Var node corresponding to the error location and
- * fsstate->ss.ps.state gives access to the RTEs of corresponding relation
- * to get the relation name and attribute name.
- */
- ForeignScanState *fsstate;
} ConversionLocation;
/* Callback argument for ec_member_matches_foreign */
--- 246,253 ----
typedef struct ConversionLocation
{
Relation rel; /* foreign table's relcache entry. */
+ FdwScanTupleExtraData *extra; /* info about result tup, if foreign join */
AttrNumber cur_attno; /* attribute number being processed, or 0 */
} ConversionLocation;
/* Callback argument for ec_member_matches_foreign */
***************
*** 395,402 **** static HeapTuple make_tuple_from_result_row(PGresult *res,
int row,
Relation rel,
AttInMetadata *attinmeta,
List *retrieved_attrs,
- ForeignScanState *fsstate,
MemoryContext temp_context);
static void conversion_error_callback(void *arg);
static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
--- 388,395 ----
int row,
Relation rel,
AttInMetadata *attinmeta,
+ FdwScanTupleExtraData *extra,
List *retrieved_attrs,
MemoryContext temp_context);
static void conversion_error_callback(void *arg);
static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
***************
*** 1235,1240 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
--- 1228,1234 ----
EState *estate = node->ss.ps.state;
PgFdwScanState *fsstate;
UserMapping *user;
+ TupleDesc tupdesc;
int numParams;
/*
***************
*** 1316,1326 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
* into local representation and error reporting during that process.
*/
if (fsplan->scan.scanrelid > 0)
! fsstate->tupdesc = RelationGetDescr(fsstate->rel);
else
! fsstate->tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
! fsstate->attinmeta = TupleDescGetAttInMetadata(fsstate->tupdesc);
/*
* Prepare for processing of parameters used in remote query, if any.
--- 1310,1327 ----
* into local representation and error reporting during that process.
*/
if (fsplan->scan.scanrelid > 0)
! tupdesc = RelationGetDescr(fsstate->rel);
else
! tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
! fsstate->attinmeta = TupleDescGetAttInMetadata(tupdesc);
!
! /* Create extra info for making scan tuples, if foreign join */
! if (fsplan->scan.scanrelid > 0)
! fsstate->extra = NULL;
! else
! fsstate->extra = GetFdwScanTupleExtraData(fsplan->fdw_scan_tlist,
! estate->es_range_table);
/*
* Prepare for processing of parameters used in remote query, if any.
***************
*** 2887,2894 **** fetch_more_data(ForeignScanState *node)
make_tuple_from_result_row(res, i,
fsstate->rel,
fsstate->attinmeta,
fsstate->retrieved_attrs,
- node,
fsstate->temp_cxt);
}
--- 2888,2895 ----
make_tuple_from_result_row(res, i,
fsstate->rel,
fsstate->attinmeta,
+ fsstate->extra,
fsstate->retrieved_attrs,
fsstate->temp_cxt);
}
***************
*** 3106,3113 **** store_returning_result(PgFdwModifyState *fmstate,
newtup = make_tuple_from_result_row(res, 0,
fmstate->rel,
fmstate->attinmeta,
- fmstate->retrieved_attrs,
NULL,
fmstate->temp_cxt);
/* tuple will be deleted when it is cleared from the slot */
ExecStoreTuple(newtup, slot, InvalidBuffer, true);
--- 3107,3114 ----
newtup = make_tuple_from_result_row(res, 0,
fmstate->rel,
fmstate->attinmeta,
NULL,
+ fmstate->retrieved_attrs,
fmstate->temp_cxt);
/* tuple will be deleted when it is cleared from the slot */
ExecStoreTuple(newtup, slot, InvalidBuffer, true);
***************
*** 3207,3214 **** get_returning_data(ForeignScanState *node)
dmstate->next_tuple,
dmstate->rel,
dmstate->attinmeta,
- dmstate->retrieved_attrs,
NULL,
dmstate->temp_cxt);
ExecStoreTuple(newtup, slot, InvalidBuffer, false);
}
--- 3208,3215 ----
dmstate->next_tuple,
dmstate->rel,
dmstate->attinmeta,
NULL,
+ dmstate->retrieved_attrs,
dmstate->temp_cxt);
ExecStoreTuple(newtup, slot, InvalidBuffer, false);
}
***************
*** 3609,3616 **** analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
astate->rows[pos] = make_tuple_from_result_row(res, row,
astate->rel,
astate->attinmeta,
- astate->retrieved_attrs,
NULL,
astate->temp_cxt);
MemoryContextSwitchTo(oldcontext);
--- 3610,3617 ----
astate->rows[pos] = make_tuple_from_result_row(res, row,
astate->rel,
astate->attinmeta,
NULL,
+ astate->retrieved_attrs,
astate->temp_cxt);
MemoryContextSwitchTo(oldcontext);
***************
*** 4289,4296 **** make_tuple_from_result_row(PGresult *res,
int row,
Relation rel,
AttInMetadata *attinmeta,
List *retrieved_attrs,
- ForeignScanState *fsstate,
MemoryContext temp_context)
{
HeapTuple tuple;
--- 4290,4297 ----
int row,
Relation rel,
AttInMetadata *attinmeta,
+ FdwScanTupleExtraData *extra,
List *retrieved_attrs,
MemoryContext temp_context)
{
HeapTuple tuple;
***************
*** 4317,4327 **** make_tuple_from_result_row(PGresult *res,
tupdesc = RelationGetDescr(rel);
else
{
! PgFdwScanState *fdw_sstate;
!
! Assert(fsstate);
! fdw_sstate = (PgFdwScanState *) fsstate->fdw_state;
! tupdesc = fdw_sstate->tupdesc;
}
values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum));
--- 4318,4325 ----
tupdesc = RelationGetDescr(rel);
else
{
! Assert(extra);
! tupdesc = attinmeta->tupdesc;
}
values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum));
***************
*** 4333,4340 **** make_tuple_from_result_row(PGresult *res,
* Set up and install callback to report where conversion error occurs.
*/
errpos.rel = rel;
errpos.cur_attno = 0;
- errpos.fsstate = fsstate;
errcallback.callback = conversion_error_callback;
errcallback.arg = (void *) &errpos;
errcallback.previous = error_context_stack;
--- 4331,4338 ----
* Set up and install callback to report where conversion error occurs.
*/
errpos.rel = rel;
+ errpos.extra = extra;
errpos.cur_attno = 0;
errcallback.callback = conversion_error_callback;
errcallback.arg = (void *) &errpos;
errcallback.previous = error_context_stack;
***************
*** 4399,4404 **** make_tuple_from_result_row(PGresult *res,
--- 4397,4408 ----
*/
MemoryContextSwitchTo(oldcontext);
+ /*
+ * Set values for xmins/xmaxs, cmins/cmaxs, and tableoids, if foreign join.
+ */
+ if (extra && extra->has_sys_attrs)
+ FillFdwScanTupleSysAttrs(extra, values, nulls);
+
tuple = heap_form_tuple(tupdesc, values, nulls);
/*
***************
*** 4442,4462 **** conversion_error_callback(void *arg)
else
{
/* error occurred in a scan against a foreign join */
- ForeignScanState *fsstate = errpos->fsstate;
- ForeignScan *fsplan = (ForeignScan *) fsstate->ss.ps.plan;
- EState *estate = fsstate->ss.ps.state;
TargetEntry *tle;
Var *var;
RangeTblEntry *rte;
! Assert(IsA(fsplan, ForeignScan));
! tle = (TargetEntry *) list_nth(fsplan->fdw_scan_tlist,
errpos->cur_attno - 1);
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
Assert(IsA(var, Var));
! rte = rt_fetch(var->varno, estate->es_range_table);
relname = get_rel_name(rte->relid);
attname = get_relid_attribute_name(rte->relid, var->varattno);
}
--- 4446,4463 ----
else
{
/* error occurred in a scan against a foreign join */
TargetEntry *tle;
Var *var;
RangeTblEntry *rte;
! Assert(errpos->cur_attno > 0);
! tle = (TargetEntry *) list_nth(errpos->extra->fdw_scan_tlist,
errpos->cur_attno - 1);
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
Assert(IsA(var, Var));
! rte = rt_fetch(var->varno, errpos->extra->range_table);
relname = get_rel_name(rte->relid);
attname = get_relid_attribute_name(rte->relid, var->varattno);
}
*** a/contrib/postgres_fdw/sql/postgres_fdw.sql
--- b/contrib/postgres_fdw/sql/postgres_fdw.sql
***************
*** 466,471 **** SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2
--- 466,475 ----
EXPLAIN (COSTS false, VERBOSE)
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
+ -- System columns, except ctid and oid, should not be retrieved from remote
+ EXPLAIN (COSTS false, VERBOSE)
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-- create another user for permission, user mapping, effective user tests
CREATE USER view_owner;
*** a/src/backend/foreign/foreign.c
--- b/src/backend/foreign/foreign.c
***************
*** 14,19 ****
--- 14,20 ----
#include "access/htup_details.h"
#include "access/reloptions.h"
+ #include "access/sysattr.h"
#include "catalog/pg_foreign_data_wrapper.h"
#include "catalog/pg_foreign_server.h"
#include "catalog/pg_foreign_table.h"
***************
*** 22,27 ****
--- 23,29 ----
#include "foreign/foreign.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
+ #include "parser/parsetree.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/rel.h"
***************
*** 909,911 **** GetExistingLocalJoinPath(RelOptInfo *joinrel)
--- 911,1048 ----
}
return NULL;
}
+
+ /*
+ * Get extra information for making scan tuples for a foreign join
+ *
+ * Note: we assume that an expression in each TargetEntry in fdw_scan_tlist
+ * is a simple Var.
+ */
+ FdwScanTupleExtraData *
+ GetFdwScanTupleExtraData(List *fdw_scan_tlist, List *range_table)
+ {
+ FdwScanTupleExtraData *extra;
+ List *xmin_attrs = NIL;
+ List *cmin_attrs = NIL;
+ List *xmax_attrs = NIL;
+ List *cmax_attrs = NIL;
+ List *tableoid_attrs = NIL;
+ List *tableoid_values = NIL;
+ RangeTblEntry *rte;
+ ListCell *lc;
+ int i;
+
+ i = 1;
+ foreach(lc, fdw_scan_tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+ Var *var = (Var *) tle->expr;
+
+ if (!IsA(var, Var))
+ elog(ERROR, "unexpected node type in fdw_scan_tlist target entry: %d",
+ (int) nodeTag(var));
+
+ if (var->varattno < 0)
+ {
+ switch (var->varattno)
+ {
+ case SelfItemPointerAttributeNumber:
+ case ObjectIdAttributeNumber:
+ break;
+ case MinTransactionIdAttributeNumber:
+ xmin_attrs = lappend_int(xmin_attrs, i);
+ break;
+ case MinCommandIdAttributeNumber:
+ cmin_attrs = lappend_int(cmin_attrs, i);
+ break;
+ case MaxTransactionIdAttributeNumber:
+ xmax_attrs = lappend_int(xmax_attrs, i);
+ break;
+ case MaxCommandIdAttributeNumber:
+ cmax_attrs = lappend_int(cmax_attrs, i);
+ break;
+ case TableOidAttributeNumber:
+ rte = rt_fetch(var->varno, range_table);
+ tableoid_attrs = lappend_int(tableoid_attrs, i);
+ tableoid_values = lappend_oid(tableoid_values, rte->relid);
+ break;
+ default:
+ elog(ERROR, "invalid attnum: %d", var->varattno);
+ break;
+ }
+ }
+
+ i++;
+ }
+
+ extra = (FdwScanTupleExtraData *) palloc0(sizeof(FdwScanTupleExtraData));
+ extra->fdw_scan_tlist = fdw_scan_tlist;
+ if (xmin_attrs || cmin_attrs || xmax_attrs || cmax_attrs || tableoid_attrs)
+ extra->has_sys_attrs = true;
+ else
+ extra->has_sys_attrs = false;
+ extra->xmin_attrs = xmin_attrs;
+ extra->cmin_attrs = cmin_attrs;
+ extra->xmax_attrs = xmax_attrs;
+ extra->cmax_attrs = cmax_attrs;
+ extra->tableoid_attrs = tableoid_attrs;
+ extra->tableoid_values = tableoid_values;
+ extra->range_table = range_table;
+
+ return extra;
+ }
+
+ /*
+ * Set values for xmins/xmaxs, cmins/cmaxs, and tableoids in a given tuple.
+ */
+ void
+ FillFdwScanTupleSysAttrs(FdwScanTupleExtraData *extra,
+ Datum *values, bool *nulls)
+ {
+ ListCell *lc;
+ ListCell *lc2;
+
+ Assert(extra);
+
+ /* Fill xmins/xmaxs with InvalidTransactionId */
+ foreach(lc, extra->xmin_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = TransactionIdGetDatum(InvalidTransactionId);
+ }
+ foreach(lc, extra->xmax_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = TransactionIdGetDatum(InvalidTransactionId);
+ }
+
+ /* Fill cmins/cmaxs with InvalidCommandId */
+ foreach(lc, extra->cmin_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = CommandIdGetDatum(InvalidCommandId);
+ }
+ foreach(lc, extra->cmax_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = CommandIdGetDatum(InvalidCommandId);
+ }
+
+ /* Fill tableoids with valid values */
+ forboth(lc, extra->tableoid_attrs, lc2, extra->tableoid_values)
+ {
+ int i = lfirst_int(lc);
+ Oid tableoid = lfirst_oid(lc2);
+
+ nulls[i - 1] = false;
+ values[i - 1] = ObjectIdGetDatum(tableoid);
+ }
+ }
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 224,229 **** typedef struct FdwRoutine
--- 224,246 ----
InitializeWorkerForeignScan_function InitializeWorkerForeignScan;
} FdwRoutine;
+ /*
+ * Struct for extra information for making scan tuples for a foreign join
+ */
+ typedef struct FdwScanTupleExtraData
+ {
+ List *fdw_scan_tlist; /* tlist describing the structure of tuple */
+ bool has_sys_attrs; /* does tuple contain any system attrs other
+ * than ctids and oids */
+ List *xmin_attrs; /* attribute numbers of xmins in tuple */
+ List *cmin_attrs; /* attribute numbers of cmins in tuple */
+ List *xmax_attrs; /* attribute numbers of xmaxs in tuple */
+ List *cmax_attrs; /* attribute numbers of cmaxs in tuple */
+ List *tableoid_attrs; /* attribute numbers of tableoids in tuple */
+ List *tableoid_values; /* OIDs of tableoids in tuple */
+ List *range_table; /* EState's list of RangeTblEntry */
+ } FdwScanTupleExtraData;
+
/* Functions in foreign/foreign.c */
extern FdwRoutine *GetFdwRoutine(Oid fdwhandler);
***************
*** 234,238 **** extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy);
--- 251,259 ----
extern bool IsImportableForeignTable(const char *tablename,
ImportForeignSchemaStmt *stmt);
extern Path *GetExistingLocalJoinPath(RelOptInfo *joinrel);
+ extern FdwScanTupleExtraData *GetFdwScanTupleExtraData(List *fdw_scan_tlist,
+ List *range_table);
+ extern void FillFdwScanTupleSysAttrs(FdwScanTupleExtraData *extra,
+ Datum *values, bool *nulls);
#endif /* FDWAPI_H */
On 2016/03/29 15:37, Etsuro Fujita wrote:
I added two helper functions: GetFdwScanTupleExtraData and
FillFdwScanTupleSysAttrs. The FDW author could use the former to get
info about system attributes other than ctids and oids in fdw_scan_tlist
during BeginForeignScan, and the latter to set values for these system
attributes during IterateForeignScan (InvalidTransactionId for
xmins/xmaxs, InvalidCommandId for cmins/cmaxs, and valid values for
tableoids). Attached is a proposed patch for that. I also slightly
simplified the changes to make_tuple_from_result_row and
conversion_error_callback made by the postgres_fdw join pushdown patch.
What do you think about that?
I revised comments a little bit. Attached is an updated version of the
patch. I think this issue should be fixed in advance of the PostgreSQL
9.6beta1 release.
Best regards,
Etsuro Fujita
Attachments:
postgres-fdw-join-pushdown-syscol-handling-v3.patchtext/x-patch; name=postgres-fdw-join-pushdown-syscol-handling-v3.patchDownload
*** a/contrib/postgres_fdw/deparse.c
--- b/contrib/postgres_fdw/deparse.c
***************
*** 1091,1130 **** get_jointype_name(JoinType jointype)
*
* tlist is list of TargetEntry's which in turn contain Var nodes.
*
! * retrieved_attrs is the list of continuously increasing integers starting
! * from 1. It has same number of entries as tlist.
*/
static void
deparseExplicitTargetList(List *tlist, List **retrieved_attrs,
deparse_expr_cxt *context)
{
- ListCell *lc;
StringInfo buf = context->buf;
! int i = 0;
*retrieved_attrs = NIL;
foreach(lc, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Var *var;
- /* Extract expression if TargetEntry node */
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
/* We expect only Var nodes here */
Assert(IsA(var, Var));
! if (i > 0)
! appendStringInfoString(buf, ", ");
! deparseVar(var, context);
! *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
i++;
}
! if (i == 0)
appendStringInfoString(buf, "NULL");
}
--- 1091,1142 ----
*
* tlist is list of TargetEntry's which in turn contain Var nodes.
*
! * System columns other than ctid and oid are not retrieved from the remote
! * server, since we set values for such columns locally.
! *
! * We create an integer List of the columns being retrieved, which is returned
! * to *retrieved_attrs.
*/
static void
deparseExplicitTargetList(List *tlist, List **retrieved_attrs,
deparse_expr_cxt *context)
{
StringInfo buf = context->buf;
! ListCell *lc;
! bool first;
! int i;
*retrieved_attrs = NIL;
+ i = 1;
+ first = true;
foreach(lc, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Var *var;
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
/* We expect only Var nodes here */
Assert(IsA(var, Var));
! if (var->varattno >= 0 ||
! var->varattno == SelfItemPointerAttributeNumber ||
! var->varattno == ObjectIdAttributeNumber)
! {
! if (!first)
! appendStringInfoString(buf, ", ");
! first = false;
! deparseVar(var, context);
!
! *retrieved_attrs = lappend_int(*retrieved_attrs, i);
! }
i++;
}
! if (first)
appendStringInfoString(buf, "NULL");
}
*** a/contrib/postgres_fdw/expected/postgres_fdw.out
--- b/contrib/postgres_fdw/expected/postgres_fdw.out
***************
*** 1923,1928 **** SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
--- 1923,1956 ----
1
(10 rows)
+ -- System columns, except ctid and oid, should not be retrieved from remote
+ EXPLAIN (COSTS false, VERBOSE)
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ QUERY PLAN
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: ((t1.tableoid)::regclass), t1.c1, ((t2.tableoid)::regclass), t2.c1, t1.c3
+ -> Foreign Scan
+ Output: (t1.tableoid)::regclass, t1.c1, (t2.tableoid)::regclass, t2.c1, t1.c3
+ Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+ Remote SQL: SELECT r1."C 1", r1.c3, r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+ (6 rows)
+
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ tableoid | c1 | tableoid | c1
+ ----------+-----+----------+-----
+ ft1 | 101 | ft2 | 101
+ ft1 | 102 | ft2 | 102
+ ft1 | 103 | ft2 | 103
+ ft1 | 104 | ft2 | 104
+ ft1 | 105 | ft2 | 105
+ ft1 | 106 | ft2 | 106
+ ft1 | 107 | ft2 | 107
+ ft1 | 108 | ft2 | 108
+ ft1 | 109 | ft2 | 109
+ ft1 | 110 | ft2 | 110
+ (10 rows)
+
-- create another user for permission, user mapping, effective user tests
CREATE USER view_owner;
-- grant privileges on ft4 and ft5 to view_owner
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 127,134 **** typedef struct PgFdwScanState
{
Relation rel; /* relcache entry for the foreign table. NULL
* for a foreign join scan. */
- TupleDesc tupdesc; /* tuple descriptor of scan */
AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
/* extracted fdw_private data */
char *query; /* text of SELECT command */
--- 127,134 ----
{
Relation rel; /* relcache entry for the foreign table. NULL
* for a foreign join scan. */
AttInMetadata *attinmeta; /* attribute datatype conversion metadata */
+ FdwScanTupleExtraData *extra; /* info about result tup, if foreign join */
/* extracted fdw_private data */
char *query; /* text of SELECT command */
***************
*** 246,260 **** typedef struct PgFdwAnalyzeState
typedef struct ConversionLocation
{
Relation rel; /* foreign table's relcache entry. */
AttrNumber cur_attno; /* attribute number being processed, or 0 */
-
- /*
- * In case of foreign join push down, fdw_scan_tlist is used to identify
- * the Var node corresponding to the error location and
- * fsstate->ss.ps.state gives access to the RTEs of corresponding relation
- * to get the relation name and attribute name.
- */
- ForeignScanState *fsstate;
} ConversionLocation;
/* Callback argument for ec_member_matches_foreign */
--- 246,253 ----
typedef struct ConversionLocation
{
Relation rel; /* foreign table's relcache entry. */
+ FdwScanTupleExtraData *extra; /* info about result tup, if foreign join */
AttrNumber cur_attno; /* attribute number being processed, or 0 */
} ConversionLocation;
/* Callback argument for ec_member_matches_foreign */
***************
*** 395,402 **** static HeapTuple make_tuple_from_result_row(PGresult *res,
int row,
Relation rel,
AttInMetadata *attinmeta,
List *retrieved_attrs,
- ForeignScanState *fsstate,
MemoryContext temp_context);
static void conversion_error_callback(void *arg);
static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
--- 388,395 ----
int row,
Relation rel,
AttInMetadata *attinmeta,
+ FdwScanTupleExtraData *extra,
List *retrieved_attrs,
MemoryContext temp_context);
static void conversion_error_callback(void *arg);
static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
***************
*** 1235,1240 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
--- 1228,1234 ----
EState *estate = node->ss.ps.state;
PgFdwScanState *fsstate;
UserMapping *user;
+ TupleDesc tupdesc;
int numParams;
/*
***************
*** 1316,1326 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
* into local representation and error reporting during that process.
*/
if (fsplan->scan.scanrelid > 0)
! fsstate->tupdesc = RelationGetDescr(fsstate->rel);
else
! fsstate->tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
! fsstate->attinmeta = TupleDescGetAttInMetadata(fsstate->tupdesc);
/*
* Prepare for processing of parameters used in remote query, if any.
--- 1310,1327 ----
* into local representation and error reporting during that process.
*/
if (fsplan->scan.scanrelid > 0)
! tupdesc = RelationGetDescr(fsstate->rel);
else
! tupdesc = node->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
! fsstate->attinmeta = TupleDescGetAttInMetadata(tupdesc);
!
! /* Create extra info for making scan tuples, if foreign join */
! if (fsplan->scan.scanrelid > 0)
! fsstate->extra = NULL;
! else
! fsstate->extra = GetFdwScanTupleExtraData(fsplan->fdw_scan_tlist,
! estate->es_range_table);
/*
* Prepare for processing of parameters used in remote query, if any.
***************
*** 2887,2894 **** fetch_more_data(ForeignScanState *node)
make_tuple_from_result_row(res, i,
fsstate->rel,
fsstate->attinmeta,
fsstate->retrieved_attrs,
- node,
fsstate->temp_cxt);
}
--- 2888,2895 ----
make_tuple_from_result_row(res, i,
fsstate->rel,
fsstate->attinmeta,
+ fsstate->extra,
fsstate->retrieved_attrs,
fsstate->temp_cxt);
}
***************
*** 3106,3113 **** store_returning_result(PgFdwModifyState *fmstate,
newtup = make_tuple_from_result_row(res, 0,
fmstate->rel,
fmstate->attinmeta,
- fmstate->retrieved_attrs,
NULL,
fmstate->temp_cxt);
/* tuple will be deleted when it is cleared from the slot */
ExecStoreTuple(newtup, slot, InvalidBuffer, true);
--- 3107,3114 ----
newtup = make_tuple_from_result_row(res, 0,
fmstate->rel,
fmstate->attinmeta,
NULL,
+ fmstate->retrieved_attrs,
fmstate->temp_cxt);
/* tuple will be deleted when it is cleared from the slot */
ExecStoreTuple(newtup, slot, InvalidBuffer, true);
***************
*** 3207,3214 **** get_returning_data(ForeignScanState *node)
dmstate->next_tuple,
dmstate->rel,
dmstate->attinmeta,
- dmstate->retrieved_attrs,
NULL,
dmstate->temp_cxt);
ExecStoreTuple(newtup, slot, InvalidBuffer, false);
}
--- 3208,3215 ----
dmstate->next_tuple,
dmstate->rel,
dmstate->attinmeta,
NULL,
+ dmstate->retrieved_attrs,
dmstate->temp_cxt);
ExecStoreTuple(newtup, slot, InvalidBuffer, false);
}
***************
*** 3609,3616 **** analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
astate->rows[pos] = make_tuple_from_result_row(res, row,
astate->rel,
astate->attinmeta,
- astate->retrieved_attrs,
NULL,
astate->temp_cxt);
MemoryContextSwitchTo(oldcontext);
--- 3610,3617 ----
astate->rows[pos] = make_tuple_from_result_row(res, row,
astate->rel,
astate->attinmeta,
NULL,
+ astate->retrieved_attrs,
astate->temp_cxt);
MemoryContextSwitchTo(oldcontext);
***************
*** 4289,4296 **** make_tuple_from_result_row(PGresult *res,
int row,
Relation rel,
AttInMetadata *attinmeta,
List *retrieved_attrs,
- ForeignScanState *fsstate,
MemoryContext temp_context)
{
HeapTuple tuple;
--- 4290,4297 ----
int row,
Relation rel,
AttInMetadata *attinmeta,
+ FdwScanTupleExtraData *extra,
List *retrieved_attrs,
MemoryContext temp_context)
{
HeapTuple tuple;
***************
*** 4317,4327 **** make_tuple_from_result_row(PGresult *res,
tupdesc = RelationGetDescr(rel);
else
{
! PgFdwScanState *fdw_sstate;
!
! Assert(fsstate);
! fdw_sstate = (PgFdwScanState *) fsstate->fdw_state;
! tupdesc = fdw_sstate->tupdesc;
}
values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum));
--- 4318,4325 ----
tupdesc = RelationGetDescr(rel);
else
{
! Assert(extra);
! tupdesc = attinmeta->tupdesc;
}
values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum));
***************
*** 4333,4340 **** make_tuple_from_result_row(PGresult *res,
* Set up and install callback to report where conversion error occurs.
*/
errpos.rel = rel;
errpos.cur_attno = 0;
- errpos.fsstate = fsstate;
errcallback.callback = conversion_error_callback;
errcallback.arg = (void *) &errpos;
errcallback.previous = error_context_stack;
--- 4331,4338 ----
* Set up and install callback to report where conversion error occurs.
*/
errpos.rel = rel;
+ errpos.extra = extra;
errpos.cur_attno = 0;
errcallback.callback = conversion_error_callback;
errcallback.arg = (void *) &errpos;
errcallback.previous = error_context_stack;
***************
*** 4399,4404 **** make_tuple_from_result_row(PGresult *res,
--- 4397,4408 ----
*/
MemoryContextSwitchTo(oldcontext);
+ /*
+ * Set values for xmins/xmaxs, cmins/cmaxs, and tableoids, if foreign join.
+ */
+ if (extra && extra->has_sys_attrs)
+ FillFdwScanTupleSysAttrs(extra, values, nulls);
+
tuple = heap_form_tuple(tupdesc, values, nulls);
/*
***************
*** 4442,4462 **** conversion_error_callback(void *arg)
else
{
/* error occurred in a scan against a foreign join */
- ForeignScanState *fsstate = errpos->fsstate;
- ForeignScan *fsplan = (ForeignScan *) fsstate->ss.ps.plan;
- EState *estate = fsstate->ss.ps.state;
TargetEntry *tle;
Var *var;
RangeTblEntry *rte;
! Assert(IsA(fsplan, ForeignScan));
! tle = (TargetEntry *) list_nth(fsplan->fdw_scan_tlist,
errpos->cur_attno - 1);
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
Assert(IsA(var, Var));
! rte = rt_fetch(var->varno, estate->es_range_table);
relname = get_rel_name(rte->relid);
attname = get_relid_attribute_name(rte->relid, var->varattno);
}
--- 4446,4463 ----
else
{
/* error occurred in a scan against a foreign join */
TargetEntry *tle;
Var *var;
RangeTblEntry *rte;
! Assert(errpos->cur_attno > 0);
! tle = (TargetEntry *) list_nth(errpos->extra->fdw_scan_tlist,
errpos->cur_attno - 1);
Assert(IsA(tle, TargetEntry));
var = (Var *) tle->expr;
Assert(IsA(var, Var));
! rte = rt_fetch(var->varno, errpos->extra->range_table);
relname = get_rel_name(rte->relid);
attname = get_relid_attribute_name(rte->relid, var->varattno);
}
*** a/contrib/postgres_fdw/sql/postgres_fdw.sql
--- b/contrib/postgres_fdw/sql/postgres_fdw.sql
***************
*** 466,471 **** SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2
--- 466,475 ----
EXPLAIN (COSTS false, VERBOSE)
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
+ -- System columns, except ctid and oid, should not be retrieved from remote
+ EXPLAIN (COSTS false, VERBOSE)
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+ SELECT t1.tableoid::regclass, t1.c1, t2.tableoid::regclass, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-- create another user for permission, user mapping, effective user tests
CREATE USER view_owner;
*** a/src/backend/foreign/foreign.c
--- b/src/backend/foreign/foreign.c
***************
*** 14,19 ****
--- 14,20 ----
#include "access/htup_details.h"
#include "access/reloptions.h"
+ #include "access/sysattr.h"
#include "catalog/pg_foreign_data_wrapper.h"
#include "catalog/pg_foreign_server.h"
#include "catalog/pg_foreign_table.h"
***************
*** 22,27 ****
--- 23,29 ----
#include "foreign/foreign.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
+ #include "parser/parsetree.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/rel.h"
***************
*** 909,911 **** GetExistingLocalJoinPath(RelOptInfo *joinrel)
--- 911,1048 ----
}
return NULL;
}
+
+ /*
+ * Get extra information for making scan tuples described by fdw_scan_tlist
+ *
+ * Note: we assume that an expression in each TargetEntry in fdw_scan_tlist
+ * is a simple Var.
+ */
+ FdwScanTupleExtraData *
+ GetFdwScanTupleExtraData(List *fdw_scan_tlist, List *range_table)
+ {
+ FdwScanTupleExtraData *extra;
+ List *xmin_attrs = NIL;
+ List *cmin_attrs = NIL;
+ List *xmax_attrs = NIL;
+ List *cmax_attrs = NIL;
+ List *tableoid_attrs = NIL;
+ List *tableoid_values = NIL;
+ RangeTblEntry *rte;
+ ListCell *lc;
+ int i;
+
+ i = 1;
+ foreach(lc, fdw_scan_tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+ Var *var = (Var *) tle->expr;
+
+ if (!IsA(var, Var))
+ elog(ERROR, "unexpected node type in fdw_scan_tlist target entry: %d",
+ (int) nodeTag(var));
+
+ if (var->varattno < 0)
+ {
+ switch (var->varattno)
+ {
+ case SelfItemPointerAttributeNumber:
+ case ObjectIdAttributeNumber:
+ break;
+ case MinTransactionIdAttributeNumber:
+ xmin_attrs = lappend_int(xmin_attrs, i);
+ break;
+ case MinCommandIdAttributeNumber:
+ cmin_attrs = lappend_int(cmin_attrs, i);
+ break;
+ case MaxTransactionIdAttributeNumber:
+ xmax_attrs = lappend_int(xmax_attrs, i);
+ break;
+ case MaxCommandIdAttributeNumber:
+ cmax_attrs = lappend_int(cmax_attrs, i);
+ break;
+ case TableOidAttributeNumber:
+ rte = rt_fetch(var->varno, range_table);
+ tableoid_attrs = lappend_int(tableoid_attrs, i);
+ tableoid_values = lappend_oid(tableoid_values, rte->relid);
+ break;
+ default:
+ elog(ERROR, "invalid attnum: %d", var->varattno);
+ break;
+ }
+ }
+
+ i++;
+ }
+
+ extra = (FdwScanTupleExtraData *) palloc0(sizeof(FdwScanTupleExtraData));
+ extra->fdw_scan_tlist = fdw_scan_tlist;
+ if (xmin_attrs || cmin_attrs || xmax_attrs || cmax_attrs || tableoid_attrs)
+ extra->has_sys_attrs = true;
+ else
+ extra->has_sys_attrs = false;
+ extra->xmin_attrs = xmin_attrs;
+ extra->cmin_attrs = cmin_attrs;
+ extra->xmax_attrs = xmax_attrs;
+ extra->cmax_attrs = cmax_attrs;
+ extra->tableoid_attrs = tableoid_attrs;
+ extra->tableoid_values = tableoid_values;
+ extra->range_table = range_table;
+
+ return extra;
+ }
+
+ /*
+ * Set values for xmins/xmaxs, cmins/cmaxs, and tableoids in a given tuple.
+ */
+ void
+ FillFdwScanTupleSysAttrs(FdwScanTupleExtraData *extra,
+ Datum *values, bool *nulls)
+ {
+ ListCell *lc;
+ ListCell *lc2;
+
+ Assert(extra);
+
+ /* Fill xmins/xmaxs with InvalidTransactionId */
+ foreach(lc, extra->xmin_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = TransactionIdGetDatum(InvalidTransactionId);
+ }
+ foreach(lc, extra->xmax_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = TransactionIdGetDatum(InvalidTransactionId);
+ }
+
+ /* Fill cmins/cmaxs with InvalidCommandId */
+ foreach(lc, extra->cmin_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = CommandIdGetDatum(InvalidCommandId);
+ }
+ foreach(lc, extra->cmax_attrs)
+ {
+ int i = lfirst_int(lc);
+
+ nulls[i - 1] = false;
+ values[i - 1] = CommandIdGetDatum(InvalidCommandId);
+ }
+
+ /* Fill tableoids with valid values */
+ forboth(lc, extra->tableoid_attrs, lc2, extra->tableoid_values)
+ {
+ int i = lfirst_int(lc);
+ Oid tableoid = lfirst_oid(lc2);
+
+ nulls[i - 1] = false;
+ values[i - 1] = ObjectIdGetDatum(tableoid);
+ }
+ }
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 224,229 **** typedef struct FdwRoutine
--- 224,247 ----
InitializeWorkerForeignScan_function InitializeWorkerForeignScan;
} FdwRoutine;
+ /*
+ * Struct for extra information for making scan tuples described by
+ * fdw_scan_tlist
+ */
+ typedef struct FdwScanTupleExtraData
+ {
+ List *fdw_scan_tlist; /* tlist describing the structure of tuple */
+ bool has_sys_attrs; /* does tuple contain any system attrs other
+ * than ctids and oids */
+ List *xmin_attrs; /* attribute numbers of xmins in tuple */
+ List *cmin_attrs; /* attribute numbers of cmins in tuple */
+ List *xmax_attrs; /* attribute numbers of xmaxs in tuple */
+ List *cmax_attrs; /* attribute numbers of cmaxs in tuple */
+ List *tableoid_attrs; /* attribute numbers of tableoids in tuple */
+ List *tableoid_values; /* OIDs of tableoids in tuple */
+ List *range_table; /* EState's list of RangeTblEntry */
+ } FdwScanTupleExtraData;
+
/* Functions in foreign/foreign.c */
extern FdwRoutine *GetFdwRoutine(Oid fdwhandler);
***************
*** 234,238 **** extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy);
--- 252,260 ----
extern bool IsImportableForeignTable(const char *tablename,
ImportForeignSchemaStmt *stmt);
extern Path *GetExistingLocalJoinPath(RelOptInfo *joinrel);
+ extern FdwScanTupleExtraData *GetFdwScanTupleExtraData(List *fdw_scan_tlist,
+ List *range_table);
+ extern void FillFdwScanTupleSysAttrs(FdwScanTupleExtraData *extra,
+ Datum *values, bool *nulls);
#endif /* FDWAPI_H */
With this patch, all instances of tableoid, cmin, cmax etc. will get a
non-NULL value irrespective of whether they appear on nullable side of the
join or not.
e.g. select t1.c1, t1.tableoid, t2.c1, t2.tableoid from ft4 t1 left join
ft5 t2 on (t1.c1 = t2.c1); run in contrib_regression gives output
c1 | tableoid | c1 | tableoid
-----+----------+----+----------
2 | 54282 | | 54285
4 | 54282 | | 54285
6 | 54282 | 6 | 54285
8 | 54282 | | 54285
10 | 54282 | | 54285
12 | 54282 | 12 | 54285
but the same query run on local tables select t1.c1, t1.tableoid, t2.c1,
t2.tableoid from "S 1"."T 3" t1 left join "S 1"."T 4" t2 on (t1.c1 =
t2.c1); gives output
c1 | tableoid | c1 | tableoid
-----+----------+----+----------
2 | 54258 | |
4 | 54258 | |
6 | 54258 | 6 | 54266
8 | 54258 | |
10 | 54258 | |
12 | 54258 | 12 | 54266
BTW, why do we want to set the column values with invalid values, and not
null? Wouldn't setting them NULL will be a better way?
On Tue, Apr 5, 2016 at 12:11 PM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:
On 2016/03/29 15:37, Etsuro Fujita wrote:
I added two helper functions: GetFdwScanTupleExtraData and
FillFdwScanTupleSysAttrs. The FDW author could use the former to get
info about system attributes other than ctids and oids in fdw_scan_tlist
during BeginForeignScan, and the latter to set values for these system
attributes during IterateForeignScan (InvalidTransactionId for
xmins/xmaxs, InvalidCommandId for cmins/cmaxs, and valid values for
tableoids). Attached is a proposed patch for that. I also slightly
simplified the changes to make_tuple_from_result_row and
conversion_error_callback made by the postgres_fdw join pushdown patch.
What do you think about that?I revised comments a little bit. Attached is an updated version of the
patch. I think this issue should be fixed in advance of the PostgreSQL
9.6beta1 release.Best regards,
Etsuro Fujita
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On Tue, Apr 05, 2016 at 03:41:00PM +0900, Etsuro Fujita wrote:
On 2016/03/29 15:37, Etsuro Fujita wrote:
I added two helper functions: GetFdwScanTupleExtraData and
FillFdwScanTupleSysAttrs. The FDW author could use the former to get
info about system attributes other than ctids and oids in fdw_scan_tlist
during BeginForeignScan, and the latter to set values for these system
attributes during IterateForeignScan (InvalidTransactionId for
xmins/xmaxs, InvalidCommandId for cmins/cmaxs, and valid values for
tableoids). Attached is a proposed patch for that. I also slightly
simplified the changes to make_tuple_from_result_row and
conversion_error_callback made by the postgres_fdw join pushdown patch.
What do you think about that?I revised comments a little bit. Attached is an updated version of the
patch. I think this issue should be fixed in advance of the PostgreSQL
9.6beta1 release.
Of the foreign table columns affected here, I bet only tableoid sees
non-negligible use. Even tableoid is not very prominent, so I would not have
thought of this as a beta1 blocker. What about this bug makes pre-beta1
resolution especially valuable?
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 06, 2016 at 01:14:34AM -0400, Noah Misch wrote:
On Tue, Apr 05, 2016 at 03:41:00PM +0900, Etsuro Fujita wrote:
On 2016/03/29 15:37, Etsuro Fujita wrote:
I added two helper functions: GetFdwScanTupleExtraData and
FillFdwScanTupleSysAttrs. The FDW author could use the former to get
info about system attributes other than ctids and oids in fdw_scan_tlist
during BeginForeignScan, and the latter to set values for these system
attributes during IterateForeignScan (InvalidTransactionId for
xmins/xmaxs, InvalidCommandId for cmins/cmaxs, and valid values for
tableoids). Attached is a proposed patch for that. I also slightly
simplified the changes to make_tuple_from_result_row and
conversion_error_callback made by the postgres_fdw join pushdown patch.
What do you think about that?I revised comments a little bit. Attached is an updated version of the
patch. I think this issue should be fixed in advance of the PostgreSQL
9.6beta1 release.Of the foreign table columns affected here, I bet only tableoid sees
non-negligible use. Even tableoid is not very prominent, so I would not have
thought of this as a beta1 blocker. What about this bug makes pre-beta1
resolution especially valuable?
This will probably get resolved earlier if it enters the process now as a non
beta blocker, compared to entering the process later as a beta blocker. I'm
taking the liberty of doing that:
[This is a generic notification.]
The above-described topic is currently a PostgreSQL 9.6 open item. Robert,
since you committed the patch believed to have created it, you own this open
item. If that responsibility lies elsewhere, please let us know whose
responsibility it is to fix this. Since new open items may be discovered at
any time and I want to plan to have them all fixed well in advance of the ship
date, I will appreciate your efforts toward speedy resolution. Please
present, within 72 hours, a plan to fix the defect within seven days of this
message. Thanks.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Apr 5, 2016 at 4:54 AM, Ashutosh Bapat
<ashutosh.bapat@enterprisedb.com> wrote:
With this patch, all instances of tableoid, cmin, cmax etc. will get a
non-NULL value irrespective of whether they appear on nullable side of the
join or not.e.g. select t1.c1, t1.tableoid, t2.c1, t2.tableoid from ft4 t1 left join ft5
t2 on (t1.c1 = t2.c1); run in contrib_regression gives output
c1 | tableoid | c1 | tableoid
-----+----------+----+----------
2 | 54282 | | 54285
4 | 54282 | | 54285
6 | 54282 | 6 | 54285
8 | 54282 | | 54285
10 | 54282 | | 54285
12 | 54282 | 12 | 54285but the same query run on local tables select t1.c1, t1.tableoid, t2.c1,
t2.tableoid from "S 1"."T 3" t1 left join "S 1"."T 4" t2 on (t1.c1 = t2.c1);
gives output
c1 | tableoid | c1 | tableoid
-----+----------+----+----------
2 | 54258 | |
4 | 54258 | |
6 | 54258 | 6 | 54266
8 | 54258 | |
10 | 54258 | |
12 | 54258 | 12 | 54266BTW, why do we want to set the column values with invalid values, and not
null? Wouldn't setting them NULL will be a better way?
I tend to favor zeroes rather than NULLs, because that's what we
typically use to represent an invalid value of those types, and I'm
not aware of any current case where those values are NULL.
Ashutosh's comment that "With this patch, all instances of tableoid,
cmin, cmax etc. will get a non-NULL value irrespective of whether they
appear on nullable side of the join or not." seems like something that
must be addressed before we can proceed here.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 13, 2016 at 1:36 PM, Robert Haas <robertmhaas@gmail.com> wrote:
I tend to favor zeroes rather than NULLs, because that's what we
typically use to represent an invalid value of those types, and I'm
not aware of any current case where those values are NULL.
Actually, come to think of it, what we *really* need to do here is
make sure that the behavior in the join-pushdown case matches the
behavior in the join-not-pushed-down case.
CREATE EXTENSION postgres_fdw;
CREATE SERVER s1 FOREIGN DATA WRAPPER postgres_fdw;
CREATE USER MAPPING FOR public SERVER s1;
CREATE TABLE t1 (a integer, b text);
CREATE FOREIGN TABLE ft1 (a integer, b text) SERVER s1 OPTIONS
(table_name 't1');
INSERT INTO t1 VALUES (1, 'foo'), (2, 'bar'), (3, 'baz'), (4, 'quux');
Without join pushdown - this is what gets selected by default, sadly,
so the costing isn't working as hoped in this case:
rhaas=# select ft1.xmax, ft2.xmax, ft1.* from ft1, ft1 ft2 where ft1.a = ft2.a;
xmax | xmax | a | b
------------+------------+---+------
4294967295 | 4294967295 | 1 | foo
4294967295 | 4294967295 | 2 | bar
4294967295 | 4294967295 | 3 | baz
4294967295 | 4294967295 | 4 | quux
(4 rows)
With join pushdown, after disabling merge and hash joins:
rhaas=# select ft1.xmax, ft2.xmax, ft1.* from ft1, ft1 ft2 where ft1.a
= ft2.a;
xmax | xmax | a | b
------+------+---+------
0 | 0 | 1 | foo
0 | 0 | 2 | bar
0 | 0 | 3 | baz
0 | 0 | 4 | quux
(4 rows)
So, clearly that's not good. It should at least be consistent. But
more than that, the fact that postgres_fdw sets the xmax to 0xffffffff
is also pretty wacky. We might use such a value as a sentinel for
some data type, but for transaction IDs that's just some random normal
transaction ID, and it's NOT coming from t1. I haven't tracked down
where it *is* coming from yet, but can't imagine it's any place very
principled.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 13, 2016 at 1:36 PM, Robert Haas <robertmhaas@gmail.com> wrote:
I tend to favor zeroes rather than NULLs, because that's what we
typically use to represent an invalid value of those types, and I'm
not aware of any current case where those values are NULL.
In fact, see heap_attisnull.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Robert Haas <robertmhaas@gmail.com> writes:
On Wed, Apr 13, 2016 at 1:36 PM, Robert Haas <robertmhaas@gmail.com> wrote:
I tend to favor zeroes rather than NULLs, because that's what we
typically use to represent an invalid value of those types, and I'm
not aware of any current case where those values are NULL.
In fact, see heap_attisnull.
Right, a table's system columns cannot be null at the table-scan level.
(But they could go to null above an outer join.)
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 13, 2016 at 2:11 PM, Robert Haas <robertmhaas@gmail.com> wrote:
So, clearly that's not good. It should at least be consistent. But
more than that, the fact that postgres_fdw sets the xmax to 0xffffffff
is also pretty wacky. We might use such a value as a sentinel for
some data type, but for transaction IDs that's just some random normal
transaction ID, and it's NOT coming from t1. I haven't tracked down
where it *is* coming from yet, but can't imagine it's any place very
principled.
And, yeah, it's not very principled.
rhaas=# select ft1.xmin, ft1.xmax, ft1.cmin from ft1;
xmin | xmax | cmin
------+------------+-------
96 | 4294967295 | 16392
96 | 4294967295 | 16392
96 | 4294967295 | 16392
96 | 4294967295 | 16392
(4 rows)
What's happening here is that heap_getattr() is being applied to a
HeapTupleHeaderData which contains DatumTupleFields. So 96 is
datum_len_, 4294967295 is the -1 recorded in datum_typmod, and 16392
is the compose type OID recorded in datum_typeid, which happens in
this case to be the OID of ft1. Isn't that special?
It's hard for me to view this as anything other than a bug in
postgres_fdw - which of course means that this open item boils down to
the complaint that the way system columns are handled by join pushdown
isn't bug-compatible with the existing behavior....
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 13, 2016 at 2:36 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Wed, Apr 13, 2016 at 2:11 PM, Robert Haas <robertmhaas@gmail.com> wrote:
So, clearly that's not good. It should at least be consistent. But
more than that, the fact that postgres_fdw sets the xmax to 0xffffffff
is also pretty wacky. We might use such a value as a sentinel for
some data type, but for transaction IDs that's just some random normal
transaction ID, and it's NOT coming from t1. I haven't tracked down
where it *is* coming from yet, but can't imagine it's any place very
principled.And, yeah, it's not very principled.
rhaas=# select ft1.xmin, ft1.xmax, ft1.cmin from ft1;
xmin | xmax | cmin
------+------------+-------
96 | 4294967295 | 16392
96 | 4294967295 | 16392
96 | 4294967295 | 16392
96 | 4294967295 | 16392
(4 rows)What's happening here is that heap_getattr() is being applied to a
HeapTupleHeaderData which contains DatumTupleFields. So 96 is
datum_len_, 4294967295 is the -1 recorded in datum_typmod, and 16392
is the compose type OID recorded in datum_typeid, which happens in
this case to be the OID of ft1. Isn't that special?It's hard for me to view this as anything other than a bug in
postgres_fdw - which of course means that this open item boils down to
the complaint that the way system columns are handled by join pushdown
isn't bug-compatible with the existing behavior....
OK, here's a patch. What I did is:
1. For a regular FDW scan, zero the xmin, xmax, and cid of the tuple
before returning it from postgres_fdw, so that we don't expose the
datum-tuple fields. I can't see any reason this isn't safe, but I
might be missing something.
2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.
Review appreciated.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Attachments:
postgres-fdw-syscol-zap.patchapplication/x-patch; name=postgres-fdw-syscol-zap.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc410d..1c2f165 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -1571,13 +1571,38 @@ deparseColumnRef(StringInfo buf, int varno, int varattno, PlannerInfo *root,
{
RangeTblEntry *rte;
- /* varattno can be a whole-row reference, ctid or a regular table column */
if (varattno == SelfItemPointerAttributeNumber)
{
+ /* We support fetching the remote side's CTID. */
if (qualify_col)
ADD_REL_QUALIFIER(buf, varno);
appendStringInfoString(buf, "ctid");
}
+ else if (varattno < 0)
+ {
+ /*
+ * All other system attributes are fetched as 0, except for table OID,
+ * which is fetched as the local table OID. However, we must be
+ * careful; the table could be beneath an outer join, in which case
+ * it must go to NULL whenever the rest of the row does.
+ */
+ Oid fetchval = 0;
+
+ if (varattno == TableOidAttributeNumber)
+ {
+ rte = planner_rt_fetch(varno, root);
+ fetchval = rte->relid;
+ }
+
+ if (qualify_col)
+ {
+ appendStringInfoString(buf, "CASE WHEN ");
+ ADD_REL_QUALIFIER(buf, varno);
+ appendStringInfo(buf, "* IS NOT NULL THEN %u END", fetchval);
+ }
+ else
+ appendStringInfo(buf, "%u", fetchval);
+ }
else if (varattno == 0)
{
/* Whole row reference */
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index ee0220a..066cffb 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -4410,6 +4410,18 @@ make_tuple_from_result_row(PGresult *res,
if (ctid)
tuple->t_self = tuple->t_data->t_ctid = *ctid;
+ /*
+ * Stomp on the xmin, xmax, and cmin fields from the tuple created by
+ * heap_form_tuple. heap_form_tuple actually creates the tuple with
+ * DatumTupleFields, not HeapTupleFields, but the executor expects
+ * HeapTupleFields and will happily extract system columns on that
+ * assumption. If we don't do this then, for example, the tuple length
+ * ends up in the xmin field, which isn't what we want.
+ */
+ HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
+ HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId);
+ HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId);
+
/* Clean up */
MemoryContextReset(temp_context);
Robert Haas <robertmhaas@gmail.com> writes:
2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.
Um, why would that be necessary? Surely the correct things will happen
on the far end without that, if it's implementing the same join semantics
as the local query would have.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 13, 2016 at 5:33 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Robert Haas <robertmhaas@gmail.com> writes:
2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.Um, why would that be necessary? Surely the correct things will happen
on the far end without that, if it's implementing the same join semantics
as the local query would have.
I don't know exactly what you mean by "without that". Currently, the
situation is:
1. When postgres_fdw scans a baserel, the xmin, xmax, and cmin/cmax
fields reflect the reinterpreted contents of the datumtuple header
fields. Blech.
2. When postgres_fdw scans a joinrel (the join is pushed down), any
references to xmin/xmax/cmin/cmax reflect the values of those fields
on the remote sides.
#1 is obviously stupid, although maybe not that stupid since nobody
cared enough to fix it before now. #2 is arguably correct, but I
figure it's not a good idea to display the remote values of those
fields on the local system - local transaction IDs and remote
transaction IDs exist in two different XID spaces, and I think
shouldn't be conflated. So what I'm proposing to do is standardize on
this:
When postgres_fdw scans a baserel *or* a joinrel, any references to
xmin/xmax/cmin/cmax are read as 0.
There are alternatives. We could decide that the joinrel case (which,
BTW, is what the OP complained about) is right and the baserel case is
wrong, and change the baserel case to work like the joinrel case. I'm
not enamored of that, but it's not totally nuts. The one thing I'm
sure about is that the current baserel behavior is stupid.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2016/04/14 4:57, Robert Haas wrote:
On Wed, Apr 13, 2016 at 2:36 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Wed, Apr 13, 2016 at 2:11 PM, Robert Haas <robertmhaas@gmail.com> wrote:
So, clearly that's not good. It should at least be consistent. But
more than that, the fact that postgres_fdw sets the xmax to 0xffffffff
is also pretty wacky. We might use such a value as a sentinel for
some data type, but for transaction IDs that's just some random normal
transaction ID, and it's NOT coming from t1. I haven't tracked down
where it *is* coming from yet, but can't imagine it's any place very
principled.And, yeah, it's not very principled.
rhaas=# select ft1.xmin, ft1.xmax, ft1.cmin from ft1;
xmin | xmax | cmin
------+------------+-------
96 | 4294967295 | 16392
96 | 4294967295 | 16392
96 | 4294967295 | 16392
96 | 4294967295 | 16392
(4 rows)What's happening here is that heap_getattr() is being applied to a
HeapTupleHeaderData which contains DatumTupleFields. So 96 is
datum_len_, 4294967295 is the -1 recorded in datum_typmod, and 16392
is the compose type OID recorded in datum_typeid, which happens in
this case to be the OID of ft1. Isn't that special?It's hard for me to view this as anything other than a bug in
postgres_fdw - which of course means that this open item boils down to
the complaint that the way system columns are handled by join pushdown
isn't bug-compatible with the existing behavior....
OK, here's a patch. What I did is:
Thank you for taking care of this.
1. For a regular FDW scan, zero the xmin, xmax, and cid of the tuple
before returning it from postgres_fdw, so that we don't expose the
datum-tuple fields. I can't see any reason this isn't safe, but I
might be missing something.
I'm not sure that is really safe.
2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.
I think that that would cause useless data transfer for such culumns.
Why not set values locally for such columns?
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2016/04/14 12:04, Etsuro Fujita wrote:
On 2016/04/14 4:57, Robert Haas wrote:
2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.
I think that that would cause useless data transfer for such culumns.
Why not set values locally for such columns?
At least for the table OID.
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 13, 2016 at 11:21 PM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:
2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.I think that that would cause useless data transfer for such culumns.
Why not set values locally for such columns?
Because that doesn't work properly when there are outer joins
involved. I see no other way of doing that correctly that's anywhere
near as simple as this.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Thanks Robert for the patch.
OK, here's a patch. What I did is:
1. For a regular FDW scan, zero the xmin, xmax, and cid of the tuple
before returning it from postgres_fdw, so that we don't expose the
datum-tuple fields. I can't see any reason this isn't safe, but I
might be missing something.2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.Review appreciated.
It looks good to me. It might be good to explain why we don't add CASE ..
END statement when qualify_col is false, i.e. "qualify_col being false
indicates that there is only one relation involved thus no join."
BTW, I noticed that we are deparsing whole-row reference as ROW(list of
columns from local definition of foreign table), which has the same problem
with outer joins. It won't be NULL when the rest of the row from that
relation is NULL in an outer join. It too needs to be encapsulated in CASE
WHEN .. END expression. PFA patch with that fix included and also some
testcases for system columns as well as whole-row references.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
Attachments:
postgres-fdw-syscol-zap-ab.patchtext/x-diff; charset=US-ASCII; name=postgres-fdw-syscol-zap-ab.patchDownload
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bdc410d..35c27e7 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -1564,27 +1564,52 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
* If it has a column_name FDW option, use that instead of attribute name.
*
* If qualify_col is true, qualify column name with the alias of relation.
*/
static void
deparseColumnRef(StringInfo buf, int varno, int varattno, PlannerInfo *root,
bool qualify_col)
{
RangeTblEntry *rte;
- /* varattno can be a whole-row reference, ctid or a regular table column */
if (varattno == SelfItemPointerAttributeNumber)
{
+ /* We support fetching the remote side's CTID. */
if (qualify_col)
ADD_REL_QUALIFIER(buf, varno);
appendStringInfoString(buf, "ctid");
}
+ else if (varattno < 0)
+ {
+ /*
+ * All other system attributes are fetched as 0, except for table OID,
+ * which is fetched as the local table OID. However, we must be
+ * careful; the table could be beneath an outer join, in which case
+ * it must go to NULL whenever the rest of the row does.
+ */
+ Oid fetchval = 0;
+
+ if (varattno == TableOidAttributeNumber)
+ {
+ rte = planner_rt_fetch(varno, root);
+ fetchval = rte->relid;
+ }
+
+ if (qualify_col)
+ {
+ appendStringInfoString(buf, "CASE WHEN ");
+ ADD_REL_QUALIFIER(buf, varno);
+ appendStringInfo(buf, "* IS NOT NULL THEN %u END", fetchval);
+ }
+ else
+ appendStringInfo(buf, "%u", fetchval);
+ }
else if (varattno == 0)
{
/* Whole row reference */
Relation rel;
Bitmapset *attrs_used;
/* Required only to be passed down to deparseTargetList(). */
List *retrieved_attrs;
/* Get RangeTblEntry from array in PlannerInfo. */
@@ -1599,24 +1624,43 @@ deparseColumnRef(StringInfo buf, int varno, int varattno, PlannerInfo *root,
/*
* The local name of the foreign table can not be recognized by the
* foreign server and the table it references on foreign server might
* have different column ordering or different columns than those
* declared locally. Hence we have to deparse whole-row reference as
* ROW(columns referenced locally). Construct this by deparsing a
* "whole row" attribute.
*/
attrs_used = bms_add_member(NULL,
0 - FirstLowInvalidHeapAttributeNumber);
+
+ /*
+ * In case the whole-row reference is under an outer join then it has to
+ * go NULL whenver the rest of the row goes NULL. Deparsing a join query
+ * would always involve multiple relations, thus qualify_col would be
+ * true.
+ */
+ if (qualify_col)
+ {
+ appendStringInfoString(buf, "CASE WHEN ");
+ ADD_REL_QUALIFIER(buf, varno);
+ appendStringInfo(buf, "* IS NOT NULL THEN ");
+ }
+
appendStringInfoString(buf, "ROW(");
deparseTargetList(buf, root, varno, rel, false, attrs_used, qualify_col,
&retrieved_attrs);
appendStringInfoString(buf, ")");
+
+ /* Complete the CASE WHEN statement started above. */
+ if (qualify_col)
+ appendStringInfo(buf," END");
+
heap_close(rel, NoLock);
bms_free(attrs_used);
}
else
{
char *colname = NULL;
List *options;
ListCell *lc;
/* varno must not be any of OUTER_VAR, INNER_VAR and INDEX_VAR. */
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 50f1261..8eccc3f 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -1368,30 +1368,30 @@ SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1
| 3
| 9
| 15
| 21
(10 rows)
-- join two tables with FOR UPDATE clause
-- tests whole-row reference for row marks
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
- QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> LockRows
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
- Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
+ Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN r1.* IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN r2.* IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
-> Merge Join
Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
Merge Cond: (t1.c1 = t2.c1)
-> Sort
Output: t1.c1, t1.c3, t1.*
Sort Key: t1.c1
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c3, t1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
-> Sort
@@ -1412,30 +1412,30 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> LockRows
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
- Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
+ Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN r1.* IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN r2.* IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
-> Merge Join
Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
Merge Cond: (t1.c1 = t2.c1)
-> Sort
Output: t1.c1, t1.c3, t1.*
Sort Key: t1.c1
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c3, t1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
-> Sort
@@ -1457,30 +1457,30 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- join two tables with FOR SHARE clause
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> LockRows
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
- Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
+ Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN r1.* IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN r2.* IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
-> Merge Join
Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
Merge Cond: (t1.c1 = t2.c1)
-> Sort
Output: t1.c1, t1.c3, t1.*
Sort Key: t1.c1
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c3, t1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
-> Sort
@@ -1501,30 +1501,30 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
- QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> LockRows
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-> Foreign Scan
Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
- Remote SQL: SELECT r1."C 1", r1.c3, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r2."C 1", ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
+ Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN r1.* IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN r2.* IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
-> Merge Join
Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
Merge Cond: (t1.c1 = t2.c1)
-> Sort
Output: t1.c1, t1.c3, t1.*
Sort Key: t1.c1
-> Foreign Scan on public.ft1 t1
Output: t1.c1, t1.c3, t1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
-> Sort
@@ -1580,30 +1580,209 @@ WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- ctid with whole-row reference
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
- QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
-> Foreign Scan
Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
- Remote SQL: SELECT r1.ctid, ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8), r1."C 1", r1.c3, ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+ Remote SQL: SELECT r1.ctid, CASE WHEN r1.* IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r1."C 1", r1.c3, CASE WHEN r2.* IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1."C 1" = r2."C 1")) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
(6 rows)
+-- system columns other than ctid whose values of foreign server are meaningless
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t1.tableoid::regclass, t2.cmax, t2.tableoid::regclass, t2.c1 FROM ft4 t1 JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: t1.c1, t1.cmin, t1.xmin, t1.xmax, ((t1.tableoid)::regclass), t2.cmax, ((t2.tableoid)::regclass), t2.c1
+ -> Foreign Scan
+ Output: t1.c1, t1.cmin, t1.xmin, t1.xmax, (t1.tableoid)::regclass, t2.cmax, (t2.tableoid)::regclass, t2.c1
+ Relations: (public.ft4 t1) INNER JOIN (public.ft5 t2)
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 16444 END, CASE WHEN r2.* IS NOT NULL THEN 0 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S 1"."T 3" r1 INNER JOIN "S 1"."T 4" r2 ON (TRUE)) WHERE ((r1.c1 = r2.c1)) ORDER BY r1.c1 ASC NULLS LAST
+(6 rows)
+
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t1.tableoid::regclass, t2.cmax, t2.tableoid::regclass, t2.c1 FROM ft4 t1 JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+ c1 | cmin | xmin | xmax | tableoid | cmax | tableoid | c1
+----+------+------+------+----------+------+----------+----
+ 66 | 0 | 0 | 0 | ft4 | 0 | ft5 | 66
+ 72 | 0 | 0 | 0 | ft4 | 0 | ft5 | 72
+ 78 | 0 | 0 | 0 | ft4 | 0 | ft5 | 78
+ 84 | 0 | 0 | 0 | ft4 | 0 | ft5 | 84
+ 90 | 0 | 0 | 0 | ft4 | 0 | ft5 | 90
+(5 rows)
+
+-- system columns other than ctid whose values of foreign server are meaningless
+-- on nullable side of join
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1
+ -> Foreign Scan
+ Output: t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1
+ Relations: (public.ft4 t1) LEFT JOIN (public.ft5 t2)
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r2.* IS NOT NULL THEN 0 END, r2.c1 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
+(6 rows)
+
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+ c1 | cmin | xmin | xmax | cmax | c1
+----+------+------+------+------+----
+ 22 | 0 | 0 | 0 | |
+ 24 | 0 | 0 | 0 | 0 | 24
+ 26 | 0 | 0 | 0 | |
+ 28 | 0 | 0 | 0 | |
+ 30 | 0 | 0 | 0 | 0 | 30
+(5 rows)
+
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1
+ -> Foreign Scan
+ Output: t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1
+ Relations: (public.ft4 t1) FULL JOIN (public.ft5 t2)
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r2.* IS NOT NULL THEN 0 END, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
+(6 rows)
+
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+ c1 | cmin | xmin | xmax | cmax | c1
+-----+------+------+------+------+----
+ 92 | 0 | 0 | 0 | |
+ 94 | 0 | 0 | 0 | |
+ 96 | 0 | 0 | 0 | 0 | 96
+ 98 | 0 | 0 | 0 | |
+ 100 | 0 | 0 | 0 | |
+ | | | | 0 | 3
+ | | | | 0 | 9
+ | | | | 0 | 15
+ | | | | 0 | 21
+ | | | | 0 | 27
+(10 rows)
+
+-- tableoid on nullable side of join
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: t1.c1, ((t1.tableoid)::regclass), ((t2.tableoid)::regclass), t2.c1
+ -> Foreign Scan
+ Output: t1.c1, (t1.tableoid)::regclass, (t2.tableoid)::regclass, t2.c1
+ Relations: (public.ft4 t1) LEFT JOIN (public.ft5 t2)
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 16444 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
+(6 rows)
+
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+ c1 | tableoid | tableoid | c1
+----+----------+----------+----
+ 22 | ft4 | |
+ 24 | ft4 | ft5 | 24
+ 26 | ft4 | |
+ 28 | ft4 | |
+ 30 | ft4 | ft5 | 30
+(5 rows)
+
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: t1.c1, ((t1.tableoid)::regclass), ((t2.tableoid)::regclass), t2.c1
+ -> Foreign Scan
+ Output: t1.c1, (t1.tableoid)::regclass, (t2.tableoid)::regclass, t2.c1
+ Relations: (public.ft4 t1) FULL JOIN (public.ft5 t2)
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 16444 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
+(6 rows)
+
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+ c1 | tableoid | tableoid | c1
+-----+----------+----------+----
+ 92 | ft4 | |
+ 94 | ft4 | |
+ 96 | ft4 | ft5 | 96
+ 98 | ft4 | |
+ 100 | ft4 | |
+ | | ft5 | 3
+ | | ft5 | 9
+ | | ft5 | 15
+ | | ft5 | 21
+ | | ft5 | 27
+(10 rows)
+
+-- whole-row reference on nullable side of OUTER join
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: t1.*, t1.c1, t2.*, t2.c1
+ -> Foreign Scan
+ Output: t1.*, t1.c1, t2.*, t2.c1
+ Relations: (public.ft4 t1) LEFT JOIN (public.ft5 t2)
+ Remote SQL: SELECT CASE WHEN r1.* IS NOT NULL THEN ROW(r1.c1, r1.c2, r1.c3) END, r1.c1, CASE WHEN r2.* IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r2.c1 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
+(6 rows)
+
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
+ t1 | c1 | t2 | c1
+----------------+----+----------------+----
+ (22,23,AAA022) | 22 | |
+ (24,25,AAA024) | 24 | (24,25,AAA024) | 24
+ (26,27,AAA026) | 26 | |
+ (28,29,AAA028) | 28 | |
+ (30,31,AAA030) | 30 | (30,31,AAA030) | 30
+ (32,33,AAA032) | 32 | |
+ (34,35,AAA034) | 34 | |
+ (36,37,AAA036) | 36 | (36,37,AAA036) | 36
+ (38,39,AAA038) | 38 | |
+ (40,41,AAA040) | 40 | |
+(10 rows)
+
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+ Output: t1.*, t1.c1, t2.*, t2.c1
+ -> Foreign Scan
+ Output: t1.*, t1.c1, t2.*, t2.c1
+ Relations: (public.ft4 t1) FULL JOIN (public.ft5 t2)
+ Remote SQL: SELECT CASE WHEN r1.* IS NOT NULL THEN ROW(r1.c1, r1.c2, r1.c3) END, r1.c1, CASE WHEN r2.* IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
+(6 rows)
+
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+ t1 | c1 | t2 | c1
+------------------+-----+----------------+----
+ (92,93,AAA092) | 92 | |
+ (94,95,AAA094) | 94 | |
+ (96,97,AAA096) | 96 | (96,97,AAA096) | 96
+ (98,99,AAA098) | 98 | |
+ (100,101,AAA100) | 100 | |
+ | | (3,4,AAA003) | 3
+ | | (9,10,AAA009) | 9
+ | | (15,16,AAA015) | 15
+ | | (21,22,AAA021) | 21
+ | | (27,28,AAA027) | 27
+(10 rows)
+
-- SEMI JOIN, not pushed down
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------
Limit
Output: t1.c1
-> Merge Semi Join
Output: t1.c1
Merge Cond: (t1.c1 = t2.c1)
@@ -2685,28 +2864,28 @@ UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING
977 | 407 | 00977_update7 | Thu Mar 19 00:00:00 1970 PST | Thu Mar 19 00:00:00 1970 | 7 | 7 | foo
987 | 407 | 00987_update7 | Sun Mar 29 00:00:00 1970 PST | Sun Mar 29 00:00:00 1970 | 7 | 7 | foo
997 | 407 | 00997_update7 | Wed Apr 08 00:00:00 1970 PST | Wed Apr 08 00:00:00 1970 | 7 | 7 | foo
1007 | 507 | 0000700007_update7 | | | | ft2 |
1017 | 507 | 0001700017_update7 | | | | ft2 |
(102 rows)
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; -- can't be pushed down
- QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Update on public.ft2
Remote SQL: UPDATE "S 1"."T 1" SET c2 = $2, c3 = $3, c7 = $4 WHERE ctid = $1
-> Foreign Scan
Output: ft2.c1, (ft2.c2 + 500), NULL::integer, (ft2.c3 || '_update9'::text), ft2.c4, ft2.c5, ft2.c6, 'ft2 '::character(10), ft2.c8, ft2.ctid, ft1.*
Relations: (public.ft2) INNER JOIN (public.ft1)
- Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c8, r1.ctid, ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 9)) FOR UPDATE OF r1
+ Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c8, r1.ctid, CASE WHEN r2.* IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 9)) FOR UPDATE OF r1
-> Hash Join
Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid, ft1.*
Hash Cond: (ft2.c2 = ft1.c1)
-> Foreign Scan on public.ft2
Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c8, ctid FROM "S 1"."T 1" FOR UPDATE
-> Hash
Output: ft1.*, ft1.c1
-> Foreign Scan on public.ft1
Output: ft1.*, ft1.c1
@@ -2828,28 +3007,28 @@ DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4;
975 | Tue Mar 17 00:00:00 1970 PST
985 | Fri Mar 27 00:00:00 1970 PST
995 | Mon Apr 06 00:00:00 1970 PST
1005 |
1015 |
1105 |
(103 rows)
EXPLAIN (verbose, costs off)
DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; -- can't be pushed down
- QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Delete on public.ft2
Remote SQL: DELETE FROM "S 1"."T 1" WHERE ctid = $1
-> Foreign Scan
Output: ft2.ctid, ft1.*
Relations: (public.ft2) INNER JOIN (public.ft1)
- Remote SQL: SELECT r1.ctid, ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 2)) FOR UPDATE OF r1
+ Remote SQL: SELECT r1.ctid, CASE WHEN r2.* IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) WHERE ((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 2)) FOR UPDATE OF r1
-> Hash Join
Output: ft2.ctid, ft1.*
Hash Cond: (ft2.c2 = ft1.c1)
-> Foreign Scan on public.ft2
Output: ft2.ctid, ft2.c2
Remote SQL: SELECT c2, ctid FROM "S 1"."T 1" FOR UPDATE
-> Hash
Output: ft1.*, ft1.c1
-> Foreign Scan on public.ft1
Output: ft1.*, ft1.c1
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index ee0220a..066cffb 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -4403,20 +4403,32 @@ make_tuple_from_result_row(PGresult *res,
/*
* If we have a CTID to return, install it in both t_self and t_ctid.
* t_self is the normal place, but if the tuple is converted to a
* composite Datum, t_self will be lost; setting t_ctid allows CTID to be
* preserved during EvalPlanQual re-evaluations (see ROW_MARK_COPY code).
*/
if (ctid)
tuple->t_self = tuple->t_data->t_ctid = *ctid;
+ /*
+ * Stomp on the xmin, xmax, and cmin fields from the tuple created by
+ * heap_form_tuple. heap_form_tuple actually creates the tuple with
+ * DatumTupleFields, not HeapTupleFields, but the executor expects
+ * HeapTupleFields and will happily extract system columns on that
+ * assumption. If we don't do this then, for example, the tuple length
+ * ends up in the xmin field, which isn't what we want.
+ */
+ HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
+ HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId);
+ HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId);
+
/* Clean up */
MemoryContextReset(temp_context);
return tuple;
}
/*
* Callback function which is called when error occurs during column value
* conversion. Print names of column and relation.
*/
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index f420b23..6b240c4 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -419,20 +419,46 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
-- join in CTE
EXPLAIN (COSTS false, VERBOSE)
WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
-- ctid with whole-row reference
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+-- system columns other than ctid whose values of foreign server are meaningless
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t1.tableoid::regclass, t2.cmax, t2.tableoid::regclass, t2.c1 FROM ft4 t1 JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t1.tableoid::regclass, t2.cmax, t2.tableoid::regclass, t2.c1 FROM ft4 t1 JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+-- system columns other than ctid whose values of foreign server are meaningless
+-- on nullable side of join
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+SELECT t1.c1, t1.cmin, t1.xmin, t1.xmax, t2.cmax, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+-- tableoid on nullable side of join
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 5;
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+SELECT t1.c1, t1.tableoid::regclass, t2.tableoid::regclass, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+-- whole-row reference on nullable side of OUTER join
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 LEFT JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10;
+EXPLAIN (COSTS false, VERBOSE)
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
+SELECT t1, t1.c1, t2, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
-- SEMI JOIN, not pushed down
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
SELECT t1.c1 FROM ft1 t1 WHERE EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c1) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
-- ANTI JOIN, not pushed down
EXPLAIN (COSTS false, VERBOSE)
SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2.c2) ORDER BY t1.c1 OFFSET 100 LIMIT 10;
-- CROSS JOIN, not pushed down
EXPLAIN (COSTS false, VERBOSE)
On 2016/04/14 13:04, Robert Haas wrote:
On Wed, Apr 13, 2016 at 11:21 PM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:2. When a join is pushed down, deparse system columns using something
like "CASE WHEN r1.* IS NOT NULL THEN 0 END", except for the table OID
column, which gets deparsed with the table OID in place of 0. This
delivers the correct behavior in the presence of outer joins.I think that that would cause useless data transfer for such culumns.
Why not set values locally for such columns?
Because that doesn't work properly when there are outer joins
involved.
Understood. It looks like I overlooked Ashutosh's mail about that.
Thanks, Robert and Ashutosh!
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Apr 14, 2016 at 7:49 AM, Ashutosh Bapat
<ashutosh.bapat@enterprisedb.com> wrote:
BTW, I noticed that we are deparsing whole-row reference as ROW(list of
columns from local definition of foreign table), which has the same problem
with outer joins. It won't be NULL when the rest of the row from that
relation is NULL in an outer join. It too needs to be encapsulated in CASE
WHEN .. END expression. PFA patch with that fix included and also some
testcases for system columns as well as whole-row references.
Good catch. But your test cases are no good because then we have OIDs
hardcoded in the expected output. That means 'make installcheck' will
fail, or if for any other reason the OID varies it will also fail.
Committed your version with those test cases.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Fri, Apr 15, 2016 at 9:39 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Thu, Apr 14, 2016 at 7:49 AM, Ashutosh Bapat
<ashutosh.bapat@enterprisedb.com> wrote:BTW, I noticed that we are deparsing whole-row reference as ROW(list of
columns from local definition of foreign table), which has the sameproblem
with outer joins. It won't be NULL when the rest of the row from that
relation is NULL in an outer join. It too needs to be encapsulated inCASE
WHEN .. END expression. PFA patch with that fix included and also some
testcases for system columns as well as whole-row references.Good catch. But your test cases are no good because then we have OIDs
hardcoded in the expected output. That means 'make installcheck' will
fail, or if for any other reason the OID varies it will also fail.
Committed your version with those test cases.
The testcases had tableoid::regclass which outputs the foreign table's
local name, which won't change across runs. Isn't that so?
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On Fri, Apr 15, 2016 at 12:16 PM, Ashutosh Bapat
<ashutosh.bapat@enterprisedb.com> wrote:
The testcases had tableoid::regclass which outputs the foreign table's local
name, which won't change across runs. Isn't that so?
[rhaas pgsql]$ grep 16444 ~/Downloads/postgres-fdw-syscol-zap-ab.patch
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 0
END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL
THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 16444 END, CASE WHEN r2.*
IS NOT NULL THEN 0 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END,
r2.c1 FROM ("S 1"."T 3" r1 INNER JOIN "S 1"."T 4" r2 ON (TRUE)) WHERE
((r1.c1 = r2.c1)) ORDER BY r1.c1 ASC NULLS LAST
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN
16444 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S
1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY
r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
+ Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN
16444 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S
1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY
r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST
Where do you think 16444 and 16447 are coming from here?
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Fri, Apr 15, 2016 at 10:17 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Fri, Apr 15, 2016 at 12:16 PM, Ashutosh Bapat
<ashutosh.bapat@enterprisedb.com> wrote:The testcases had tableoid::regclass which outputs the foreign table's
local
name, which won't change across runs. Isn't that so?
[rhaas pgsql]$ grep 16444 ~/Downloads/postgres-fdw-syscol-zap-ab.patch + Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 0 END, CASE WHEN r1.* IS NOT NULL THEN 16444 END, CASE WHEN r2.* IS NOT NULL THEN 0 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S 1"."T 3" r1 INNER JOIN "S 1"."T 4" r2 ON (TRUE)) WHERE ((r1.c1 = r2.c1)) ORDER BY r1.c1 ASC NULLS LAST + Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 16444 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S 1"."T 3" r1 LEFT JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LAST + Remote SQL: SELECT r1.c1, CASE WHEN r1.* IS NOT NULL THEN 16444 END, CASE WHEN r2.* IS NOT NULL THEN 16447 END, r2.c1 FROM ("S 1"."T 3" r1 FULL JOIN "S 1"."T 4" r2 ON (((r1.c1 = r2.c1)))) ORDER BY r1.c1 ASC NULLS LAST, r2.c1 ASC NULLS LASTWhere do you think 16444 and 16447 are coming from here?
Ah! Sorry, it's the explain output.
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
On 2016/04/14 4:57, Robert Haas wrote:
1. For a regular FDW scan, zero the xmin, xmax, and cid of the tuple
before returning it from postgres_fdw, so that we don't expose the
datum-tuple fields. I can't see any reason this isn't safe, but I
might be missing something.
I noticed odd behavior of this in EvalPlanQual. Consider:
-- session 1
postgres=# create extension postgres_fdw;
CREATE EXTENSION
postgres=# create server fs foreign data wrapper postgres_fdw options
(dbname 'postgres');
CREATE SERVER
postgres=# create user mapping for public server fs;
CREATE USER MAPPING
postgres=# create table t1 (a int, b int);
CREATE TABLE
postgres=# create table t2 (a int, b int);
CREATE TABLE
postgres=# insert into t1 values (1, 1);
INSERT 0 1
postgres=# insert into t2 values (1, 1);
INSERT 0 1
postgres=# create foreign table ft1 (a int, b int) server fs options
(table_name 't1');
CREATE FOREIGN TABLE
postgres=# select xmin, xmax, cmin, * from ft1;
xmin | xmax | cmin | a | b
------+------+------+---+---
0 | 0 | 0 | 1 | 1
(1 row)
-- session 2
postgres=# begin;
BEGIN
postgres=# update t2 set a = a;
UPDATE 1
-- session 1
postgres=# select ft1.xmin, ft1.xmax, ft1.cmin, ft1.* from ft1, t2 for
update;
-- session 2
postgres=# commit;
COMMIT
-- session 1 (will show the following)
xmin | xmax | cmin | a | b
------+------------+-------+---+---
128 | 4294967295 | 16398 | 1 | 1
(1 row)
The values of xmin, xmax, and cmin are not 0! The reason for that is
that we don't zero these values in a test tuple stored by
EvalPlanQualFetchRowMarks for EvalPlanQual re-evaluations.
That cleanup applies to the file_fdw foreign table case as well, so I
think xmin, xmax, and cid in tuples from such tables should be set to 0,
too. And ISTM that it's better that the core (ie, ForeignNext) supports
doing that, rather than each FDW does that work. That would also
minimize the overhead because ForeignNext does that if needed. Please
find attached a patch.
Best regards,
Etsuro Fujita
Attachments:
postgres-fdw-syscol-cleanup.patchbinary/octet-stream; name=postgres-fdw-syscol-cleanup.patchDownload
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 4502,4519 **** make_tuple_from_result_row(PGresult *res,
if (ctid)
tuple->t_self = tuple->t_data->t_ctid = *ctid;
- /*
- * Stomp on the xmin, xmax, and cmin fields from the tuple created by
- * heap_form_tuple. heap_form_tuple actually creates the tuple with
- * DatumTupleFields, not HeapTupleFields, but the executor expects
- * HeapTupleFields and will happily extract system columns on that
- * assumption. If we don't do this then, for example, the tuple length
- * ends up in the xmin field, which isn't what we want.
- */
- HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
- HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId);
- HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId);
-
/* Clean up */
MemoryContextReset(temp_context);
--- 4502,4507 ----
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 2555,2560 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
--- 2555,2561 ----
Datum datum;
bool isNull;
HeapTupleData tuple;
+ HeapTuple copyTuple;
if (RowMarkRequiresRowShareLock(erm->markType))
elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
***************
*** 2585,2592 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
if (erm->markType == ROW_MARK_REFERENCE)
{
- HeapTuple copyTuple;
-
Assert(erm->relation != NULL);
/* fetch the tuple's ctid */
--- 2586,2591 ----
***************
*** 2664,2672 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
/* also copy t_ctid in case there's valid data there */
tuple.t_self = td->t_ctid;
! /* copy and store tuple */
! EvalPlanQualSetTuple(epqstate, erm->rti,
! heap_copytuple(&tuple));
}
}
}
--- 2663,2677 ----
/* also copy t_ctid in case there's valid data there */
tuple.t_self = td->t_ctid;
! /* copy tuple */
! copyTuple = heap_copytuple(&tuple);
! /* then zero xmin, xmax, and cid, as described above */
! HeapTupleHeaderSetXmin(copyTuple->t_data, InvalidTransactionId);
! HeapTupleHeaderSetXmax(copyTuple->t_data, InvalidTransactionId);
! HeapTupleHeaderSetCmin(copyTuple->t_data, InvalidTransactionId);
!
! /* store tuple */
! EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
}
}
}
*** a/src/backend/executor/nodeForeignscan.c
--- b/src/backend/executor/nodeForeignscan.c
***************
*** 22,27 ****
--- 22,28 ----
*/
#include "postgres.h"
+ #include "access/htup_details.h"
#include "executor/executor.h"
#include "executor/nodeForeignscan.h"
#include "foreign/fdwapi.h"
***************
*** 58,70 **** ForeignNext(ForeignScanState *node)
* If any system columns are requested, we have to force the tuple into
* physical-tuple form to avoid "cannot extract system attribute from
* virtual tuple" errors later. We also insert a valid value for
! * tableoid, which is the only actually-useful system column.
*/
if (plan->fsSystemCol && !TupIsNull(slot))
{
HeapTuple tup = ExecMaterializeSlot(slot);
tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
}
return slot;
--- 59,76 ----
* If any system columns are requested, we have to force the tuple into
* physical-tuple form to avoid "cannot extract system attribute from
* virtual tuple" errors later. We also insert a valid value for
! * tableoid, which is the only actually-useful system column, and zero
! * the xmin, xmax, and cid of the tuple.
*/
if (plan->fsSystemCol && !TupIsNull(slot))
{
HeapTuple tup = ExecMaterializeSlot(slot);
tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
+
+ HeapTupleHeaderSetXmin(tup->t_data, InvalidTransactionId);
+ HeapTupleHeaderSetXmax(tup->t_data, InvalidTransactionId);
+ HeapTupleHeaderSetCmin(tup->t_data, InvalidTransactionId);
}
return slot;
On Fri, Jul 1, 2016 at 3:10 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:
On 2016/04/14 4:57, Robert Haas wrote:
1. For a regular FDW scan, zero the xmin, xmax, and cid of the tuple
before returning it from postgres_fdw, so that we don't expose the
datum-tuple fields. I can't see any reason this isn't safe, but I
might be missing something.I noticed odd behavior of this in EvalPlanQual. Consider:
-- session 1
postgres=# create extension postgres_fdw;
CREATE EXTENSION
postgres=# create server fs foreign data wrapper postgres_fdw options
(dbname 'postgres');
CREATE SERVER
postgres=# create user mapping for public server fs;
CREATE USER MAPPING
postgres=# create table t1 (a int, b int);
CREATE TABLE
postgres=# create table t2 (a int, b int);
CREATE TABLE
postgres=# insert into t1 values (1, 1);
INSERT 0 1
postgres=# insert into t2 values (1, 1);
INSERT 0 1
postgres=# create foreign table ft1 (a int, b int) server fs options
(table_name 't1');
CREATE FOREIGN TABLE
postgres=# select xmin, xmax, cmin, * from ft1;
xmin | xmax | cmin | a | b
------+------+------+---+---
0 | 0 | 0 | 1 | 1
(1 row)-- session 2
postgres=# begin;
BEGIN
postgres=# update t2 set a = a;
UPDATE 1-- session 1
postgres=# select ft1.xmin, ft1.xmax, ft1.cmin, ft1.* from ft1, t2 for
update;-- session 2
postgres=# commit;
COMMIT-- session 1 (will show the following)
xmin | xmax | cmin | a | b
------+------------+-------+---+---
128 | 4294967295 | 16398 | 1 | 1
(1 row)The values of xmin, xmax, and cmin are not 0! The reason for that is that
we don't zero these values in a test tuple stored by
EvalPlanQualFetchRowMarks for EvalPlanQual re-evaluations.That cleanup applies to the file_fdw foreign table case as well, so I think
xmin, xmax, and cid in tuples from such tables should be set to 0, too. And
ISTM that it's better that the core (ie, ForeignNext) supports doing that,
rather than each FDW does that work. That would also minimize the overhead
because ForeignNext does that if needed. Please find attached a patch.
If you want this to be considered, you'll need to rebase it and submit
it to the next CommitFest.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2016/09/21 0:40, Robert Haas wrote:
On Fri, Jul 1, 2016 at 3:10 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:On 2016/04/14 4:57, Robert Haas wrote:
1. For a regular FDW scan, zero the xmin, xmax, and cid of the tuple
before returning it from postgres_fdw, so that we don't expose the
datum-tuple fields. I can't see any reason this isn't safe, but I
might be missing something.
I noticed odd behavior of this in EvalPlanQual. Consider:
-- session 1
postgres=# create extension postgres_fdw;
CREATE EXTENSION
postgres=# create server fs foreign data wrapper postgres_fdw options
(dbname 'postgres');
CREATE SERVER
postgres=# create user mapping for public server fs;
CREATE USER MAPPING
postgres=# create table t1 (a int, b int);
CREATE TABLE
postgres=# create table t2 (a int, b int);
CREATE TABLE
postgres=# insert into t1 values (1, 1);
INSERT 0 1
postgres=# insert into t2 values (1, 1);
INSERT 0 1
postgres=# create foreign table ft1 (a int, b int) server fs options
(table_name 't1');
CREATE FOREIGN TABLE
postgres=# select xmin, xmax, cmin, * from ft1;
xmin | xmax | cmin | a | b
------+------+------+---+---
0 | 0 | 0 | 1 | 1
(1 row)-- session 2
postgres=# begin;
BEGIN
postgres=# update t2 set a = a;
UPDATE 1-- session 1
postgres=# select ft1.xmin, ft1.xmax, ft1.cmin, ft1.* from ft1, t2 for
update;-- session 2
postgres=# commit;
COMMIT-- session 1 (will show the following)
xmin | xmax | cmin | a | b
------+------------+-------+---+---
128 | 4294967295 | 16398 | 1 | 1
(1 row)The values of xmin, xmax, and cmin are not 0! The reason for that is that
we don't zero these values in a test tuple stored by
EvalPlanQualFetchRowMarks for EvalPlanQual re-evaluations.That cleanup applies to the file_fdw foreign table case as well, so I think
xmin, xmax, and cid in tuples from such tables should be set to 0, too. And
ISTM that it's better that the core (ie, ForeignNext) supports doing that,
rather than each FDW does that work. That would also minimize the overhead
because ForeignNext does that if needed. Please find attached a patch.
If you want this to be considered, you'll need to rebase it and submit
it to the next CommitFest.
Will do.
Best regards,
Etsuro Fujita
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers