EvalPlanQual behaves oddly for FDW queries involving system columns

Started by Etsuro Fujitaalmost 11 years ago51 messages
#1Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
1 attachment(s)

Here is an example using postgres_fdw.

[Terminal 1]
postgres=# create table t (a int, b int);
CREATE TABLE
postgres=# insert into t values (1, 1);
INSERT 0 1
postgres=# begin;
BEGIN
postgres=# update t set b = b * 2;
UPDATE 1

[Terminal 2]
postgres=# create foreign table ft (a int) server loopback options
(table_name 'lbt');
CREATE FOREIGN TABLE
postgres=# insert into ft values (1);
INSERT 0 1
postgres=# select tableoid, ctid, * from ft;
tableoid | ctid | a
----------+-------+---
25092 | (0,1) | 1
(1 row)

postgres=# select ft.tableoid, ft.ctid, ft.* from t, ft where t.a = ft.a
for update;

[Terminal 1]
postgres=# commit;
COMMIT

[Terminal 2]
postgres=# select ft.tableoid, ft.ctid, ft.* from t, ft where t.a = ft.a
for update;
tableoid | ctid | a
----------+----------------+---
0 | (4294967295,0) | 1
(1 row)

Note that tableoid and ctid have been changed!

I think the reason for that is because EvalPlanQualFetchRowMarks doesn't
properly set tableoid and ctid for foreign tables, IIUC. I think this
should be fixed. Please find attached a patch. The patch slightly
relates to [1]https://commitfest.postgresql.org/action/patch_view?id=1386, so if it is reasonable, I'll update [1]https://commitfest.postgresql.org/action/patch_view?id=1386 on top of this.

[1]: https://commitfest.postgresql.org/action/patch_view?id=1386

Best regards,
Etsuro Fujita

Attachments:

evalqualplan-v1.patchtext/x-patch; name=evalqualplan-v1.patchDownload
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 2947,2953 **** make_tuple_from_result_row(PGresult *res,
  	tuple = heap_form_tuple(tupdesc, values, nulls);
  
  	if (ctid)
! 		tuple->t_self = *ctid;
  
  	/* Clean up */
  	MemoryContextReset(temp_context);
--- 2947,2953 ----
  	tuple = heap_form_tuple(tupdesc, values, nulls);
  
  	if (ctid)
! 		tuple->t_self = tuple->t_data->t_ctid = *ctid;
  
  	/* Clean up */
  	MemoryContextReset(temp_context);
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 795,800 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 795,801 ----
  	{
  		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
  		Oid			relid;
+ 		char		relkind;
  		Relation	relation;
  		ExecRowMark *erm;
  
***************
*** 817,826 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 818,833 ----
  				break;
  			case ROW_MARK_COPY:
  				/* there's no real table here ... */
+ 				relkind = rt_fetch(rc->rti, rangeTable)->relkind;
+ 				if (relkind == RELKIND_FOREIGN_TABLE)
+ 					relid = getrelid(rc->rti, rangeTable);
+ 				else
+ 					relid = InvalidOid;
  				relation = NULL;
  				break;
  			default:
  				elog(ERROR, "unrecognized markType: %d", rc->markType);
+ 				relid = InvalidOid;
  				relation = NULL;	/* keep compiler quiet */
  				break;
  		}
***************
*** 831,836 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 838,844 ----
  
  		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
  		erm->relation = relation;
+ 		erm->relid = relid;
  		erm->rti = rc->rti;
  		erm->prti = rc->prti;
  		erm->rowmarkId = rc->rowmarkId;
***************
*** 2318,2325 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
! 			ItemPointerSetInvalid(&(tuple.t_self));
! 			tuple.t_tableOid = InvalidOid;
  			tuple.t_data = td;
  
  			/* copy and store tuple */
--- 2326,2342 ----
  
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
! 			/* if relid is valid, rel is a foreign table; set system columns */
! 			if (OidIsValid(erm->relid))
! 			{
! 				tuple.t_self = td->t_ctid;
! 				tuple.t_tableOid = erm->relid;
! 			}
! 			else
! 			{
! 				ItemPointerSetInvalid(&(tuple.t_self));
! 				tuple.t_tableOid = InvalidOid;
! 			}
  			tuple.t_data = td;
  
  			/* copy and store tuple */
*** a/src/backend/executor/nodeForeignscan.c
--- b/src/backend/executor/nodeForeignscan.c
***************
*** 22,27 ****
--- 22,28 ----
   */
  #include "postgres.h"
  
+ #include "access/htup_details.h"
  #include "executor/executor.h"
  #include "executor/nodeForeignscan.h"
  #include "foreign/fdwapi.h"
***************
*** 53,65 **** ForeignNext(ForeignScanState *node)
  	/*
  	 * If any system columns are requested, we have to force the tuple into
  	 * physical-tuple form to avoid "cannot extract system attribute from
! 	 * virtual tuple" errors later.  We also insert a valid value for
! 	 * tableoid, which is the only actually-useful system column.
  	 */
  	if (plan->fsSystemCol && !TupIsNull(slot))
  	{
  		HeapTuple	tup = ExecMaterializeSlot(slot);
  
  		tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
  	}
  
--- 54,69 ----
  	/*
  	 * If any system columns are requested, we have to force the tuple into
  	 * physical-tuple form to avoid "cannot extract system attribute from
! 	 * virtual tuple" errors later.  We also insert a valid value for TID
! 	 * and tableoid, which are the only actually-useful system columns.
  	 */
  	if (plan->fsSystemCol && !TupIsNull(slot))
  	{
  		HeapTuple	tup = ExecMaterializeSlot(slot);
  
+ 		/* We assume that t_ctid is initialized with its own TID */
+ 		tup->t_self = tup->t_data->t_ctid;
+ 
  		tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
  	}
  
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
***************
*** 425,430 **** typedef struct EState
--- 425,431 ----
  typedef struct ExecRowMark
  {
  	Relation	relation;		/* opened and suitably locked relation */
+ 	Oid			relid;			/* its relation Oid */
  	Index		rti;			/* its range table index */
  	Index		prti;			/* parent range table index, if child */
  	Index		rowmarkId;		/* unique identifier for resjunk columns */
#2Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Etsuro Fujita (#1)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita wrote:

***************
*** 817,826 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 818,833 ----
break;
case ROW_MARK_COPY:
/* there's no real table here ... */
+ 				relkind = rt_fetch(rc->rti, rangeTable)->relkind;
+ 				if (relkind == RELKIND_FOREIGN_TABLE)
+ 					relid = getrelid(rc->rti, rangeTable);
+ 				else
+ 					relid = InvalidOid;
relation = NULL;
break;
default:
elog(ERROR, "unrecognized markType: %d", rc->markType);
+ 				relid = InvalidOid;
relation = NULL;	/* keep compiler quiet */
break;
}

[ ... ]

--- 2326,2342 ----

/* build a temporary HeapTuple control structure */
tuple.t_len = HeapTupleHeaderGetDatumLength(td);
! /* if relid is valid, rel is a foreign table; set system columns */
! if (OidIsValid(erm->relid))
! {
! tuple.t_self = td->t_ctid;
! tuple.t_tableOid = erm->relid;
! }
! else
! {
! ItemPointerSetInvalid(&(tuple.t_self));
! tuple.t_tableOid = InvalidOid;
! }
tuple.t_data = td;

/* copy and store tuple */

I find this arrangement confusing and unnecessary -- surely if you have
access to the ExecRowMark here, you could just obtain the relid with
RelationGetRelid instead of saving the OID beforehand? And if you have
the Relation, you could just consult the relkind at that point instead
of relying on the relid being set or not as a flag to indicate whether
the rel is foreign.

I didn't look at anything else in the patch so I can't comment more on
it, but the change to ExecRowMark caught my attention.

--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Alvaro Herrera (#2)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/01/16 1:24, Alvaro Herrera wrote:

Etsuro Fujita wrote:

*** 817,826 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 818,833 ----
break;
case ROW_MARK_COPY:
/* there's no real table here ... */
+ 				relkind = rt_fetch(rc->rti, rangeTable)->relkind;
+ 				if (relkind == RELKIND_FOREIGN_TABLE)
+ 					relid = getrelid(rc->rti, rangeTable);
+ 				else
+ 					relid = InvalidOid;
relation = NULL;
break;
--- 2326,2342 ----

/* build a temporary HeapTuple control structure */
tuple.t_len = HeapTupleHeaderGetDatumLength(td);
! /* if relid is valid, rel is a foreign table; set system columns */
! if (OidIsValid(erm->relid))
! {
! tuple.t_self = td->t_ctid;
! tuple.t_tableOid = erm->relid;
! }
! else
! {
! ItemPointerSetInvalid(&(tuple.t_self));
! tuple.t_tableOid = InvalidOid;
! }
tuple.t_data = td;

I find this arrangement confusing and unnecessary -- surely if you have
access to the ExecRowMark here, you could just obtain the relid with
RelationGetRelid instead of saving the OID beforehand?

I don't think so because we don't have the Relation (ie, erm->relation =
NULL) here since InitPlan don't open/lock relations having markType =
ROW_MARK_COPY as shown above, which include foreign tables selected for
update/share. But I noticed we should open/lock such foreign tables as
well in InitPlan because I think ExecOpenScanRelation is assuming that,
IIUC.

And if you have
the Relation, you could just consult the relkind at that point instead
of relying on the relid being set or not as a flag to indicate whether
the rel is foreign.

Followed your revision. Attached is an updated version of the patch.

Thanks for the comment!

Best regards,
Etsuro Fujita

Attachments:

EvalPlanQual-v2.patchtext/x-diff; name=EvalPlanQual-v2.patchDownload
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 2947,2953 **** make_tuple_from_result_row(PGresult *res,
  	tuple = heap_form_tuple(tupdesc, values, nulls);
  
  	if (ctid)
! 		tuple->t_self = *ctid;
  
  	/* Clean up */
  	MemoryContextReset(temp_context);
--- 2947,2953 ----
  	tuple = heap_form_tuple(tupdesc, values, nulls);
  
  	if (ctid)
! 		tuple->t_self = tuple->t_data->t_ctid = *ctid;
  
  	/* Clean up */
  	MemoryContextReset(temp_context);
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 795,800 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 795,801 ----
  	{
  		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
  		Oid			relid;
+ 		char		relkind;
  		Relation	relation;
  		ExecRowMark *erm;
  
***************
*** 817,823 **** InitPlan(QueryDesc *queryDesc, int eflags)
  				break;
  			case ROW_MARK_COPY:
  				/* there's no real table here ... */
! 				relation = NULL;
  				break;
  			default:
  				elog(ERROR, "unrecognized markType: %d", rc->markType);
--- 818,831 ----
  				break;
  			case ROW_MARK_COPY:
  				/* there's no real table here ... */
! 				relkind = rt_fetch(rc->rti, rangeTable)->relkind;
! 				if (relkind == RELKIND_FOREIGN_TABLE)
! 				{
! 					relid = getrelid(rc->rti, rangeTable);
! 					relation = heap_open(relid, AccessShareLock);
! 				}
! 				else
! 					relation = NULL;
  				break;
  			default:
  				elog(ERROR, "unrecognized markType: %d", rc->markType);
***************
*** 1115,1125 **** CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Should not get here; planner should have used ROW_MARK_COPY */
! 			ereport(ERROR,
! 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 					 errmsg("cannot lock rows in foreign table \"%s\"",
! 							RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
--- 1123,1134 ----
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Allow opening/locking a foreign table only if ROW_MARK_COPY */
! 			if (markType != ROW_MARK_COPY)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 						 errmsg("cannot lock rows in foreign table \"%s\"",
! 								RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
***************
*** 1792,1798 **** ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
  	aerm->rowmark = erm;
  
  	/* Look up the resjunk columns associated with this rowmark */
! 	if (erm->relation)
  	{
  		Assert(erm->markType != ROW_MARK_COPY);
  
--- 1801,1808 ----
  	aerm->rowmark = erm;
  
  	/* Look up the resjunk columns associated with this rowmark */
! 	if (erm->relation &&
! 		erm->relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
  	{
  		Assert(erm->markType != ROW_MARK_COPY);
  
***************
*** 2256,2262 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  		/* clear any leftover test tuple for this rel */
  		EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
  
! 		if (erm->relation)
  		{
  			Buffer		buffer;
  
--- 2266,2273 ----
  		/* clear any leftover test tuple for this rel */
  		EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
  
! 		if (erm->relation &&
! 			erm->relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
  		{
  			Buffer		buffer;
  
***************
*** 2318,2325 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
! 			ItemPointerSetInvalid(&(tuple.t_self));
! 			tuple.t_tableOid = InvalidOid;
  			tuple.t_data = td;
  
  			/* copy and store tuple */
--- 2329,2346 ----
  
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
! 			/* if relation is opened, the rel is foreign; set system columns */
! 			if (erm->relation)
! 			{
! 				Assert(erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE);
! 				tuple.t_self = td->t_ctid;
! 				tuple.t_tableOid = RelationGetRelid(erm->relation);
! 			}
! 			else
! 			{
! 				ItemPointerSetInvalid(&(tuple.t_self));
! 				tuple.t_tableOid = InvalidOid;
! 			}
  			tuple.t_data = td;
  
  			/* copy and store tuple */
*** a/src/backend/executor/nodeForeignscan.c
--- b/src/backend/executor/nodeForeignscan.c
***************
*** 22,27 ****
--- 22,28 ----
   */
  #include "postgres.h"
  
+ #include "access/htup_details.h"
  #include "executor/executor.h"
  #include "executor/nodeForeignscan.h"
  #include "foreign/fdwapi.h"
***************
*** 53,65 **** ForeignNext(ForeignScanState *node)
  	/*
  	 * If any system columns are requested, we have to force the tuple into
  	 * physical-tuple form to avoid "cannot extract system attribute from
! 	 * virtual tuple" errors later.  We also insert a valid value for
! 	 * tableoid, which is the only actually-useful system column.
  	 */
  	if (plan->fsSystemCol && !TupIsNull(slot))
  	{
  		HeapTuple	tup = ExecMaterializeSlot(slot);
  
  		tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
  	}
  
--- 54,69 ----
  	/*
  	 * If any system columns are requested, we have to force the tuple into
  	 * physical-tuple form to avoid "cannot extract system attribute from
! 	 * virtual tuple" errors later.  We also insert a valid value for TID
! 	 * and tableoid, which are the only actually-useful system columns.
  	 */
  	if (plan->fsSystemCol && !TupIsNull(slot))
  	{
  		HeapTuple	tup = ExecMaterializeSlot(slot);
  
+ 		/* We assume that t_ctid is initialized with its own TID */
+ 		tup->t_self = tup->t_data->t_ctid;
+ 
  		tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
  	}
  
#4Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Etsuro Fujita (#3)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/01/19 17:10, Etsuro Fujita wrote:

Attached is an updated version of the patch.

I'll add this to the next CF.

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#5Ashutosh Bapat
ashutosh.bapat@enterprisedb.com
In reply to: Etsuro Fujita (#1)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Hi Fujita-san,
I am having some minor problems running this repro

On Thu, Jan 15, 2015 at 12:45 PM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp

wrote:

Here is an example using postgres_fdw.

[Terminal 1]
postgres=# create table t (a int, b int);
CREATE TABLE
postgres=# insert into t values (1, 1);
INSERT 0 1
postgres=# begin;
BEGIN
postgres=# update t set b = b * 2;
UPDATE 1

[Terminal 2]
postgres=# create foreign table ft (a int) server loopback options
(table_name 'lbt');

There isn't any table "lbt" mentioned here. Do you mean "t" here?

CREATE FOREIGN TABLE
postgres=# insert into ft values (1);
INSERT 0 1
postgres=# select tableoid, ctid, * from ft;
tableoid | ctid | a
----------+-------+---
25092 | (0,1) | 1
(1 row)

Shouldn't we see two values here one inserted in 't' and one in "ft"

postgres=# select ft.tableoid, ft.ctid, ft.* from t, ft where t.a = ft.a
for update;

[Terminal 1]
postgres=# commit;
COMMIT

[Terminal 2]
postgres=# select ft.tableoid, ft.ctid, ft.* from t, ft where t.a = ft.a
for update;
tableoid | ctid | a
----------+----------------+---
0 | (4294967295,0) | 1
(1 row)

Instead of this result, I got following error
ERROR: could not serialize access due to concurrent update
CONTEXT: Remote SQL command: SELECT a, ctid FROM public.t FOR UPDATE

Am I missing something while reproducing the problem?

Note that tableoid and ctid have been changed!

I think the reason for that is because EvalPlanQualFetchRowMarks doesn't
properly set tableoid and ctid for foreign tables, IIUC. I think this
should be fixed. Please find attached a patch. The patch slightly
relates to [1], so if it is reasonable, I'll update [1] on top of this.

[1] https://commitfest.postgresql.org/action/patch_view?id=1386

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company

#6Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Ashutosh Bapat (#5)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Hi Ashutosh,

On 2015/02/03 16:44, Ashutosh Bapat wrote:

I am having some minor problems running this repro

[Terminal 2]
postgres=# create foreign table ft (a int) server loopback options
(table_name 'lbt');

There isn't any table "lbt" mentioned here. Do you mean "t" here?

Sorry, my explanation was not enough. "lbt" means a foreign table named
"lbt" defined on a foreign server named "loopback". It'd be defined eg,
in the following manner:

$ createdb efujita

efujita=# create table lbt (a int);
CREATE TABLE

postgres=# create server loopback foreign data wrapper postgres_fdw
options (dbname 'efujita');
CREATE SERVER
postgres=# create user mapping for current_user server loopback;
CREATE USER MAPPING
postgres=# create foreign table ft (a int) server loopback options
(table_name 'lbt');
CREATE FOREIGN TABLE

Thanks for the review!

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#7Ashutosh Bapat
ashutosh.bapat@enterprisedb.com
In reply to: Etsuro Fujita (#6)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Hi Fujita-san,
I tried reproducing the issue with the steps summarised.
Here's my setup
postgres=# \d ft
Foreign table "public.ft"
Column | Type | Modifiers | FDW Options
--------+---------+-----------+-------------
a | integer | |
Server: loopback
FDW Options: (table_name 'lbt')

postgres=# \d lbt
Table "public.lbt"
Column | Type | Modifiers
--------+---------+-----------
a | integer |

The select (without for update) returns me two rows (one inserted in lbt
and one in ft), whereas in your description there is only one row. For me,
I am getting following error
postgres=# select ft.tableoid, ft.ctid, ft.* from lbt, ft where lbt.a = ft.a
for update;
ERROR: could not serialize access due to concurrent update
CONTEXT: Remote SQL command: SELECT a, ctid FROM public.lbt FOR UPDATE
postgres=#

after commit on terminal 1.

Am I missing something?
--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company

#8Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Ashutosh Bapat (#7)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/03/09 16:02, Ashutosh Bapat wrote:

I tried reproducing the issue with the steps summarised.

Thanks for the review!

Here's my setup

Sorry, my explanation was not enough, but such was not my intention.
I'll re-summarize the steps below:

[Create a test environment]

$ createdb mydatabase
$ psql mydatabase
mydatabase=# create table mytable (a int);

$ psql postgres
postgres=# create extension postgres_fdw;
postgres=# create server loopback foreign data wrapper postgres_fdw
options (dbname 'mydatabase');
postgres=# create user mapping for current_user server loopback;
postgres=# create foreign table ftable (a int) server loopback options
(table_name 'mytable');
postgres=# insert into ftable values (1);
postgres=# create table ltable (a int, b int);
postgres=# insert into ltable values (1, 1);

[Run concurrent transactions]

In terminal1:
$ psql postgres
postgres=# begin;
BEGIN
postgres=# update ltable set b = b * 2;
UPDATE 1

In terminal2:
$ psql postgres
postgres=# select tableoid, ctid, * from ftable;
tableoid | ctid | a
----------+-------+---
16394 | (0,1) | 1
(1 row)

postgres=# select f.tableoid, f.ctid, f.* from ftable f, ltable l where
f.a = l.a for update;

In terminal1:
postgres=# commit;
COMMIT

In terminal2:(When committing the UPDATE query in terminal1, psql in
terminal2 will display the result for the SELECT-FOR-UPDATE query as
shown below.)
tableoid | ctid | a
----------+----------------+---
0 | (4294967295,0) | 1
(1 row)

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#9Ashutosh Bapat
ashutosh.bapat@enterprisedb.com
In reply to: Etsuro Fujita (#8)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Now I can reproduce the problem.

Sanity
--------
Patch compiles cleanly and make check passes. The tests in file_fdw and
postgres_fdw contrib modules pass.

The patch works as expected in the test case reported.

I have only one doubt.
In EvalPlanQualFetchRowMarks(). tuple->t_self is assigned from td->t_ctid.
CTID or even t_self may be valid for foreign tables based on postgres_fdw
but may not be valid for other FDWs. So, this assignment might put some
garbage in t_self, rather we should set it to invalid as done prior to the
patch. I might have missed some previous thread, we decided to go this
route, so ignore the comment, in that case.

Apart from this, I do not have any comments here.

On Mon, Mar 9, 2015 at 4:05 PM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:

On 2015/03/09 16:02, Ashutosh Bapat wrote:

I tried reproducing the issue with the steps summarised.

Thanks for the review!

Here's my setup

Sorry, my explanation was not enough, but such was not my intention. I'll
re-summarize the steps below:

[Create a test environment]

$ createdb mydatabase
$ psql mydatabase
mydatabase=# create table mytable (a int);

$ psql postgres
postgres=# create extension postgres_fdw;
postgres=# create server loopback foreign data wrapper postgres_fdw
options (dbname 'mydatabase');
postgres=# create user mapping for current_user server loopback;
postgres=# create foreign table ftable (a int) server loopback options
(table_name 'mytable');
postgres=# insert into ftable values (1);
postgres=# create table ltable (a int, b int);
postgres=# insert into ltable values (1, 1);

[Run concurrent transactions]

In terminal1:
$ psql postgres
postgres=# begin;
BEGIN
postgres=# update ltable set b = b * 2;
UPDATE 1

In terminal2:
$ psql postgres
postgres=# select tableoid, ctid, * from ftable;
tableoid | ctid | a
----------+-------+---
16394 | (0,1) | 1
(1 row)

postgres=# select f.tableoid, f.ctid, f.* from ftable f, ltable l where
f.a = l.a for update;

In terminal1:
postgres=# commit;
COMMIT

In terminal2:(When committing the UPDATE query in terminal1, psql in
terminal2 will display the result for the SELECT-FOR-UPDATE query as shown
below.)
tableoid | ctid | a
----------+----------------+---
0 | (4294967295,0) | 1
(1 row)

Best regards,
Etsuro Fujita

--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company

#10Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Ashutosh Bapat (#9)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/03/11 17:37, Ashutosh Bapat wrote:

Now I can reproduce the problem.

Sanity
--------
Patch compiles cleanly and make check passes. The tests in file_fdw and
postgres_fdw contrib modules pass.

The patch works as expected in the test case reported.

Thanks for the testing!

I have only one doubt.
In EvalPlanQualFetchRowMarks(). tuple->t_self is assigned from
td->t_ctid. CTID or even t_self may be valid for foreign tables based on
postgres_fdw but may not be valid for other FDWs. So, this assignment
might put some garbage in t_self, rather we should set it to invalid as
done prior to the patch. I might have missed some previous thread, we
decided to go this route, so ignore the comment, in that case.

Good point. As the following code and comment I added to ForeignNext, I
think that FDW authors should initialize the tup->t_data->t_ctid of each
scan tuple with its own TID. If the authors do that, the t_self is
guaranteed to be assigned the right TID from the whole-row Var (ie,
td->t_ctid) in EvalPlanQualFetchRowMarks.

/* We assume that t_ctid is initialized with its own TID */
tup->t_self = tup->t_data->t_ctid;

IMHO, I'm not sure it's worth complicating the code as you mentioned.
(I don't know whether there are any discussions about this before.)

Note that file_fdw needs no treatment. In that case, in ForeignNext,
the tup->t_data->t_ctid of each scan tuple is initialized with (0,0) (if
necessary), and then the t_self will be correctly assigned (0,0) throguh
the whole-row Var in EvalPlanQualFetchRowMarks. So, no inconsistency!

Apart from this, I do not have any comments here.

Thanks again.

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#11Ashutosh Bapat
ashutosh.bapat@enterprisedb.com
In reply to: Etsuro Fujita (#10)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On Wed, Mar 11, 2015 at 5:10 PM, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp>
wrote:

On 2015/03/11 17:37, Ashutosh Bapat wrote:

Now I can reproduce the problem.

Sanity
--------
Patch compiles cleanly and make check passes. The tests in file_fdw and
postgres_fdw contrib modules pass.

The patch works as expected in the test case reported.

Thanks for the testing!

I have only one doubt.

In EvalPlanQualFetchRowMarks(). tuple->t_self is assigned from
td->t_ctid. CTID or even t_self may be valid for foreign tables based on
postgres_fdw but may not be valid for other FDWs. So, this assignment
might put some garbage in t_self, rather we should set it to invalid as
done prior to the patch. I might have missed some previous thread, we
decided to go this route, so ignore the comment, in that case.

Good point. As the following code and comment I added to ForeignNext, I
think that FDW authors should initialize the tup->t_data->t_ctid of each
scan tuple with its own TID. If the authors do that, the t_self is
guaranteed to be assigned the right TID from the whole-row Var (ie,
td->t_ctid) in EvalPlanQualFetchRowMarks.

/* We assume that t_ctid is initialized with its own TID */
tup->t_self = tup->t_data->t_ctid;

IMHO, I'm not sure it's worth complicating the code as you mentioned. (I
don't know whether there are any discussions about this before.)

Note that file_fdw needs no treatment. In that case, in ForeignNext, the
tup->t_data->t_ctid of each scan tuple is initialized with (0,0) (if
necessary), and then the t_self will be correctly assigned (0,0) throguh
the whole-row Var in EvalPlanQualFetchRowMarks. So, no inconsistency!

I will leave this issue for the committer to judge. Changed the status to
"ready for committer".

Apart from this, I do not have any comments here.

Thanks again.

Best regards,
Etsuro Fujita

--
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company

#12Tom Lane
tgl@sss.pgh.pa.us
In reply to: Ashutosh Bapat (#11)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Ashutosh Bapat <ashutosh.bapat@enterprisedb.com> writes:

I will leave this issue for the committer to judge. Changed the status to
"ready for committer".

I don't like the execMain.c changes much at all. They look somewhat
like they're intended to allow foreign tables to adopt a different
locking strategy, but if so they belong in a different patch that
actually implements such a thing. The changes are not all consistent
either, eg this:

! if (erm->relation &&
! erm->relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)

is not necessary if this Assert is accurate:

! if (erm->relation)
! {
! Assert(erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE);

I don't see the need for the change in nodeForeignscan.c. If the FDW has
returned a physical tuple, it can fix that for itself, while if it has
returned a virtual tuple, the ctid will be garbage in any case.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#13Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#12)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/03/12 13:35, Tom Lane wrote:

I don't like the execMain.c changes much at all. They look somewhat
like they're intended to allow foreign tables to adopt a different
locking strategy,

I didn't intend such a thing. My intention is, foreign tables have
markType = ROW_MARK_COPY as ever, but I might not have correctly
understood what you pointed out. Could you elaborate on that?

The changes are not all consistent
either, eg this:

! if (erm->relation &&
! erm->relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)

is not necessary if this Assert is accurate:

! if (erm->relation)
! {
! Assert(erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE);

I modified InitPlan so that relations get opened for foreign tables
as well as local tables. So, I think the change would be necessary.

I don't see the need for the change in nodeForeignscan.c. If the FDW has
returned a physical tuple, it can fix that for itself, while if it has
returned a virtual tuple, the ctid will be garbage in any case.

If you leave nodeForeignscan.c unchanged, file_fdw would cause the
problem that I pointed out upthread. Here is an example.

[Create a test environment]

postgres=# create foreign table foo (a int) server file_server options
(filename '/home/pgsql/foo.csv', format 'csv');
CREATE FOREIGN TABLE
postgres=# select tableoid, ctid, * from foo;
tableoid | ctid | a
----------+----------------+---
16459 | (4294967295,0) | 1
(1 row)

postgres=# create table tab (a int, b int);
CREATE TABLE
postgres=# insert into tab values (1, 1);
INSERT 0 1

[Run concurrent transactions]

In terminal1:
postgres=# begin;
BEGIN
postgres=# update tab set b = b * 2;
UPDATE 1

In terminal2:
postgres=# select foo.tableoid, foo.ctid, foo.* from foo, tab where
foo.a = tab.a for update;

In terminal1:
postgres=# commit;
COMMIT

In terminal2:(When committing the UPDATE query in terminal1, psql in
terminal2 will display the result for the SELECT-FOR-UPDATE query as
shown below.)
tableoid | ctid | a
----------+-------+---
16459 | (0,0) | 1
(1 row)

Note the value of the ctid has changed!

Rather than changing nodeForeignscan.c, it might be better to change
heap_form_tuple to set the t_ctid of a formed tuple to be invalid.

Thanks for the review!

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#14Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#13)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

On 2015/03/12 13:35, Tom Lane wrote:

I don't like the execMain.c changes much at all. They look somewhat
like they're intended to allow foreign tables to adopt a different
locking strategy,

I didn't intend such a thing. My intention is, foreign tables have
markType = ROW_MARK_COPY as ever, but I might not have correctly
understood what you pointed out. Could you elaborate on that?

I think the real fix as far as postgres_fdw is concerned is in fact
to let it adopt a different ROW_MARK strategy, since it has meaningful
ctid values. However, that is not a one-size-fits-all answer. The
fundamental problem I've got with this patch is that it's trying to
impose some one-size-fits-all assumptions on all FDWs about whether
ctids are meaningful; which is wrong, not to mention not backwards
compatible.

I don't see the need for the change in nodeForeignscan.c. If the FDW has
returned a physical tuple, it can fix that for itself, while if it has
returned a virtual tuple, the ctid will be garbage in any case.

If you leave nodeForeignscan.c unchanged, file_fdw would cause the
problem that I pointed out upthread. Here is an example.

But that's self-inflicted damage, because in fact ctid correctly shows
as invalid in this case in HEAD, without this patch.

The tableoid problem can be fixed much less invasively as per the attached
patch. I think that we should continue to assume that ctid is not
meaningful (and hence should read as (4294967295,0)) in FDWs that use
ROW_MARK_COPY, and press forward on fixing the locking issues for
postgres_fdw by letting it use ROW_MARK_REFERENCE or something close to
that. That would also cause ctid to read properly for rows from
postgres_fdw.

regards, tom lane

Attachments:

bad-tableoid-in-ROW_MARK_COPY.patchtext/x-diff; charset=us-ascii; name=bad-tableoid-in-ROW_MARK_COPY.patchDownload
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 33b172b..5a1c3b3 100644
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
*************** EvalPlanQualFetchRowMarks(EPQState *epqs
*** 2438,2444 ****
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
  			ItemPointerSetInvalid(&(tuple.t_self));
! 			tuple.t_tableOid = InvalidOid;
  			tuple.t_data = td;
  
  			/* copy and store tuple */
--- 2438,2445 ----
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
  			ItemPointerSetInvalid(&(tuple.t_self));
! 			tuple.t_tableOid = getrelid(erm->rti,
! 										epqstate->estate->es_range_table);
  			tuple.t_data = td;
  
  			/* copy and store tuple */
#15Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#14)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/03/13 0:50, Tom Lane wrote:

The tableoid problem can be fixed much less invasively as per the attached
patch. I think that we should continue to assume that ctid is not
meaningful (and hence should read as (4294967295,0)) in FDWs that use
ROW_MARK_COPY, and press forward on fixing the locking issues for
postgres_fdw by letting it use ROW_MARK_REFERENCE or something close to
that. That would also cause ctid to read properly for rows from
postgres_fdw.

OK, thanks!

BTW, what do you think about opening/locking foreign tables selected for
update at InitPlan, which the original patch does? As I mentioned in
[1]: /messages/by-id/54BCBBF8.3020103@lab.ntt.co.jp
assuming that.

Best regards,
Etsuro Fujita

[1]: /messages/by-id/54BCBBF8.3020103@lab.ntt.co.jp

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#16Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Etsuro Fujita (#15)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/03/13 11:46, Etsuro Fujita wrote:

BTW, what do you think about opening/locking foreign tables selected for
update at InitPlan, which the original patch does? As I mentioned in
[1], ISTM that ExecOpenScanRelation called from ExecInitForeignScan is
assuming that.

[1] /messages/by-id/54BCBBF8.3020103@lab.ntt.co.jp

Let me explain further. Here is the comment in ExecOpenScanRelation:

* Determine the lock type we need. First, scan to see if target
relation
* is a result relation. If not, check if it's a FOR UPDATE/FOR SHARE
* relation. In either of those cases, we got the lock already.

I think this is not true for foreign tables selected FOR UPDATE/SHARE,
which have markType = ROW_MARK_COPY, because such foreign tables don't
get opened/locked by InitPlan. Then such foreign tables don't get
locked by neither of InitPlan nor ExecOpenScanRelation. I think this is
a bug. To fix it, I think we should open/lock such foreign tables at
InitPlan as the original patch does.

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#17Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#16)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

Let me explain further. Here is the comment in ExecOpenScanRelation:

* Determine the lock type we need. First, scan to see if target
relation
* is a result relation. If not, check if it's a FOR UPDATE/FOR SHARE
* relation. In either of those cases, we got the lock already.

I think this is not true for foreign tables selected FOR UPDATE/SHARE,
which have markType = ROW_MARK_COPY, because such foreign tables don't
get opened/locked by InitPlan. Then such foreign tables don't get
locked by neither of InitPlan nor ExecOpenScanRelation. I think this is
a bug.

You are right. I think it may not matter in practice, but if the executor
is taking its own locks here then it should not overlook ROW_MARK_COPY
cases.

To fix it, I think we should open/lock such foreign tables at
InitPlan as the original patch does.

I still don't like that; InitPlan is not doing something that would
require physical table access. The right thing is to fix
ExecOpenScanRelation's idea of whether InitPlan took a lock or not,
which I've now done.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#18Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#17)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/03/25 4:56, Tom Lane wrote:

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

Let me explain further. Here is the comment in ExecOpenScanRelation:

* Determine the lock type we need. First, scan to see if target
relation
* is a result relation. If not, check if it's a FOR UPDATE/FOR SHARE
* relation. In either of those cases, we got the lock already.

I think this is not true for foreign tables selected FOR UPDATE/SHARE,
which have markType = ROW_MARK_COPY, because such foreign tables don't
get opened/locked by InitPlan. Then such foreign tables don't get
locked by neither of InitPlan nor ExecOpenScanRelation. I think this is
a bug.

You are right. I think it may not matter in practice, but if the executor
is taking its own locks here then it should not overlook ROW_MARK_COPY
cases.

To fix it, I think we should open/lock such foreign tables at
InitPlan as the original patch does.

I still don't like that; InitPlan is not doing something that would
require physical table access. The right thing is to fix
ExecOpenScanRelation's idea of whether InitPlan took a lock or not,
which I've now done.

OK, thanks.

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#19Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#14)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/03/13 0:50, Tom Lane wrote:

I think the real fix as far as postgres_fdw is concerned is in fact
to let it adopt a different ROW_MARK strategy, since it has meaningful
ctid values. However, that is not a one-size-fits-all answer.

The tableoid problem can be fixed much less invasively as per the attached
patch. I think that we should continue to assume that ctid is not
meaningful (and hence should read as (4294967295,0)) in FDWs that use
ROW_MARK_COPY, and press forward on fixing the locking issues for
postgres_fdw by letting it use ROW_MARK_REFERENCE or something close to
that. That would also cause ctid to read properly for rows from
postgres_fdw.

To support ROW_MARK_REFERENCE on (postgres_fdw) foreign tables, I'd like
to propose the following FDW APIs:

RowMarkType
GetForeignRowMarkType(Oid relid,
LockClauseStrength strength);

Decide which rowmark type to use for a foreign table (that has strength
= LCS_NONE), ie, ROW_MARK_REFERENCE or ROW_MARK_COPY. (For now, the
second argument takes LCS_NONE only, but is intended to be used for the
possible extension to the other cases.) This is called during
select_rowmark_type() in the planner.

void
BeginForeignFetch(EState *estate,
ExecRowMark *erm,
List *fdw_private,
int eflags);

Begin a remote fetch. This is called during InitPlan() in the executor.

HeapTuple
ExecForeignFetch(EState *estate,
ExecRowMark *erm,
ItemPointer tupleid);

Re-fetch the specified tuple. This is called during
EvalPlanQualFetchRowMarks() in the executor.

void
EndForeignFetch(EState *estate,
ExecRowMark *erm);

End a remote fetch. This is called during ExecEndPlan() in the executor.

And I'd also like to propose to add a table/server option,
row_mark_reference, to postgres_fdw. When a user sets the option to
true for eg a foreign table, ROW_MARK_REFERENCE will be used for the
table, not ROW_MARK_COPY.

Attached is a WIP patch, which contains no docs/regression tests.

It'd be appreciated if anyone could send back any comments earlier.

Best regards,
Etsuro Fujita

Attachments:

EvalPlanQual-v3.patchtext/x-patch; name=EvalPlanQual-v3.patchDownload
*** a/contrib/postgres_fdw/option.c
--- b/contrib/postgres_fdw/option.c
***************
*** 105,111 **** postgres_fdw_validator(PG_FUNCTION_ARGS)
  		 * Validate option value, when we can do so without any context.
  		 */
  		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
! 			strcmp(def->defname, "updatable") == 0)
  		{
  			/* these accept only boolean values */
  			(void) defGetBoolean(def);
--- 105,112 ----
  		 * Validate option value, when we can do so without any context.
  		 */
  		if (strcmp(def->defname, "use_remote_estimate") == 0 ||
! 			strcmp(def->defname, "updatable") == 0 ||
! 			strcmp(def->defname, "row_mark_reference") == 0)
  		{
  			/* these accept only boolean values */
  			(void) defGetBoolean(def);
***************
*** 153,158 **** InitPgFdwOptions(void)
--- 154,162 ----
  		/* updatable is available on both server and table */
  		{"updatable", ForeignServerRelationId, false},
  		{"updatable", ForeignTableRelationId, false},
+ 		/* row_mark_reference is available on both server and table */
+ 		{"row_mark_reference", ForeignServerRelationId, false},
+ 		{"row_mark_reference", ForeignTableRelationId, false},
  		{NULL, InvalidOid, false}
  	};
  
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 124,129 **** enum FdwModifyPrivateIndex
--- 124,144 ----
  };
  
  /*
+  * Similarly, this enum describes what's kept in the fdw_private list for
+  * a PlanRowMark node referencing a postgres_fdw foreign table.  We store:
+  *
+  * 1) SELECT statement text to be sent to the remote server
+  * 2) Integer list of attribute numbers retrieved by SELECT
+  */
+ enum FdwFetchPrivateIndex
+ {
+ 	/* SQL statement to execute remotely (as a String node) */
+ 	FdwFetchPrivateSelectSql,
+ 	/* Integer list of attribute numbers retrieved by SELECT */
+ 	FdwFetchPrivateRetrievedAttrs
+ };
+ 
+ /*
   * Execution state of a foreign scan using postgres_fdw.
   */
  typedef struct PgFdwScanState
***************
*** 186,191 **** typedef struct PgFdwModifyState
--- 201,230 ----
  } PgFdwModifyState;
  
  /*
+  * Execution state of a foreign fetch operation.
+  */
+ typedef struct PgFdwFetchState
+ {
+ 	Relation	rel;			/* relcache entry for the foreign table */
+ 	AttInMetadata *attinmeta;	/* attribute datatype conversion metadata */
+ 
+ 	/* for remote query execution */
+ 	PGconn	   *conn;			/* connection for the fetch */
+ 	char	   *p_name;			/* name of prepared statement, if created */
+ 
+ 	/* extracted fdw_private data */
+ 	char	   *query;			/* text of SELECT command */
+ 	List	   *retrieved_attrs;	/* attr numbers retrieved by SELECT */
+ 
+ 	/* info about parameters for prepared statement */
+ 	int			p_nums;			/* number of parameters to transmit */
+ 	FmgrInfo   *p_flinfo;		/* output conversion functions for them */
+ 
+ 	/* working memory context */
+ 	MemoryContext temp_cxt;		/* context for per-tuple temporary data */
+ } PgFdwFetchState;
+ 
+ /*
   * Workspace for analyzing a foreign table.
   */
  typedef struct PgFdwAnalyzeState
***************
*** 276,281 **** static TupleTableSlot *postgresExecForeignDelete(EState *estate,
--- 315,330 ----
  static void postgresEndForeignModify(EState *estate,
  						 ResultRelInfo *resultRelInfo);
  static int	postgresIsForeignRelUpdatable(Relation rel);
+ static RowMarkType postgresGetForeignRowMarkType(Oid relid,
+ 												 LockClauseStrength strength);
+ static void postgresBeginForeignFetch(EState *estate,
+ 									  ExecRowMark *erm,
+ 									  List *fdw_private,
+ 									  int eflags);
+ static HeapTuple postgresExecForeignFetch(EState *estate,
+ 										  ExecRowMark *erm,
+ 										  ItemPointer tupleid);
+ static void postgresEndForeignFetch(EState *estate, ExecRowMark *erm);
  static void postgresExplainForeignScan(ForeignScanState *node,
  						   ExplainState *es);
  static void postgresExplainForeignModify(ModifyTableState *mtstate,
***************
*** 306,318 **** static void get_remote_estimate(const char *sql,
  static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
  						  EquivalenceClass *ec, EquivalenceMember *em,
  						  void *arg);
  static void create_cursor(ForeignScanState *node);
  static void fetch_more_data(ForeignScanState *node);
  static void close_cursor(PGconn *conn, unsigned int cursor_number);
! static void prepare_foreign_modify(PgFdwModifyState *fmstate);
! static const char **convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
  static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
--- 355,373 ----
  static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
  						  EquivalenceClass *ec, EquivalenceMember *em,
  						  void *arg);
+ static List *create_foreign_fetch_info(PlannerInfo *root,
+ 									   RelOptInfo *baserel,
+ 									   RowMarkType markType);
  static void create_cursor(ForeignScanState *node);
  static void fetch_more_data(ForeignScanState *node);
  static void close_cursor(PGconn *conn, unsigned int cursor_number);
! static char *setup_prep_stmt(PGconn *conn, char *query);
! static const char **convert_prep_stmt_params(ItemPointer tupleid,
! 											 TupleTableSlot *slot,
! 											 int p_nums,
! 											 FmgrInfo *p_flinfo,
! 											 List *target_attrs,
! 											 MemoryContext temp_context);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
  static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
***************
*** 358,363 **** postgres_fdw_handler(PG_FUNCTION_ARGS)
--- 413,424 ----
  	routine->EndForeignModify = postgresEndForeignModify;
  	routine->IsForeignRelUpdatable = postgresIsForeignRelUpdatable;
  
+ 	/* Functions for EvalPlanQual rechecking */
+ 	routine->GetForeignRowMarkType = postgresGetForeignRowMarkType;
+ 	routine->BeginForeignFetch = postgresBeginForeignFetch;
+ 	routine->ExecForeignFetch = postgresExecForeignFetch;
+ 	routine->EndForeignFetch = postgresEndForeignFetch;
+ 
  	/* Support functions for EXPLAIN */
  	routine->ExplainForeignScan = postgresExplainForeignScan;
  	routine->ExplainForeignModify = postgresExplainForeignModify;
***************
*** 850,855 **** postgresGetForeignPlan(PlannerInfo *root,
--- 911,923 ----
  					appendStringInfoString(&sql, " FOR UPDATE");
  					break;
  			}
+ 
+ 			/*
+ 			 * Build the fdw_private list that will be available to
+ 			 * EvalPlanQual that re-fetches tuples from the foreign table.
+ 			 */
+ 			rc->fdw_private = create_foreign_fetch_info(root, baserel,
+ 														rc->markType);
  		}
  	}
  
***************
*** 1391,1400 **** postgresExecForeignInsert(EState *estate,
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate, NULL, slot);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1459,1472 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(NULL, slot,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
***************
*** 1451,1457 **** postgresExecForeignUpdate(EState *estate,
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
--- 1523,1529 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
***************
*** 1462,1470 **** postgresExecForeignUpdate(EState *estate,
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate,
! 										(ItemPointer) DatumGetPointer(datum),
! 										slot);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1534,1545 ----
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params((ItemPointer) DatumGetPointer(datum),
! 										slot,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
***************
*** 1521,1527 **** postgresExecForeignDelete(EState *estate,
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
--- 1596,1602 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
***************
*** 1532,1540 **** postgresExecForeignDelete(EState *estate,
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate,
! 										(ItemPointer) DatumGetPointer(datum),
! 										NULL);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1607,1618 ----
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params((ItemPointer) DatumGetPointer(datum),
! 										NULL,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
***************
*** 1656,1661 **** postgresIsForeignRelUpdatable(Relation rel)
--- 1734,1962 ----
  }
  
  /*
+  * postgresGetForeignRowMarkType
+  *		Get rowmark type that we use for a foreign table.
+  */
+ static RowMarkType
+ postgresGetForeignRowMarkType(Oid relid,
+ 							  LockClauseStrength strength)
+ {
+ 	bool		row_mark_reference;
+ 	ForeignTable *table;
+ 	ForeignServer *server;
+ 	ListCell   *lc;
+ 
+ 	Assert(strength == LCS_NONE);
+ 
+ 	/*
+ 	 * By default, we use ROW_MARK_COPY for all postgres_fdw foreign tables.
+ 	 * However, this can be overridden by a per-server setting, which in turn
+ 	 * can be overridden by a per-table setting.
+ 	 */
+ 	row_mark_reference = false;	/* false means ROW_MARK_COPY */
+ 
+ 	table = GetForeignTable(relid);
+ 	server = GetForeignServer(table->serverid);
+ 
+ 	foreach(lc, server->options)
+ 	{
+ 		DefElem    *def = (DefElem *) lfirst(lc);
+ 
+ 		if (strcmp(def->defname, "row_mark_reference") == 0)
+ 			row_mark_reference = defGetBoolean(def);
+ 	}
+ 	foreach(lc, table->options)
+ 	{
+ 		DefElem    *def = (DefElem *) lfirst(lc);
+ 
+ 		if (strcmp(def->defname, "row_mark_reference") == 0)
+ 			row_mark_reference = defGetBoolean(def);
+ 	}
+ 
+ 	return row_mark_reference ? ROW_MARK_REFERENCE : ROW_MARK_COPY;
+ }
+ 
+ /*
+  * postgresBeginForeignFetch
+  *		Begin a fetch operation on a foreign table
+  */
+ static void
+ postgresBeginForeignFetch(EState *estate,
+ 						  ExecRowMark *erm,
+ 						  List *fdw_private,
+ 						  int eflags)
+ {
+ 	PgFdwFetchState *ffstate;
+ 	Relation	rel = erm->relation;
+ 	RangeTblEntry *rte;
+ 	Oid			userid;
+ 	ForeignTable *table;
+ 	ForeignServer *server;
+ 	UserMapping *user;
+ 	Oid			typefnoid;
+ 	bool		isvarlena;
+ 
+ 	/*
+ 	 * Do nothing in EXPLAIN (no ANALYZE) case.  erm->fdw_state stays NULL.
+ 	 */
+ 	if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
+ 		return;
+ 
+ 	/* Begin constructing PgFdwFetchState. */
+ 	ffstate = (PgFdwFetchState *) palloc0(sizeof(PgFdwFetchState));
+ 	ffstate->rel = rel;
+ 
+ 	/*
+ 	 * Identify which user to do the remote access as.  This should match what
+ 	 * ExecCheckRTEPerms() does.
+ 	 */
+ 	rte = rt_fetch(erm->rti, estate->es_range_table);
+ 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+ 
+ 	/* Get info about foreign table. */
+ 	table = GetForeignTable(RelationGetRelid(rel));
+ 	server = GetForeignServer(table->serverid);
+ 	user = GetUserMapping(userid, server->serverid);
+ 
+ 	/* Open connection; report that we'll create a prepared statement. */
+ 	ffstate->conn = GetConnection(server, user, true);
+ 	ffstate->p_name = NULL;		/* prepared statement not made yet */
+ 
+ 	/* Deconstruct fdw_private data. */
+ 	ffstate->query = strVal(list_nth(fdw_private,
+ 									 FdwFetchPrivateSelectSql));
+ 	ffstate->retrieved_attrs = (List *) list_nth(fdw_private,
+ 											 FdwFetchPrivateRetrievedAttrs);
+ 
+ 	/* Create context for per-tuple temp workspace. */
+ 	ffstate->temp_cxt = AllocSetContextCreate(estate->es_query_cxt,
+ 											  "postgres_fdw temporary data",
+ 											  ALLOCSET_SMALL_MINSIZE,
+ 											  ALLOCSET_SMALL_INITSIZE,
+ 											  ALLOCSET_SMALL_MAXSIZE);
+ 
+ 	/* Prepare for input conversion of SELECT results. */
+ 	ffstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(rel));
+ 
+ 	/* Prepare for output conversion of parameters used in prepared stmt. */
+ 	ffstate->p_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ 	ffstate->p_nums = 0;
+ 
+ 	/* Only one transmittable parameter will be ctid */
+ 	getTypeOutputInfo(TIDOID, &typefnoid, &isvarlena);
+ 	fmgr_info(typefnoid, &ffstate->p_flinfo[ffstate->p_nums]);
+ 	ffstate->p_nums++;
+ 
+ 	erm->fdw_state = ffstate;
+ }
+ 
+ /*
+  * postgresExecForeignFetch
+  *		Fetch the specified tuple from a foreign table
+  */
+ static HeapTuple
+ postgresExecForeignFetch(EState *estate,
+ 						 ExecRowMark *erm,
+ 						 ItemPointer tupleid)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->fdw_state;
+ 	const char **p_values;
+ 	PGresult   *res;
+ 	HeapTuple	tuple;
+ 
+ 	/* Set up the prepared statement on the remote server, if we didn't yet */
+ 	if (!ffstate->p_name)
+ 		ffstate->p_name = setup_prep_stmt(ffstate->conn, ffstate->query);
+ 
+ 	/* Convert parameters needed by prepared statement to text form */
+ 	p_values = convert_prep_stmt_params(tupleid, NULL,
+ 										ffstate->p_nums,
+ 										ffstate->p_flinfo,
+ 										NIL,
+ 										ffstate->temp_cxt);
+ 
+ 	/*
+ 	 * Execute the prepared statement, and check for success.
+ 	 *
+ 	 * We don't use a PG_TRY block here, so be careful not to throw error
+ 	 * without releasing the PGresult.
+ 	 */
+ 	res = PQexecPrepared(ffstate->conn,
+ 						 ffstate->p_name,
+ 						 ffstate->p_nums,
+ 						 p_values,
+ 						 NULL,
+ 						 NULL,
+ 						 0);
+ 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ 		pgfdw_report_error(ERROR, res, ffstate->conn, true, ffstate->query);
+ 
+ 	/* PGresult must be released before leaving this function. */
+ 	PG_TRY();
+ 	{
+ 		/* Create the tuple */
+ 		tuple = make_tuple_from_result_row(res, 0,
+ 										   ffstate->rel,
+ 										   ffstate->attinmeta,
+ 										   ffstate->retrieved_attrs,
+ 										   ffstate->temp_cxt);
+ 		tuple->t_self = *tupleid;
+ 		tuple->t_tableOid = erm->relid;
+ 
+ 		PQclear(res);
+ 		res = NULL;
+ 	}
+ 	PG_CATCH();
+ 	{
+ 		if (res)
+ 			PQclear(res);
+ 		PG_RE_THROW();
+ 	}
+ 	PG_END_TRY();
+ 
+ 	MemoryContextReset(ffstate->temp_cxt);
+ 
+ 	return tuple;
+ }
+ 
+ /*
+  * postgresEndForeignFetch
+  *		Finish a fetch operation on a foreign table
+  */
+ static void
+ postgresEndForeignFetch(EState *estate, ExecRowMark *erm)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->fdw_state;
+ 
+ 	/* If ffstate is NULL, we are in EXPLAIN; nothing to do */
+ 	if (ffstate == NULL)
+ 		return;
+ 
+ 	/* If we created a prepared statement, destroy it */
+ 	if (ffstate->p_name)
+ 	{
+ 		char		sql[64];
+ 		PGresult   *res;
+ 
+ 		snprintf(sql, sizeof(sql), "DEALLOCATE %s", ffstate->p_name);
+ 
+ 		/*
+ 		 * We don't use a PG_TRY block here, so be careful not to throw error
+ 		 * without releasing the PGresult.
+ 		 */
+ 		res = PQexec(ffstate->conn, sql);
+ 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ 			pgfdw_report_error(ERROR, res, ffstate->conn, true, sql);
+ 		PQclear(res);
+ 		ffstate->p_name = NULL;
+ 	}
+ 
+ 	/* Release remote connection */
+ 	ReleaseConnection(ffstate->conn);
+ 	ffstate->conn = NULL;
+ }
+ 
+ /*
   * postgresExplainForeignScan
   *		Produce extra output for EXPLAIN of a ForeignScan on a foreign table
   */
***************
*** 1918,1923 **** ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
--- 2219,2258 ----
  }
  
  /*
+  * Create the FDW-private information for a PlanRowMark node.
+  */
+ static List *
+ create_foreign_fetch_info(PlannerInfo *root,
+ 							RelOptInfo *baserel,
+ 							RowMarkType markType)
+ {
+ 	StringInfoData sql;
+ 	List	   *retrieved_attrs;
+ 	Bitmapset  *attrs_used = NULL;
+ 
+ 	Assert(markType == ROW_MARK_REFERENCE || markType == ROW_MARK_COPY);
+ 
+ 	if (markType == ROW_MARK_COPY)
+ 		return NIL;
+ 
+ 	/*
+ 	 * Build the query string to be sent for execution.
+ 	 */
+ 	initStringInfo(&sql);
+ 	/* Add a whole-row var to attrs_used to retrieve all the columns. */
+ 	attrs_used = bms_add_member(attrs_used,
+ 								0 - FirstLowInvalidHeapAttributeNumber);
+ 	deparseSelectSql(&sql, root, baserel, attrs_used, &retrieved_attrs);
+ 	appendStringInfoString(&sql, " WHERE ctid = $1");
+ 
+ 	/*
+ 	 * Build the fdw_private list that will be available to the executor.
+ 	 * Items in the list must match enum FdwFetchPrivateIndex, above.
+ 	 */
+ 	return list_make2(makeString(sql.data), retrieved_attrs);
+ }
+ 
+ /*
   * Create cursor for node's query with current parameter values.
   */
  static void
***************
*** 2154,2164 **** close_cursor(PGconn *conn, unsigned int cursor_number)
  }
  
  /*
!  * prepare_foreign_modify
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
   */
! static void
! prepare_foreign_modify(PgFdwModifyState *fmstate)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
--- 2489,2500 ----
  }
  
  /*
!  * setup_prep_stmt
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
+  *		or re-fetching tuples for EvalPlanQual rechecking
   */
! static char *
! setup_prep_stmt(PGconn *conn, char *query)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
***************
*** 2166,2172 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(fmstate->conn));
  	p_name = pstrdup(prep_name);
  
  	/*
--- 2502,2508 ----
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(conn));
  	p_name = pstrdup(prep_name);
  
  	/*
***************
*** 2179,2196 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = PQprepare(fmstate->conn,
! 					p_name,
! 					fmstate->query,
! 					0,
! 					NULL);
  
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	fmstate->p_name = p_name;
  }
  
  /*
--- 2515,2528 ----
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = PQprepare(conn, p_name, query, 0, NULL);
  
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, conn, true, query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	return p_name;
  }
  
  /*
***************
*** 2203,2238 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(fmstate->temp_cxt);
  
! 	p_values = (const char **) palloc(sizeof(char *) * fmstate->p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && fmstate->target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, fmstate->target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
--- 2535,2573 ----
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(ItemPointer tupleid,
! 						 TupleTableSlot *slot,
! 						 int p_nums,
! 						 FmgrInfo *p_flinfo,
! 						 List *target_attrs,
! 						 MemoryContext temp_context)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(temp_context);
  
! 	p_values = (const char **) palloc(sizeof(char *) * p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
***************
*** 2242,2248 **** convert_prep_stmt_params(PgFdwModifyState *fmstate,
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
--- 2577,2583 ----
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
***************
*** 2250,2256 **** convert_prep_stmt_params(PgFdwModifyState *fmstate,
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == fmstate->p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
--- 2585,2591 ----
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 855,860 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 855,878 ----
  		erm->markType = rc->markType;
  		erm->waitPolicy = rc->waitPolicy;
  		ItemPointerSetInvalid(&(erm->curCtid));
+ 		erm->fdw_state = NULL;
+ 		if (relation && relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ 		{
+ 			/*
+ 			 * Tell the FDW to init fdw_state.
+ 			 */
+ 			FdwRoutine *fdwroutine;
+ 
+ 			Assert(rc->markType == ROW_MARK_REFERENCE);
+ 
+ 			fdwroutine = GetFdwRoutineForRelation(relation, false);
+ 			if (fdwroutine->BeginForeignFetch != NULL)
+ 				fdwroutine->BeginForeignFetch(estate,
+ 											  erm,
+ 											  rc->fdw_private,
+ 											  eflags);
+ 		}
+ 
  		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
  	}
  
***************
*** 1098,1103 **** CheckValidResultRel(Relation resultRel, CmdType operation)
--- 1116,1123 ----
  static void
  CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  {
+ 	FdwRoutine *fdwroutine;
+ 
  	switch (rel->rd_rel->relkind)
  	{
  		case RELKIND_RELATION:
***************
*** 1133,1143 **** CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Should not get here; planner should have used ROW_MARK_COPY */
! 			ereport(ERROR,
! 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 					 errmsg("cannot lock rows in foreign table \"%s\"",
! 							RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
--- 1153,1171 ----
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Okay only if the FDW supports it */
! 			fdwroutine = GetFdwRoutineForRelation(rel, false);
! 			if (fdwroutine->ExecForeignFetch == NULL)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 						 errmsg("cannot lock rows in foreign table \"%s\"",
! 								RelationGetRelationName(rel))));
! 			/* Allow referencing the table, but not actual locking clauses */
! 			if (markType != ROW_MARK_REFERENCE)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 						 errmsg("cannot lock rows in foreign table \"%s\"",
! 								RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
***************
*** 1446,1452 **** ExecEndPlan(PlanState *planstate, EState *estate)
--- 1474,1494 ----
  		ExecRowMark *erm = (ExecRowMark *) lfirst(l);
  
  		if (erm->relation)
+ 		{
+ 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ 			{
+ 				/*
+ 				 * let the FDW shut down
+ 				 */
+ 				FdwRoutine *fdwroutine;
+ 
+ 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+ 				if (fdwroutine->EndForeignFetch != NULL)
+ 					fdwroutine->EndForeignFetch(estate, erm);
+ 			}
+ 
  			heap_close(erm->relation, NoLock);
+ 		}
  	}
  }
  
***************
*** 2401,2407 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			Buffer		buffer;
  
  			Assert(erm->relation != NULL);
  
--- 2443,2449 ----
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			HeapTuple	copyTuple;
  
  			Assert(erm->relation != NULL);
  
***************
*** 2414,2428 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  				continue;
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
! 			/* okay, fetch the tuple */
! 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 							false, NULL))
! 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
! 			/* successful, copy and store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti,
! 								 heap_copytuple(&tuple));
! 			ReleaseBuffer(buffer);
  		}
  		else
  		{
--- 2456,2489 ----
  				continue;
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
! 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 			{
! 				/*
! 				 * let the FDW fetch the tuple
! 				 */
! 				FdwRoutine *fdwroutine;
  
! 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
! 				copyTuple = fdwroutine->ExecForeignFetch(epqstate->estate,
! 														 erm,
! 														 &tuple.t_self);
! 			}
! 			else
! 			{
! 				Buffer		buffer;
! 
! 				/* okay, fetch the tuple */
! 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 								false, NULL))
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 
! 				/* successful, copy tuple */
! 				copyTuple = heap_copytuple(&tuple);
! 				ReleaseBuffer(buffer);
! 			}
! 
! 			/* store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
  		}
  		else
  		{
*** a/src/backend/nodes/copyfuncs.c
--- b/src/backend/nodes/copyfuncs.c
***************
*** 995,1000 **** _copyPlanRowMark(const PlanRowMark *from)
--- 995,1001 ----
  	COPY_SCALAR_FIELD(strength);
  	COPY_SCALAR_FIELD(waitPolicy);
  	COPY_SCALAR_FIELD(isParent);
+ 	COPY_NODE_FIELD(fdw_private);
  
  	return newnode;
  }
*** a/src/backend/nodes/outfuncs.c
--- b/src/backend/nodes/outfuncs.c
***************
*** 856,861 **** _outPlanRowMark(StringInfo str, const PlanRowMark *node)
--- 856,862 ----
  	WRITE_ENUM_FIELD(strength, LockClauseStrength);
  	WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
  	WRITE_BOOL_FIELD(isParent);
+ 	WRITE_NODE_FIELD(fdw_private);
  }
  
  static void
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
***************
*** 20,25 ****
--- 20,26 ----
  #include "access/htup_details.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
+ #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
  #ifdef OPTIMIZER_DEBUG
***************
*** 2229,2234 **** preprocess_rowmarks(PlannerInfo *root)
--- 2230,2236 ----
  		newrc->strength = rc->strength;
  		newrc->waitPolicy = rc->waitPolicy;
  		newrc->isParent = false;
+ 		newrc->fdw_private = NIL;
  
  		prowmarks = lappend(prowmarks, newrc);
  	}
***************
*** 2254,2259 **** preprocess_rowmarks(PlannerInfo *root)
--- 2256,2262 ----
  		newrc->strength = LCS_NONE;
  		newrc->waitPolicy = LockWaitBlock;		/* doesn't matter */
  		newrc->isParent = false;
+ 		newrc->fdw_private = NIL;
  
  		prowmarks = lappend(prowmarks, newrc);
  	}
***************
*** 2274,2280 **** select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		/* For now, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
--- 2277,2298 ----
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		/* For a non-target, non-locked table, let the FDW select the type */
! 		if (strength == LCS_NONE)
! 		{
! 			FdwRoutine *fdwroutine;
! 			RowMarkType type;
! 
! 			fdwroutine = GetFdwRoutineByRelId(rte->relid);
! 			if (fdwroutine->GetForeignRowMarkType != NULL)
! 			{
! 				type = fdwroutine->GetForeignRowMarkType(rte->relid, strength);
! 				if (type != ROW_MARK_REFERENCE && type != ROW_MARK_COPY)
! 					elog(ERROR, "unrecognized RowMarkType %d", (int) type);
! 				return type;
! 			}
! 		}
! 		/* Otherwise, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
*** a/src/backend/optimizer/prep/prepunion.c
--- b/src/backend/optimizer/prep/prepunion.c
***************
*** 1395,1400 **** expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
--- 1395,1401 ----
  			newrc->strength = oldrc->strength;
  			newrc->waitPolicy = oldrc->waitPolicy;
  			newrc->isParent = false;
+ 			newrc->fdw_private = NIL;
  
  			/* Include child's rowmark type in parent's allMarkTypes */
  			oldrc->allMarkTypes |= newrc->allMarkTypes;
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 13,18 ****
--- 13,19 ----
  #define FDWAPI_H
  
  #include "nodes/execnodes.h"
+ #include "nodes/plannodes.h"
  #include "nodes/relation.h"
  
  /* To avoid including explain.h here, reference ExplainState thus: */
***************
*** 82,87 **** typedef void (*EndForeignModify_function) (EState *estate,
--- 83,102 ----
  
  typedef int (*IsForeignRelUpdatable_function) (Relation rel);
  
+ typedef RowMarkType (*GetForeignRowMarkType_function) (Oid relid,
+ 													   LockClauseStrength strength);
+ 
+ typedef void (*BeginForeignFetch_function) (EState *estate,
+ 											ExecRowMark *erm,
+ 											List *fdw_private,
+ 											int eflags);
+ 
+ typedef HeapTuple (*ExecForeignFetch_function) (EState *estate,
+ 												ExecRowMark *erm,
+ 												ItemPointer tupleid);
+ 
+ typedef void (*EndForeignFetch_function) (EState *estate, ExecRowMark *erm);
+ 
  typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
  													struct ExplainState *es);
  
***************
*** 141,146 **** typedef struct FdwRoutine
--- 156,167 ----
  	EndForeignModify_function EndForeignModify;
  	IsForeignRelUpdatable_function IsForeignRelUpdatable;
  
+ 	/* Functions for EvalPlanQual rechecking */
+ 	GetForeignRowMarkType_function GetForeignRowMarkType;
+ 	BeginForeignFetch_function BeginForeignFetch;
+ 	ExecForeignFetch_function ExecForeignFetch;
+ 	EndForeignFetch_function EndForeignFetch;
+ 
  	/* Support functions for EXPLAIN */
  	ExplainForeignScan_function ExplainForeignScan;
  	ExplainForeignModify_function ExplainForeignModify;
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
***************
*** 420,426 **** typedef struct EState
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code.
   *
   * EState->es_rowMarks is a list of these structs.
   */
--- 420,427 ----
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code, and fdw_state, which is used by the FDW referencing
!  * a foreign table.
   *
   * EState->es_rowMarks is a list of these structs.
   */
***************
*** 434,439 **** typedef struct ExecRowMark
--- 435,441 ----
  	RowMarkType markType;		/* see enum in nodes/plannodes.h */
  	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED */
  	ItemPointerData curCtid;	/* ctid of currently locked tuple, if any */
+ 	void	   *fdw_state;		/* foreign-data wrapper can keep state here */
  } ExecRowMark;
  
  /*
*** a/src/include/nodes/plannodes.h
--- b/src/include/nodes/plannodes.h
***************
*** 808,818 **** typedef struct Limit
   * FUNCTION scans) we have to copy the whole row value.  ROW_MARK_COPY is
   * pretty inefficient, since most of the time we'll never need the data; but
   * fortunately the case is not performance-critical in practice.  Note that
!  * we use ROW_MARK_COPY for non-target foreign tables, even if the FDW has a
!  * concept of rowid and so could theoretically support some form of
!  * ROW_MARK_REFERENCE.  Although copying the whole row value is inefficient,
!  * it's probably still faster than doing a second remote fetch, so it doesn't
!  * seem worth the extra complexity to permit ROW_MARK_REFERENCE.
   */
  typedef enum RowMarkType
  {
--- 808,816 ----
   * FUNCTION scans) we have to copy the whole row value.  ROW_MARK_COPY is
   * pretty inefficient, since most of the time we'll never need the data; but
   * fortunately the case is not performance-critical in practice.  Note that
!  * we use ROW_MARK_COPY for non-target foreign tables by default.  However,
!  * we allow the FDW to use ROW_MARK_REFERENCE for its foreign tables if the
!  * FDW has a concept of rowid and supports the ROW_MARK_REFERENCE operation.
   */
  typedef enum RowMarkType
  {
***************
*** 875,880 **** typedef struct PlanRowMark
--- 873,879 ----
  	LockClauseStrength strength;	/* LockingClause's strength, or LCS_NONE */
  	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED options */
  	bool		isParent;		/* true if this is a "dummy" parent entry */
+ 	List	   *fdw_private;	/* private data for FDW, if foreign table */
  } PlanRowMark;
  
  
#20Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#19)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

To support ROW_MARK_REFERENCE on (postgres_fdw) foreign tables, I'd like
to propose the following FDW APIs:

RowMarkType
GetForeignRowMarkType(Oid relid,
LockClauseStrength strength);

Decide which rowmark type to use for a foreign table (that has strength
= LCS_NONE), ie, ROW_MARK_REFERENCE or ROW_MARK_COPY. (For now, the
second argument takes LCS_NONE only, but is intended to be used for the
possible extension to the other cases.) This is called during
select_rowmark_type() in the planner.

Why would you restrict that to LCS_NONE? Seems like the point is to give
the FDW control of the rowmark behavior, not only partial control.
(For the same reason I disagree with the error check in the patch that
restricts which ROW_MARK options this function can return. If the FDW has
TIDs, seems like it could reasonably use whatever options tables can use.)

void
BeginForeignFetch(EState *estate,
ExecRowMark *erm,
List *fdw_private,
int eflags);

Begin a remote fetch. This is called during InitPlan() in the executor.

The begin/end functions seem like useless extra mechanism. Why wouldn't
the FDW just handle this during its regular scan setup? It could look to
see whether the foreign table is referenced by any ExecRowMarks (possibly
there's room to add some convenience functions to help with that). What's
more, that design would make it simpler if the basic row fetch needs to be
modified.

And I'd also like to propose to add a table/server option,
row_mark_reference, to postgres_fdw. When a user sets the option to
true for eg a foreign table, ROW_MARK_REFERENCE will be used for the
table, not ROW_MARK_COPY.

Why would we leave that in the hands of the user? Hardly anybody is going
to know what to do with the option, or even that they should do something
with it. It's basically only of value for debugging AFAICS, but if we
expose an option we're going to be stuck with supporting it forever.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#21Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#20)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/04/08 7:44, Tom Lane wrote:

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

To support ROW_MARK_REFERENCE on (postgres_fdw) foreign tables, I'd like
to propose the following FDW APIs:

RowMarkType
GetForeignRowMarkType(Oid relid,
LockClauseStrength strength);

Decide which rowmark type to use for a foreign table (that has strength
= LCS_NONE), ie, ROW_MARK_REFERENCE or ROW_MARK_COPY. (For now, the
second argument takes LCS_NONE only, but is intended to be used for the
possible extension to the other cases.) This is called during
select_rowmark_type() in the planner.

Why would you restrict that to LCS_NONE? Seems like the point is to give
the FDW control of the rowmark behavior, not only partial control.

The reason is because I think it's comparatively more promissing to
customize the ROW_MARK type for LCS_NONE than other cases since in many
workloads no re-fetch is needed, and because I think other cases would
need more discussions. So, as a first step, I restricted that to
LCS_NONE. But I've got the point, and agree that we should give the
full control to the FDW.

(For the same reason I disagree with the error check in the patch that
restricts which ROW_MARK options this function can return. If the FDW has
TIDs, seems like it could reasonably use whatever options tables can use.)

Will fix.

void
BeginForeignFetch(EState *estate,
ExecRowMark *erm,
List *fdw_private,
int eflags);

Begin a remote fetch. This is called during InitPlan() in the executor.

The begin/end functions seem like useless extra mechanism. Why wouldn't
the FDW just handle this during its regular scan setup? It could look to
see whether the foreign table is referenced by any ExecRowMarks (possibly
there's room to add some convenience functions to help with that). What's
more, that design would make it simpler if the basic row fetch needs to be
modified.

The reason is just because it's easy to understand the structure at
least to me since the begin/exec/end are all done in a higher level of
the executor. What's more, the begin/end can be done once per foreign
scan node for the multi-table update case. But I feel inclined to agree
with you on this point also.

And I'd also like to propose to add a table/server option,
row_mark_reference, to postgres_fdw. When a user sets the option to
true for eg a foreign table, ROW_MARK_REFERENCE will be used for the
table, not ROW_MARK_COPY.

Why would we leave that in the hands of the user? Hardly anybody is going
to know what to do with the option, or even that they should do something
with it. It's basically only of value for debugging AFAICS,

Agreed. (When begining to update postgres_fdw docs, I also started to
think so.)

I'll update the patch, which will contain only an infrastructure for
this in the PG core, and will not contain any postgres_fdw change.

Thank you for taking the time to review the patch!

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#22Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Etsuro Fujita (#21)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/04/09 12:07, Etsuro Fujita wrote:

I'll update the patch, which will contain only an infrastructure for
this in the PG core, and will not contain any postgres_fdw change.

I updated the patch based on your comments. Updated patch attached. In
the patch the following FDW APIs have been proposed:

+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);

I think that these APIs allow the FDW that has TIDs to use the rowmark
options such as ROW_MARK_REFERENCE, ROW_MARK_SHARE and
ROW_MARK_EXCLUSIVE for its foreign tables so as to match the local
semantics exactly, for example.

As you mentioned, it would be better to add helper functions to see
whether the foreign table is referenced by any ExecRowMarks. ISTM that
an easy way to do that is to modify ExecFindRowMark() so that it allows
for the missing case. I didn't contain such functions in the patch, though.

Best regards,
Etsuro Fujita

Attachments:

EvalPlanQual-v4.patchtext/x-diff; name=EvalPlanQual-v4.patchDownload
*** a/doc/src/sgml/fdwhandler.sgml
--- b/doc/src/sgml/fdwhandler.sgml
***************
*** 598,603 **** IsForeignRelUpdatable (Relation rel);
--- 598,687 ----
  
     </sect2>
  
+    <sect2 id="fdw-callbacks-evalplanqual-checking">
+     <title>FDW Routines For <function>EvalPlanQual</> Checking</title>
+ 
+     <para>
+      If an FDW supports <function>EvalPlanQual</> Checking, it should provide
+      some or all of the following callback functions depending on
+      the needs and capabilities of the FDW (see <filename>
+      src/backend/executor/README</> for details of <function>EvalPlanQual</>
+      Checking):
+     </para>
+ 
+     <para>
+ <programlisting>
+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ </programlisting>
+ 
+      Report which row-marking option to support for a lock strength
+      associated with a <literal>SELECT FOR UPDATE/SHARE</> request.
+      This is called at the beginning of query planning.
+      <literal>strength</> is a member of the <literal>LockClauseStrength</>
+      enum type.
+      The return value should be a member of the <literal>RowMarkType</>
+      enum type.  See
+      <filename>src/include/nodes/lockoptions.h</> and
+      <filename>src/include/nodes/plannodes.h</> for information about
+      these enum types.
+     </para>
+ 
+     <para>
+      If the <function>GetForeignRowMarkType</> pointer is set to
+      <literal>NULL</>, the default option is selected for any lock strength,
+      and both <function>LockForeignRow</> and <function>FetchForeignRow</>
+      described below will not be called at query execution time.
+     </para>
+ 
+     <para>
+ <programlisting>
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ </programlisting>
+ 
+      Lock one tuple in the foreign table.
+      <literal>estate</> is global execution state for the query.
+      <literal>erm</> is the <structname>ExecRowMark</> struct describing
+      the target foreign table.
+      <literal>tupleid</> identifies the tuple to be locked.
+      This function should return <literal>true</>, if the FDW lock the tuple
+      successfully.  Otherwise, return <literal>false</>.
+     </para>
+ 
+     <para>
+      If the <function>LockForeignRow</> pointer is set to
+      <literal>NULL</>, attempts to lock the tuple will fail
+      with an error message.
+     </para>
+ 
+     <para>
+ <programlisting>
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);
+ </programlisting>
+ 
+      Fetch one tuple from the foreign table.
+      <literal>estate</> is global execution state for the query.
+      <literal>erm</> is the <structname>ExecRowMark</> struct describing
+      the target foreign table.
+      <literal>tupleid</> identifies the tuple to be fetched.
+      This function should return the fetched tuple, if the FDW fetch the
+      tuple successfully.  Otherwise, return NULL.
+     </para>
+ 
+     <para>
+      If the <function>FetchForeignRow</> pointer is set to
+      <literal>NULL</>, attempts to fetch the tuple will fail
+      with an error message.
+     </para>
+ 
+    </sect2>
+ 
     <sect2 id="fdw-callbacks-explain">
      <title>FDW Routines for <command>EXPLAIN</></title>
  
***************
*** 1011,1017 **** GetForeignServerByName(const char *name, bool missing_ok);
       join conditions.  However, matching the local semantics exactly would
       require an additional remote access for every row, and might be
       impossible anyway depending on what locking semantics the external data
!      source provides.
      </para>
  
    </sect1>
--- 1095,1104 ----
       join conditions.  However, matching the local semantics exactly would
       require an additional remote access for every row, and might be
       impossible anyway depending on what locking semantics the external data
!      source provides.  If necessary, however, the FDW can change this behavior
!      so as to match the local semantics, using its callback functions
!      <function>GetForeignRowMarkType</>, <function>LockForeignRow</>, and
!      <function>FetchForeignRow</>.
      </para>
  
    </sect1>
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 855,860 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 855,861 ----
  		erm->markType = rc->markType;
  		erm->waitPolicy = rc->waitPolicy;
  		ItemPointerSetInvalid(&(erm->curCtid));
+ 		erm->fdw_state = NULL;
  		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
  	}
  
***************
*** 1098,1103 **** CheckValidResultRel(Relation resultRel, CmdType operation)
--- 1099,1106 ----
  static void
  CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  {
+ 	FdwRoutine *fdwroutine;
+ 
  	switch (rel->rd_rel->relkind)
  	{
  		case RELKIND_RELATION:
***************
*** 1133,1143 **** CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Should not get here; planner should have used ROW_MARK_COPY */
! 			ereport(ERROR,
! 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 					 errmsg("cannot lock rows in foreign table \"%s\"",
! 							RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
--- 1136,1150 ----
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Okay only if the FDW supports it */
! 			fdwroutine = GetFdwRoutineForRelation(rel, false);
! 			if ((markType != ROW_MARK_REFERENCE &&
! 				 fdwroutine->LockForeignRow == NULL) ||
! 				fdwroutine->FetchForeignRow == NULL)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 						 errmsg("cannot lock rows in foreign table \"%s\"",
! 								RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
***************
*** 2401,2407 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			Buffer		buffer;
  
  			Assert(erm->relation != NULL);
  
--- 2408,2414 ----
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			HeapTuple	copyTuple;
  
  			Assert(erm->relation != NULL);
  
***************
*** 2415,2428 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  			/* okay, fetch the tuple */
- 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
- 							false, NULL))
- 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
! 			/* successful, copy and store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti,
! 								 heap_copytuple(&tuple));
! 			ReleaseBuffer(buffer);
  		}
  		else
  		{
--- 2422,2454 ----
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  			/* okay, fetch the tuple */
  
! 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 			{
! 				FdwRoutine *fdwroutine;
! 
! 				/* let the FDW do the work */
! 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
! 				copyTuple = fdwroutine->FetchForeignRow(epqstate->estate,
! 														erm, &tuple.t_self);
! 				if (!copyTuple)
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 			}
! 			else
! 			{
! 				Buffer		buffer;
! 
! 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 								false, NULL))
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 
! 				/* successful, copy tuple */
! 				copyTuple = heap_copytuple(&tuple);
! 				ReleaseBuffer(buffer);
! 			}
! 
! 			/* store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
  		}
  		else
  		{
*** a/src/backend/executor/nodeLockRows.c
--- b/src/backend/executor/nodeLockRows.c
***************
*** 25,30 ****
--- 25,31 ----
  #include "access/xact.h"
  #include "executor/executor.h"
  #include "executor/nodeLockRows.h"
+ #include "foreign/fdwapi.h"
  #include "storage/bufmgr.h"
  #include "utils/rel.h"
  #include "utils/tqual.h"
***************
*** 112,117 **** lnext:
--- 113,138 ----
  		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  		/* okay, try to lock the tuple */
+ 
+ 		if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ 		{
+ 			FdwRoutine *fdwroutine;
+ 
+ 			/* Let the FDW do the work */
+ 			fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+ 			if (!fdwroutine->LockForeignRow(estate, erm, &tuple.t_self))
+ 			{
+ 				/* couldn't get the lock */
+ 				goto lnext;
+ 			}
+ 			/* got the lock successfully */
+ 
+ 			/* Remember locked tuple's TID for EvalPlanQual testing */
+ 			erm->curCtid = tuple.t_self;
+ 
+ 			continue;
+ 		}
+ 
  		switch (erm->markType)
  		{
  			case ROW_MARK_EXCLUSIVE:
***************
*** 253,259 **** lnext:
  			ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
  			ExecRowMark *erm = aerm->rowmark;
  			HeapTupleData tuple;
! 			Buffer		buffer;
  
  			/* ignore non-active child tables */
  			if (!ItemPointerIsValid(&(erm->curCtid)))
--- 274,280 ----
  			ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
  			ExecRowMark *erm = aerm->rowmark;
  			HeapTupleData tuple;
! 			HeapTuple	copyTuple;
  
  			/* ignore non-active child tables */
  			if (!ItemPointerIsValid(&(erm->curCtid)))
***************
*** 267,280 **** lnext:
  
  			/* okay, fetch the tuple */
  			tuple.t_self = erm->curCtid;
- 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
- 							false, NULL))
- 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
! 			/* successful, copy and store tuple */
! 			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti,
! 								 heap_copytuple(&tuple));
! 			ReleaseBuffer(buffer);
  		}
  
  		/*
--- 288,320 ----
  
  			/* okay, fetch the tuple */
  			tuple.t_self = erm->curCtid;
  
! 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 			{
! 				FdwRoutine *fdwroutine;
! 
! 				/* let the FDW do the work */
! 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
! 				copyTuple = fdwroutine->FetchForeignRow(node->lr_epqstate.estate,
! 														erm, &tuple.t_self);
! 				if (!copyTuple)
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 			}
! 			else
! 			{
! 				Buffer		buffer;
! 
! 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 								false, NULL))
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 
! 				/* successful, copy tuple */
! 				copyTuple = heap_copytuple(&tuple);
! 				ReleaseBuffer(buffer);
! 			}
! 
! 			/* store tuple */
! 			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, copyTuple);
  		}
  
  		/*
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
***************
*** 20,25 ****
--- 20,26 ----
  #include "access/htup_details.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
+ #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
  #ifdef OPTIMIZER_DEBUG
***************
*** 2274,2280 **** select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		/* For now, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
--- 2275,2288 ----
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		FdwRoutine *fdwroutine;
! 
! 		/* Let the FDW select the rowmark type, if possible */
! 		fdwroutine = GetFdwRoutineByRelId(rte->relid);
! 		if (fdwroutine->GetForeignRowMarkType != NULL)
! 			return fdwroutine->GetForeignRowMarkType(strength);
! 
! 		/* Otherwise, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 13,18 ****
--- 13,19 ----
  #define FDWAPI_H
  
  #include "nodes/execnodes.h"
+ #include "nodes/plannodes.h"
  #include "nodes/relation.h"
  
  /* To avoid including explain.h here, reference ExplainState thus: */
***************
*** 82,87 **** typedef void (*EndForeignModify_function) (EState *estate,
--- 83,98 ----
  
  typedef int (*IsForeignRelUpdatable_function) (Relation rel);
  
+ typedef RowMarkType (*GetForeignRowMarkType_function) (LockClauseStrength strength);
+ 
+ typedef bool (*LockForeignRow_function) (EState *estate,
+ 										 ExecRowMark *erm,
+ 										 ItemPointer tupleid);
+ 
+ typedef HeapTuple (*FetchForeignRow_function) (EState *estate,
+ 											   ExecRowMark *erm,
+ 											   ItemPointer tupleid);
+ 
  typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
  													struct ExplainState *es);
  
***************
*** 141,146 **** typedef struct FdwRoutine
--- 152,162 ----
  	EndForeignModify_function EndForeignModify;
  	IsForeignRelUpdatable_function IsForeignRelUpdatable;
  
+ 	/* Functions for EvalPlanQual rechecking */
+ 	GetForeignRowMarkType_function GetForeignRowMarkType;
+ 	LockForeignRow_function LockForeignRow;
+ 	FetchForeignRow_function FetchForeignRow;
+ 
  	/* Support functions for EXPLAIN */
  	ExplainForeignScan_function ExplainForeignScan;
  	ExplainForeignModify_function ExplainForeignModify;
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
***************
*** 420,426 **** typedef struct EState
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code.
   *
   * EState->es_rowMarks is a list of these structs.
   */
--- 420,426 ----
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code, and fdw_state, which is used by the FDW.
   *
   * EState->es_rowMarks is a list of these structs.
   */
***************
*** 434,439 **** typedef struct ExecRowMark
--- 434,440 ----
  	RowMarkType markType;		/* see enum in nodes/plannodes.h */
  	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED */
  	ItemPointerData curCtid;	/* ctid of currently locked tuple, if any */
+ 	void	   *fdw_state;		/* foreign-data wrapper can keep state here */
  } ExecRowMark;
  
  /*
#23Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Etsuro Fujita (#22)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/04/10 21:40, Etsuro Fujita wrote:

On 2015/04/09 12:07, Etsuro Fujita wrote:

I'll update the patch, which will contain only an infrastructure for
this in the PG core, and will not contain any postgres_fdw change.

I updated the patch based on your comments. Updated patch attached. In
the patch the following FDW APIs have been proposed:

+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);

I think that these APIs allow the FDW that has TIDs to use the rowmark
options such as ROW_MARK_REFERENCE, ROW_MARK_SHARE and
ROW_MARK_EXCLUSIVE for its foreign tables so as to match the local
semantics exactly, for example.

As you mentioned, it would be better to add helper functions to see
whether the foreign table is referenced by any ExecRowMarks. ISTM that
an easy way to do that is to modify ExecFindRowMark() so that it allows
for the missing case. I didn't contain such functions in the patch, though.

I added that function and modified docs a bit. Please find attached an
updated version of the patch.

Best regards,
Etsuro Fujita

Attachments:

EvalPlanQual-v5.patchtext/x-diff; name=EvalPlanQual-v5.patchDownload
*** a/doc/src/sgml/fdwhandler.sgml
--- b/doc/src/sgml/fdwhandler.sgml
***************
*** 211,216 **** BeginForeignScan (ForeignScanState *node,
--- 211,230 ----
      </para>
  
      <para>
+      If the FDW supports <command>SELECT FOR UPDATE/SHARE</> row locking,
+      this routine should perform any initialization needed prior to the
+      actual row locking if the foreign table is referenced in a
+      <command>SELECT FOR UPDATE/SHARE</>.  To decide whether the foreign
+      table is referenced in a <command>SELECT FOR UPDATE/SHARE</> or not,
+      it's recommended to use <function>ExecFindRowMark</> with the missing_ok
+      argument allowing a NULL return if the structure is not.  Information
+      about the row locking is accessible through its return value
+      (<structname>ExecRowMark</> struct).  (The <structfield>fdw_state</>
+      field of <structname>ExecRowMark</> is available for the FDW to store
+      any private state it needs for the operation.)
+     </para>
+ 
+     <para>
       Note that when <literal>(eflags &amp; EXEC_FLAG_EXPLAIN_ONLY)</> is
       true, this function should not perform any externally-visible actions;
       it should only do the minimum required to make the node state valid
***************
*** 598,603 **** IsForeignRelUpdatable (Relation rel);
--- 612,699 ----
  
     </sect2>
  
+    <sect2 id="fdw-callbacks-row-locking">
+     <title>FDW Routines For <command>SELECT FOR UPDATE/SHARE</> row locking
+      </title>
+ 
+     <para>
+      If an FDW supports <command>SELECT FOR UPDATE/SHARE</> row locking,
+      it should provide the following callback functions:
+     </para>
+ 
+     <para>
+ <programlisting>
+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ </programlisting>
+ 
+      Report which row-marking option to support for a lock strength
+      associated with a <command>SELECT FOR UPDATE/SHARE</> request.
+      This is called at the beginning of query planning.
+      <literal>strength</> is a member of the <literal>LockClauseStrength</>
+      enum type.
+      The return value should be a member of the <literal>RowMarkType</>
+      enum type.  See
+      <filename>src/include/nodes/lockoptions.h</> and
+      <filename>src/include/nodes/plannodes.h</> for information about
+      these enum types.
+     </para>
+ 
+     <para>
+      If the <function>GetForeignRowMarkType</> pointer is set to
+      <literal>NULL</>, the default option is selected for any lock strength,
+      and both <function>LockForeignRow</> and <function>FetchForeignRow</>
+      described below will not be called at query execution time.
+     </para>
+ 
+     <para>
+ <programlisting>
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ </programlisting>
+ 
+      Lock one tuple in the foreign table.
+      <literal>estate</> is global execution state for the query.
+      <literal>erm</> is the <structname>ExecRowMark</> struct describing
+      the target foreign table.
+      <literal>tupleid</> identifies the tuple to be locked.
+      This function should return <literal>true</>, if the FDW lock the tuple
+      successfully.  Otherwise, return <literal>false</>.
+     </para>
+ 
+     <para>
+      If the <function>LockForeignRow</> pointer is set to
+      <literal>NULL</>, attempts to lock rows will fail
+      with an error message.
+     </para>
+ 
+     <para>
+ <programlisting>
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);
+ </programlisting>
+ 
+      Fetch one tuple from the foreign table.
+      <literal>estate</> is global execution state for the query.
+      <literal>erm</> is the <structname>ExecRowMark</> struct describing
+      the target foreign table.
+      <literal>tupleid</> identifies the tuple to be fetched.
+      This function should return the fetched tuple, if the FDW fetch the
+      tuple successfully.  Otherwise, return NULL.
+     </para>
+ 
+     <para>
+      If the <function>FetchForeignRow</> pointer is set to
+      <literal>NULL</>, attempts to lock rows will fail
+      with an error message.
+     </para>
+ 
+    </sect2>
+ 
     <sect2 id="fdw-callbacks-explain">
      <title>FDW Routines for <command>EXPLAIN</></title>
  
***************
*** 1011,1017 **** GetForeignServerByName(const char *name, bool missing_ok);
       join conditions.  However, matching the local semantics exactly would
       require an additional remote access for every row, and might be
       impossible anyway depending on what locking semantics the external data
!      source provides.
      </para>
  
    </sect1>
--- 1107,1116 ----
       join conditions.  However, matching the local semantics exactly would
       require an additional remote access for every row, and might be
       impossible anyway depending on what locking semantics the external data
!      source provides.  If necessary, however, the FDW can change this behavior
!      so as to match the local semantics, using its callback functions
!      <function>GetForeignRowMarkType</>, <function>LockForeignRow</>, and
!      <function>FetchForeignRow</>.
      </para>
  
    </sect1>
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 855,860 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 855,861 ----
  		erm->markType = rc->markType;
  		erm->waitPolicy = rc->waitPolicy;
  		ItemPointerSetInvalid(&(erm->curCtid));
+ 		erm->fdw_state = NULL;
  		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
  	}
  
***************
*** 1098,1103 **** CheckValidResultRel(Relation resultRel, CmdType operation)
--- 1099,1106 ----
  static void
  CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  {
+ 	FdwRoutine *fdwroutine;
+ 
  	switch (rel->rd_rel->relkind)
  	{
  		case RELKIND_RELATION:
***************
*** 1133,1143 **** CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Should not get here; planner should have used ROW_MARK_COPY */
! 			ereport(ERROR,
! 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 					 errmsg("cannot lock rows in foreign table \"%s\"",
! 							RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
--- 1136,1150 ----
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Okay only if the FDW supports it */
! 			fdwroutine = GetFdwRoutineForRelation(rel, false);
! 			if ((markType != ROW_MARK_REFERENCE &&
! 				 fdwroutine->LockForeignRow == NULL) ||
! 				fdwroutine->FetchForeignRow == NULL)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 						 errmsg("cannot lock rows in foreign table \"%s\"",
! 								RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
***************
*** 1886,1892 **** ExecBuildSlotValueDescription(Oid reloid,
   * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
   */
  ExecRowMark *
! ExecFindRowMark(EState *estate, Index rti)
  {
  	ListCell   *lc;
  
--- 1893,1899 ----
   * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
   */
  ExecRowMark *
! ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
  {
  	ListCell   *lc;
  
***************
*** 1897,1903 **** ExecFindRowMark(EState *estate, Index rti)
  		if (erm->rti == rti)
  			return erm;
  	}
! 	elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
  	return NULL;				/* keep compiler quiet */
  }
  
--- 1904,1911 ----
  		if (erm->rti == rti)
  			return erm;
  	}
! 	if (!missing_ok)
! 		elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
  	return NULL;				/* keep compiler quiet */
  }
  
***************
*** 2401,2407 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			Buffer		buffer;
  
  			Assert(erm->relation != NULL);
  
--- 2409,2415 ----
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			HeapTuple	copyTuple;
  
  			Assert(erm->relation != NULL);
  
***************
*** 2415,2428 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  			/* okay, fetch the tuple */
- 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
- 							false, NULL))
- 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
! 			/* successful, copy and store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti,
! 								 heap_copytuple(&tuple));
! 			ReleaseBuffer(buffer);
  		}
  		else
  		{
--- 2423,2455 ----
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  			/* okay, fetch the tuple */
  
! 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 			{
! 				FdwRoutine *fdwroutine;
! 
! 				/* let the FDW do the work */
! 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
! 				copyTuple = fdwroutine->FetchForeignRow(epqstate->estate,
! 														erm, &tuple.t_self);
! 				if (copyTuple == NULL)
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 			}
! 			else
! 			{
! 				Buffer		buffer;
! 
! 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 								false, NULL))
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 
! 				/* successful, copy tuple */
! 				copyTuple = heap_copytuple(&tuple);
! 				ReleaseBuffer(buffer);
! 			}
! 
! 			/* store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
  		}
  		else
  		{
*** a/src/backend/executor/nodeLockRows.c
--- b/src/backend/executor/nodeLockRows.c
***************
*** 25,30 ****
--- 25,31 ----
  #include "access/xact.h"
  #include "executor/executor.h"
  #include "executor/nodeLockRows.h"
+ #include "foreign/fdwapi.h"
  #include "storage/bufmgr.h"
  #include "utils/rel.h"
  #include "utils/tqual.h"
***************
*** 112,117 **** lnext:
--- 113,141 ----
  		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  		/* okay, try to lock the tuple */
+ 
+ 		if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ 		{
+ 			FdwRoutine *fdwroutine;
+ 
+ 			/* Let the FDW do the work */
+ 			fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+ 			if (!fdwroutine->LockForeignRow(estate, erm, &tuple.t_self))
+ 			{
+ 				/* couldn't get the lock */
+ 				goto lnext;
+ 			}
+ 			/* got the lock successfully */
+ 
+ 			/*
+ 			 * Remember locked tuple's TID for EvalPlanQual testing, not for
+ 			 * WHERE CURRENT OF, which is not supported for foreign tables.
+ 			 */
+ 			erm->curCtid = tuple.t_self;
+ 
+ 			continue;
+ 		}
+ 
  		switch (erm->markType)
  		{
  			case ROW_MARK_EXCLUSIVE:
***************
*** 253,259 **** lnext:
  			ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
  			ExecRowMark *erm = aerm->rowmark;
  			HeapTupleData tuple;
! 			Buffer		buffer;
  
  			/* ignore non-active child tables */
  			if (!ItemPointerIsValid(&(erm->curCtid)))
--- 277,283 ----
  			ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
  			ExecRowMark *erm = aerm->rowmark;
  			HeapTupleData tuple;
! 			HeapTuple	copyTuple;
  
  			/* ignore non-active child tables */
  			if (!ItemPointerIsValid(&(erm->curCtid)))
***************
*** 267,280 **** lnext:
  
  			/* okay, fetch the tuple */
  			tuple.t_self = erm->curCtid;
- 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
- 							false, NULL))
- 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
! 			/* successful, copy and store tuple */
! 			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti,
! 								 heap_copytuple(&tuple));
! 			ReleaseBuffer(buffer);
  		}
  
  		/*
--- 291,323 ----
  
  			/* okay, fetch the tuple */
  			tuple.t_self = erm->curCtid;
  
! 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 			{
! 				FdwRoutine *fdwroutine;
! 
! 				/* let the FDW do the work */
! 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
! 				copyTuple = fdwroutine->FetchForeignRow(node->lr_epqstate.estate,
! 														erm, &tuple.t_self);
! 				if (copyTuple == NULL)
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 			}
! 			else
! 			{
! 				Buffer		buffer;
! 
! 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 								false, NULL))
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 
! 				/* successful, copy tuple */
! 				copyTuple = heap_copytuple(&tuple);
! 				ReleaseBuffer(buffer);
! 			}
! 
! 			/* store tuple */
! 			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, copyTuple);
  		}
  
  		/*
***************
*** 367,373 **** ExecInitLockRows(LockRows *node, EState *estate, int eflags)
  			continue;
  
  		/* find ExecRowMark and build ExecAuxRowMark */
! 		erm = ExecFindRowMark(estate, rc->rti);
  		aerm = ExecBuildAuxRowMark(erm, outerPlan->targetlist);
  
  		/*
--- 410,416 ----
  			continue;
  
  		/* find ExecRowMark and build ExecAuxRowMark */
! 		erm = ExecFindRowMark(estate, rc->rti, false);
  		aerm = ExecBuildAuxRowMark(erm, outerPlan->targetlist);
  
  		/*
*** a/src/backend/executor/nodeModifyTable.c
--- b/src/backend/executor/nodeModifyTable.c
***************
*** 1257,1263 **** ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
  			continue;
  
  		/* find ExecRowMark (same for all subplans) */
! 		erm = ExecFindRowMark(estate, rc->rti);
  
  		/* build ExecAuxRowMark for each subplan */
  		for (i = 0; i < nplans; i++)
--- 1257,1263 ----
  			continue;
  
  		/* find ExecRowMark (same for all subplans) */
! 		erm = ExecFindRowMark(estate, rc->rti, false);
  
  		/* build ExecAuxRowMark for each subplan */
  		for (i = 0; i < nplans; i++)
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
***************
*** 20,25 ****
--- 20,26 ----
  #include "access/htup_details.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
+ #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
  #ifdef OPTIMIZER_DEBUG
***************
*** 2274,2280 **** select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		/* For now, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
--- 2275,2288 ----
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		FdwRoutine *fdwroutine;
! 
! 		/* Let the FDW select the rowmark type, if possible */
! 		fdwroutine = GetFdwRoutineByRelId(rte->relid);
! 		if (fdwroutine->GetForeignRowMarkType != NULL)
! 			return fdwroutine->GetForeignRowMarkType(strength);
! 
! 		/* Otherwise, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
*** a/src/include/executor/executor.h
--- b/src/include/executor/executor.h
***************
*** 195,201 **** extern void ExecConstraints(ResultRelInfo *resultRelInfo,
  				TupleTableSlot *slot, EState *estate);
  extern void ExecWithCheckOptions(ResultRelInfo *resultRelInfo,
  					 TupleTableSlot *slot, EState *estate);
! extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti);
  extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
  extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
  			 Relation relation, Index rti, int lockmode,
--- 195,201 ----
  				TupleTableSlot *slot, EState *estate);
  extern void ExecWithCheckOptions(ResultRelInfo *resultRelInfo,
  					 TupleTableSlot *slot, EState *estate);
! extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti, bool missing_ok);
  extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
  extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
  			 Relation relation, Index rti, int lockmode,
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 13,18 ****
--- 13,19 ----
  #define FDWAPI_H
  
  #include "nodes/execnodes.h"
+ #include "nodes/plannodes.h"
  #include "nodes/relation.h"
  
  /* To avoid including explain.h here, reference ExplainState thus: */
***************
*** 82,87 **** typedef void (*EndForeignModify_function) (EState *estate,
--- 83,98 ----
  
  typedef int (*IsForeignRelUpdatable_function) (Relation rel);
  
+ typedef RowMarkType (*GetForeignRowMarkType_function) (LockClauseStrength strength);
+ 
+ typedef bool (*LockForeignRow_function) (EState *estate,
+ 										 ExecRowMark *erm,
+ 										 ItemPointer tupleid);
+ 
+ typedef HeapTuple (*FetchForeignRow_function) (EState *estate,
+ 											   ExecRowMark *erm,
+ 											   ItemPointer tupleid);
+ 
  typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
  													struct ExplainState *es);
  
***************
*** 141,146 **** typedef struct FdwRoutine
--- 152,162 ----
  	EndForeignModify_function EndForeignModify;
  	IsForeignRelUpdatable_function IsForeignRelUpdatable;
  
+ 	/* Functions for SELECT FOR UPDATE/SHARE row locking */
+ 	GetForeignRowMarkType_function GetForeignRowMarkType;
+ 	LockForeignRow_function LockForeignRow;
+ 	FetchForeignRow_function FetchForeignRow;
+ 
  	/* Support functions for EXPLAIN */
  	ExplainForeignScan_function ExplainForeignScan;
  	ExplainForeignModify_function ExplainForeignModify;
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
***************
*** 420,426 **** typedef struct EState
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code.
   *
   * EState->es_rowMarks is a list of these structs.
   */
--- 420,427 ----
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code, and fdw_state, which is used by the FDW if the
!  * relation is foreign.
   *
   * EState->es_rowMarks is a list of these structs.
   */
***************
*** 434,439 **** typedef struct ExecRowMark
--- 435,441 ----
  	RowMarkType markType;		/* see enum in nodes/plannodes.h */
  	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED */
  	ItemPointerData curCtid;	/* ctid of currently locked tuple, if any */
+ 	void	   *fdw_state;		/* foreign-data wrapper can keep state here */
  } ExecRowMark;
  
  /*
#24Jim Nasby
Jim.Nasby@BlueTreble.com
In reply to: Etsuro Fujita (#23)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 4/13/15 4:58 AM, Etsuro Fujita wrote:

On 2015/04/10 21:40, Etsuro Fujita wrote:

On 2015/04/09 12:07, Etsuro Fujita wrote:

I'll update the patch, which will contain only an infrastructure for
this in the PG core, and will not contain any postgres_fdw change.

I updated the patch based on your comments. Updated patch attached. In
the patch the following FDW APIs have been proposed:

+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);

I think that these APIs allow the FDW that has TIDs to use the rowmark
options such as ROW_MARK_REFERENCE, ROW_MARK_SHARE and
ROW_MARK_EXCLUSIVE for its foreign tables so as to match the local
semantics exactly, for example.

As you mentioned, it would be better to add helper functions to see
whether the foreign table is referenced by any ExecRowMarks. ISTM that
an easy way to do that is to modify ExecFindRowMark() so that it allows
for the missing case. I didn't contain such functions in the patch, though.

I added that function and modified docs a bit. Please find attached an
updated version of the patch.

Why aren't we allowing SELECT FOR KEY SHARE?
--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#25Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Jim Nasby (#24)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/04/13 23:25, Jim Nasby wrote:

On 4/13/15 4:58 AM, Etsuro Fujita wrote:

On 2015/04/10 21:40, Etsuro Fujita wrote:

On 2015/04/09 12:07, Etsuro Fujita wrote:

I'll update the patch, which will contain only an infrastructure for
this in the PG core, and will not contain any postgres_fdw change.

I updated the patch based on your comments. Updated patch attached. In
the patch the following FDW APIs have been proposed:

+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);

I think that these APIs allow the FDW that has TIDs to use the rowmark
options such as ROW_MARK_REFERENCE, ROW_MARK_SHARE and
ROW_MARK_EXCLUSIVE for its foreign tables so as to match the local
semantics exactly, for example.

As you mentioned, it would be better to add helper functions to see
whether the foreign table is referenced by any ExecRowMarks. ISTM that
an easy way to do that is to modify ExecFindRowMark() so that it allows
for the missing case. I didn't contain such functions in the patch, though.

I added that function and modified docs a bit. Please find attached an
updated version of the patch.

Why aren't we allowing SELECT FOR KEY SHARE?

I omitted that mode (and the FOR NO KEY UPDATE mode) for simplicity, but
both modes have been allowed. However, I'm not sure if those modes are
useful because we don't have information about keys for a remote table.

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#26Kyotaro HORIGUCHI
horiguchi.kyotaro@lab.ntt.co.jp
In reply to: Etsuro Fujita (#25)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Hello,

At Tue, 14 Apr 2015 12:10:35 +0900, Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> wrote in <552C852B.2050401@lab.ntt.co.jp>

On 2015/04/13 23:25, Jim Nasby wrote:

On 4/13/15 4:58 AM, Etsuro Fujita wrote:

On 2015/04/10 21:40, Etsuro Fujita wrote:

On 2015/04/09 12:07, Etsuro Fujita wrote:

I'll update the patch, which will contain only an infrastructure for
this in the PG core, and will not contain any postgres_fdw change.

I updated the patch based on your comments. Updated patch attached. In
the patch the following FDW APIs have been proposed:

+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);

I think that these APIs allow the FDW that has TIDs to use the rowmark
options such as ROW_MARK_REFERENCE, ROW_MARK_SHARE and
ROW_MARK_EXCLUSIVE for its foreign tables so as to match the local
semantics exactly, for example.

As you mentioned, it would be better to add helper functions to see
whether the foreign table is referenced by any ExecRowMarks. ISTM that
an easy way to do that is to modify ExecFindRowMark() so that it allows
for the missing case. I didn't contain such functions in the patch, though.

I added that function and modified docs a bit. Please find attached an
updated version of the patch.

Why aren't we allowing SELECT FOR KEY SHARE?

I omitted that mode (and the FOR NO KEY UPDATE mode) for simplicity, but
both modes have been allowed. However, I'm not sure if those modes are
useful because we don't have information about keys for a remote table.

If I understand this correctly, the two lock modes are no
relation with key column definitions, and are provided as a
weaker lock in exchange for some risks. Like advisory locks. we
can FOR_NO_KEY_UPDATE in the trunsactions that evetually update
"key columns" but also should accept the unexpected result. In
other words, the "key" in the context of "FOR NO KEY UPDATE" and
"FOR KEY SHARE" is only in the programmers' minds.

Apart from feasibility, I don't see no resason to omit them if
this is correct.

As an example, the following operations cause an "unexpected"
result.

Ex. 1
Session A=# create table t (a int primary key, b int);
Session A=# insert into t (select a, 1 from generate_series(0, 99) a);
Session A=# begin;
Session A=# select * from t where a = 1 for no key update;

Session B=# begin;
Session B=# select * from t where a = 1 for key share;
Session B=# update t set a = -a where a = 1;
<session B is blocked>

Ex. 2
Session A=# create table t (a int, b int); -- a is the key in mind.
Session A=# insert into t (select a, 1 from generate_series(0, 99) a);
Session A=# begin;
Session A=# select * from t where a = 1 for no key update;

Session B=# begin;
Session B=# select * from t where a = 1 for key share;

Session A=# update t set a = -a where a = 1;
UPDATE 1
Session A=# commit;

Session B=# select * from t where a = 1;
(0 rows) -- Woops.

regards,

--
Kyotaro Horiguchi
NTT Open Source Software Center

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#27Jim Nasby
Jim.Nasby@BlueTreble.com
In reply to: Kyotaro HORIGUCHI (#26)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 4/14/15 1:05 AM, Kyotaro HORIGUCHI wrote:

As an example, the following operations cause an "unexpected"
result.

Ex. 1
Session A=# create table t (a int primary key, b int);
Session A=# insert into t (select a, 1 from generate_series(0, 99) a);
Session A=# begin;
Session A=# select * from t where a = 1 for no key update;

Session B=# begin;
Session B=# select * from t where a = 1 for key share;
Session B=# update t set a = -a where a = 1;
<session B is blocked>

Ex. 2
Session A=# create table t (a int, b int); -- a is the key in mind.
Session A=# insert into t (select a, 1 from generate_series(0, 99) a);
Session A=# begin;
Session A=# select * from t where a = 1 for no key update;

Session B=# begin;
Session B=# select * from t where a = 1 for key share;

Session A=# update t set a = -a where a = 1;
UPDATE 1
Session A=# commit;

Session B=# select * from t where a = 1;
(0 rows) -- Woops.

Those results are indeed surprising, but since we allow it in a direct
connection I don't see why we wouldn't allow it in the Postgres FDW...

As for the FDW not knowing about keys, why would it need to? If you try
to do something illegal it's the remote side that should throw the
error, not the FDW.

Of course, if you try to do a locking operation on an FDW that doesn't
support it, the FDW should throw an error... but that's different.
--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#28Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Jim Nasby (#27)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/04/15 2:27, Jim Nasby wrote:

On 4/14/15 1:05 AM, Kyotaro HORIGUCHI wrote:

As an example, the following operations cause an "unexpected"
result.

Those results are indeed surprising, but since we allow it in a direct
connection I don't see why we wouldn't allow it in the Postgres FDW...

As for the FDW not knowing about keys, why would it need to? If you try
to do something illegal it's the remote side that should throw the
error, not the FDW.

Of course, if you try to do a locking operation on an FDW that doesn't
support it, the FDW should throw an error... but that's different.

Ah, you are right. FOR NO KEY UPDATE and FOR KEY SHARE would be useful
in the Postgres FDW if we assume the user performs those properly based
on information about keys for a remote table.

Sorry, my explanation was not correct, but I want to make it clear that
the proposed patch also allows the FDW to change the behavior of FOR NO
KEY UPDATE and/or FOR KEY SHARE row locking so as to match the local
semantics exactly.

BTW, I revised docs a bit. Attached is an updated version of the patch.

Best regards,
Etsuro Fujita

Attachments:

EvalPlanQual-v6.patchtext/x-diff; name=EvalPlanQual-v6.patchDownload
*** a/doc/src/sgml/fdwhandler.sgml
--- b/doc/src/sgml/fdwhandler.sgml
***************
*** 211,216 **** BeginForeignScan (ForeignScanState *node,
--- 211,232 ----
      </para>
  
      <para>
+      If the FDW changes handling <command>SELECT FOR UPDATE/SHARE</> row
+      locking, this routine should perform any initialization needed prior to
+      the actual row locking if the foreign table is referenced in a
+      <command>SELECT FOR UPDATE/SHARE</>.  To decide whether the foreign
+      table is referenced in the command or not, it's recommended to use
+      <function>ExecFindRowMark</> with the missing_ok argument set to true,
+      which returns an <structname>ExecRowMark</> struct if the foreign table
+      is referenced in the command and otherwise returns a NULL.  Information
+      about the row locking is accessible through the
+      <structname>ExecRowMark</> struct.
+      (The <structfield>fdw_state</> field of <structname>ExecRowMark</> is
+      available for the FDW to store any private state it needs for the row
+      locking.)
+     </para>
+ 
+     <para>
       Note that when <literal>(eflags &amp; EXEC_FLAG_EXPLAIN_ONLY)</> is
       true, this function should not perform any externally-visible actions;
       it should only do the minimum required to make the node state valid
***************
*** 598,603 **** IsForeignRelUpdatable (Relation rel);
--- 614,701 ----
  
     </sect2>
  
+    <sect2 id="fdw-callbacks-row-locking">
+     <title>FDW Routines For <command>SELECT FOR UPDATE/SHARE</> row locking
+      </title>
+ 
+     <para>
+      If an FDW supports <command>SELECT FOR UPDATE/SHARE</> row locking,
+      it should provide the following callback functions:
+     </para>
+ 
+     <para>
+ <programlisting>
+ RowMarkType
+ GetForeignRowMarkType (LockClauseStrength strength);
+ </programlisting>
+ 
+      Report which row-marking option to support for a lock strength
+      associated with a <command>SELECT FOR UPDATE/SHARE</> request.
+      This is called at the beginning of query planning.
+      <literal>strength</> is a member of the <literal>LockClauseStrength</>
+      enum type.
+      The return value should be a member of the <literal>RowMarkType</>
+      enum type.  See
+      <filename>src/include/nodes/lockoptions.h</> and
+      <filename>src/include/nodes/plannodes.h</> for information about
+      these enum types.
+     </para>
+ 
+     <para>
+      If the <function>GetForeignRowMarkType</> pointer is set to
+      <literal>NULL</>, the default option is selected for any lock strength,
+      and both <function>LockForeignRow</> and <function>FetchForeignRow</>
+      described below will not be called at query execution time.
+     </para>
+ 
+     <para>
+ <programlisting>
+ bool
+ LockForeignRow (EState *estate,
+                 ExecRowMark *erm,
+                 ItemPointer tupleid);
+ </programlisting>
+ 
+      Lock one tuple in the foreign table.
+      <literal>estate</> is global execution state for the query.
+      <literal>erm</> is the <structname>ExecRowMark</> struct describing
+      the target foreign table.
+      <literal>tupleid</> identifies the tuple to be locked.
+      This function should return <literal>true</>, if the FDW lock the tuple
+      successfully.  Otherwise, return <literal>false</>.
+     </para>
+ 
+     <para>
+      If the <function>LockForeignRow</> pointer is set to
+      <literal>NULL</>, attempts to lock rows will fail
+      with an error message.
+     </para>
+ 
+     <para>
+ <programlisting>
+ HeapTuple
+ FetchForeignRow (EState *estate,
+                  ExecRowMark *erm,
+                  ItemPointer tupleid);
+ </programlisting>
+ 
+      Fetch one tuple from the foreign table.
+      <literal>estate</> is global execution state for the query.
+      <literal>erm</> is the <structname>ExecRowMark</> struct describing
+      the target foreign table.
+      <literal>tupleid</> identifies the tuple to be fetched.
+      This function should return the fetched tuple, if the FDW fetch the
+      tuple successfully.  Otherwise, return NULL.
+     </para>
+ 
+     <para>
+      If the <function>FetchForeignRow</> pointer is set to
+      <literal>NULL</>, attempts to lock rows will fail
+      with an error message.
+     </para>
+ 
+    </sect2>
+ 
     <sect2 id="fdw-callbacks-explain">
      <title>FDW Routines for <command>EXPLAIN</></title>
  
***************
*** 1011,1017 **** GetForeignServerByName(const char *name, bool missing_ok);
       join conditions.  However, matching the local semantics exactly would
       require an additional remote access for every row, and might be
       impossible anyway depending on what locking semantics the external data
!      source provides.
      </para>
  
    </sect1>
--- 1109,1118 ----
       join conditions.  However, matching the local semantics exactly would
       require an additional remote access for every row, and might be
       impossible anyway depending on what locking semantics the external data
!      source provides.  If necessary, however, the FDW can change this behavior
!      so as to match the local semantics, using its callback functions
!      <function>GetForeignRowMarkType</>, <function>LockForeignRow</>, and
!      <function>FetchForeignRow</>.
      </para>
  
    </sect1>
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 855,860 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 855,861 ----
  		erm->markType = rc->markType;
  		erm->waitPolicy = rc->waitPolicy;
  		ItemPointerSetInvalid(&(erm->curCtid));
+ 		erm->fdw_state = NULL;
  		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
  	}
  
***************
*** 1098,1103 **** CheckValidResultRel(Relation resultRel, CmdType operation)
--- 1099,1106 ----
  static void
  CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  {
+ 	FdwRoutine *fdwroutine;
+ 
  	switch (rel->rd_rel->relkind)
  	{
  		case RELKIND_RELATION:
***************
*** 1133,1143 **** CheckValidRowMarkRel(Relation rel, RowMarkType markType)
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Should not get here; planner should have used ROW_MARK_COPY */
! 			ereport(ERROR,
! 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 					 errmsg("cannot lock rows in foreign table \"%s\"",
! 							RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
--- 1136,1150 ----
  							  RelationGetRelationName(rel))));
  			break;
  		case RELKIND_FOREIGN_TABLE:
! 			/* Okay only if the FDW supports it */
! 			fdwroutine = GetFdwRoutineForRelation(rel, false);
! 			if ((markType != ROW_MARK_REFERENCE &&
! 				 fdwroutine->LockForeignRow == NULL) ||
! 				fdwroutine->FetchForeignRow == NULL)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 						 errmsg("cannot lock rows in foreign table \"%s\"",
! 								RelationGetRelationName(rel))));
  			break;
  		default:
  			ereport(ERROR,
***************
*** 1886,1892 **** ExecBuildSlotValueDescription(Oid reloid,
   * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
   */
  ExecRowMark *
! ExecFindRowMark(EState *estate, Index rti)
  {
  	ListCell   *lc;
  
--- 1893,1899 ----
   * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
   */
  ExecRowMark *
! ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
  {
  	ListCell   *lc;
  
***************
*** 1897,1903 **** ExecFindRowMark(EState *estate, Index rti)
  		if (erm->rti == rti)
  			return erm;
  	}
! 	elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
  	return NULL;				/* keep compiler quiet */
  }
  
--- 1904,1911 ----
  		if (erm->rti == rti)
  			return erm;
  	}
! 	if (!missing_ok)
! 		elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
  	return NULL;				/* keep compiler quiet */
  }
  
***************
*** 2401,2407 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			Buffer		buffer;
  
  			Assert(erm->relation != NULL);
  
--- 2409,2415 ----
  
  		if (erm->markType == ROW_MARK_REFERENCE)
  		{
! 			HeapTuple	copyTuple;
  
  			Assert(erm->relation != NULL);
  
***************
*** 2415,2428 **** EvalPlanQualFetchRowMarks(EPQState *epqstate)
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  			/* okay, fetch the tuple */
- 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
- 							false, NULL))
- 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
! 			/* successful, copy and store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti,
! 								 heap_copytuple(&tuple));
! 			ReleaseBuffer(buffer);
  		}
  		else
  		{
--- 2423,2455 ----
  			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  			/* okay, fetch the tuple */
  
! 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 			{
! 				FdwRoutine *fdwroutine;
! 
! 				/* let the FDW do the work */
! 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
! 				copyTuple = fdwroutine->FetchForeignRow(epqstate->estate,
! 														erm, &tuple.t_self);
! 				if (copyTuple == NULL)
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 			}
! 			else
! 			{
! 				Buffer		buffer;
! 
! 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 								false, NULL))
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 
! 				/* successful, copy tuple */
! 				copyTuple = heap_copytuple(&tuple);
! 				ReleaseBuffer(buffer);
! 			}
! 
! 			/* store tuple */
! 			EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
  		}
  		else
  		{
*** a/src/backend/executor/nodeLockRows.c
--- b/src/backend/executor/nodeLockRows.c
***************
*** 25,30 ****
--- 25,31 ----
  #include "access/xact.h"
  #include "executor/executor.h"
  #include "executor/nodeLockRows.h"
+ #include "foreign/fdwapi.h"
  #include "storage/bufmgr.h"
  #include "utils/rel.h"
  #include "utils/tqual.h"
***************
*** 112,117 **** lnext:
--- 113,141 ----
  		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  		/* okay, try to lock the tuple */
+ 
+ 		if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ 		{
+ 			FdwRoutine *fdwroutine;
+ 
+ 			/* Let the FDW do the work */
+ 			fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+ 			if (!fdwroutine->LockForeignRow(estate, erm, &tuple.t_self))
+ 			{
+ 				/* couldn't get the lock */
+ 				goto lnext;
+ 			}
+ 			/* got the lock successfully */
+ 
+ 			/*
+ 			 * Remember locked tuple's TID for EvalPlanQual testing, not for
+ 			 * WHERE CURRENT OF, which is not supported for foreign tables.
+ 			 */
+ 			erm->curCtid = tuple.t_self;
+ 
+ 			continue;
+ 		}
+ 
  		switch (erm->markType)
  		{
  			case ROW_MARK_EXCLUSIVE:
***************
*** 253,259 **** lnext:
  			ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
  			ExecRowMark *erm = aerm->rowmark;
  			HeapTupleData tuple;
! 			Buffer		buffer;
  
  			/* ignore non-active child tables */
  			if (!ItemPointerIsValid(&(erm->curCtid)))
--- 277,283 ----
  			ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
  			ExecRowMark *erm = aerm->rowmark;
  			HeapTupleData tuple;
! 			HeapTuple	copyTuple;
  
  			/* ignore non-active child tables */
  			if (!ItemPointerIsValid(&(erm->curCtid)))
***************
*** 267,280 **** lnext:
  
  			/* okay, fetch the tuple */
  			tuple.t_self = erm->curCtid;
- 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
- 							false, NULL))
- 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
! 			/* successful, copy and store tuple */
! 			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti,
! 								 heap_copytuple(&tuple));
! 			ReleaseBuffer(buffer);
  		}
  
  		/*
--- 291,323 ----
  
  			/* okay, fetch the tuple */
  			tuple.t_self = erm->curCtid;
  
! 			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
! 			{
! 				FdwRoutine *fdwroutine;
! 
! 				/* let the FDW do the work */
! 				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
! 				copyTuple = fdwroutine->FetchForeignRow(node->lr_epqstate.estate,
! 														erm, &tuple.t_self);
! 				if (copyTuple == NULL)
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 			}
! 			else
! 			{
! 				Buffer		buffer;
! 
! 				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
! 								false, NULL))
! 					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
! 
! 				/* successful, copy tuple */
! 				copyTuple = heap_copytuple(&tuple);
! 				ReleaseBuffer(buffer);
! 			}
! 
! 			/* store tuple */
! 			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, copyTuple);
  		}
  
  		/*
***************
*** 367,373 **** ExecInitLockRows(LockRows *node, EState *estate, int eflags)
  			continue;
  
  		/* find ExecRowMark and build ExecAuxRowMark */
! 		erm = ExecFindRowMark(estate, rc->rti);
  		aerm = ExecBuildAuxRowMark(erm, outerPlan->targetlist);
  
  		/*
--- 410,416 ----
  			continue;
  
  		/* find ExecRowMark and build ExecAuxRowMark */
! 		erm = ExecFindRowMark(estate, rc->rti, false);
  		aerm = ExecBuildAuxRowMark(erm, outerPlan->targetlist);
  
  		/*
*** a/src/backend/executor/nodeModifyTable.c
--- b/src/backend/executor/nodeModifyTable.c
***************
*** 1257,1263 **** ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
  			continue;
  
  		/* find ExecRowMark (same for all subplans) */
! 		erm = ExecFindRowMark(estate, rc->rti);
  
  		/* build ExecAuxRowMark for each subplan */
  		for (i = 0; i < nplans; i++)
--- 1257,1263 ----
  			continue;
  
  		/* find ExecRowMark (same for all subplans) */
! 		erm = ExecFindRowMark(estate, rc->rti, false);
  
  		/* build ExecAuxRowMark for each subplan */
  		for (i = 0; i < nplans; i++)
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
***************
*** 20,25 ****
--- 20,26 ----
  #include "access/htup_details.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
+ #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
  #ifdef OPTIMIZER_DEBUG
***************
*** 2274,2280 **** select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		/* For now, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
--- 2275,2288 ----
  	}
  	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
  	{
! 		FdwRoutine *fdwroutine;
! 
! 		/* Let the FDW select the rowmark type, if possible */
! 		fdwroutine = GetFdwRoutineByRelId(rte->relid);
! 		if (fdwroutine->GetForeignRowMarkType != NULL)
! 			return fdwroutine->GetForeignRowMarkType(strength);
! 
! 		/* Otherwise, we force all foreign tables to use ROW_MARK_COPY */
  		return ROW_MARK_COPY;
  	}
  	else
*** a/src/include/executor/executor.h
--- b/src/include/executor/executor.h
***************
*** 195,201 **** extern void ExecConstraints(ResultRelInfo *resultRelInfo,
  				TupleTableSlot *slot, EState *estate);
  extern void ExecWithCheckOptions(ResultRelInfo *resultRelInfo,
  					 TupleTableSlot *slot, EState *estate);
! extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti);
  extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
  extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
  			 Relation relation, Index rti, int lockmode,
--- 195,201 ----
  				TupleTableSlot *slot, EState *estate);
  extern void ExecWithCheckOptions(ResultRelInfo *resultRelInfo,
  					 TupleTableSlot *slot, EState *estate);
! extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti, bool missing_ok);
  extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
  extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
  			 Relation relation, Index rti, int lockmode,
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 13,18 ****
--- 13,19 ----
  #define FDWAPI_H
  
  #include "nodes/execnodes.h"
+ #include "nodes/plannodes.h"
  #include "nodes/relation.h"
  
  /* To avoid including explain.h here, reference ExplainState thus: */
***************
*** 82,87 **** typedef void (*EndForeignModify_function) (EState *estate,
--- 83,98 ----
  
  typedef int (*IsForeignRelUpdatable_function) (Relation rel);
  
+ typedef RowMarkType (*GetForeignRowMarkType_function) (LockClauseStrength strength);
+ 
+ typedef bool (*LockForeignRow_function) (EState *estate,
+ 										 ExecRowMark *erm,
+ 										 ItemPointer tupleid);
+ 
+ typedef HeapTuple (*FetchForeignRow_function) (EState *estate,
+ 											   ExecRowMark *erm,
+ 											   ItemPointer tupleid);
+ 
  typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
  													struct ExplainState *es);
  
***************
*** 141,146 **** typedef struct FdwRoutine
--- 152,162 ----
  	EndForeignModify_function EndForeignModify;
  	IsForeignRelUpdatable_function IsForeignRelUpdatable;
  
+ 	/* Functions for SELECT FOR UPDATE/SHARE row locking */
+ 	GetForeignRowMarkType_function GetForeignRowMarkType;
+ 	LockForeignRow_function LockForeignRow;
+ 	FetchForeignRow_function FetchForeignRow;
+ 
  	/* Support functions for EXPLAIN */
  	ExplainForeignScan_function ExplainForeignScan;
  	ExplainForeignModify_function ExplainForeignModify;
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
***************
*** 420,426 **** typedef struct EState
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code.
   *
   * EState->es_rowMarks is a list of these structs.
   */
--- 420,427 ----
   * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
   * PlanRowMark for details about most of the fields.  In addition to fields
   * directly derived from PlanRowMark, we store curCtid, which is used by the
!  * WHERE CURRENT OF code, and fdw_state, which is used by the FDW if the
!  * relation is foreign.
   *
   * EState->es_rowMarks is a list of these structs.
   */
***************
*** 434,439 **** typedef struct ExecRowMark
--- 435,441 ----
  	RowMarkType markType;		/* see enum in nodes/plannodes.h */
  	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED */
  	ItemPointerData curCtid;	/* ctid of currently locked tuple, if any */
+ 	void	   *fdw_state;		/* foreign-data wrapper can keep state here */
  } ExecRowMark;
  
  /*
#29Robert Haas
robertmhaas@gmail.com
In reply to: Etsuro Fujita (#28)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On Thu, Apr 16, 2015 at 2:55 AM, Etsuro Fujita
<fujita.etsuro@lab.ntt.co.jp> wrote:

Ah, you are right. FOR NO KEY UPDATE and FOR KEY SHARE would be useful in
the Postgres FDW if we assume the user performs those properly based on
information about keys for a remote table.

Sorry, my explanation was not correct, but I want to make it clear that the
proposed patch also allows the FDW to change the behavior of FOR NO KEY
UPDATE and/or FOR KEY SHARE row locking so as to match the local semantics
exactly.

BTW, I revised docs a bit. Attached is an updated version of the patch.

Tom, you're listed as the committer for this in the CF app. Are you
still planning to take care of this?

It seems that time is beginning to run short.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#30Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#29)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Robert Haas <robertmhaas@gmail.com> writes:

Tom, you're listed as the committer for this in the CF app. Are you
still planning to take care of this?
It seems that time is beginning to run short.

Yeah, I will address this (and start looking at GROUPING SETS) next week.
I'm out of town right now.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#31Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#28)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

BTW, I revised docs a bit. Attached is an updated version of the patch.

I started to look at this and realized that it only touches the core code
and not postgres_fdw, which seems odd --- doesn't that mean the new
feature can't be tested?

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#32Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#28)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

[ EvalPlanQual-v6.patch ]

I've started to study this in a little more detail, and I'm not terribly
happy with some of the API decisions in it.

In particular, I find the addition of "void *fdw_state" to ExecRowMark
to be pretty questionable. That does not seem like a good place to keep
random state. (I realize that WHERE CURRENT OF keeps some state in
ExecRowMark, but that's a crock not something to emulate.) ISTM that in
most scenarios, the state that LockForeignRow/FetchForeignRow would like
to get at is probably the FDW state associated with the ForeignScanState
that the tuple came from. Which this API doesn't help with particularly.
I wonder if we should instead add a "ScanState*" field and expect the
core code to set that up (ExecOpenScanRelation could do it with minor
API changes...).

I'm also a bit tempted to pass the TIDs to LockForeignRow and
FetchForeignRow as Datums not ItemPointers. We have the Datum format
available already at the call sites, so this is free as far as the core
code is concerned, and would only cost another line or so for the FDWs.
This is by no means sufficient to allow FDWs to use some other type than
"tid" for row identifiers; but it would be a down payment on that problem,
and at least would avoid nailing the rowids-are-tids assumption into yet
another global API.

Thoughts?

Also, as I mentioned, I'd be a whole lot happier if we had a way to test
this...

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#33Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#32)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/05/11 8:50, Tom Lane wrote:

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

[ EvalPlanQual-v6.patch ]

I've started to study this in a little more detail, and I'm not terribly
happy with some of the API decisions in it.

Thanks for taking the time to review the patch!

In particular, I find the addition of "void *fdw_state" to ExecRowMark
to be pretty questionable. That does not seem like a good place to keep
random state. (I realize that WHERE CURRENT OF keeps some state in
ExecRowMark, but that's a crock not something to emulate.) ISTM that in
most scenarios, the state that LockForeignRow/FetchForeignRow would like
to get at is probably the FDW state associated with the ForeignScanState
that the tuple came from. Which this API doesn't help with particularly.
I wonder if we should instead add a "ScanState*" field and expect the
core code to set that up (ExecOpenScanRelation could do it with minor
API changes...).

Sorry, I don't understand clearly what you mean, but that (the idea of
expecting the core to set it up) sounds inconsistent with your comment
on the earlier version of the API "BeginForeignFetch" [1]/messages/by-id/14504.1428446647@sss.pgh.pa.us.

I'm also a bit tempted to pass the TIDs to LockForeignRow and
FetchForeignRow as Datums not ItemPointers. We have the Datum format
available already at the call sites, so this is free as far as the core
code is concerned, and would only cost another line or so for the FDWs.
This is by no means sufficient to allow FDWs to use some other type than
"tid" for row identifiers; but it would be a down payment on that problem,
and at least would avoid nailing the rowids-are-tids assumption into yet
another global API.

That is a good idea.

Also, as I mentioned, I'd be a whole lot happier if we had a way to test
this...

Attached is a postgres_fdw patch that I used for the testing. If you
try it, edit postgresGetForeignRowMarkType as necessary. I have to
confess that I did the testing only in the normal conditions by the patch.

Sorry for the delay. I took a vacation until yesterday.

Best regards,
Etsuro Fujita

[1]: /messages/by-id/14504.1428446647@sss.pgh.pa.us

Attachments:

EvalPlanQual-postgres_fdw-test.patchtext/x-diff; name=EvalPlanQual-postgres_fdw-test.patchDownload
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 88,93 **** typedef struct PgFdwRelationInfo
--- 88,95 ----
   *
   * 1) SELECT statement text to be sent to the remote server
   * 2) Integer list of attribute numbers retrieved by the SELECT
+  * 3) SELECT statement text to be sent to the remote server
+  * 4) Integer list of attribute numbers retrieved by the SELECT
   *
   * These items are indexed with the enum FdwScanPrivateIndex, so an item
   * can be fetched with list_nth().  For example, to get the SELECT statement:
***************
*** 98,104 **** enum FdwScanPrivateIndex
  	/* SQL statement to execute remotely (as a String node) */
  	FdwScanPrivateSelectSql,
  	/* Integer list of attribute numbers retrieved by the SELECT */
! 	FdwScanPrivateRetrievedAttrs
  };
  
  /*
--- 100,110 ----
  	/* SQL statement to execute remotely (as a String node) */
  	FdwScanPrivateSelectSql,
  	/* Integer list of attribute numbers retrieved by the SELECT */
! 	FdwScanPrivateRetrievedAttrs,
! 	/* SQL statement to execute remotely (as a String node) */
! 	FdwScanPrivateSelectSql2,
! 	/* Integer list of attribute numbers retrieved by SELECT */
! 	FdwScanPrivateRetrievedAttrs2
  };
  
  /*
***************
*** 186,191 **** typedef struct PgFdwModifyState
--- 192,223 ----
  } PgFdwModifyState;
  
  /*
+  * Execution state for fetching/locking foreign rows.
+  */
+ typedef struct PgFdwFetchState
+ {
+ 	Relation	rel;			/* relcache entry for the foreign table */
+ 	AttInMetadata *attinmeta;	/* attribute datatype conversion metadata */
+ 
+ 	/* for remote query execution */
+ 	PGconn	   *conn;			/* connection for the fetch */
+ 	char	   *p_name;			/* name of prepared statement, if created */
+ 
+ 	/* extracted fdw_private data */
+ 	char	   *query;			/* text of SELECT command */
+ 	List	   *retrieved_attrs;	/* attr numbers retrieved by SELECT */
+ 
+ 	/* info about parameters for prepared statement */
+ 	int			p_nums;			/* number of parameters to transmit */
+ 	FmgrInfo   *p_flinfo;		/* output conversion functions for them */
+ 
+ 	HeapTuple	locked_tuple;
+ 
+ 	/* working memory context */
+ 	MemoryContext temp_cxt;		/* context for per-tuple temporary data */
+ } PgFdwFetchState;
+ 
+ /*
   * Workspace for analyzing a foreign table.
   */
  typedef struct PgFdwAnalyzeState
***************
*** 276,281 **** static TupleTableSlot *postgresExecForeignDelete(EState *estate,
--- 308,320 ----
  static void postgresEndForeignModify(EState *estate,
  						 ResultRelInfo *resultRelInfo);
  static int	postgresIsForeignRelUpdatable(Relation rel);
+ static RowMarkType postgresGetForeignRowMarkType(LockClauseStrength strength);
+ static bool postgresLockForeignRow(EState *estate,
+ 								   ExecRowMark *erm,
+ 								   ItemPointer tupleid);
+ static HeapTuple postgresFetchForeignRow(EState *estate,
+ 										 ExecRowMark *erm,
+ 										 ItemPointer tupleid);
  static void postgresExplainForeignScan(ForeignScanState *node,
  						   ExplainState *es);
  static void postgresExplainForeignModify(ModifyTableState *mtstate,
***************
*** 306,320 **** static void get_remote_estimate(const char *sql,
  static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
  						  EquivalenceClass *ec, EquivalenceMember *em,
  						  void *arg);
  static void create_cursor(ForeignScanState *node);
  static void fetch_more_data(ForeignScanState *node);
  static void close_cursor(PGconn *conn, unsigned int cursor_number);
! static void prepare_foreign_modify(PgFdwModifyState *fmstate);
! static const char **convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
  static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
  							  HeapTuple *rows, int targrows,
  							  double *totalrows,
--- 345,370 ----
  static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
  						  EquivalenceClass *ec, EquivalenceMember *em,
  						  void *arg);
+ static List *create_foreign_fetch_info(PlannerInfo *root,
+ 									   RelOptInfo *baserel,
+ 									   RowMarkType markType);
  static void create_cursor(ForeignScanState *node);
  static void fetch_more_data(ForeignScanState *node);
  static void close_cursor(PGconn *conn, unsigned int cursor_number);
! static char *setup_prep_stmt(PGconn *conn, char *query);
! static const char **convert_prep_stmt_params(ItemPointer tupleid,
! 											 TupleTableSlot *slot,
! 											 int p_nums,
! 											 FmgrInfo *p_flinfo,
! 											 List *target_attrs,
! 											 MemoryContext temp_context);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
+ static void init_foreign_fetch_state(EState *estate,
+ 									 ExecRowMark *erm,
+ 									 List *fdw_private,
+ 									 int eflags);
+ static void finish_foreign_fetch_state(EState *estate, ExecRowMark *erm);
  static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
  							  HeapTuple *rows, int targrows,
  							  double *totalrows,
***************
*** 358,363 **** postgres_fdw_handler(PG_FUNCTION_ARGS)
--- 408,418 ----
  	routine->EndForeignModify = postgresEndForeignModify;
  	routine->IsForeignRelUpdatable = postgresIsForeignRelUpdatable;
  
+ 	/* Functions for SELECT FOR UPDATE/SHARE row locking */
+ 	routine->GetForeignRowMarkType = postgresGetForeignRowMarkType;
+ 	routine->LockForeignRow = postgresLockForeignRow;
+ 	routine->FetchForeignRow = postgresFetchForeignRow;
+ 
  	/* Support functions for EXPLAIN */
  	routine->ExplainForeignScan = postgresExplainForeignScan;
  	routine->ExplainForeignModify = postgresExplainForeignModify;
***************
*** 746,751 **** postgresGetForeignPlan(PlannerInfo *root,
--- 801,807 ----
  	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
  	Index		scan_relid = baserel->relid;
  	List	   *fdw_private;
+ 	List	   *fdw_private2 = NIL;
  	List	   *remote_conds = NIL;
  	List	   *local_exprs = NIL;
  	List	   *params_list = NIL;
***************
*** 836,855 **** postgresGetForeignPlan(PlannerInfo *root,
  			 * complete information about, and (b) it wouldn't work anyway on
  			 * older remote servers.  Likewise, we don't worry about NOWAIT.
  			 */
! 			switch (rc->strength)
  			{
! 				case LCS_NONE:
! 					/* No locking needed */
! 					break;
! 				case LCS_FORKEYSHARE:
! 				case LCS_FORSHARE:
! 					appendStringInfoString(&sql, " FOR SHARE");
! 					break;
! 				case LCS_FORNOKEYUPDATE:
! 				case LCS_FORUPDATE:
! 					appendStringInfoString(&sql, " FOR UPDATE");
! 					break;
  			}
  		}
  	}
  
--- 892,917 ----
  			 * complete information about, and (b) it wouldn't work anyway on
  			 * older remote servers.  Likewise, we don't worry about NOWAIT.
  			 */
! 			if (rc->markType == ROW_MARK_COPY)
  			{
! 				switch (rc->strength)
! 				{
! 					case LCS_NONE:
! 						/* No locking needed */
! 						break;
! 					case LCS_FORKEYSHARE:
! 					case LCS_FORSHARE:
! 						appendStringInfoString(&sql, " FOR SHARE");
! 						break;
! 					case LCS_FORNOKEYUPDATE:
! 					case LCS_FORUPDATE:
! 						appendStringInfoString(&sql, " FOR UPDATE");
! 						break;
! 				}
  			}
+ 			else
+ 				fdw_private2 = create_foreign_fetch_info(root, baserel,
+ 														 rc->markType);
  		}
  	}
  
***************
*** 859,864 **** postgresGetForeignPlan(PlannerInfo *root,
--- 921,928 ----
  	 */
  	fdw_private = list_make2(makeString(sql.data),
  							 retrieved_attrs);
+ 	if (fdw_private2)
+ 		fdw_private = list_concat(fdw_private, fdw_private2);
  
  	/*
  	 * Create the ForeignScan node from target list, local filtering
***************
*** 886,891 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
--- 950,956 ----
  	EState	   *estate = node->ss.ps.state;
  	PgFdwScanState *fsstate;
  	RangeTblEntry *rte;
+ 	ExecRowMark *erm;
  	Oid			userid;
  	ForeignTable *table;
  	ForeignServer *server;
***************
*** 986,991 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
--- 1051,1063 ----
  		fsstate->param_values = (const char **) palloc0(numParams * sizeof(char *));
  	else
  		fsstate->param_values = NULL;
+ 
+ 	/*
+ 	 * Initialize state for fetching/locking foreign rows if needed.
+ 	 */
+ 	erm = ExecFindRowMark(estate, fsplan->scan.scanrelid, true);
+ 	if (erm && erm->relation && erm->fdw_state == NULL)
+ 		init_foreign_fetch_state(estate, erm, fsplan->fdw_private, eflags);
  }
  
  /*
***************
*** 1093,1099 **** postgresReScanForeignScan(ForeignScanState *node)
--- 1165,1174 ----
  static void
  postgresEndForeignScan(ForeignScanState *node)
  {
+ 	ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan;
+ 	EState	   *estate = node->ss.ps.state;
  	PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
+ 	ExecRowMark *erm;
  
  	/* if fsstate is NULL, we are in EXPLAIN; nothing to do */
  	if (fsstate == NULL)
***************
*** 1107,1112 **** postgresEndForeignScan(ForeignScanState *node)
--- 1182,1194 ----
  	ReleaseConnection(fsstate->conn);
  	fsstate->conn = NULL;
  
+ 	/*
+ 	 * Finish state for fetching/locking foreign rows if needed.
+ 	 */
+ 	erm = ExecFindRowMark(estate, fsplan->scan.scanrelid, true);
+ 	if (erm && erm->relation && erm->fdw_state != NULL)
+ 		finish_foreign_fetch_state(estate, erm);
+ 
  	/* MemoryContexts will be deleted automatically. */
  }
  
***************
*** 1391,1400 **** postgresExecForeignInsert(EState *estate,
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate, NULL, slot);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1473,1486 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(NULL, slot,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
***************
*** 1451,1457 **** postgresExecForeignUpdate(EState *estate,
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
--- 1537,1543 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
***************
*** 1462,1470 **** postgresExecForeignUpdate(EState *estate,
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate,
! 										(ItemPointer) DatumGetPointer(datum),
! 										slot);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1548,1559 ----
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params((ItemPointer) DatumGetPointer(datum),
! 										slot,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
***************
*** 1521,1527 **** postgresExecForeignDelete(EState *estate,
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
--- 1610,1616 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
***************
*** 1532,1540 **** postgresExecForeignDelete(EState *estate,
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate,
! 										(ItemPointer) DatumGetPointer(datum),
! 										NULL);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1621,1632 ----
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params((ItemPointer) DatumGetPointer(datum),
! 										NULL,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
***************
*** 1656,1661 **** postgresIsForeignRelUpdatable(Relation rel)
--- 1748,1927 ----
  }
  
  /*
+  * postgresGetForeignRowMarkType
+  *		Get rowmark type that we use for a given LockClauseStrength value.
+  */
+ static RowMarkType
+ postgresGetForeignRowMarkType(LockClauseStrength strength)
+ {
+ 	/* return ROW_MARK_COPY; */
+ 	switch (strength)
+ 	{
+ 		case LCS_NONE:
+ 			return ROW_MARK_REFERENCE;
+ 		case LCS_FORKEYSHARE:
+ 			return ROW_MARK_KEYSHARE;
+ 		case LCS_FORSHARE:
+ 			return ROW_MARK_SHARE;
+ 		case LCS_FORNOKEYUPDATE:
+ 			return ROW_MARK_NOKEYEXCLUSIVE;
+ 		case LCS_FORUPDATE:
+ 			return ROW_MARK_EXCLUSIVE;
+ 	}
+ 	return ROW_MARK_COPY;		/* shouldn't happen */
+ }
+ 
+ /*
+  * postgresLockForeignRow
+  *		Lock one tuple in a foreign table
+  */
+ static bool
+ postgresLockForeignRow(EState *estate,
+ 					   ExecRowMark *erm,
+ 					   ItemPointer tupleid)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->fdw_state;
+ 	const char **p_values;
+ 	PGresult   *res;
+ 	HeapTuple	tuple;
+ 
+ 	ffstate->locked_tuple = NULL;
+ 
+ 	/* Set up the prepared statement on the remote server, if we didn't yet */
+ 	if (!ffstate->p_name)
+ 		ffstate->p_name = setup_prep_stmt(ffstate->conn, ffstate->query);
+ 
+ 	/* Convert parameters needed by prepared statement to text form */
+ 	p_values = convert_prep_stmt_params(tupleid, NULL,
+ 										ffstate->p_nums,
+ 										ffstate->p_flinfo,
+ 										NIL,
+ 										ffstate->temp_cxt);
+ 
+ 	/*
+ 	 * Execute the prepared statement, and check for success.
+ 	 *
+ 	 * We don't use a PG_TRY block here, so be careful not to throw error
+ 	 * without releasing the PGresult.
+ 	 */
+ 	res = PQexecPrepared(ffstate->conn,
+ 						 ffstate->p_name,
+ 						 ffstate->p_nums,
+ 						 p_values,
+ 						 NULL,
+ 						 NULL,
+ 						 0);
+ 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ 		pgfdw_report_error(ERROR, res, ffstate->conn, true, ffstate->query);
+ 
+ 	/* PGresult must be released before leaving this function. */
+ 	PG_TRY();
+ 	{
+ 		/* Create the tuple */
+ 		tuple = make_tuple_from_result_row(res, 0,
+ 										   ffstate->rel,
+ 										   ffstate->attinmeta,
+ 										   ffstate->retrieved_attrs,
+ 										   ffstate->temp_cxt);
+ 		tuple->t_self = *tupleid;
+ 		tuple->t_tableOid = erm->relid;
+ 
+ 		PQclear(res);
+ 		res = NULL;
+ 	}
+ 	PG_CATCH();
+ 	{
+ 		if (res)
+ 			PQclear(res);
+ 		PG_RE_THROW();
+ 	}
+ 	PG_END_TRY();
+ 
+ 	MemoryContextReset(ffstate->temp_cxt);
+ 
+ 	/* Remember locked tuple for later processing */
+ 	ffstate->locked_tuple = tuple;
+ 
+ 	/* Got the lock successfully */
+ 	return true;
+ }
+ 
+ /*
+  * postgresFetchForeignRow
+  *		Fetch one tuple from a foreign table
+  */
+ static HeapTuple
+ postgresFetchForeignRow(EState *estate,
+ 						ExecRowMark *erm,
+ 						ItemPointer tupleid)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->fdw_state;
+ 	const char **p_values;
+ 	PGresult   *res;
+ 	HeapTuple	tuple;
+ 
+ 	if (RowMarkRequiresRowShareLock(erm->markType))
+ 	{
+ 		Assert(ffstate->locked_tuple);
+ 		return ffstate->locked_tuple;
+ 	}
+ 
+ 	/* Set up the prepared statement on the remote server, if we didn't yet */
+ 	if (!ffstate->p_name)
+ 		ffstate->p_name = setup_prep_stmt(ffstate->conn, ffstate->query);
+ 
+ 	/* Convert parameters needed by prepared statement to text form */
+ 	p_values = convert_prep_stmt_params(tupleid, NULL,
+ 										ffstate->p_nums,
+ 										ffstate->p_flinfo,
+ 										NIL,
+ 										ffstate->temp_cxt);
+ 
+ 	/*
+ 	 * Execute the prepared statement, and check for success.
+ 	 *
+ 	 * We don't use a PG_TRY block here, so be careful not to throw error
+ 	 * without releasing the PGresult.
+ 	 */
+ 	res = PQexecPrepared(ffstate->conn,
+ 						 ffstate->p_name,
+ 						 ffstate->p_nums,
+ 						 p_values,
+ 						 NULL,
+ 						 NULL,
+ 						 0);
+ 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ 		pgfdw_report_error(ERROR, res, ffstate->conn, true, ffstate->query);
+ 
+ 	/* PGresult must be released before leaving this function. */
+ 	PG_TRY();
+ 	{
+ 		/* Create the tuple */
+ 		tuple = make_tuple_from_result_row(res, 0,
+ 										   ffstate->rel,
+ 										   ffstate->attinmeta,
+ 										   ffstate->retrieved_attrs,
+ 										   ffstate->temp_cxt);
+ 		tuple->t_self = *tupleid;
+ 		tuple->t_tableOid = erm->relid;
+ 
+ 		PQclear(res);
+ 		res = NULL;
+ 	}
+ 	PG_CATCH();
+ 	{
+ 		if (res)
+ 			PQclear(res);
+ 		PG_RE_THROW();
+ 	}
+ 	PG_END_TRY();
+ 
+ 	MemoryContextReset(ffstate->temp_cxt);
+ 
+ 	return tuple;
+ }
+ 
+ /*
   * postgresExplainForeignScan
   *		Produce extra output for EXPLAIN of a ForeignScan on a foreign table
   */
***************
*** 1918,1923 **** ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
--- 2184,2232 ----
  }
  
  /*
+  * Create the FDW-private information for fetching/locking foreign rows.
+  */
+ static List *
+ create_foreign_fetch_info(PlannerInfo *root,
+ 						  RelOptInfo *baserel,
+ 						  RowMarkType markType)
+ {
+ 	StringInfoData sql;
+ 	List	   *retrieved_attrs;
+ 	Bitmapset  *attrs_used = NULL;
+ 
+ 	/*
+ 	 * Build the query string to be sent for execution.
+ 	 */
+ 	initStringInfo(&sql);
+ 	/* Add a whole-row var to attrs_used to retrieve all the columns. */
+ 	attrs_used = bms_add_member(attrs_used,
+ 								0 - FirstLowInvalidHeapAttributeNumber);
+ 	deparseSelectSql(&sql, root, baserel, attrs_used, &retrieved_attrs);
+ 	appendStringInfoString(&sql, " WHERE ctid = $1");
+ 
+ 	switch (markType)
+ 	{
+ 		case ROW_MARK_EXCLUSIVE:
+ 		case ROW_MARK_NOKEYEXCLUSIVE:
+ 			appendStringInfoString(&sql, " FOR UPDATE");
+ 			break;
+ 		case ROW_MARK_SHARE:
+ 		case ROW_MARK_KEYSHARE:
+ 			appendStringInfoString(&sql, " FOR SHARE");
+ 			break;
+ 		default:
+ 			break;
+ 	}
+ 
+ 	/*
+ 	 * Build the fdw_private list that will be available to the executor.
+ 	 * Items in the list must match enum FdwFetchPrivateIndex, above.
+ 	 */
+ 	return list_make2(makeString(sql.data), retrieved_attrs);
+ }
+ 
+ /*
   * Create cursor for node's query with current parameter values.
   */
  static void
***************
*** 2154,2164 **** close_cursor(PGconn *conn, unsigned int cursor_number)
  }
  
  /*
!  * prepare_foreign_modify
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
   */
! static void
! prepare_foreign_modify(PgFdwModifyState *fmstate)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
--- 2463,2474 ----
  }
  
  /*
!  * setup_prep_stmt
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
+  *		or re-fetching tuples for EvalPlanQual rechecking
   */
! static char *
! setup_prep_stmt(PGconn *conn, char *query)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
***************
*** 2166,2172 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(fmstate->conn));
  	p_name = pstrdup(prep_name);
  
  	/*
--- 2476,2482 ----
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(conn));
  	p_name = pstrdup(prep_name);
  
  	/*
***************
*** 2179,2196 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = PQprepare(fmstate->conn,
! 					p_name,
! 					fmstate->query,
! 					0,
! 					NULL);
  
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	fmstate->p_name = p_name;
  }
  
  /*
--- 2489,2502 ----
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = PQprepare(conn, p_name, query, 0, NULL);
  
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, conn, true, query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	return p_name;
  }
  
  /*
***************
*** 2203,2238 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(fmstate->temp_cxt);
  
! 	p_values = (const char **) palloc(sizeof(char *) * fmstate->p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && fmstate->target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, fmstate->target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
--- 2509,2547 ----
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(ItemPointer tupleid,
! 						 TupleTableSlot *slot,
! 						 int p_nums,
! 						 FmgrInfo *p_flinfo,
! 						 List *target_attrs,
! 						 MemoryContext temp_context)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(temp_context);
  
! 	p_values = (const char **) palloc(sizeof(char *) * p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
***************
*** 2242,2248 **** convert_prep_stmt_params(PgFdwModifyState *fmstate,
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
--- 2551,2557 ----
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
***************
*** 2250,2256 **** convert_prep_stmt_params(PgFdwModifyState *fmstate,
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == fmstate->p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
--- 2559,2565 ----
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
***************
*** 2290,2295 **** store_returning_result(PgFdwModifyState *fmstate,
--- 2599,2705 ----
  }
  
  /*
+  * init_foreign_fetch_state
+  *		Initialize an execution state for fetching/locking foreign rows
+  */
+ static void
+ init_foreign_fetch_state(EState *estate,
+ 						 ExecRowMark *erm,
+ 						 List *fdw_private,
+ 						 int eflags)
+ {
+ 	PgFdwFetchState *ffstate;
+ 	Relation	rel = erm->relation;
+ 	RangeTblEntry *rte;
+ 	Oid			userid;
+ 	ForeignTable *table;
+ 	ForeignServer *server;
+ 	UserMapping *user;
+ 	Oid			typefnoid;
+ 	bool		isvarlena;
+ 
+ 	/* Begin constructing PgFdwFetchState. */
+ 	ffstate = (PgFdwFetchState *) palloc0(sizeof(PgFdwFetchState));
+ 	ffstate->rel = rel;
+ 
+ 	/*
+ 	 * Identify which user to do the remote access as.  This should match what
+ 	 * ExecCheckRTEPerms() does.
+ 	 */
+ 	rte = rt_fetch(erm->rti, estate->es_range_table);
+ 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+ 
+ 	/* Get info about foreign table. */
+ 	table = GetForeignTable(RelationGetRelid(rel));
+ 	server = GetForeignServer(table->serverid);
+ 	user = GetUserMapping(userid, server->serverid);
+ 
+ 	/* Open connection; report that we'll create a prepared statement. */
+ 	ffstate->conn = GetConnection(server, user, true);
+ 	ffstate->p_name = NULL;		/* prepared statement not made yet */
+ 
+ 	/* Deconstruct fdw_private data. */
+ 	ffstate->query = strVal(list_nth(fdw_private,
+ 									 FdwScanPrivateSelectSql2));
+ 	ffstate->retrieved_attrs = (List *) list_nth(fdw_private,
+ 												 FdwScanPrivateRetrievedAttrs2);
+ 
+ 	/* Create context for per-tuple temp workspace. */
+ 	ffstate->temp_cxt = AllocSetContextCreate(estate->es_query_cxt,
+ 											  "postgres_fdw temporary data",
+ 											  ALLOCSET_SMALL_MINSIZE,
+ 											  ALLOCSET_SMALL_INITSIZE,
+ 											  ALLOCSET_SMALL_MAXSIZE);
+ 
+ 	/* Prepare for input conversion of SELECT results. */
+ 	ffstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(rel));
+ 
+ 	/* Prepare for output conversion of parameters used in prepared stmt. */
+ 	ffstate->p_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ 	ffstate->p_nums = 0;
+ 
+ 	/* Only one transmittable parameter will be ctid */
+ 	getTypeOutputInfo(TIDOID, &typefnoid, &isvarlena);
+ 	fmgr_info(typefnoid, &ffstate->p_flinfo[ffstate->p_nums]);
+ 	ffstate->p_nums++;
+ 
+ 	erm->fdw_state = ffstate;
+ }
+ 
+ /*
+  * finish_foreign_fetch_state
+  *		Finish an execution state for fetching/locking foreign rows
+  */
+ static void
+ finish_foreign_fetch_state(EState *estate, ExecRowMark *erm)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->fdw_state;
+ 
+ 	/* If we created a prepared statement, destroy it */
+ 	if (ffstate->p_name)
+ 	{
+ 		char		sql[64];
+ 		PGresult   *res;
+ 
+ 		snprintf(sql, sizeof(sql), "DEALLOCATE %s", ffstate->p_name);
+ 
+ 		/*
+ 		 * We don't use a PG_TRY block here, so be careful not to throw error
+ 		 * without releasing the PGresult.
+ 		 */
+ 		res = PQexec(ffstate->conn, sql);
+ 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ 			pgfdw_report_error(ERROR, res, ffstate->conn, true, sql);
+ 		PQclear(res);
+ 		ffstate->p_name = NULL;
+ 	}
+ 
+ 	/* Release remote connection */
+ 	ReleaseConnection(ffstate->conn);
+ 	ffstate->conn = NULL;
+ }
+ 
+ /*
   * postgresAnalyzeForeignTable
   *		Test whether analyzing this foreign table is supported
   */
#34Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#33)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

On 2015/05/11 8:50, Tom Lane wrote:

In particular, I find the addition of "void *fdw_state" to ExecRowMark
to be pretty questionable. That does not seem like a good place to keep
random state. (I realize that WHERE CURRENT OF keeps some state in
ExecRowMark, but that's a crock not something to emulate.) ISTM that in
most scenarios, the state that LockForeignRow/FetchForeignRow would like
to get at is probably the FDW state associated with the ForeignScanState
that the tuple came from. Which this API doesn't help with particularly.
I wonder if we should instead add a "ScanState*" field and expect the
core code to set that up (ExecOpenScanRelation could do it with minor
API changes...).

Sorry, I don't understand clearly what you mean, but that (the idea of
expecting the core to set it up) sounds inconsistent with your comment
on the earlier version of the API "BeginForeignFetch" [1].

Well, the other way that it could work would be for the FDW's
BeginForeignScan routine to search for a relevant ExecRowMark entry
(which, per that previous discussion, it'd likely need to do anyway) and
then plug a back-link to the ForeignScanState into the ExecRowMark.
But I don't see any very good reason to make every FDW that's concerned
with this do that, rather than doing it once in the core code. I'm also
thinking that having a link to the source scan node there might be useful
someday for regular tables as well as FDWs.

Also, as I mentioned, I'd be a whole lot happier if we had a way to test
this...

Attached is a postgres_fdw patch that I used for the testing. If you
try it, edit postgresGetForeignRowMarkType as necessary. I have to
confess that I did the testing only in the normal conditions by the patch.

Thanks, this is helpful. However, it pretty much proves my point about
wanting the back-link --- it seems like init_foreign_fetch_state, for
example, is uselessly repeating a lot of the setup already done for
the scan node itself.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#35Tom Lane
tgl@sss.pgh.pa.us
In reply to: Tom Lane (#34)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

I wrote:

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

On 2015/05/11 8:50, Tom Lane wrote:

I wonder if we should instead add a "ScanState*" field and expect the
core code to set that up (ExecOpenScanRelation could do it with minor
API changes...).

Sorry, I don't understand clearly what you mean, but that (the idea of
expecting the core to set it up) sounds inconsistent with your comment
on the earlier version of the API "BeginForeignFetch" [1].

Well, the other way that it could work would be for the FDW's
BeginForeignScan routine to search for a relevant ExecRowMark entry
(which, per that previous discussion, it'd likely need to do anyway) and
then plug a back-link to the ForeignScanState into the ExecRowMark.
But I don't see any very good reason to make every FDW that's concerned
with this do that, rather than doing it once in the core code. I'm also
thinking that having a link to the source scan node there might be useful
someday for regular tables as well as FDWs.

And on the third hand ... in view of the custom/foreign join pushdown
stuff that just went in, it would be a mistake to assume that there
necessarily is a distinct source scan node associated with each
ExecRowMark. The FDW can presumably find all the ExecRowMarks associated
with the rels that a join ForeignScan is scanning, but we can't assume
that ExecOpenScanRelation will be able to set up those links, and the FDW
might not want a simple link to the join scan node anyway. So it's
probably best to leave it as an unspecified void* instead of trying to
nail down the meaning more precisely.

I still don't much like calling it "fdw_state" though, because I don't
think it should be documented as only for the use of FDWs. (Custom scans
might have a use for a pointer field here too, for example.) Maybe just
call it "void *extra" and document it as being for the use of whatever
plan node is sourcing the relation's tuples.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#36Tom Lane
tgl@sss.pgh.pa.us
In reply to: Tom Lane (#35)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On further consideration ...

I don't much like the division of labor between LockForeignRow and
FetchForeignRow. In principle that would lead to not one but two
extra remote accesses per locked row in SELECT FOR UPDATE, at least
in the case that an EvalPlanQual recheck is required. (I see that
in your prototype patch for postgres_fdw you attempt to avoid that
by saving a retrieved tuple in LockForeignRow and then returning it
in FetchForeignRow, but that seems extremely ugly and bug-prone,
since there is nothing in the API spec insisting that those calls match
up one-to-one.) The fact that locking and fetching a tuple are separate
operations in the heapam API is a historical artifact that probably
doesn't make sense to duplicate in the FDW API.

Another problem is that we fail to detect whether an EvalPlanQual recheck
is required during a SELECT FOR UPDATE on a remote table, which we
certainly ought to do if the objective is to make postgres_fdw semantics
match the local ones.

What I'm inclined to do is merge LockForeignRow and FetchForeignRow
into one operation, which would have the semantics of returning a
palloc'd tuple, or NULL on a SKIP LOCKED failure, and it would be expected
to acquire a lock according to erm->markType (in particular, no lock just
a re-fetch for ROW_MARK_REFERENCE). In addition it needs some way of
reporting that the returned row is an updated version rather than the
original. Probably just an extra output boolean would do for that.

This'd require some refactoring in nodeLockRows, because we'd want to
be able to hang onto the returned tuple without necessarily provoking
an EvalPlanQual cycle, but it doesn't look too bad.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#37Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#36)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/05/12 7:42, Tom Lane wrote:

On further consideration ...

Thanks for the consideration!

I don't much like the division of labor between LockForeignRow and
FetchForeignRow. In principle that would lead to not one but two
extra remote accesses per locked row in SELECT FOR UPDATE, at least
in the case that an EvalPlanQual recheck is required. (I see that
in your prototype patch for postgres_fdw you attempt to avoid that
by saving a retrieved tuple in LockForeignRow and then returning it
in FetchForeignRow, but that seems extremely ugly and bug-prone,
since there is nothing in the API spec insisting that those calls match
up one-to-one.) The fact that locking and fetching a tuple are separate
operations in the heapam API is a historical artifact that probably
doesn't make sense to duplicate in the FDW API.

I understand your concern about the postgres_fdw patch. However, I
think we should divide this into the two routines as the core patch
does, because I think that would allow an FDW author to implement these
routines so as to improve the efficiency for scenarios that seldom need
fetching, by not retrieving and saving locked tuples in LockForeignRow.

Another problem is that we fail to detect whether an EvalPlanQual recheck
is required during a SELECT FOR UPDATE on a remote table, which we
certainly ought to do if the objective is to make postgres_fdw semantics
match the local ones.

I think that is interesting in theory, but I'm not sure that is worth
much in practice.

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#38Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#37)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

On 2015/05/12 7:42, Tom Lane wrote:

I don't much like the division of labor between LockForeignRow and
FetchForeignRow. In principle that would lead to not one but two
extra remote accesses per locked row in SELECT FOR UPDATE, at least
in the case that an EvalPlanQual recheck is required. (I see that
in your prototype patch for postgres_fdw you attempt to avoid that
by saving a retrieved tuple in LockForeignRow and then returning it
in FetchForeignRow, but that seems extremely ugly and bug-prone,
since there is nothing in the API spec insisting that those calls match
up one-to-one.) The fact that locking and fetching a tuple are separate
operations in the heapam API is a historical artifact that probably
doesn't make sense to duplicate in the FDW API.

I understand your concern about the postgres_fdw patch. However, I
think we should divide this into the two routines as the core patch
does, because I think that would allow an FDW author to implement these
routines so as to improve the efficiency for scenarios that seldom need
fetching, by not retrieving and saving locked tuples in LockForeignRow.

I find it hard to envision a situation where an FDW could lock a row
without being able to fetch its contents more or less for free. We have
IIRC discussed changing the way that works even for heapam, since the
current design requires multiple buffer lock/unlock cycles which aren't
free either. In any case, I think that the temptation to do probably-
buggy stuff like what you did in your prototype would be too strong for
most people, and that we're much better off with a simpler one-step API.

An additional advantage of the way I changed this is that it makes the
logic in nodeLockRows simpler too; we no longer need the very messy
hack added by commit 2db576ba8c449fca.

Another problem is that we fail to detect whether an EvalPlanQual recheck
is required during a SELECT FOR UPDATE on a remote table, which we
certainly ought to do if the objective is to make postgres_fdw semantics
match the local ones.

I think that is interesting in theory, but I'm not sure that is worth
much in practice.

Hm, well, AFAICS the entire point of this patch is to make it possible for
FDWs to duplicate the semantics used for local tables, so I'm not sure
why you'd want to ignore that aspect of it.

Anyway, I redid the patch along those lines, improved the documentation,
and have committed it.

I did a very basic update of your postgres_fdw patch to test this with,
and attach that so that you don't have to repeat the effort. I'm not sure
whether we want to try to convert that into something committable. I'm
afraid that the extra round trips involved in doing row locking this way
will be so expensive that no one really wants it for postgres_fdw. It's
more credible that FDWs operating against local storage would have use
for it.

regards, tom lane

Attachments:

postgres-fdw-late-locking.patchtext/x-diff; charset=us-ascii; name=postgres-fdw-late-locking.patchDownload
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 0c44260..a122c9e 100644
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
*************** typedef struct PgFdwRelationInfo
*** 88,93 ****
--- 88,95 ----
   *
   * 1) SELECT statement text to be sent to the remote server
   * 2) Integer list of attribute numbers retrieved by the SELECT
+  * 3) SELECT statement text to be sent to the remote server
+  * 4) Integer list of attribute numbers retrieved by the SELECT
   *
   * These items are indexed with the enum FdwScanPrivateIndex, so an item
   * can be fetched with list_nth().  For example, to get the SELECT statement:
*************** enum FdwScanPrivateIndex
*** 98,104 ****
  	/* SQL statement to execute remotely (as a String node) */
  	FdwScanPrivateSelectSql,
  	/* Integer list of attribute numbers retrieved by the SELECT */
! 	FdwScanPrivateRetrievedAttrs
  };
  
  /*
--- 100,110 ----
  	/* SQL statement to execute remotely (as a String node) */
  	FdwScanPrivateSelectSql,
  	/* Integer list of attribute numbers retrieved by the SELECT */
! 	FdwScanPrivateRetrievedAttrs,
! 	/* SQL statement to execute remotely (as a String node) */
! 	FdwScanPrivateSelectSql2,
! 	/* Integer list of attribute numbers retrieved by SELECT */
! 	FdwScanPrivateRetrievedAttrs2
  };
  
  /*
*************** typedef struct PgFdwModifyState
*** 186,191 ****
--- 192,221 ----
  } PgFdwModifyState;
  
  /*
+  * Execution state for fetching/locking foreign rows.
+  */
+ typedef struct PgFdwFetchState
+ {
+ 	Relation	rel;			/* relcache entry for the foreign table */
+ 	AttInMetadata *attinmeta;	/* attribute datatype conversion metadata */
+ 
+ 	/* for remote query execution */
+ 	PGconn	   *conn;			/* connection for the fetch */
+ 	char	   *p_name;			/* name of prepared statement, if created */
+ 
+ 	/* extracted fdw_private data */
+ 	char	   *query;			/* text of SELECT command */
+ 	List	   *retrieved_attrs;	/* attr numbers retrieved by SELECT */
+ 
+ 	/* info about parameters for prepared statement */
+ 	int			p_nums;			/* number of parameters to transmit */
+ 	FmgrInfo   *p_flinfo;		/* output conversion functions for them */
+ 
+ 	/* working memory context */
+ 	MemoryContext temp_cxt;		/* context for per-tuple temporary data */
+ } PgFdwFetchState;
+ 
+ /*
   * Workspace for analyzing a foreign table.
   */
  typedef struct PgFdwAnalyzeState
*************** static TupleTableSlot *postgresExecForei
*** 276,281 ****
--- 306,317 ----
  static void postgresEndForeignModify(EState *estate,
  						 ResultRelInfo *resultRelInfo);
  static int	postgresIsForeignRelUpdatable(Relation rel);
+ static RowMarkType postgresGetForeignRowMarkType(RangeTblEntry *rte,
+ 							  LockClauseStrength strength);
+ static HeapTuple postgresRefetchForeignRow(EState *estate,
+ 						  ExecRowMark *erm,
+ 						  Datum rowid,
+ 						  bool *updated);
  static void postgresExplainForeignScan(ForeignScanState *node,
  						   ExplainState *es);
  static void postgresExplainForeignModify(ModifyTableState *mtstate,
*************** static void get_remote_estimate(const ch
*** 306,320 ****
  static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
  						  EquivalenceClass *ec, EquivalenceMember *em,
  						  void *arg);
  static void create_cursor(ForeignScanState *node);
  static void fetch_more_data(ForeignScanState *node);
  static void close_cursor(PGconn *conn, unsigned int cursor_number);
! static void prepare_foreign_modify(PgFdwModifyState *fmstate);
! static const char **convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
  static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
  							  HeapTuple *rows, int targrows,
  							  double *totalrows,
--- 342,367 ----
  static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
  						  EquivalenceClass *ec, EquivalenceMember *em,
  						  void *arg);
+ static List *create_foreign_fetch_info(PlannerInfo *root,
+ 						  RelOptInfo *baserel,
+ 						  RowMarkType markType);
  static void create_cursor(ForeignScanState *node);
  static void fetch_more_data(ForeignScanState *node);
  static void close_cursor(PGconn *conn, unsigned int cursor_number);
! static char *setup_prep_stmt(PGconn *conn, char *query);
! static const char **convert_prep_stmt_params(ItemPointer tupleid,
! 						 TupleTableSlot *slot,
! 						 int p_nums,
! 						 FmgrInfo *p_flinfo,
! 						 List *target_attrs,
! 						 MemoryContext temp_context);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
+ static void init_foreign_fetch_state(EState *estate,
+ 						 ExecRowMark *erm,
+ 						 List *fdw_private,
+ 						 int eflags);
+ static void finish_foreign_fetch_state(EState *estate, ExecRowMark *erm);
  static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
  							  HeapTuple *rows, int targrows,
  							  double *totalrows,
*************** postgres_fdw_handler(PG_FUNCTION_ARGS)
*** 358,363 ****
--- 405,414 ----
  	routine->EndForeignModify = postgresEndForeignModify;
  	routine->IsForeignRelUpdatable = postgresIsForeignRelUpdatable;
  
+ 	/* Functions for SELECT FOR UPDATE/SHARE row locking */
+ 	routine->GetForeignRowMarkType = postgresGetForeignRowMarkType;
+ 	routine->RefetchForeignRow = postgresRefetchForeignRow;
+ 
  	/* Support functions for EXPLAIN */
  	routine->ExplainForeignScan = postgresExplainForeignScan;
  	routine->ExplainForeignModify = postgresExplainForeignModify;
*************** postgresGetForeignPlan(PlannerInfo *root
*** 746,751 ****
--- 797,803 ----
  	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
  	Index		scan_relid = baserel->relid;
  	List	   *fdw_private;
+ 	List	   *fdw_private2 = NIL;
  	List	   *remote_conds = NIL;
  	List	   *local_exprs = NIL;
  	List	   *params_list = NIL;
*************** postgresGetForeignPlan(PlannerInfo *root
*** 836,855 ****
  			 * complete information about, and (b) it wouldn't work anyway on
  			 * older remote servers.  Likewise, we don't worry about NOWAIT.
  			 */
! 			switch (rc->strength)
  			{
! 				case LCS_NONE:
! 					/* No locking needed */
! 					break;
! 				case LCS_FORKEYSHARE:
! 				case LCS_FORSHARE:
! 					appendStringInfoString(&sql, " FOR SHARE");
! 					break;
! 				case LCS_FORNOKEYUPDATE:
! 				case LCS_FORUPDATE:
! 					appendStringInfoString(&sql, " FOR UPDATE");
! 					break;
  			}
  		}
  	}
  
--- 888,913 ----
  			 * complete information about, and (b) it wouldn't work anyway on
  			 * older remote servers.  Likewise, we don't worry about NOWAIT.
  			 */
! 			if (rc->markType == ROW_MARK_COPY)
  			{
! 				switch (rc->strength)
! 				{
! 					case LCS_NONE:
! 						/* No locking needed */
! 						break;
! 					case LCS_FORKEYSHARE:
! 					case LCS_FORSHARE:
! 						appendStringInfoString(&sql, " FOR SHARE");
! 						break;
! 					case LCS_FORNOKEYUPDATE:
! 					case LCS_FORUPDATE:
! 						appendStringInfoString(&sql, " FOR UPDATE");
! 						break;
! 				}
  			}
+ 			else
+ 				fdw_private2 = create_foreign_fetch_info(root, baserel,
+ 														 rc->markType);
  		}
  	}
  
*************** postgresGetForeignPlan(PlannerInfo *root
*** 859,864 ****
--- 917,924 ----
  	 */
  	fdw_private = list_make2(makeString(sql.data),
  							 retrieved_attrs);
+ 	if (fdw_private2)
+ 		fdw_private = list_concat(fdw_private, fdw_private2);
  
  	/*
  	 * Create the ForeignScan node from target list, local filtering
*************** postgresBeginForeignScan(ForeignScanStat
*** 887,892 ****
--- 947,953 ----
  	EState	   *estate = node->ss.ps.state;
  	PgFdwScanState *fsstate;
  	RangeTblEntry *rte;
+ 	ExecRowMark *erm;
  	Oid			userid;
  	ForeignTable *table;
  	ForeignServer *server;
*************** postgresBeginForeignScan(ForeignScanStat
*** 987,992 ****
--- 1048,1060 ----
  		fsstate->param_values = (const char **) palloc0(numParams * sizeof(char *));
  	else
  		fsstate->param_values = NULL;
+ 
+ 	/*
+ 	 * Initialize state for fetching/locking foreign rows if needed.
+ 	 */
+ 	erm = ExecFindRowMark(estate, fsplan->scan.scanrelid, true);
+ 	if (erm && erm->relation && erm->ermExtra == NULL)
+ 		init_foreign_fetch_state(estate, erm, fsplan->fdw_private, eflags);
  }
  
  /*
*************** postgresReScanForeignScan(ForeignScanSta
*** 1094,1100 ****
--- 1162,1171 ----
  static void
  postgresEndForeignScan(ForeignScanState *node)
  {
+ 	ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan;
+ 	EState	   *estate = node->ss.ps.state;
  	PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
+ 	ExecRowMark *erm;
  
  	/* if fsstate is NULL, we are in EXPLAIN; nothing to do */
  	if (fsstate == NULL)
*************** postgresEndForeignScan(ForeignScanState 
*** 1108,1113 ****
--- 1179,1191 ----
  	ReleaseConnection(fsstate->conn);
  	fsstate->conn = NULL;
  
+ 	/*
+ 	 * Finish state for fetching/locking foreign rows if needed.
+ 	 */
+ 	erm = ExecFindRowMark(estate, fsplan->scan.scanrelid, true);
+ 	if (erm && erm->relation && erm->ermExtra != NULL)
+ 		finish_foreign_fetch_state(estate, erm);
+ 
  	/* MemoryContexts will be deleted automatically. */
  }
  
*************** postgresExecForeignInsert(EState *estate
*** 1405,1414 ****
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate, NULL, slot);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1483,1496 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(NULL, slot,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
*************** postgresExecForeignUpdate(EState *estate
*** 1465,1471 ****
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
--- 1547,1553 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
*************** postgresExecForeignUpdate(EState *estate
*** 1476,1484 ****
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate,
! 										(ItemPointer) DatumGetPointer(datum),
! 										slot);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1558,1569 ----
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params((ItemPointer) DatumGetPointer(datum),
! 										slot,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
*************** postgresExecForeignDelete(EState *estate
*** 1535,1541 ****
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
--- 1620,1626 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/* Get the ctid that was passed up as a resjunk column */
  	datum = ExecGetJunkAttribute(planSlot,
*************** postgresExecForeignDelete(EState *estate
*** 1546,1554 ****
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate,
! 										(ItemPointer) DatumGetPointer(datum),
! 										NULL);
  
  	/*
  	 * Execute the prepared statement, and check for success.
--- 1631,1642 ----
  		elog(ERROR, "ctid is NULL");
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params((ItemPointer) DatumGetPointer(datum),
! 										NULL,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement, and check for success.
*************** postgresIsForeignRelUpdatable(Relation r
*** 1670,1675 ****
--- 1758,1857 ----
  }
  
  /*
+  * postgresGetForeignRowMarkType
+  *		Get rowmark type to use for a particular table
+  */
+ static RowMarkType
+ postgresGetForeignRowMarkType(RangeTblEntry *rte, LockClauseStrength strength)
+ {
+ 	switch (strength)
+ 	{
+ 		case LCS_NONE:
+ 			return ROW_MARK_REFERENCE;
+ 		case LCS_FORKEYSHARE:
+ 			return ROW_MARK_KEYSHARE;
+ 		case LCS_FORSHARE:
+ 			return ROW_MARK_SHARE;
+ 		case LCS_FORNOKEYUPDATE:
+ 			return ROW_MARK_NOKEYEXCLUSIVE;
+ 		case LCS_FORUPDATE:
+ 			return ROW_MARK_EXCLUSIVE;
+ 	}
+ 	return ROW_MARK_COPY;		/* shouldn't happen */
+ }
+ 
+ /*
+  * postgresRefetchForeignRow
+  *		Re-fetch one tuple from a foreign table, possibly locking it
+  */
+ static HeapTuple
+ postgresRefetchForeignRow(EState *estate,
+ 						  ExecRowMark *erm,
+ 						  Datum rowid,
+ 						  bool *updated)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->ermExtra;
+ 	ItemPointer tupleid = (ItemPointer) DatumGetPointer(rowid);
+ 	const char **p_values;
+ 	PGresult   *res;
+ 	HeapTuple	tuple;
+ 
+ 	/* Set up the prepared statement on the remote server, if we didn't yet */
+ 	if (!ffstate->p_name)
+ 		ffstate->p_name = setup_prep_stmt(ffstate->conn, ffstate->query);
+ 
+ 	/* Convert parameters needed by prepared statement to text form */
+ 	p_values = convert_prep_stmt_params(tupleid, NULL,
+ 										ffstate->p_nums,
+ 										ffstate->p_flinfo,
+ 										NIL,
+ 										ffstate->temp_cxt);
+ 
+ 	/*
+ 	 * Execute the prepared statement, and check for success.
+ 	 *
+ 	 * We don't use a PG_TRY block here, so be careful not to throw error
+ 	 * without releasing the PGresult.
+ 	 */
+ 	res = PQexecPrepared(ffstate->conn,
+ 						 ffstate->p_name,
+ 						 ffstate->p_nums,
+ 						 p_values,
+ 						 NULL,
+ 						 NULL,
+ 						 0);
+ 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ 		pgfdw_report_error(ERROR, res, ffstate->conn, true, ffstate->query);
+ 
+ 	/* PGresult must be released before leaving this function. */
+ 	PG_TRY();
+ 	{
+ 		/* Create the tuple */
+ 		tuple = make_tuple_from_result_row(res, 0,
+ 										   ffstate->rel,
+ 										   ffstate->attinmeta,
+ 										   ffstate->retrieved_attrs,
+ 										   ffstate->temp_cxt);
+ 		tuple->t_self = *tupleid;
+ 		tuple->t_tableOid = erm->relid;
+ 
+ 		PQclear(res);
+ 		res = NULL;
+ 	}
+ 	PG_CATCH();
+ 	{
+ 		if (res)
+ 			PQclear(res);
+ 		PG_RE_THROW();
+ 	}
+ 	PG_END_TRY();
+ 
+ 	MemoryContextReset(ffstate->temp_cxt);
+ 
+ 	return tuple;
+ }
+ 
+ /*
   * postgresExplainForeignScan
   *		Produce extra output for EXPLAIN of a ForeignScan on a foreign table
   */
*************** ec_member_matches_foreign(PlannerInfo *r
*** 1932,1937 ****
--- 2114,2162 ----
  }
  
  /*
+  * Create the FDW-private information for fetching/locking foreign rows.
+  */
+ static List *
+ create_foreign_fetch_info(PlannerInfo *root,
+ 						  RelOptInfo *baserel,
+ 						  RowMarkType markType)
+ {
+ 	StringInfoData sql;
+ 	List	   *retrieved_attrs;
+ 	Bitmapset  *attrs_used = NULL;
+ 
+ 	/*
+ 	 * Build the query string to be sent for execution.
+ 	 */
+ 	initStringInfo(&sql);
+ 	/* Add a whole-row var to attrs_used to retrieve all the columns. */
+ 	attrs_used = bms_add_member(attrs_used,
+ 								0 - FirstLowInvalidHeapAttributeNumber);
+ 	deparseSelectSql(&sql, root, baserel, attrs_used, &retrieved_attrs);
+ 	appendStringInfoString(&sql, " WHERE ctid = $1");
+ 
+ 	switch (markType)
+ 	{
+ 		case ROW_MARK_EXCLUSIVE:
+ 		case ROW_MARK_NOKEYEXCLUSIVE:
+ 			appendStringInfoString(&sql, " FOR UPDATE");
+ 			break;
+ 		case ROW_MARK_SHARE:
+ 		case ROW_MARK_KEYSHARE:
+ 			appendStringInfoString(&sql, " FOR SHARE");
+ 			break;
+ 		default:
+ 			break;
+ 	}
+ 
+ 	/*
+ 	 * Build the fdw_private list that will be available to the executor.
+ 	 * Items in the list must match enum FdwFetchPrivateIndex, above.
+ 	 */
+ 	return list_make2(makeString(sql.data), retrieved_attrs);
+ }
+ 
+ /*
   * Create cursor for node's query with current parameter values.
   */
  static void
*************** close_cursor(PGconn *conn, unsigned int 
*** 2168,2178 ****
  }
  
  /*
!  * prepare_foreign_modify
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
   */
! static void
! prepare_foreign_modify(PgFdwModifyState *fmstate)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
--- 2393,2404 ----
  }
  
  /*
!  * setup_prep_stmt
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
+  *		or re-fetching tuples for EvalPlanQual rechecking
   */
! static char *
! setup_prep_stmt(PGconn *conn, char *query)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
*************** prepare_foreign_modify(PgFdwModifyState 
*** 2180,2186 ****
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(fmstate->conn));
  	p_name = pstrdup(prep_name);
  
  	/*
--- 2406,2412 ----
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(conn));
  	p_name = pstrdup(prep_name);
  
  	/*
*************** prepare_foreign_modify(PgFdwModifyState 
*** 2193,2210 ****
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = PQprepare(fmstate->conn,
! 					p_name,
! 					fmstate->query,
! 					0,
! 					NULL);
  
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	fmstate->p_name = p_name;
  }
  
  /*
--- 2419,2432 ----
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = PQprepare(conn, p_name, query, 0, NULL);
  
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, conn, true, query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	return p_name;
  }
  
  /*
*************** prepare_foreign_modify(PgFdwModifyState 
*** 2217,2252 ****
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(fmstate->temp_cxt);
  
! 	p_values = (const char **) palloc(sizeof(char *) * fmstate->p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && fmstate->target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, fmstate->target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
--- 2439,2477 ----
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(ItemPointer tupleid,
! 						 TupleTableSlot *slot,
! 						 int p_nums,
! 						 FmgrInfo *p_flinfo,
! 						 List *target_attrs,
! 						 MemoryContext temp_context)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(temp_context);
  
! 	p_values = (const char **) palloc(sizeof(char *) * p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
*************** convert_prep_stmt_params(PgFdwModifyStat
*** 2256,2262 ****
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
--- 2481,2487 ----
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
*************** convert_prep_stmt_params(PgFdwModifyStat
*** 2264,2270 ****
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == fmstate->p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
--- 2489,2495 ----
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
*************** store_returning_result(PgFdwModifyState 
*** 2304,2309 ****
--- 2529,2635 ----
  }
  
  /*
+  * init_foreign_fetch_state
+  *		Initialize an execution state for fetching/locking foreign rows
+  */
+ static void
+ init_foreign_fetch_state(EState *estate,
+ 						 ExecRowMark *erm,
+ 						 List *fdw_private,
+ 						 int eflags)
+ {
+ 	PgFdwFetchState *ffstate;
+ 	Relation	rel = erm->relation;
+ 	RangeTblEntry *rte;
+ 	Oid			userid;
+ 	ForeignTable *table;
+ 	ForeignServer *server;
+ 	UserMapping *user;
+ 	Oid			typefnoid;
+ 	bool		isvarlena;
+ 
+ 	/* Begin constructing PgFdwFetchState. */
+ 	ffstate = (PgFdwFetchState *) palloc0(sizeof(PgFdwFetchState));
+ 	ffstate->rel = rel;
+ 
+ 	/*
+ 	 * Identify which user to do the remote access as.  This should match what
+ 	 * ExecCheckRTEPerms() does.
+ 	 */
+ 	rte = rt_fetch(erm->rti, estate->es_range_table);
+ 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+ 
+ 	/* Get info about foreign table. */
+ 	table = GetForeignTable(RelationGetRelid(rel));
+ 	server = GetForeignServer(table->serverid);
+ 	user = GetUserMapping(userid, server->serverid);
+ 
+ 	/* Open connection; report that we'll create a prepared statement. */
+ 	ffstate->conn = GetConnection(server, user, true);
+ 	ffstate->p_name = NULL;		/* prepared statement not made yet */
+ 
+ 	/* Deconstruct fdw_private data. */
+ 	ffstate->query = strVal(list_nth(fdw_private,
+ 									 FdwScanPrivateSelectSql2));
+ 	ffstate->retrieved_attrs = (List *) list_nth(fdw_private,
+ 											  FdwScanPrivateRetrievedAttrs2);
+ 
+ 	/* Create context for per-tuple temp workspace. */
+ 	ffstate->temp_cxt = AllocSetContextCreate(estate->es_query_cxt,
+ 											  "postgres_fdw temporary data",
+ 											  ALLOCSET_SMALL_MINSIZE,
+ 											  ALLOCSET_SMALL_INITSIZE,
+ 											  ALLOCSET_SMALL_MAXSIZE);
+ 
+ 	/* Prepare for input conversion of SELECT results. */
+ 	ffstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(rel));
+ 
+ 	/* Prepare for output conversion of parameters used in prepared stmt. */
+ 	ffstate->p_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ 	ffstate->p_nums = 0;
+ 
+ 	/* Only one transmittable parameter will be ctid */
+ 	getTypeOutputInfo(TIDOID, &typefnoid, &isvarlena);
+ 	fmgr_info(typefnoid, &ffstate->p_flinfo[ffstate->p_nums]);
+ 	ffstate->p_nums++;
+ 
+ 	erm->ermExtra = ffstate;
+ }
+ 
+ /*
+  * finish_foreign_fetch_state
+  *		Finish an execution state for fetching/locking foreign rows
+  */
+ static void
+ finish_foreign_fetch_state(EState *estate, ExecRowMark *erm)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->ermExtra;
+ 
+ 	/* If we created a prepared statement, destroy it */
+ 	if (ffstate->p_name)
+ 	{
+ 		char		sql[64];
+ 		PGresult   *res;
+ 
+ 		snprintf(sql, sizeof(sql), "DEALLOCATE %s", ffstate->p_name);
+ 
+ 		/*
+ 		 * We don't use a PG_TRY block here, so be careful not to throw error
+ 		 * without releasing the PGresult.
+ 		 */
+ 		res = PQexec(ffstate->conn, sql);
+ 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ 			pgfdw_report_error(ERROR, res, ffstate->conn, true, sql);
+ 		PQclear(res);
+ 		ffstate->p_name = NULL;
+ 	}
+ 
+ 	/* Release remote connection */
+ 	ReleaseConnection(ffstate->conn);
+ 	ffstate->conn = NULL;
+ }
+ 
+ /*
   * postgresAnalyzeForeignTable
   *		Test whether analyzing this foreign table is supported
   */
#39Tom Lane
tgl@sss.pgh.pa.us
In reply to: Tom Lane (#38)
1 attachment(s)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

I wrote:

I did a very basic update of your postgres_fdw patch to test this with,
and attach that so that you don't have to repeat the effort. I'm not sure
whether we want to try to convert that into something committable. I'm
afraid that the extra round trips involved in doing row locking this way
will be so expensive that no one really wants it for postgres_fdw. It's
more credible that FDWs operating against local storage would have use
for it.

Of course, if we don't do that then we still have your original gripe
about ctid not being correct in EvalPlanQual results. I poked at this
a bit and it seems like we could arrange to pass ctid through even in
the ROW_MARK_COPY case, if we define the t_ctid field of a composite
Datum as being the thing to use. The attached WIP, mostly-comment-free
patch seems to fix the original test case. It would likely result in
ctid coming out as (0,0) not (4294967295,0) for FDWs that don't take
any special steps to return a valid ctid, as a result of the fact that
heap_form_tuple just leaves that field zeroed. We could fix that by
adding an ItemPointerSetInvalid(&(td->t_ctid)) call to heap_form_tuple,
but I dunno if we want to add even a small number of cycles for this
purpose to such a core function.

regards, tom lane

Attachments:

pass-ctid-through-whole-row-tuple.patchtext/x-diff; charset=us-ascii; name=pass-ctid-through-whole-row-tuple.patchDownload
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 0c44260..2350de3 100644
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
*************** make_tuple_from_result_row(PGresult *res
*** 2965,2971 ****
--- 2965,2974 ----
  	tuple = heap_form_tuple(tupdesc, values, nulls);
  
  	if (ctid)
+ 	{
  		tuple->t_self = *ctid;
+ 		tuple->t_data->t_ctid = *ctid;
+ 	}
  
  	/* Clean up */
  	MemoryContextReset(temp_context);
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 43d3c44..4c39e06 100644
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
*************** EvalPlanQualFetchRowMarks(EPQState *epqs
*** 2613,2621 ****
  
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
- 			ItemPointerSetInvalid(&(tuple.t_self));
  			/* relation might be a foreign table, if so provide tableoid */
  			tuple.t_tableOid = erm->relid;
  			tuple.t_data = td;
  
  			/* copy and store tuple */
--- 2613,2622 ----
  
  			/* build a temporary HeapTuple control structure */
  			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
  			/* relation might be a foreign table, if so provide tableoid */
  			tuple.t_tableOid = erm->relid;
+ 			/* also copy t_ctid in case somebody supplied it */
+ 			tuple.t_self = td->t_ctid;
  			tuple.t_data = td;
  
  			/* copy and store tuple */
#40Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#38)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/05/13 3:24, Tom Lane wrote:

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

On 2015/05/12 7:42, Tom Lane wrote:

I don't much like the division of labor between LockForeignRow and
FetchForeignRow. In principle that would lead to not one but two
extra remote accesses per locked row in SELECT FOR UPDATE, at least
in the case that an EvalPlanQual recheck is required. (I see that
in your prototype patch for postgres_fdw you attempt to avoid that
by saving a retrieved tuple in LockForeignRow and then returning it
in FetchForeignRow, but that seems extremely ugly and bug-prone,
since there is nothing in the API spec insisting that those calls match
up one-to-one.) The fact that locking and fetching a tuple are separate
operations in the heapam API is a historical artifact that probably
doesn't make sense to duplicate in the FDW API.

I understand your concern about the postgres_fdw patch. However, I
think we should divide this into the two routines as the core patch
does, because I think that would allow an FDW author to implement these
routines so as to improve the efficiency for scenarios that seldom need
fetching, by not retrieving and saving locked tuples in LockForeignRow.

I find it hard to envision a situation where an FDW could lock a row
without being able to fetch its contents more or less for free. We have
IIRC discussed changing the way that works even for heapam, since the
current design requires multiple buffer lock/unlock cycles which aren't
free either. In any case, I think that the temptation to do probably-
buggy stuff like what you did in your prototype would be too strong for
most people, and that we're much better off with a simpler one-step API.

OK

Another problem is that we fail to detect whether an EvalPlanQual recheck
is required during a SELECT FOR UPDATE on a remote table, which we
certainly ought to do if the objective is to make postgres_fdw semantics
match the local ones.

I think that is interesting in theory, but I'm not sure that is worth
much in practice.

Hm, well, AFAICS the entire point of this patch is to make it possible for
FDWs to duplicate the semantics used for local tables, so I'm not sure
why you'd want to ignore that aspect of it.

Sorry, my explanation was not correct. For me, the objective was not
necessarily to make it possible for FDWs to duplicate the semantics, but
to make it possible for FDWs to improve the efficiency of SELECT FOR
UPDATE on foreign tables (and UPDATE/DELETE involving foreign tables as
source relations) by making it possible for FDWs to duplicate the
semantics. And I didn't think that the idea of detecting such a thing
would be in itself directly useful for the improved efficiency. Maybe
I'm missing something, though.

Anyway, I redid the patch along those lines, improved the documentation,
and have committed it.

Thanks for improving and committing the patch. I'm so happy with the
committed version.

I did a very basic update of your postgres_fdw patch to test this with,
and attach that so that you don't have to repeat the effort. I'm not sure
whether we want to try to convert that into something committable. I'm
afraid that the extra round trips involved in doing row locking this way
will be so expensive that no one really wants it for postgres_fdw. It's
more credible that FDWs operating against local storage would have use
for it.

I think so too. And thanks for updating the patch.

Best regards,
Etsuro Fujita

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#41Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Tom Lane (#39)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

On 2015/05/13 6:22, Tom Lane wrote:

I wrote:

I did a very basic update of your postgres_fdw patch to test this with,
and attach that so that you don't have to repeat the effort. I'm not sure
whether we want to try to convert that into something committable. I'm
afraid that the extra round trips involved in doing row locking this way
will be so expensive that no one really wants it for postgres_fdw. It's
more credible that FDWs operating against local storage would have use
for it.

Of course, if we don't do that then we still have your original gripe
about ctid not being correct in EvalPlanQual results. I poked at this
a bit and it seems like we could arrange to pass ctid through even in
the ROW_MARK_COPY case, if we define the t_ctid field of a composite
Datum as being the thing to use. The attached WIP, mostly-comment-free
patch seems to fix the original test case. It would likely result in
ctid coming out as (0,0) not (4294967295,0) for FDWs that don't take
any special steps to return a valid ctid, as a result of the fact that
heap_form_tuple just leaves that field zeroed.

+1

I did the same thing when creating the first version of the patch [1]/messages/by-id/54B7691B.5080702@lab.ntt.co.jp.
In addition, I made another change to ForeignNext so that the FDWs to
get ctid coming out as the same value (0, 0) instead of (4294967295,0)
before and after an EvalPlanQual recheck. But IIRC, I think both of
them were rejected by you ...

We could fix that by
adding an ItemPointerSetInvalid(&(td->t_ctid)) call to heap_form_tuple,
but I dunno if we want to add even a small number of cycles for this
purpose to such a core function.

I thought so too when creating the first version.

Best regards,
Etsuro Fujita

[1]: /messages/by-id/54B7691B.5080702@lab.ntt.co.jp

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#42Tom Lane
tgl@sss.pgh.pa.us
In reply to: Etsuro Fujita (#41)
Re: EvalPlanQual behaves oddly for FDW queries involving system columns

Etsuro Fujita <fujita.etsuro@lab.ntt.co.jp> writes:

On 2015/05/13 6:22, Tom Lane wrote:

Of course, if we don't do that then we still have your original gripe
about ctid not being correct in EvalPlanQual results. I poked at this
a bit and it seems like we could arrange to pass ctid through even in
the ROW_MARK_COPY case, if we define the t_ctid field of a composite
Datum as being the thing to use.

I did the same thing when creating the first version of the patch [1].
In addition, I made another change to ForeignNext so that the FDWs to
get ctid coming out as the same value (0, 0) instead of (4294967295,0)
before and after an EvalPlanQual recheck. But IIRC, I think both of
them were rejected by you ...

Ah, right. AFAIR, people objected to the other things you'd done in
that patch, and I'd still say that the change in nodeForeignscan.c
is unnecessary. But the idea of using t_ctid to solve the problem
for the ROW_MARK_COPY code path seems reasonable enough.

We could fix that by
adding an ItemPointerSetInvalid(&(td->t_ctid)) call to heap_form_tuple,
but I dunno if we want to add even a small number of cycles for this
purpose to such a core function.

I thought so too when creating the first version.

On the other hand, that would only be three additional instructions
on typical machines, which is surely in the noise compared to the rest of
heap_form_tuple, and you could argue that it's a bug fix: leaving the
t_ctid field with an apparently valid value is not very appropriate.
So after thinking about it I think we ought to do that.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#43Andres Freund
andres@anarazel.de
In reply to: Tom Lane (#38)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

Hi,

On 2015-05-12 14:24:34 -0400, Tom Lane wrote:

I did a very basic update of your postgres_fdw patch to test this with,
and attach that so that you don't have to repeat the effort. I'm not sure
whether we want to try to convert that into something committable. I'm
afraid that the extra round trips involved in doing row locking this way
will be so expensive that no one really wants it for postgres_fdw. It's
more credible that FDWs operating against local storage would have use
for it.

Fujita-san, do you know of any FDWs that use this? I'm currently
converting the EPQ machinery to slots, and in course of that I (with
Horiguchi-san's help), converted RefetchForeignRow to return a slot. But
there's currently no in-core user of this facility... I guess I can
rebase the preliminary postgres_fdw patch here, but it bitrotted
significantly. I also feel like there should be some test coverage for
an API in a nontrivial part of the code...

Greetings,

Andres Freund

#44Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Andres Freund (#43)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

Hi Andres,

(2019/02/28 5:33), Andres Freund wrote:

On 2015-05-12 14:24:34 -0400, Tom Lane wrote:

I did a very basic update of your postgres_fdw patch to test this with,
and attach that so that you don't have to repeat the effort. I'm not sure
whether we want to try to convert that into something committable. I'm
afraid that the extra round trips involved in doing row locking this way
will be so expensive that no one really wants it for postgres_fdw. It's
more credible that FDWs operating against local storage would have use
for it.

Fujita-san, do you know of any FDWs that use this?

No, I don't.

I'm currently
converting the EPQ machinery to slots, and in course of that I (with
Horiguchi-san's help), converted RefetchForeignRow to return a slot. But
there's currently no in-core user of this facility... I guess I can
rebase the preliminary postgres_fdw patch here, but it bitrotted
significantly.

I'll rebase that patch and help the testing, if you want me to.

I also feel like there should be some test coverage for
an API in a nontrivial part of the code...

Yeah, but as mentioned above, the row-locking API is provided for FDWs
operating against local storage, which we don't have in core, unfortunately.

Best regards,
Etsuro Fujita

#45Andres Freund
andres@anarazel.de
In reply to: Etsuro Fujita (#44)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

Hi,

Thanks for the quick response.

On 2019-02-28 18:28:37 +0900, Etsuro Fujita wrote:

I'm currently
converting the EPQ machinery to slots, and in course of that I (with
Horiguchi-san's help), converted RefetchForeignRow to return a slot. But
there's currently no in-core user of this facility... I guess I can
rebase the preliminary postgres_fdw patch here, but it bitrotted
significantly.

I'll rebase that patch and help the testing, if you want me to.

That'd be awesome.

I also feel like there should be some test coverage for
an API in a nontrivial part of the code...

Yeah, but as mentioned above, the row-locking API is provided for FDWs
operating against local storage, which we don't have in core, unfortunately.

Yea. file_fdw exists, but doesn't support modifications...

Greetings,

Andres Freund

#46Andres Freund
andres@anarazel.de
In reply to: Andres Freund (#45)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

Hi,

On 2019-02-28 10:18:36 -0800, Andres Freund wrote:

On 2019-02-28 18:28:37 +0900, Etsuro Fujita wrote:

I'm currently
converting the EPQ machinery to slots, and in course of that I (with
Horiguchi-san's help), converted RefetchForeignRow to return a slot. But
there's currently no in-core user of this facility... I guess I can
rebase the preliminary postgres_fdw patch here, but it bitrotted
significantly.

I'll rebase that patch and help the testing, if you want me to.

That'd be awesome.

FWIW, I pushed the EPQ patch, doing this conversion blindly. It'd be
awesome if you'd check that it actually works...

Thanks,

Andres

#47Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Andres Freund (#46)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

Hi Andres,

(2019/03/02 3:57), Andres Freund wrote:

FWIW, I pushed the EPQ patch, doing this conversion blindly. It'd be
awesome if you'd check that it actually works...

I'll start the work later this week. I think I can post an (initial)
report on that next week, maybe.

Best regards,
Etsuro Fujita

#48Etsuro Fujita
fujita.etsuro@lab.ntt.co.jp
In reply to: Etsuro Fujita (#47)
1 attachment(s)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

(2019/03/04 12:10), Etsuro Fujita wrote:

(2019/03/02 3:57), Andres Freund wrote:

FWIW, I pushed the EPQ patch, doing this conversion blindly. It'd be
awesome if you'd check that it actually works...

I'll start the work later this week. I think I can post an (initial)
report on that next week, maybe.

Here is an updated version of the patch [1]/messages/by-id/16016.1431455074@sss.pgh.pa.us. This version doesn't allow
pushing down joins to the remote if there is a possibility that EPQ will
be executed, but I think it would be useful to test the EPQ patch. I
haven't looked into the EPQ patch in detail yet, but I tested the patch
with the attached, and couldn't find any issues on the patch.

Best regards,
Etsuro Fujita

[1]: /messages/by-id/16016.1431455074@sss.pgh.pa.us

Attachments:

postgres-fdw-late-locking-2.patchtext/x-patch; name=postgres-fdw-late-locking-2.patchDownload
*** a/contrib/postgres_fdw/deparse.c
--- b/contrib/postgres_fdw/deparse.c
***************
*** 1216,1222 **** deparseLockingClause(deparse_expr_cxt *context)
  		{
  			PlanRowMark *rc = get_plan_rowmark(root->rowMarks, relid);
  
! 			if (rc)
  			{
  				/*
  				 * Relation is specified as a FOR UPDATE/SHARE target, so
--- 1216,1222 ----
  		{
  			PlanRowMark *rc = get_plan_rowmark(root->rowMarks, relid);
  
! 			if (rc && rc->markType == ROW_MARK_COPY)
  			{
  				/*
  				 * Relation is specified as a FOR UPDATE/SHARE target, so
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 32,37 ****
--- 32,38 ----
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "optimizer/planmain.h"
+ #include "optimizer/prep.h"
  #include "optimizer/restrictinfo.h"
  #include "optimizer/tlist.h"
  #include "parser/parsetree.h"
***************
*** 70,75 **** enum FdwScanPrivateIndex
--- 71,80 ----
  	FdwScanPrivateRetrievedAttrs,
  	/* Integer representing the desired fetch_size */
  	FdwScanPrivateFetchSize,
+ 	/* SQL statement to execute remotely (as a String node) */
+ 	FdwScanPrivateSelectSql2,
+ 	/* Integer list of attribute numbers retrieved by SELECT */
+ 	FdwScanPrivateRetrievedAttrs2,
  
  	/*
  	 * String describing join i.e. names of relations being joined and types
***************
*** 222,227 **** typedef struct PgFdwDirectModifyState
--- 227,256 ----
  	MemoryContext temp_cxt;		/* context for per-tuple temporary data */
  } PgFdwDirectModifyState;
  
+ /*
+  * Execution state for fetching/locking foreign rows.
+  */
+ typedef struct PgFdwFetchState
+ {
+ 	Relation	rel;			/* relcache entry for the foreign table */
+ 	AttInMetadata *attinmeta;	/* attribute datatype conversion metadata */
+ 
+ 	/* for remote query execution */
+ 	PGconn	   *conn;			/* connection for the fetch */
+ 	char	   *p_name;			/* name of prepared statement, if created */
+ 
+ 	/* extracted fdw_private data */
+ 	char	   *query;			/* text of SELECT command */
+ 	List	   *retrieved_attrs;	/* attr numbers retrieved by SELECT */
+ 
+ 	/* info about parameters for prepared statement */
+ 	int			p_nums;			/* number of parameters to transmit */
+ 	FmgrInfo   *p_flinfo;		/* output conversion functions for them */
+ 
+ 	/* working memory context */
+ 	MemoryContext temp_cxt;		/* context for per-tuple temporary data */
+ } PgFdwFetchState;
+ 
  /*
   * Workspace for analyzing a foreign table.
   */
***************
*** 333,338 **** static bool postgresPlanDirectModify(PlannerInfo *root,
--- 362,374 ----
  static void postgresBeginDirectModify(ForeignScanState *node, int eflags);
  static TupleTableSlot *postgresIterateDirectModify(ForeignScanState *node);
  static void postgresEndDirectModify(ForeignScanState *node);
+ static RowMarkType postgresGetForeignRowMarkType(RangeTblEntry *rte,
+ 							  LockClauseStrength strength);
+ static void postgresRefetchForeignRow(EState *estate,
+ 						  ExecRowMark *erm,
+ 						  Datum rowid,
+ 						  TupleTableSlot *slot,
+ 						  bool *updated);
  static void postgresExplainForeignScan(ForeignScanState *node,
  						   ExplainState *es);
  static void postgresExplainForeignModify(ModifyTableState *mtstate,
***************
*** 379,384 **** static void get_remote_estimate(const char *sql,
--- 415,423 ----
  static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
  						  EquivalenceClass *ec, EquivalenceMember *em,
  						  void *arg);
+ static List *create_foreign_fetch_info(PlannerInfo *root,
+ 						  RelOptInfo *baserel,
+ 						  RowMarkType markType);
  static void create_cursor(ForeignScanState *node);
  static void fetch_more_data(ForeignScanState *node);
  static void close_cursor(PGconn *conn, unsigned int cursor_number);
***************
*** 396,405 **** static TupleTableSlot *execute_foreign_modify(EState *estate,
  					   CmdType operation,
  					   TupleTableSlot *slot,
  					   TupleTableSlot *planSlot);
! static void prepare_foreign_modify(PgFdwModifyState *fmstate);
! static const char **convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
  static void finish_foreign_modify(PgFdwModifyState *fmstate);
--- 435,447 ----
  					   CmdType operation,
  					   TupleTableSlot *slot,
  					   TupleTableSlot *planSlot);
! static char *setup_prep_stmt(PGconn *conn, char *query);
! static const char **convert_prep_stmt_params(ItemPointer tupleid,
! 						 TupleTableSlot *slot,
! 						 int p_nums,
! 						 FmgrInfo *p_flinfo,
! 						 List *target_attrs,
! 						 MemoryContext temp_context);
  static void store_returning_result(PgFdwModifyState *fmstate,
  					   TupleTableSlot *slot, PGresult *res);
  static void finish_foreign_modify(PgFdwModifyState *fmstate);
***************
*** 424,429 **** static void process_query_params(ExprContext *econtext,
--- 466,476 ----
  					 FmgrInfo *param_flinfo,
  					 List *param_exprs,
  					 const char **param_values);
+ static void init_foreign_fetch_state(EState *estate,
+ 						 ExecRowMark *erm,
+ 						 List *fdw_private,
+ 						 int eflags);
+ static void finish_foreign_fetch_state(EState *estate, ExecRowMark *erm);
  static int postgresAcquireSampleRowsFunc(Relation relation, int elevel,
  							  HeapTuple *rows, int targrows,
  							  double *totalrows,
***************
*** 493,500 **** postgres_fdw_handler(PG_FUNCTION_ARGS)
  	routine->IterateDirectModify = postgresIterateDirectModify;
  	routine->EndDirectModify = postgresEndDirectModify;
  
! 	/* Function for EvalPlanQual rechecks */
  	routine->RecheckForeignScan = postgresRecheckForeignScan;
  	/* Support functions for EXPLAIN */
  	routine->ExplainForeignScan = postgresExplainForeignScan;
  	routine->ExplainForeignModify = postgresExplainForeignModify;
--- 540,550 ----
  	routine->IterateDirectModify = postgresIterateDirectModify;
  	routine->EndDirectModify = postgresEndDirectModify;
  
! 	/* Functions for SELECT FOR UPDATE/SHARE row locking */
! 	routine->GetForeignRowMarkType = postgresGetForeignRowMarkType;
! 	routine->RefetchForeignRow = postgresRefetchForeignRow;
  	routine->RecheckForeignScan = postgresRecheckForeignScan;
+ 
  	/* Support functions for EXPLAIN */
  	routine->ExplainForeignScan = postgresExplainForeignScan;
  	routine->ExplainForeignModify = postgresExplainForeignModify;
***************
*** 1149,1154 **** postgresGetForeignPlan(PlannerInfo *root,
--- 1199,1206 ----
  	List	   *fdw_recheck_quals = NIL;
  	List	   *retrieved_attrs;
  	StringInfoData sql;
+ 	PlanRowMark *rc = NULL;
+ 	List	   *fdw_private2 = NIL;
  	ListCell   *lc;
  
  	if (IS_SIMPLE_REL(foreignrel))
***************
*** 1311,1316 **** postgresGetForeignPlan(PlannerInfo *root,
--- 1363,1383 ----
  	fdw_private = list_make3(makeString(sql.data),
  							 retrieved_attrs,
  							 makeInteger(fpinfo->fetch_size));
+ 
+ 	/* Append information about late row locking for rowmarked rels. */
+ 	if (root->rowMarks && IS_SIMPLE_REL(foreignrel))
+ 		rc = get_plan_rowmark(root->rowMarks, foreignrel->relid);
+ 	if (rc)
+ 	{
+ 		Assert(rc->markType != ROW_MARK_COPY);
+ 		fdw_private2 = create_foreign_fetch_info(root,
+ 												 foreignrel,
+ 												 rc->markType);
+ 	}
+ 	else
+ 		fdw_private2 = list_make2(NULL, NIL);
+ 	fdw_private = list_concat(fdw_private, fdw_private2);
+ 
  	if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel))
  		fdw_private = lappend(fdw_private,
  							  makeString(fpinfo->relation_name->data));
***************
*** 1343,1348 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
--- 1410,1416 ----
  	EState	   *estate = node->ss.ps.state;
  	PgFdwScanState *fsstate;
  	RangeTblEntry *rte;
+ 	ExecRowMark *erm;
  	Oid			userid;
  	ForeignTable *table;
  	UserMapping *user;
***************
*** 1433,1438 **** postgresBeginForeignScan(ForeignScanState *node, int eflags)
--- 1501,1516 ----
  							 &fsstate->param_flinfo,
  							 &fsstate->param_exprs,
  							 &fsstate->param_values);
+ 
+ 	/*
+ 	 * Initialize state for fetching/locking foreign rows if needed.
+ 	 */
+ 	if (fsplan->scan.scanrelid > 0)
+ 	{
+ 		erm = ExecFindRowMark(estate, fsplan->scan.scanrelid, true);
+ 		if (erm && erm->relation && erm->ermExtra == NULL)
+ 			init_foreign_fetch_state(estate, erm, fsplan->fdw_private, eflags);
+ 	}
  }
  
  /*
***************
*** 1539,1545 **** postgresReScanForeignScan(ForeignScanState *node)
--- 1617,1626 ----
  static void
  postgresEndForeignScan(ForeignScanState *node)
  {
+ 	ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan;
+ 	EState	   *estate = node->ss.ps.state;
  	PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
+ 	ExecRowMark *erm;
  
  	/* if fsstate is NULL, we are in EXPLAIN; nothing to do */
  	if (fsstate == NULL)
***************
*** 1553,1558 **** postgresEndForeignScan(ForeignScanState *node)
--- 1634,1649 ----
  	ReleaseConnection(fsstate->conn);
  	fsstate->conn = NULL;
  
+ 	/*
+ 	 * Finish state for fetching/locking foreign rows if needed.
+ 	 */
+ 	if (fsplan->scan.scanrelid > 0)
+ 	{
+ 		erm = ExecFindRowMark(estate, fsplan->scan.scanrelid, true);
+ 		if (erm && erm->relation && erm->ermExtra != NULL)
+ 			finish_foreign_fetch_state(estate, erm);
+ 	}
+ 
  	/* MemoryContexts will be deleted automatically. */
  }
  
***************
*** 2401,2406 **** postgresEndDirectModify(ForeignScanState *node)
--- 2492,2600 ----
  	/* MemoryContext will be deleted automatically. */
  }
  
+ /*
+  * postgresGetForeignRowMarkType
+  *		Get rowmark type to use for a particular table
+  */
+ static RowMarkType
+ postgresGetForeignRowMarkType(RangeTblEntry *rte, LockClauseStrength strength)
+ {
+ 	switch (strength)
+ 	{
+ 		case LCS_NONE:
+ 			return ROW_MARK_REFERENCE;
+ 		case LCS_FORKEYSHARE:
+ 			return ROW_MARK_KEYSHARE;
+ 		case LCS_FORSHARE:
+ 			return ROW_MARK_SHARE;
+ 		case LCS_FORNOKEYUPDATE:
+ 			return ROW_MARK_NOKEYEXCLUSIVE;
+ 		case LCS_FORUPDATE:
+ 			return ROW_MARK_EXCLUSIVE;
+ 	}
+ 	return ROW_MARK_COPY;		/* shouldn't happen */
+ }
+ 
+ /*
+  * postgresRefetchForeignRow
+  *		Re-fetch one tuple from a foreign table, possibly locking it
+  */
+ static void
+ postgresRefetchForeignRow(EState *estate,
+ 						  ExecRowMark *erm,
+ 						  Datum rowid,
+ 						  TupleTableSlot *slot,
+ 						  bool *updated)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->ermExtra;
+ 	ItemPointer tupleid = (ItemPointer) DatumGetPointer(rowid);
+ 	const char **p_values;
+ 	PGresult   *res;
+ 	HeapTuple	tuple;
+ 
+ 	/* Set up the prepared statement on the remote server, if we didn't yet */
+ 	if (!ffstate->p_name)
+ 		ffstate->p_name = setup_prep_stmt(ffstate->conn, ffstate->query);
+ 
+ 	/* Convert parameters needed by prepared statement to text form */
+ 	p_values = convert_prep_stmt_params(tupleid, NULL,
+ 										ffstate->p_nums,
+ 										ffstate->p_flinfo,
+ 										NIL,
+ 										ffstate->temp_cxt);
+ 
+ 	/*
+ 	 * Execute the prepared statement.
+ 	 */
+ 	if (!PQsendQueryPrepared(ffstate->conn,
+ 							 ffstate->p_name,
+ 							 ffstate->p_nums,
+ 							 p_values,
+ 							 NULL,
+ 							 NULL,
+ 							 0))
+ 		pgfdw_report_error(ERROR, NULL, ffstate->conn, false, ffstate->query);
+ 
+ 	/*
+ 	 * Get the result, and check for success.
+ 	 *
+ 	 * We don't use a PG_TRY block here, so be careful not to throw error
+ 	 * without releasing the PGresult.
+ 	 */
+ 	res = pgfdw_get_result(ffstate->conn, ffstate->query);
+ 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ 		pgfdw_report_error(ERROR, res, ffstate->conn, true, ffstate->query);
+ 
+ 	/* PGresult must be released before leaving this function. */
+ 	PG_TRY();
+ 	{
+ 		/* Create the tuple */
+ 		tuple = make_tuple_from_result_row(res, 0,
+ 										   ffstate->rel,
+ 										   ffstate->attinmeta,
+ 										   ffstate->retrieved_attrs,
+ 										   NULL,
+ 										   ffstate->temp_cxt);
+ 		tuple->t_self = *tupleid;
+ 		tuple->t_tableOid = erm->relid;
+ 
+ 		PQclear(res);
+ 		res = NULL;
+ 	}
+ 	PG_CATCH();
+ 	{
+ 		if (res)
+ 			PQclear(res);
+ 		PG_RE_THROW();
+ 	}
+ 	PG_END_TRY();
+ 
+ 	MemoryContextReset(ffstate->temp_cxt);
+ 
+ 	/* Store the tuple into the given slot */
+ 	ExecStoreHeapTuple(tuple, slot, true);
+ }
+ 
  /*
   * postgresExplainForeignScan
   *		Produce extra output for EXPLAIN of a ForeignScan on a foreign table
***************
*** 2966,2971 **** ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
--- 3160,3214 ----
  	return true;
  }
  
+ /*
+  * Create the FDW-private information for fetching/locking foreign rows.
+  */
+ static List *
+ create_foreign_fetch_info(PlannerInfo *root,
+ 						  RelOptInfo *baserel,
+ 						  RowMarkType markType)
+ {
+ 	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
+ 	StringInfoData sql;
+ 	List	   *retrieved_attrs;
+ 	Bitmapset  *attrs_used = NULL;
+ 	Bitmapset  *save_attrs_used;
+ 
+ 	/*
+ 	 * Build the query string to be sent for execution.
+ 	 */
+ 	initStringInfo(&sql);
+ 	/* Add a whole-row var to attrs_used to retrieve all the columns. */
+ 	attrs_used = bms_add_member(attrs_used,
+ 								0 - FirstLowInvalidHeapAttributeNumber);
+ 	save_attrs_used = fpinfo->attrs_used;
+ 	fpinfo->attrs_used = attrs_used;
+ 	deparseSelectStmtForRel(&sql, root, baserel, NIL, NIL, NIL, false,
+ 							&retrieved_attrs, NULL);
+ 	fpinfo->attrs_used = save_attrs_used;
+ 	appendStringInfoString(&sql, " WHERE ctid = $1");
+ 
+ 	switch (markType)
+ 	{
+ 		case ROW_MARK_EXCLUSIVE:
+ 		case ROW_MARK_NOKEYEXCLUSIVE:
+ 			appendStringInfoString(&sql, " FOR UPDATE");
+ 			break;
+ 		case ROW_MARK_SHARE:
+ 		case ROW_MARK_KEYSHARE:
+ 			appendStringInfoString(&sql, " FOR SHARE");
+ 			break;
+ 		default:
+ 			break;
+ 	}
+ 
+ 	/*
+ 	 * Build the fdw_private list that will be available to the executor.
+ 	 * Items in the list must match enum FdwScanPrivateIndex, above.
+ 	 */
+ 	return list_make2(makeString(sql.data), retrieved_attrs);
+ }
+ 
  /*
   * Create cursor for node's query with current parameter values.
   */
***************
*** 3311,3317 **** execute_foreign_modify(EState *estate,
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		prepare_foreign_modify(fmstate);
  
  	/*
  	 * For UPDATE/DELETE, get the ctid that was passed up as a resjunk column
--- 3554,3560 ----
  
  	/* Set up the prepared statement on the remote server, if we didn't yet */
  	if (!fmstate->p_name)
! 		fmstate->p_name = setup_prep_stmt(fmstate->conn, fmstate->query);
  
  	/*
  	 * For UPDATE/DELETE, get the ctid that was passed up as a resjunk column
***************
*** 3331,3337 **** execute_foreign_modify(EState *estate,
  	}
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(fmstate, ctid, slot);
  
  	/*
  	 * Execute the prepared statement.
--- 3574,3584 ----
  	}
  
  	/* Convert parameters needed by prepared statement to text form */
! 	p_values = convert_prep_stmt_params(ctid, slot,
! 										fmstate->p_nums,
! 										fmstate->p_flinfo,
! 										fmstate->target_attrs,
! 										fmstate->temp_cxt);
  
  	/*
  	 * Execute the prepared statement.
***************
*** 3378,3388 **** execute_foreign_modify(EState *estate,
  }
  
  /*
!  * prepare_foreign_modify
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
   */
! static void
! prepare_foreign_modify(PgFdwModifyState *fmstate)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
--- 3625,3636 ----
  }
  
  /*
!  * setup_prep_stmt
   *		Establish a prepared statement for execution of INSERT/UPDATE/DELETE
+  *		or re-fetching tuples for EvalPlanQual rechecking
   */
! static char *
! setup_prep_stmt(PGconn *conn, char *query)
  {
  	char		prep_name[NAMEDATALEN];
  	char	   *p_name;
***************
*** 3390,3396 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(fmstate->conn));
  	p_name = pstrdup(prep_name);
  
  	/*
--- 3638,3644 ----
  
  	/* Construct name we'll use for the prepared statement. */
  	snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u",
! 			 GetPrepStmtNumber(conn));
  	p_name = pstrdup(prep_name);
  
  	/*
***************
*** 3400,3411 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
  	 * the prepared statements we use in this module are simple enough that
  	 * the remote server will make the right choices.
  	 */
! 	if (!PQsendPrepare(fmstate->conn,
! 					   p_name,
! 					   fmstate->query,
! 					   0,
! 					   NULL))
! 		pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query);
  
  	/*
  	 * Get the result, and check for success.
--- 3648,3655 ----
  	 * the prepared statements we use in this module are simple enough that
  	 * the remote server will make the right choices.
  	 */
! 	if (!PQsendPrepare(conn, p_name, query, 0, NULL))
! 		pgfdw_report_error(ERROR, NULL, conn, false, query);
  
  	/*
  	 * Get the result, and check for success.
***************
*** 3413,3425 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = pgfdw_get_result(fmstate->conn, fmstate->query);
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	fmstate->p_name = p_name;
  }
  
  /*
--- 3657,3669 ----
  	 * We don't use a PG_TRY block here, so be careful not to throw error
  	 * without releasing the PGresult.
  	 */
! 	res = pgfdw_get_result(conn, query);
  	if (PQresultStatus(res) != PGRES_COMMAND_OK)
! 		pgfdw_report_error(ERROR, res, conn, true, query);
  	PQclear(res);
  
  	/* This action shows that the prepare has been done. */
! 	return p_name;
  }
  
  /*
***************
*** 3432,3467 **** prepare_foreign_modify(PgFdwModifyState *fmstate)
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(PgFdwModifyState *fmstate,
! 						 ItemPointer tupleid,
! 						 TupleTableSlot *slot)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(fmstate->temp_cxt);
  
! 	p_values = (const char **) palloc(sizeof(char *) * fmstate->p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && fmstate->target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, fmstate->target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
--- 3676,3714 ----
   * Data is constructed in temp_cxt; caller should reset that after use.
   */
  static const char **
! convert_prep_stmt_params(ItemPointer tupleid,
! 						 TupleTableSlot *slot,
! 						 int p_nums,
! 						 FmgrInfo *p_flinfo,
! 						 List *target_attrs,
! 						 MemoryContext temp_context)
  {
  	const char **p_values;
  	int			pindex = 0;
  	MemoryContext oldcontext;
  
! 	oldcontext = MemoryContextSwitchTo(temp_context);
  
! 	p_values = (const char **) palloc(sizeof(char *) * p_nums);
  
  	/* 1st parameter should be ctid, if it's in use */
  	if (tupleid != NULL)
  	{
  		/* don't need set_transmission_modes for TID output */
! 		p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  											  PointerGetDatum(tupleid));
  		pindex++;
  	}
  
  	/* get following parameters from slot */
! 	if (slot != NULL && target_attrs != NIL)
  	{
  		int			nestlevel;
  		ListCell   *lc;
  
  		nestlevel = set_transmission_modes();
  
! 		foreach(lc, target_attrs)
  		{
  			int			attnum = lfirst_int(lc);
  			Datum		value;
***************
*** 3471,3477 **** convert_prep_stmt_params(PgFdwModifyState *fmstate,
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&fmstate->p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
--- 3718,3724 ----
  			if (isnull)
  				p_values[pindex] = NULL;
  			else
! 				p_values[pindex] = OutputFunctionCall(&p_flinfo[pindex],
  													  value);
  			pindex++;
  		}
***************
*** 3479,3485 **** convert_prep_stmt_params(PgFdwModifyState *fmstate,
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == fmstate->p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
--- 3726,3732 ----
  		reset_transmission_modes(nestlevel);
  	}
  
! 	Assert(pindex == p_nums);
  
  	MemoryContextSwitchTo(oldcontext);
  
***************
*** 4067,4072 **** process_query_params(ExprContext *econtext,
--- 4314,4418 ----
  	reset_transmission_modes(nestlevel);
  }
  
+ /*
+  * init_foreign_fetch_state
+  *		Initialize an execution state for fetching/locking foreign rows
+  */
+ static void
+ init_foreign_fetch_state(EState *estate,
+ 						 ExecRowMark *erm,
+ 						 List *fdw_private,
+ 						 int eflags)
+ {
+ 	PgFdwFetchState *ffstate;
+ 	Relation	rel = erm->relation;
+ 	RangeTblEntry *rte;
+ 	Oid			userid;
+ 	ForeignTable *table;
+ 	UserMapping *user;
+ 	Oid			typefnoid;
+ 	bool		isvarlena;
+ 
+ 	/* Begin constructing PgFdwFetchState. */
+ 	ffstate = (PgFdwFetchState *) palloc0(sizeof(PgFdwFetchState));
+ 	ffstate->rel = rel;
+ 
+ 	/*
+ 	 * Identify which user to do the remote access as.  This should match what
+ 	 * ExecCheckRTEPerms() does.
+ 	 */
+ 	rte = rt_fetch(erm->rti, estate->es_range_table);
+ 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+ 
+ 	/* Get info about foreign table. */
+ 	table = GetForeignTable(RelationGetRelid(rel));
+ 	user = GetUserMapping(userid, table->serverid);
+ 
+ 	/* Open connection; report that we'll create a prepared statement. */
+ 	ffstate->conn = GetConnection(user, true);
+ 	ffstate->p_name = NULL;		/* prepared statement not made yet */
+ 
+ 	/* Deconstruct fdw_private data. */
+ 	ffstate->query = strVal(list_nth(fdw_private,
+ 									 FdwScanPrivateSelectSql2));
+ 	ffstate->retrieved_attrs = (List *) list_nth(fdw_private,
+ 											  FdwScanPrivateRetrievedAttrs2);
+ 
+ 	/* Create context for per-tuple temp workspace. */
+ 	ffstate->temp_cxt = AllocSetContextCreate(estate->es_query_cxt,
+ 											  "postgres_fdw temporary data",
+ 											  ALLOCSET_SMALL_MINSIZE,
+ 											  ALLOCSET_SMALL_INITSIZE,
+ 											  ALLOCSET_SMALL_MAXSIZE);
+ 
+ 	/* Prepare for input conversion of SELECT results. */
+ 	ffstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(rel));
+ 
+ 	/* Prepare for output conversion of parameters used in prepared stmt. */
+ 	ffstate->p_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ 	ffstate->p_nums = 0;
+ 
+ 	/* Only one transmittable parameter will be ctid */
+ 	getTypeOutputInfo(TIDOID, &typefnoid, &isvarlena);
+ 	fmgr_info(typefnoid, &ffstate->p_flinfo[ffstate->p_nums]);
+ 	ffstate->p_nums++;
+ 
+ 	erm->ermExtra = ffstate;
+ }
+ 
+ /*
+  * finish_foreign_fetch_state
+  *		Finish an execution state for fetching/locking foreign rows
+  */
+ static void
+ finish_foreign_fetch_state(EState *estate, ExecRowMark *erm)
+ {
+ 	PgFdwFetchState *ffstate = (PgFdwFetchState *) erm->ermExtra;
+ 
+ 	/* If we created a prepared statement, destroy it */
+ 	if (ffstate->p_name)
+ 	{
+ 		char		sql[64];
+ 		PGresult   *res;
+ 
+ 		snprintf(sql, sizeof(sql), "DEALLOCATE %s", ffstate->p_name);
+ 
+ 		/*
+ 		 * We don't use a PG_TRY block here, so be careful not to throw error
+ 		 * without releasing the PGresult.
+ 		 */
+ 		res = PQexec(ffstate->conn, sql);
+ 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ 			pgfdw_report_error(ERROR, res, ffstate->conn, true, sql);
+ 		PQclear(res);
+ 		ffstate->p_name = NULL;
+ 	}
+ 
+ 	/* Release remote connection */
+ 	ReleaseConnection(ffstate->conn);
+ 	ffstate->conn = NULL;
+ }
+ 
  /*
   * postgresAnalyzeForeignTable
   *		Test whether analyzing this foreign table is supported
***************
*** 5099,5106 **** postgresGetForeignJoinPaths(PlannerInfo *root,
  	int			width;
  	Cost		startup_cost;
  	Cost		total_cost;
! 	Path	   *epq_path;		/* Path to create plan to be executed when
! 								 * EvalPlanQual gets triggered. */
  
  	/*
  	 * Skip if this join combination has been considered already.
--- 5445,5452 ----
  	int			width;
  	Cost		startup_cost;
  	Cost		total_cost;
! 	Path	   *epq_path = NULL;	/* Path to create plan to be executed when
! 									 * EvalPlanQual gets triggered. */
  
  	/*
  	 * Skip if this join combination has been considered already.
***************
*** 5108,5113 **** postgresGetForeignJoinPaths(PlannerInfo *root,
--- 5454,5468 ----
  	if (joinrel->fdw_private)
  		return;
  
+ 	/*
+ 	 * Don't allow pushing down joins to the remote if there is a possibility
+ 	 * that EvalPlanQual will be executed.
+ 	 */
+ 	if (root->parse->commandType == CMD_DELETE ||
+ 		root->parse->commandType == CMD_UPDATE ||
+ 		root->rowMarks)
+ 		return;
+ 
  	/*
  	 * This code does not work for joins with lateral references, since those
  	 * must have parameterized paths, which we don't generate yet.
***************
*** 5128,5157 **** postgresGetForeignJoinPaths(PlannerInfo *root,
  	/* attrs_used is only for base relations. */
  	fpinfo->attrs_used = NULL;
  
- 	/*
- 	 * If there is a possibility that EvalPlanQual will be executed, we need
- 	 * to be able to reconstruct the row using scans of the base relations.
- 	 * GetExistingLocalJoinPath will find a suitable path for this purpose in
- 	 * the path list of the joinrel, if one exists.  We must be careful to
- 	 * call it before adding any ForeignPath, since the ForeignPath might
- 	 * dominate the only suitable local path available.  We also do it before
- 	 * calling foreign_join_ok(), since that function updates fpinfo and marks
- 	 * it as pushable if the join is found to be pushable.
- 	 */
- 	if (root->parse->commandType == CMD_DELETE ||
- 		root->parse->commandType == CMD_UPDATE ||
- 		root->rowMarks)
- 	{
- 		epq_path = GetExistingLocalJoinPath(joinrel);
- 		if (!epq_path)
- 		{
- 			elog(DEBUG3, "could not push down foreign join because a local path suitable for EPQ checks was not found");
- 			return;
- 		}
- 	}
- 	else
- 		epq_path = NULL;
- 
  	if (!foreign_join_ok(root, joinrel, jointype, outerrel, innerrel, extra))
  	{
  		/* Free path required for EPQ if we copied one; we don't need it now */
--- 5483,5488 ----
#49Andres Freund
andres@anarazel.de
In reply to: Etsuro Fujita (#48)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

On 2019-03-15 21:58:25 +0900, Etsuro Fujita wrote:

(2019/03/04 12:10), Etsuro Fujita wrote:

(2019/03/02 3:57), Andres Freund wrote:

FWIW, I pushed the EPQ patch, doing this conversion blindly. It'd be
awesome if you'd check that it actually works...

I'll start the work later this week. I think I can post an (initial)
report on that next week, maybe.

Here is an updated version of the patch [1]. This version doesn't allow
pushing down joins to the remote if there is a possibility that EPQ will be
executed, but I think it would be useful to test the EPQ patch. I haven't
looked into the EPQ patch in detail yet, but I tested the patch with the
attached, and couldn't find any issues on the patch.

Thanks a lot for that work!

#50Alvaro Herrera
alvherre@alvh.no-ip.org
In reply to: Etsuro Fujita (#48)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

On 2019-Mar-15, Etsuro Fujita wrote:

(2019/03/04 12:10), Etsuro Fujita wrote:

(2019/03/02 3:57), Andres Freund wrote:

FWIW, I pushed the EPQ patch, doing this conversion blindly. It'd be
awesome if you'd check that it actually works...

I'll start the work later this week. I think I can post an (initial)
report on that next week, maybe.

Here is an updated version of the patch [1].

What happened to this patch? Seems it was forgotten ...

--
�lvaro Herrera 39�49'30"S 73�17'W

#51Etsuro Fujita
etsuro.fujita@gmail.com
In reply to: Alvaro Herrera (#50)
Re: [HACKERS] EvalPlanQual behaves oddly for FDW queries involving system columns

On Fri, May 28, 2021 at 8:25 AM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2019-Mar-15, Etsuro Fujita wrote:

(2019/03/04 12:10), Etsuro Fujita wrote:

(2019/03/02 3:57), Andres Freund wrote:

FWIW, I pushed the EPQ patch, doing this conversion blindly. It'd be
awesome if you'd check that it actually works...

I'll start the work later this week. I think I can post an (initial)
report on that next week, maybe.

Here is an updated version of the patch [1].

What happened to this patch? Seems it was forgotten ...

We didn't do anything about the patch. IIRC, the patch was created
for testing the preliminary work for pluggable table access methods
(ad0bda5d2), rather than making it into postgres_fdw.

Best regards,
Etsuro Fujita