Problem with transition tables on partitioned tables with foreign-table partitions

Started by Etsuro Fujita7 months ago10 messages
#1Etsuro Fujita
etsuro.fujita@gmail.com
1 attachment(s)

Hi,

While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:

create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');

create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();

create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();

insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1

The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!

UPDATE/DELETE also produce incorrect results:

create function dump_update() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %, new table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table),
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_update_trig
after update on parent referencing old table as old_table new table
as new_table
for each statement execute procedure dump_update();

update parent set b = b + 1;
NOTICE: trigger = parent_update_trig, old table = <NULL>, new table = <NULL>
UPDATE 1

create function dump_delete() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table);
return null;
end;
$$;
create trigger parent_delete_trig
after delete on parent referencing old table as old_table
for each statement execute procedure dump_delete();

delete from parent;
NOTICE: trigger = parent_delete_trig, old table = <NULL>
DELETE 1

In both cases the triggers fail to show transition tuples. The cause
of this is that postgres_fdw mistakenly performs direct modify for
UPDATE/DELETE on the partition, which skips
ExecARUpdateTriggers()/ExecARDeleteTriggers() entirely.

To fix, I think we could disallow creating transition-table triggers
on such partitioned tables, but I think that that is too restrictive
because some users might have been using such triggers, avoiding this
problem by e.g., modifying only plain-table partitions. So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.

Attached is a WIP patch for that.

Best regards,
Etsuro Fujita

Attachments:

fix-problem-with-transition-tables-wip.patchapplication/octet-stream; name=fix-problem-with-transition-tables-wip.patchDownload
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 67f8e70f9c1..ee5f27d92e4 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2544,6 +2544,15 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		transition_capture->tcs_insert_new_table)
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_insert_after_row) ||
 		(transition_capture && transition_capture->tcs_insert_new_table))
 		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2787,6 +2796,15 @@ ExecARDeleteTriggers(EState *estate,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		transition_capture->tcs_delete_old_table)
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_delete_after_row) ||
 		(transition_capture && transition_capture->tcs_delete_old_table))
 	{
@@ -3115,6 +3133,16 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		(transition_capture->tcs_update_old_table ||
+		 transition_capture->tcs_update_new_table))
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_update_after_row) ||
 		(transition_capture &&
 		 (transition_capture->tcs_update_old_table ||
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 0b61aef962c..b4770fa632d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7174,6 +7174,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 	ModifyTable *node = makeNode(ModifyTable);
 	bool		returning_old_or_new = false;
 	bool		returning_old_or_new_valid = false;
+	bool		transition_tables = false;
+	bool		transition_tables_valid = false;
 	List	   *fdw_private_list;
 	Bitmapset  *direct_modify_plans;
 	ListCell   *lc;
@@ -7320,8 +7322,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 		 * callback functions needed for that and (2) there are no local
 		 * structures that need to be run for each modified row: row-level
 		 * triggers on the foreign table, stored generated columns, WITH CHECK
-		 * OPTIONs from parent views, or Vars returning OLD/NEW in the
-		 * RETURNING list.
+		 * OPTIONs from parent views, transition tables on the named relation,
+		 * or Vars returning OLD/NEW in the RETURNING list.
 		 */
 		direct_modify = false;
 		if (fdwroutine != NULL &&
@@ -7333,7 +7335,10 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 			!has_row_triggers(root, rti, operation) &&
 			!has_stored_generated_columns(root, rti))
 		{
-			/* returning_old_or_new is the same for all result relations */
+			/*
+			 * returning_old_or_new and transition_tables are the same for all
+			 * result relations, respectively
+			 */
 			if (!returning_old_or_new_valid)
 			{
 				returning_old_or_new =
@@ -7342,7 +7347,18 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 				returning_old_or_new_valid = true;
 			}
 			if (!returning_old_or_new)
-				direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i);
+			{
+				if (!transition_tables_valid)
+				{
+					transition_tables = has_transition_tables(root,
+															  nominalRelation,
+															  operation);
+					transition_tables_valid = true;
+				}
+				if (!transition_tables)
+					direct_modify = fdwroutine->PlanDirectModify(root, node,
+																 rti, i);
+			}
 		}
 		if (direct_modify)
 			direct_modify_plans = bms_add_member(direct_modify_plans, i);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 59233b64730..dcba4b72198 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2303,6 +2303,58 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event)
 	return result;
 }
 
+/*
+ * has_transition_tables
+ *
+ * Detect whether the specified relation has any transition tables for event.
+ */
+bool
+has_transition_tables(PlannerInfo *root, Index rti, CmdType event)
+{
+	RangeTblEntry *rte = planner_rt_fetch(rti, root);
+	Relation	relation;
+	TriggerDesc *trigDesc;
+	bool		result = false;
+
+	if (rte->rtekind == RTE_RELATION &&
+		rte->relkind == RELKIND_FOREIGN_TABLE)
+		return result;
+
+	/* Assume we already have adequate lock */
+	relation = table_open(rte->relid, NoLock);
+
+	trigDesc = relation->trigdesc;
+	switch (event)
+	{
+		case CMD_INSERT:
+			if (trigDesc &&
+				trigDesc->trig_insert_new_table)
+				result = true;
+			break;
+		case CMD_UPDATE:
+			if (trigDesc &&
+				(trigDesc->trig_update_old_table ||
+				 trigDesc->trig_update_new_table))
+				result = true;
+			break;
+		case CMD_DELETE:
+			if (trigDesc &&
+				trigDesc->trig_delete_old_table)
+				result = true;
+			break;
+			/* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */
+		case CMD_MERGE:
+			result = false;
+			break;
+		default:
+			elog(ERROR, "unrecognized CmdType: %d", (int) event);
+			break;
+	}
+
+	table_close(relation, NoLock);
+	return result;
+}
+
 /*
  * has_stored_generated_columns
  *
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index cd74e4b1e8b..ac80458467b 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -72,6 +72,8 @@ extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
 
 extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
 
+extern bool has_transition_tables(PlannerInfo *root, Index rti, CmdType event);
+
 extern bool has_stored_generated_columns(PlannerInfo *root, Index rti);
 
 extern Bitmapset *get_dependent_generated_columns(PlannerInfo *root, Index rti,
#2Amit Langote
amitlangote09@gmail.com
In reply to: Etsuro Fujita (#1)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

Hi Fujita-san,

On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

Hi,

While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:

create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');

create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();

create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();

insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1

The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!

Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?

UPDATE/DELETE also produce incorrect results:

create function dump_update() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %, new table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table),
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_update_trig
after update on parent referencing old table as old_table new table
as new_table
for each statement execute procedure dump_update();

update parent set b = b + 1;
NOTICE: trigger = parent_update_trig, old table = <NULL>, new table = <NULL>
UPDATE 1

create function dump_delete() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table);
return null;
end;
$$;
create trigger parent_delete_trig
after delete on parent referencing old table as old_table
for each statement execute procedure dump_delete();

delete from parent;
NOTICE: trigger = parent_delete_trig, old table = <NULL>
DELETE 1

In both cases the triggers fail to show transition tuples. The cause
of this is that postgres_fdw mistakenly performs direct modify for
UPDATE/DELETE on the partition, which skips
ExecARUpdateTriggers()/ExecARDeleteTriggers() entirely.

Yes, that seems problematic.

To fix, I think we could disallow creating transition-table triggers
on such partitioned tables, but I think that that is too restrictive
because some users might have been using such triggers, avoiding this
problem by e.g., modifying only plain-table partitions.

+1

So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.

Is (2) intended to catch cases that occur during a foreign insert and
foreign/non-direct update/delete?

--
Thanks, Amit Langote

#3Etsuro Fujita
etsuro.fujita@gmail.com
In reply to: Amit Langote (#2)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

Hi Amit-san,

On Tue, Jul 1, 2025 at 4:42 PM Amit Langote <amitlangote09@gmail.com> wrote:

On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:

create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');

create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();

create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();

insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1

The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!

Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?

That is the question of how we define the after image of a row
inserted into a foreign table, but consider the case where the
partition is a plain table:

create table parent (a text, b int) partition by list (a);
create table child partition of parent for values in ('AAA');
create trigger intercept_insert_child
before insert on child
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)
INSERT 0 1

The trigger shows the final tuple, not the original tuple. So from a
consistency perspective, I thought it would be good if the trigger
does so even in the case where the partition is a foreign table.

So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.

Is (2) intended to catch cases that occur during a foreign insert and
foreign/non-direct update/delete?

That is right; the patch forces the FDW to perform ExecForeign*
functions, and then throws an error in ExecAR* functions. One good
thing about this is that we are able to avoid throwing the error when
local/remote row-level BEFORE triggers return NULL.

Thanks for the comments!

Best regards,
Etsuro Fujita

#4Amit Langote
amitlangote09@gmail.com
In reply to: Etsuro Fujita (#3)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

On Wed, Jul 2, 2025 at 7:05 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

On Tue, Jul 1, 2025 at 4:42 PM Amit Langote <amitlangote09@gmail.com> wrote:

On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:

create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');

create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();

create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();

insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1

The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!

Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?

That is the question of how we define the after image of a row
inserted into a foreign table, but consider the case where the
partition is a plain table:

create table parent (a text, b int) partition by list (a);
create table child partition of parent for values in ('AAA');
create trigger intercept_insert_child
before insert on child
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)
INSERT 0 1

The trigger shows the final tuple, not the original tuple. So from a
consistency perspective, I thought it would be good if the trigger
does so even in the case where the partition is a foreign table.

Ok, but if you define the trigger on the foreign table partition
(child) as follows, you do get what I think is the expected result?

create trigger intercept_insert_foreign_child
before insert on child
for each row execute procedure intercept_insert();

insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)

-- 2042, because row modified by both triggers
table parent;
a | b
-----+------
AAA | 2042
(1 row)

Or perhaps you're saying that the row returned by this line in ExecInsert():

/*
* insert into foreign table: let the FDW do it
*/
slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
resultRelInfo,
slot,
planSlot);

is not the expected "after image", and thus should not be added to the
parent's transition table?

IIUC, to prevent that, we now hit the following error in:

void
ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TupleTableSlot *slot, List *recheckIndexes,
TransitionCaptureState *transition_capture)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;

if (relinfo->ri_FdwRoutine && transition_capture &&
transition_capture->tcs_insert_new_table)
{
Assert(relinfo->ri_RootResultRelInfo);
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot collect transition tuples from child
foreign tables")));
}

So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.

Is (2) intended to catch cases that occur during a foreign insert and
foreign/non-direct update/delete?

That is right; the patch forces the FDW to perform ExecForeign*
functions, and then throws an error in ExecAR* functions. One good
thing about this is that we are able to avoid throwing the error when
local/remote row-level BEFORE triggers return NULL.

Given my question above, I’m not sure I fully understand the intention
behind adding these checks.

--
Thanks, Amit Langote

#5Etsuro Fujita
etsuro.fujita@gmail.com
In reply to: Amit Langote (#4)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

On Wed, Jul 2, 2025 at 10:05 PM Amit Langote <amitlangote09@gmail.com> wrote:

On Wed, Jul 2, 2025 at 7:05 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

On Tue, Jul 1, 2025 at 4:42 PM Amit Langote <amitlangote09@gmail.com> wrote:

On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:

create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');

create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();

create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();

insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1

The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!

Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?

That is the question of how we define the after image of a row
inserted into a foreign table, but consider the case where the
partition is a plain table:

create table parent (a text, b int) partition by list (a);
create table child partition of parent for values in ('AAA');
create trigger intercept_insert_child
before insert on child
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)
INSERT 0 1

The trigger shows the final tuple, not the original tuple. So from a
consistency perspective, I thought it would be good if the trigger
does so even in the case where the partition is a foreign table.

Ok, but if you define the trigger on the foreign table partition
(child) as follows, you do get what I think is the expected result?

create trigger intercept_insert_foreign_child
before insert on child
for each row execute procedure intercept_insert();

insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)

-- 2042, because row modified by both triggers
table parent;
a | b
-----+------
AAA | 2042
(1 row)

Or perhaps you're saying that the row returned by this line in ExecInsert():

/*
* insert into foreign table: let the FDW do it
*/
slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
resultRelInfo,
slot,
planSlot);

is not the expected "after image", and thus should not be added to the
parent's transition table?

Yes, that is what I mean by "postgres_fdw does not collect the actual
tuple, of course!" above. My explanation was not good, though.

IIUC, to prevent that, we now hit the following error in:

void
ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TupleTableSlot *slot, List *recheckIndexes,
TransitionCaptureState *transition_capture)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;

if (relinfo->ri_FdwRoutine && transition_capture &&
transition_capture->tcs_insert_new_table)
{
Assert(relinfo->ri_RootResultRelInfo);
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot collect transition tuples from child
foreign tables")));
}

Right, I added the check for that.

Thanks!

Best regards,
Etsuro Fujita

#6Etsuro Fujita
etsuro.fujita@gmail.com
In reply to: Etsuro Fujita (#1)
1 attachment(s)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.

Attached is a WIP patch for that.

Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.

Best regards,
Etsuro Fujita

Attachments:

fix-problem-with-transition-tables-v2.patchapplication/octet-stream; name=fix-problem-with-transition-tables-v2.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 2185b42bb4f..b5ec7c97db2 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8212,6 +8212,122 @@ DELETE FROM rem1;                 -- can't be pushed down
 (5 rows)
 
 DROP TRIGGER trig_row_after_delete ON rem1;
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+  SERVER loopback OPTIONS (table_name 'local_tbl');
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+CREATE FUNCTION trigger_nothing() RETURNS trigger
+  LANGUAGE plpgsql AS $$ BEGIN END; $$;
+CREATE TRIGGER parent_tbl_insert_trig
+  AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR:  cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR:  cannot collect transition tuples from child foreign tables
+CONTEXT:  COPY parent_tbl, line 1: "AAA	42"
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR:  cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR:  cannot collect transition tuples from child foreign tables
+CONTEXT:  COPY parent_tbl, line 1: "AAA	42"
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+                                           QUERY PLAN                                           
+------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+   Foreign Update on public.foreign_tbl parent_tbl_1
+     Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+   ->  Foreign Scan on public.foreign_tbl parent_tbl_1
+         Output: (parent_tbl_1.b + 1), parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.*
+         Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR:  cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Delete on public.parent_tbl
+   Foreign Delete on public.foreign_tbl parent_tbl_1
+     Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+   ->  Foreign Scan on public.foreign_tbl parent_tbl_1
+         Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+         Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+DELETE FROM parent_tbl;
+ERROR:  cannot collect transition tuples from child foreign tables
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+   Update on public.parent_tbl parent_tbl_1
+   Foreign Update on public.foreign_tbl parent_tbl_2
+     Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+   ->  Result
+         Output: (parent_tbl.b + 1), parent_tbl.tableoid, parent_tbl.ctid, (NULL::record)
+         ->  Append
+               ->  Seq Scan on public.parent_tbl parent_tbl_1
+                     Output: parent_tbl_1.b, parent_tbl_1.tableoid, parent_tbl_1.ctid, NULL::record
+               ->  Foreign Scan on public.foreign_tbl parent_tbl_2
+                     Output: parent_tbl_2.b, parent_tbl_2.tableoid, parent_tbl_2.ctid, parent_tbl_2.*
+                     Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(12 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR:  cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Delete on public.parent_tbl
+   Delete on public.parent_tbl parent_tbl_1
+   Foreign Delete on public.foreign_tbl parent_tbl_2
+     Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+   ->  Append
+         ->  Seq Scan on public.parent_tbl parent_tbl_1
+               Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+         ->  Foreign Scan on public.foreign_tbl parent_tbl_2
+               Output: parent_tbl_2.tableoid, parent_tbl_2.ctid
+               Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(10 rows)
+
+DELETE FROM parent_tbl;
+ERROR:  cannot collect transition tuples from child foreign tables
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+-- Cleanup
+DROP FUNCTION trigger_nothing();
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
 -- ===================================================================
 -- test inheritance features
 -- ===================================================================
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index e534b40de3c..dab294a75be 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2272,6 +2272,87 @@ EXPLAIN (verbose, costs off)
 DELETE FROM rem1;                 -- can't be pushed down
 DROP TRIGGER trig_row_after_delete ON rem1;
 
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+  SERVER loopback OPTIONS (table_name 'local_tbl');
+
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+
+CREATE FUNCTION trigger_nothing() RETURNS trigger
+  LANGUAGE plpgsql AS $$ BEGIN END; $$;
+
+CREATE TRIGGER parent_tbl_insert_trig
+  AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA	42
+\.
+
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA	42
+\.
+
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+
+-- Cleanup
+DROP FUNCTION trigger_nothing();
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
+
 -- ===================================================================
 -- test inheritance features
 -- ===================================================================
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 67f8e70f9c1..f23dce9485a 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2544,6 +2544,17 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	Assert(relinfo);
+
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		transition_capture->tcs_insert_new_table)
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_insert_after_row) ||
 		(transition_capture && transition_capture->tcs_insert_new_table))
 		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2787,6 +2798,17 @@ ExecARDeleteTriggers(EState *estate,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	Assert(relinfo);
+
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		transition_capture->tcs_delete_old_table)
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_delete_after_row) ||
 		(transition_capture && transition_capture->tcs_delete_old_table))
 	{
@@ -3115,6 +3137,18 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	Assert(relinfo);
+
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		(transition_capture->tcs_update_old_table ||
+		 transition_capture->tcs_update_new_table))
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_update_after_row) ||
 		(transition_capture &&
 		 (transition_capture->tcs_update_old_table ||
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8a9f1d7a943..d3119288b1d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7224,6 +7224,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 	ModifyTable *node = makeNode(ModifyTable);
 	bool		returning_old_or_new = false;
 	bool		returning_old_or_new_valid = false;
+	bool		transition_tables = false;
+	bool		transition_tables_valid = false;
 	List	   *fdw_private_list;
 	Bitmapset  *direct_modify_plans;
 	ListCell   *lc;
@@ -7370,8 +7372,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 		 * callback functions needed for that and (2) there are no local
 		 * structures that need to be run for each modified row: row-level
 		 * triggers on the foreign table, stored generated columns, WITH CHECK
-		 * OPTIONs from parent views, or Vars returning OLD/NEW in the
-		 * RETURNING list.
+		 * OPTIONs from parent views, Vars returning OLD/NEW in the RETURNING
+		 * list, or transition tables on the named relation.
 		 */
 		direct_modify = false;
 		if (fdwroutine != NULL &&
@@ -7383,7 +7385,10 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 			!has_row_triggers(root, rti, operation) &&
 			!has_stored_generated_columns(root, rti))
 		{
-			/* returning_old_or_new is the same for all result relations */
+			/*
+			 * returning_old_or_new and transition_tables are the same for all
+			 * result relations, respectively
+			 */
 			if (!returning_old_or_new_valid)
 			{
 				returning_old_or_new =
@@ -7392,7 +7397,18 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 				returning_old_or_new_valid = true;
 			}
 			if (!returning_old_or_new)
-				direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i);
+			{
+				if (!transition_tables_valid)
+				{
+					transition_tables = has_transition_tables(root,
+															  nominalRelation,
+															  operation);
+					transition_tables_valid = true;
+				}
+				if (!transition_tables)
+					direct_modify = fdwroutine->PlanDirectModify(root, node,
+																 rti, i);
+			}
 		}
 		if (direct_modify)
 			direct_modify_plans = bms_add_member(direct_modify_plans, i);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 59233b64730..1321f41c4b7 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2303,6 +2303,59 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event)
 	return result;
 }
 
+/*
+ * has_transition_tables
+ *
+ * Detect whether the specified relation has any transition tables for event.
+ */
+bool
+has_transition_tables(PlannerInfo *root, Index rti, CmdType event)
+{
+	RangeTblEntry *rte = planner_rt_fetch(rti, root);
+	Relation	relation;
+	TriggerDesc *trigDesc;
+	bool		result = false;
+
+	/* Currently foreign tables cannot have transition tables */
+	if (rte->rtekind == RTE_RELATION &&
+		rte->relkind == RELKIND_FOREIGN_TABLE)
+		return result;
+
+	/* Assume we already have adequate lock */
+	relation = table_open(rte->relid, NoLock);
+
+	trigDesc = relation->trigdesc;
+	switch (event)
+	{
+		case CMD_INSERT:
+			if (trigDesc &&
+				trigDesc->trig_insert_new_table)
+				result = true;
+			break;
+		case CMD_UPDATE:
+			if (trigDesc &&
+				(trigDesc->trig_update_old_table ||
+				 trigDesc->trig_update_new_table))
+				result = true;
+			break;
+		case CMD_DELETE:
+			if (trigDesc &&
+				trigDesc->trig_delete_old_table)
+				result = true;
+			break;
+			/* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */
+		case CMD_MERGE:
+			result = false;
+			break;
+		default:
+			elog(ERROR, "unrecognized CmdType: %d", (int) event);
+			break;
+	}
+
+	table_close(relation, NoLock);
+	return result;
+}
+
 /*
  * has_stored_generated_columns
  *
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index cd74e4b1e8b..ac80458467b 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -72,6 +72,8 @@ extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
 
 extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
 
+extern bool has_transition_tables(PlannerInfo *root, Index rti, CmdType event);
+
 extern bool has_stored_generated_columns(PlannerInfo *root, Index rti);
 
 extern Bitmapset *get_dependent_generated_columns(PlannerInfo *root, Index rti,
#7Amit Langote
amitlangote09@gmail.com
In reply to: Etsuro Fujita (#6)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

Fujita-san,

On Wed, Jul 9, 2025 at 5:07 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.

Attached is a WIP patch for that.

Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.

Thanks for the new patch. LGTM.

While reading it, I noticed that the functions performing table_open()
are repeatedly called in this condition, which runs for every
qualifying foreign child relations:

if (fdwroutine != NULL &&
fdwroutine->PlanDirectModify != NULL &&
fdwroutine->BeginDirectModify != NULL &&
fdwroutine->IterateDirectModify != NULL &&
fdwroutine->EndDirectModify != NULL &&
withCheckOptionLists == NIL &&
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))

That seems a bit expensive. It might be worth using *_valid flags to
avoid redundant table_open() calls like you're doing for transition
table checking. Maybe something to consider in a separate patch.

--
Thanks, Amit Langote

#8Amit Langote
amitlangote09@gmail.com
In reply to: Amit Langote (#7)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

On Thu, Jul 10, 2025 at 22:20 Amit Langote <amitlangote09@gmail.com> wrote:

Fujita-san,

On Wed, Jul 9, 2025 at 5:07 PM Etsuro Fujita <etsuro.fujita@gmail.com>
wrote:

On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com>

wrote:

So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in

ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()

if they collects transition tuple(s) from a foreign-table partition.

Attached is a WIP patch for that.

Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.

Thanks for the new patch. LGTM.

While reading it, I noticed that the functions performing table_open()
are repeatedly called in this condition, which runs for every
qualifying foreign child relations:

if (fdwroutine != NULL &&
fdwroutine->PlanDirectModify != NULL &&
fdwroutine->BeginDirectModify != NULL &&
fdwroutine->IterateDirectModify != NULL &&
fdwroutine->EndDirectModify != NULL &&
withCheckOptionLists == NIL &&
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))

That seems a bit expensive. It might be worth using *_valid flags to
avoid redundant table_open() calls like you're doing for transition
table checking. Maybe something to consider in a separate patch.

Ah, scratch that because I missed that transition table checking is done
for the “named” relation and these are checking it for child relations.

- Amit

Show quoted text
#9Etsuro Fujita
etsuro.fujita@gmail.com
In reply to: Amit Langote (#8)
1 attachment(s)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

Amit-san,

On Thu, Jul 10, 2025 at 11:54 PM Amit Langote <amitlangote09@gmail.com> wrote:

On Thu, Jul 10, 2025 at 22:20 Amit Langote <amitlangote09@gmail.com> wrote:

On Wed, Jul 9, 2025 at 5:07 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.

Thanks for the new patch. LGTM.

Cool!

While reading it, I noticed that the functions performing table_open()
are repeatedly called in this condition, which runs for every
qualifying foreign child relations:

if (fdwroutine != NULL &&
fdwroutine->PlanDirectModify != NULL &&
fdwroutine->BeginDirectModify != NULL &&
fdwroutine->IterateDirectModify != NULL &&
fdwroutine->EndDirectModify != NULL &&
withCheckOptionLists == NIL &&
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))

That seems a bit expensive. It might be worth using *_valid flags to
avoid redundant table_open() calls like you're doing for transition
table checking. Maybe something to consider in a separate patch.

Ah, scratch that because I missed that transition table checking is done for the “named” relation and these are checking it for child relations.

Ok, thanks for taking the time for this patch!

After re-reading the patch I noticed two minor things:

* The existing code in ExecAR* functions already dereferences the
passed-in ResultRelInfo pointer without checking that it is not NULL.
The Assert I added to those functions would be an overkill, so I
removed it. Sorry for the back and forth.

* I added a trigger function trigger_nothing() in the regression
tests, but I noticed an existing trigger function above the tests. To
make the tests a bit small, I replaced trigger_nothing() with the
existing trigger function and removed trigger_nothing().

Attached is a new version of the patch.

Best regards,
Etsuro Fujita

Attachments:

fix-problem-with-transition-tables-v3.patchapplication/octet-stream; name=fix-problem-with-transition-tables-v3.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 2185b42bb4f..fa9ee716067 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8212,6 +8212,119 @@ DELETE FROM rem1;                 -- can't be pushed down
 (5 rows)
 
 DROP TRIGGER trig_row_after_delete ON rem1;
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+  SERVER loopback OPTIONS (table_name 'local_tbl');
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+CREATE TRIGGER parent_tbl_insert_trig
+  AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR:  cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR:  cannot collect transition tuples from child foreign tables
+CONTEXT:  COPY parent_tbl, line 1: "AAA	42"
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR:  cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR:  cannot collect transition tuples from child foreign tables
+CONTEXT:  COPY parent_tbl, line 1: "AAA	42"
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+                                           QUERY PLAN                                           
+------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+   Foreign Update on public.foreign_tbl parent_tbl_1
+     Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+   ->  Foreign Scan on public.foreign_tbl parent_tbl_1
+         Output: (parent_tbl_1.b + 1), parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.*
+         Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR:  cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Delete on public.parent_tbl
+   Foreign Delete on public.foreign_tbl parent_tbl_1
+     Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+   ->  Foreign Scan on public.foreign_tbl parent_tbl_1
+         Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+         Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+DELETE FROM parent_tbl;
+ERROR:  cannot collect transition tuples from child foreign tables
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+   Update on public.parent_tbl parent_tbl_1
+   Foreign Update on public.foreign_tbl parent_tbl_2
+     Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+   ->  Result
+         Output: (parent_tbl.b + 1), parent_tbl.tableoid, parent_tbl.ctid, (NULL::record)
+         ->  Append
+               ->  Seq Scan on public.parent_tbl parent_tbl_1
+                     Output: parent_tbl_1.b, parent_tbl_1.tableoid, parent_tbl_1.ctid, NULL::record
+               ->  Foreign Scan on public.foreign_tbl parent_tbl_2
+                     Output: parent_tbl_2.b, parent_tbl_2.tableoid, parent_tbl_2.ctid, parent_tbl_2.*
+                     Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(12 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR:  cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Delete on public.parent_tbl
+   Delete on public.parent_tbl parent_tbl_1
+   Foreign Delete on public.foreign_tbl parent_tbl_2
+     Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+   ->  Append
+         ->  Seq Scan on public.parent_tbl parent_tbl_1
+               Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+         ->  Foreign Scan on public.foreign_tbl parent_tbl_2
+               Output: parent_tbl_2.tableoid, parent_tbl_2.ctid
+               Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(10 rows)
+
+DELETE FROM parent_tbl;
+ERROR:  cannot collect transition tuples from child foreign tables
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+-- Cleanup
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
 -- ===================================================================
 -- test inheritance features
 -- ===================================================================
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index e534b40de3c..b449e122abe 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2272,6 +2272,83 @@ EXPLAIN (verbose, costs off)
 DELETE FROM rem1;                 -- can't be pushed down
 DROP TRIGGER trig_row_after_delete ON rem1;
 
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+  SERVER loopback OPTIONS (table_name 'local_tbl');
+
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+
+CREATE TRIGGER parent_tbl_insert_trig
+  AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA	42
+\.
+
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA	42
+\.
+
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+
+CREATE TRIGGER parent_tbl_update_trig
+  AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+  AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+  FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+
+-- Cleanup
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
+
 -- ===================================================================
 -- test inheritance features
 -- ===================================================================
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 67f8e70f9c1..ee5f27d92e4 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2544,6 +2544,15 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		transition_capture->tcs_insert_new_table)
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_insert_after_row) ||
 		(transition_capture && transition_capture->tcs_insert_new_table))
 		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2787,6 +2796,15 @@ ExecARDeleteTriggers(EState *estate,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		transition_capture->tcs_delete_old_table)
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_delete_after_row) ||
 		(transition_capture && transition_capture->tcs_delete_old_table))
 	{
@@ -3115,6 +3133,16 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
+	if (relinfo->ri_FdwRoutine && transition_capture &&
+		(transition_capture->tcs_update_old_table ||
+		 transition_capture->tcs_update_new_table))
+	{
+		Assert(relinfo->ri_RootResultRelInfo);
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot collect transition tuples from child foreign tables")));
+	}
+
 	if ((trigdesc && trigdesc->trig_update_after_row) ||
 		(transition_capture &&
 		 (transition_capture->tcs_update_old_table ||
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8a9f1d7a943..d3119288b1d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7224,6 +7224,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 	ModifyTable *node = makeNode(ModifyTable);
 	bool		returning_old_or_new = false;
 	bool		returning_old_or_new_valid = false;
+	bool		transition_tables = false;
+	bool		transition_tables_valid = false;
 	List	   *fdw_private_list;
 	Bitmapset  *direct_modify_plans;
 	ListCell   *lc;
@@ -7370,8 +7372,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 		 * callback functions needed for that and (2) there are no local
 		 * structures that need to be run for each modified row: row-level
 		 * triggers on the foreign table, stored generated columns, WITH CHECK
-		 * OPTIONs from parent views, or Vars returning OLD/NEW in the
-		 * RETURNING list.
+		 * OPTIONs from parent views, Vars returning OLD/NEW in the RETURNING
+		 * list, or transition tables on the named relation.
 		 */
 		direct_modify = false;
 		if (fdwroutine != NULL &&
@@ -7383,7 +7385,10 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 			!has_row_triggers(root, rti, operation) &&
 			!has_stored_generated_columns(root, rti))
 		{
-			/* returning_old_or_new is the same for all result relations */
+			/*
+			 * returning_old_or_new and transition_tables are the same for all
+			 * result relations, respectively
+			 */
 			if (!returning_old_or_new_valid)
 			{
 				returning_old_or_new =
@@ -7392,7 +7397,18 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
 				returning_old_or_new_valid = true;
 			}
 			if (!returning_old_or_new)
-				direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i);
+			{
+				if (!transition_tables_valid)
+				{
+					transition_tables = has_transition_tables(root,
+															  nominalRelation,
+															  operation);
+					transition_tables_valid = true;
+				}
+				if (!transition_tables)
+					direct_modify = fdwroutine->PlanDirectModify(root, node,
+																 rti, i);
+			}
 		}
 		if (direct_modify)
 			direct_modify_plans = bms_add_member(direct_modify_plans, i);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 59233b64730..1321f41c4b7 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2303,6 +2303,59 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event)
 	return result;
 }
 
+/*
+ * has_transition_tables
+ *
+ * Detect whether the specified relation has any transition tables for event.
+ */
+bool
+has_transition_tables(PlannerInfo *root, Index rti, CmdType event)
+{
+	RangeTblEntry *rte = planner_rt_fetch(rti, root);
+	Relation	relation;
+	TriggerDesc *trigDesc;
+	bool		result = false;
+
+	/* Currently foreign tables cannot have transition tables */
+	if (rte->rtekind == RTE_RELATION &&
+		rte->relkind == RELKIND_FOREIGN_TABLE)
+		return result;
+
+	/* Assume we already have adequate lock */
+	relation = table_open(rte->relid, NoLock);
+
+	trigDesc = relation->trigdesc;
+	switch (event)
+	{
+		case CMD_INSERT:
+			if (trigDesc &&
+				trigDesc->trig_insert_new_table)
+				result = true;
+			break;
+		case CMD_UPDATE:
+			if (trigDesc &&
+				(trigDesc->trig_update_old_table ||
+				 trigDesc->trig_update_new_table))
+				result = true;
+			break;
+		case CMD_DELETE:
+			if (trigDesc &&
+				trigDesc->trig_delete_old_table)
+				result = true;
+			break;
+			/* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */
+		case CMD_MERGE:
+			result = false;
+			break;
+		default:
+			elog(ERROR, "unrecognized CmdType: %d", (int) event);
+			break;
+	}
+
+	table_close(relation, NoLock);
+	return result;
+}
+
 /*
  * has_stored_generated_columns
  *
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index cd74e4b1e8b..ac80458467b 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -72,6 +72,8 @@ extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
 
 extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
 
+extern bool has_transition_tables(PlannerInfo *root, Index rti, CmdType event);
+
 extern bool has_stored_generated_columns(PlannerInfo *root, Index rti);
 
 extern Bitmapset *get_dependent_generated_columns(PlannerInfo *root, Index rti,
#10Etsuro Fujita
etsuro.fujita@gmail.com
In reply to: Etsuro Fujita (#9)
Re: Problem with transition tables on partitioned tables with foreign-table partitions

On Mon, Jul 14, 2025 at 8:00 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:

After re-reading the patch I noticed two minor things:

* The existing code in ExecAR* functions already dereferences the
passed-in ResultRelInfo pointer without checking that it is not NULL.
The Assert I added to those functions would be an overkill, so I
removed it. Sorry for the back and forth.

* I added a trigger function trigger_nothing() in the regression
tests, but I noticed an existing trigger function above the tests. To
make the tests a bit small, I replaced trigger_nothing() with the
existing trigger function and removed trigger_nothing().

Attached is a new version of the patch.

I have pushed this and back-patched it to all supported versions.

Best regards,
Etsuro Fujita