Problem with transition tables on partitioned tables with foreign-table partitions
Hi,
While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:
create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');
create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();
create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1
The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!
UPDATE/DELETE also produce incorrect results:
create function dump_update() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %, new table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table),
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_update_trig
after update on parent referencing old table as old_table new table
as new_table
for each statement execute procedure dump_update();
update parent set b = b + 1;
NOTICE: trigger = parent_update_trig, old table = <NULL>, new table = <NULL>
UPDATE 1
create function dump_delete() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table);
return null;
end;
$$;
create trigger parent_delete_trig
after delete on parent referencing old table as old_table
for each statement execute procedure dump_delete();
delete from parent;
NOTICE: trigger = parent_delete_trig, old table = <NULL>
DELETE 1
In both cases the triggers fail to show transition tuples. The cause
of this is that postgres_fdw mistakenly performs direct modify for
UPDATE/DELETE on the partition, which skips
ExecARUpdateTriggers()/ExecARDeleteTriggers() entirely.
To fix, I think we could disallow creating transition-table triggers
on such partitioned tables, but I think that that is too restrictive
because some users might have been using such triggers, avoiding this
problem by e.g., modifying only plain-table partitions. So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.
Attached is a WIP patch for that.
Best regards,
Etsuro Fujita
Attachments:
fix-problem-with-transition-tables-wip.patchapplication/octet-stream; name=fix-problem-with-transition-tables-wip.patchDownload
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 67f8e70f9c1..ee5f27d92e4 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2544,6 +2544,15 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ transition_capture->tcs_insert_new_table)
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_insert_after_row) ||
(transition_capture && transition_capture->tcs_insert_new_table))
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2787,6 +2796,15 @@ ExecARDeleteTriggers(EState *estate,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ transition_capture->tcs_delete_old_table)
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_delete_after_row) ||
(transition_capture && transition_capture->tcs_delete_old_table))
{
@@ -3115,6 +3133,16 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ (transition_capture->tcs_update_old_table ||
+ transition_capture->tcs_update_new_table))
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_update_after_row) ||
(transition_capture &&
(transition_capture->tcs_update_old_table ||
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 0b61aef962c..b4770fa632d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7174,6 +7174,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
ModifyTable *node = makeNode(ModifyTable);
bool returning_old_or_new = false;
bool returning_old_or_new_valid = false;
+ bool transition_tables = false;
+ bool transition_tables_valid = false;
List *fdw_private_list;
Bitmapset *direct_modify_plans;
ListCell *lc;
@@ -7320,8 +7322,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
* callback functions needed for that and (2) there are no local
* structures that need to be run for each modified row: row-level
* triggers on the foreign table, stored generated columns, WITH CHECK
- * OPTIONs from parent views, or Vars returning OLD/NEW in the
- * RETURNING list.
+ * OPTIONs from parent views, transition tables on the named relation,
+ * or Vars returning OLD/NEW in the RETURNING list.
*/
direct_modify = false;
if (fdwroutine != NULL &&
@@ -7333,7 +7335,10 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))
{
- /* returning_old_or_new is the same for all result relations */
+ /*
+ * returning_old_or_new and transition_tables are the same for all
+ * result relations, respectively
+ */
if (!returning_old_or_new_valid)
{
returning_old_or_new =
@@ -7342,7 +7347,18 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
returning_old_or_new_valid = true;
}
if (!returning_old_or_new)
- direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i);
+ {
+ if (!transition_tables_valid)
+ {
+ transition_tables = has_transition_tables(root,
+ nominalRelation,
+ operation);
+ transition_tables_valid = true;
+ }
+ if (!transition_tables)
+ direct_modify = fdwroutine->PlanDirectModify(root, node,
+ rti, i);
+ }
}
if (direct_modify)
direct_modify_plans = bms_add_member(direct_modify_plans, i);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 59233b64730..dcba4b72198 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2303,6 +2303,58 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event)
return result;
}
+/*
+ * has_transition_tables
+ *
+ * Detect whether the specified relation has any transition tables for event.
+ */
+bool
+has_transition_tables(PlannerInfo *root, Index rti, CmdType event)
+{
+ RangeTblEntry *rte = planner_rt_fetch(rti, root);
+ Relation relation;
+ TriggerDesc *trigDesc;
+ bool result = false;
+
+ if (rte->rtekind == RTE_RELATION &&
+ rte->relkind == RELKIND_FOREIGN_TABLE)
+ return result;
+
+ /* Assume we already have adequate lock */
+ relation = table_open(rte->relid, NoLock);
+
+ trigDesc = relation->trigdesc;
+ switch (event)
+ {
+ case CMD_INSERT:
+ if (trigDesc &&
+ trigDesc->trig_insert_new_table)
+ result = true;
+ break;
+ case CMD_UPDATE:
+ if (trigDesc &&
+ (trigDesc->trig_update_old_table ||
+ trigDesc->trig_update_new_table))
+ result = true;
+ break;
+ case CMD_DELETE:
+ if (trigDesc &&
+ trigDesc->trig_delete_old_table)
+ result = true;
+ break;
+ /* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */
+ case CMD_MERGE:
+ result = false;
+ break;
+ default:
+ elog(ERROR, "unrecognized CmdType: %d", (int) event);
+ break;
+ }
+
+ table_close(relation, NoLock);
+ return result;
+}
+
/*
* has_stored_generated_columns
*
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index cd74e4b1e8b..ac80458467b 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -72,6 +72,8 @@ extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
+extern bool has_transition_tables(PlannerInfo *root, Index rti, CmdType event);
+
extern bool has_stored_generated_columns(PlannerInfo *root, Index rti);
extern Bitmapset *get_dependent_generated_columns(PlannerInfo *root, Index rti,
Hi Fujita-san,
On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
Hi,
While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!
Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?
UPDATE/DELETE also produce incorrect results:
create function dump_update() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %, new table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table),
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_update_trig
after update on parent referencing old table as old_table new table
as new_table
for each statement execute procedure dump_update();update parent set b = b + 1;
NOTICE: trigger = parent_update_trig, old table = <NULL>, new table = <NULL>
UPDATE 1create function dump_delete() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, old table = %',
TG_NAME,
(select string_agg(old_table::text, ', ' order by a)
from old_table);
return null;
end;
$$;
create trigger parent_delete_trig
after delete on parent referencing old table as old_table
for each statement execute procedure dump_delete();delete from parent;
NOTICE: trigger = parent_delete_trig, old table = <NULL>
DELETE 1In both cases the triggers fail to show transition tuples. The cause
of this is that postgres_fdw mistakenly performs direct modify for
UPDATE/DELETE on the partition, which skips
ExecARUpdateTriggers()/ExecARDeleteTriggers() entirely.
Yes, that seems problematic.
To fix, I think we could disallow creating transition-table triggers
on such partitioned tables, but I think that that is too restrictive
because some users might have been using such triggers, avoiding this
problem by e.g., modifying only plain-table partitions.
+1
So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.
Is (2) intended to catch cases that occur during a foreign insert and
foreign/non-direct update/delete?
--
Thanks, Amit Langote
Hi Amit-san,
On Tue, Jul 1, 2025 at 4:42 PM Amit Langote <amitlangote09@gmail.com> wrote:
On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?
That is the question of how we define the after image of a row
inserted into a foreign table, but consider the case where the
partition is a plain table:
create table parent (a text, b int) partition by list (a);
create table child partition of parent for values in ('AAA');
create trigger intercept_insert_child
before insert on child
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)
INSERT 0 1
The trigger shows the final tuple, not the original tuple. So from a
consistency perspective, I thought it would be good if the trigger
does so even in the case where the partition is a foreign table.
So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.Is (2) intended to catch cases that occur during a foreign insert and
foreign/non-direct update/delete?
That is right; the patch forces the FDW to perform ExecForeign*
functions, and then throws an error in ExecAR* functions. One good
thing about this is that we are able to avoid throwing the error when
local/remote row-level BEFORE triggers return NULL.
Thanks for the comments!
Best regards,
Etsuro Fujita
On Wed, Jul 2, 2025 at 7:05 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
On Tue, Jul 1, 2025 at 4:42 PM Amit Langote <amitlangote09@gmail.com> wrote:
On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?That is the question of how we define the after image of a row
inserted into a foreign table, but consider the case where the
partition is a plain table:create table parent (a text, b int) partition by list (a);
create table child partition of parent for values in ('AAA');
create trigger intercept_insert_child
before insert on child
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)
INSERT 0 1The trigger shows the final tuple, not the original tuple. So from a
consistency perspective, I thought it would be good if the trigger
does so even in the case where the partition is a foreign table.
Ok, but if you define the trigger on the foreign table partition
(child) as follows, you do get what I think is the expected result?
create trigger intercept_insert_foreign_child
before insert on child
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)
-- 2042, because row modified by both triggers
table parent;
a | b
-----+------
AAA | 2042
(1 row)
Or perhaps you're saying that the row returned by this line in ExecInsert():
/*
* insert into foreign table: let the FDW do it
*/
slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
resultRelInfo,
slot,
planSlot);
is not the expected "after image", and thus should not be added to the
parent's transition table?
IIUC, to prevent that, we now hit the following error in:
void
ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TupleTableSlot *slot, List *recheckIndexes,
TransitionCaptureState *transition_capture)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if (relinfo->ri_FdwRoutine && transition_capture &&
transition_capture->tcs_insert_new_table)
{
Assert(relinfo->ri_RootResultRelInfo);
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot collect transition tuples from child
foreign tables")));
}
So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.Is (2) intended to catch cases that occur during a foreign insert and
foreign/non-direct update/delete?That is right; the patch forces the FDW to perform ExecForeign*
functions, and then throws an error in ExecAR* functions. One good
thing about this is that we are able to avoid throwing the error when
local/remote row-level BEFORE triggers return NULL.
Given my question above, I’m not sure I fully understand the intention
behind adding these checks.
--
Thanks, Amit Langote
On Wed, Jul 2, 2025 at 10:05 PM Amit Langote <amitlangote09@gmail.com> wrote:
On Wed, Jul 2, 2025 at 7:05 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
On Tue, Jul 1, 2025 at 4:42 PM Amit Langote <amitlangote09@gmail.com> wrote:
On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
While working on something else, I noticed that while we disallow
transition tables on foreign tables, we allow transition tables on
partitioned tables with foreign-table partitions, which produces
incorrect results. Here is an example using postgres_fdw:create table parent (a text, b int) partition by list (a);
create table loct (a text, b int);
create foreign table child (a text, b int)
server loopback options (table_name 'loct');
alter table parent attach partition child for values in ('AAA');create function dump_insert() returns trigger language plpgsql as
$$
begin
raise notice 'trigger = %, new table = %',
TG_NAME,
(select string_agg(new_table::text, ', ' order by a)
from new_table);
return null;
end;
$$;
create trigger parent_insert_trig
after insert on parent referencing new table as new_table
for each statement execute procedure dump_insert();create function intercept_insert() returns trigger language plpgsql as
$$
begin
new.b = new.b + 1000;
return new;
end;
$$;
create trigger intercept_insert_loct
before insert on loct
for each row execute procedure intercept_insert();insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,42)
INSERT 0 1The trigger shows the original tuple created by the core, not the
actual tuple inserted into the foreign-table partition, as
postgres_fdw does not collect the actual tuple, of course!Maybe I'm missing something, but given that the intercept_insert()
function is applied during the "remote" operation, isn't it expected
that the parent table's trigger for a "local" operation shows the
original tuple?That is the question of how we define the after image of a row
inserted into a foreign table, but consider the case where the
partition is a plain table:create table parent (a text, b int) partition by list (a);
create table child partition of parent for values in ('AAA');
create trigger intercept_insert_child
before insert on child
for each row execute procedure intercept_insert();
insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)
INSERT 0 1The trigger shows the final tuple, not the original tuple. So from a
consistency perspective, I thought it would be good if the trigger
does so even in the case where the partition is a foreign table.Ok, but if you define the trigger on the foreign table partition
(child) as follows, you do get what I think is the expected result?create trigger intercept_insert_foreign_child
before insert on child
for each row execute procedure intercept_insert();insert into parent values ('AAA', 42);
NOTICE: trigger = parent_insert_trig, new table = (AAA,1042)-- 2042, because row modified by both triggers
table parent;
a | b
-----+------
AAA | 2042
(1 row)Or perhaps you're saying that the row returned by this line in ExecInsert():
/*
* insert into foreign table: let the FDW do it
*/
slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
resultRelInfo,
slot,
planSlot);is not the expected "after image", and thus should not be added to the
parent's transition table?
Yes, that is what I mean by "postgres_fdw does not collect the actual
tuple, of course!" above. My explanation was not good, though.
IIUC, to prevent that, we now hit the following error in:
void
ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TupleTableSlot *slot, List *recheckIndexes,
TransitionCaptureState *transition_capture)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;if (relinfo->ri_FdwRoutine && transition_capture &&
transition_capture->tcs_insert_new_table)
{
Assert(relinfo->ri_RootResultRelInfo);
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot collect transition tuples from child
foreign tables")));
}
Right, I added the check for that.
Thanks!
Best regards,
Etsuro Fujita
On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.Attached is a WIP patch for that.
Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.
Best regards,
Etsuro Fujita
Attachments:
fix-problem-with-transition-tables-v2.patchapplication/octet-stream; name=fix-problem-with-transition-tables-v2.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 2185b42bb4f..b5ec7c97db2 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8212,6 +8212,122 @@ DELETE FROM rem1; -- can't be pushed down
(5 rows)
DROP TRIGGER trig_row_after_delete ON rem1;
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+ SERVER loopback OPTIONS (table_name 'local_tbl');
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+CREATE FUNCTION trigger_nothing() RETURNS trigger
+ LANGUAGE plpgsql AS $$ BEGIN END; $$;
+CREATE TRIGGER parent_tbl_insert_trig
+ AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR: cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR: cannot collect transition tuples from child foreign tables
+CONTEXT: COPY parent_tbl, line 1: "AAA 42"
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR: cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR: cannot collect transition tuples from child foreign tables
+CONTEXT: COPY parent_tbl, line 1: "AAA 42"
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+ Foreign Update on public.foreign_tbl parent_tbl_1
+ Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+ -> Foreign Scan on public.foreign_tbl parent_tbl_1
+ Output: (parent_tbl_1.b + 1), parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.*
+ Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR: cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+ QUERY PLAN
+------------------------------------------------------------------
+ Delete on public.parent_tbl
+ Foreign Delete on public.foreign_tbl parent_tbl_1
+ Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+ -> Foreign Scan on public.foreign_tbl parent_tbl_1
+ Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+ Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+DELETE FROM parent_tbl;
+ERROR: cannot collect transition tuples from child foreign tables
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+ Update on public.parent_tbl parent_tbl_1
+ Foreign Update on public.foreign_tbl parent_tbl_2
+ Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+ -> Result
+ Output: (parent_tbl.b + 1), parent_tbl.tableoid, parent_tbl.ctid, (NULL::record)
+ -> Append
+ -> Seq Scan on public.parent_tbl parent_tbl_1
+ Output: parent_tbl_1.b, parent_tbl_1.tableoid, parent_tbl_1.ctid, NULL::record
+ -> Foreign Scan on public.foreign_tbl parent_tbl_2
+ Output: parent_tbl_2.b, parent_tbl_2.tableoid, parent_tbl_2.ctid, parent_tbl_2.*
+ Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(12 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR: cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Delete on public.parent_tbl
+ Delete on public.parent_tbl parent_tbl_1
+ Foreign Delete on public.foreign_tbl parent_tbl_2
+ Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+ -> Append
+ -> Seq Scan on public.parent_tbl parent_tbl_1
+ Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+ -> Foreign Scan on public.foreign_tbl parent_tbl_2
+ Output: parent_tbl_2.tableoid, parent_tbl_2.ctid
+ Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(10 rows)
+
+DELETE FROM parent_tbl;
+ERROR: cannot collect transition tuples from child foreign tables
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+-- Cleanup
+DROP FUNCTION trigger_nothing();
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
-- ===================================================================
-- test inheritance features
-- ===================================================================
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index e534b40de3c..dab294a75be 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2272,6 +2272,87 @@ EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can't be pushed down
DROP TRIGGER trig_row_after_delete ON rem1;
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+ SERVER loopback OPTIONS (table_name 'local_tbl');
+
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+
+CREATE FUNCTION trigger_nothing() RETURNS trigger
+ LANGUAGE plpgsql AS $$ BEGIN END; $$;
+
+CREATE TRIGGER parent_tbl_insert_trig
+ AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA 42
+\.
+
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA 42
+\.
+
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_nothing();
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+
+-- Cleanup
+DROP FUNCTION trigger_nothing();
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
+
-- ===================================================================
-- test inheritance features
-- ===================================================================
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 67f8e70f9c1..f23dce9485a 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2544,6 +2544,17 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ Assert(relinfo);
+
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ transition_capture->tcs_insert_new_table)
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_insert_after_row) ||
(transition_capture && transition_capture->tcs_insert_new_table))
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2787,6 +2798,17 @@ ExecARDeleteTriggers(EState *estate,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ Assert(relinfo);
+
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ transition_capture->tcs_delete_old_table)
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_delete_after_row) ||
(transition_capture && transition_capture->tcs_delete_old_table))
{
@@ -3115,6 +3137,18 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ Assert(relinfo);
+
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ (transition_capture->tcs_update_old_table ||
+ transition_capture->tcs_update_new_table))
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_update_after_row) ||
(transition_capture &&
(transition_capture->tcs_update_old_table ||
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8a9f1d7a943..d3119288b1d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7224,6 +7224,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
ModifyTable *node = makeNode(ModifyTable);
bool returning_old_or_new = false;
bool returning_old_or_new_valid = false;
+ bool transition_tables = false;
+ bool transition_tables_valid = false;
List *fdw_private_list;
Bitmapset *direct_modify_plans;
ListCell *lc;
@@ -7370,8 +7372,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
* callback functions needed for that and (2) there are no local
* structures that need to be run for each modified row: row-level
* triggers on the foreign table, stored generated columns, WITH CHECK
- * OPTIONs from parent views, or Vars returning OLD/NEW in the
- * RETURNING list.
+ * OPTIONs from parent views, Vars returning OLD/NEW in the RETURNING
+ * list, or transition tables on the named relation.
*/
direct_modify = false;
if (fdwroutine != NULL &&
@@ -7383,7 +7385,10 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))
{
- /* returning_old_or_new is the same for all result relations */
+ /*
+ * returning_old_or_new and transition_tables are the same for all
+ * result relations, respectively
+ */
if (!returning_old_or_new_valid)
{
returning_old_or_new =
@@ -7392,7 +7397,18 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
returning_old_or_new_valid = true;
}
if (!returning_old_or_new)
- direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i);
+ {
+ if (!transition_tables_valid)
+ {
+ transition_tables = has_transition_tables(root,
+ nominalRelation,
+ operation);
+ transition_tables_valid = true;
+ }
+ if (!transition_tables)
+ direct_modify = fdwroutine->PlanDirectModify(root, node,
+ rti, i);
+ }
}
if (direct_modify)
direct_modify_plans = bms_add_member(direct_modify_plans, i);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 59233b64730..1321f41c4b7 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2303,6 +2303,59 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event)
return result;
}
+/*
+ * has_transition_tables
+ *
+ * Detect whether the specified relation has any transition tables for event.
+ */
+bool
+has_transition_tables(PlannerInfo *root, Index rti, CmdType event)
+{
+ RangeTblEntry *rte = planner_rt_fetch(rti, root);
+ Relation relation;
+ TriggerDesc *trigDesc;
+ bool result = false;
+
+ /* Currently foreign tables cannot have transition tables */
+ if (rte->rtekind == RTE_RELATION &&
+ rte->relkind == RELKIND_FOREIGN_TABLE)
+ return result;
+
+ /* Assume we already have adequate lock */
+ relation = table_open(rte->relid, NoLock);
+
+ trigDesc = relation->trigdesc;
+ switch (event)
+ {
+ case CMD_INSERT:
+ if (trigDesc &&
+ trigDesc->trig_insert_new_table)
+ result = true;
+ break;
+ case CMD_UPDATE:
+ if (trigDesc &&
+ (trigDesc->trig_update_old_table ||
+ trigDesc->trig_update_new_table))
+ result = true;
+ break;
+ case CMD_DELETE:
+ if (trigDesc &&
+ trigDesc->trig_delete_old_table)
+ result = true;
+ break;
+ /* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */
+ case CMD_MERGE:
+ result = false;
+ break;
+ default:
+ elog(ERROR, "unrecognized CmdType: %d", (int) event);
+ break;
+ }
+
+ table_close(relation, NoLock);
+ return result;
+}
+
/*
* has_stored_generated_columns
*
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index cd74e4b1e8b..ac80458467b 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -72,6 +72,8 @@ extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
+extern bool has_transition_tables(PlannerInfo *root, Index rti, CmdType event);
+
extern bool has_stored_generated_columns(PlannerInfo *root, Index rti);
extern Bitmapset *get_dependent_generated_columns(PlannerInfo *root, Index rti,
Fujita-san,
On Wed, Jul 9, 2025 at 5:07 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error in ExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.Attached is a WIP patch for that.
Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.
Thanks for the new patch. LGTM.
While reading it, I noticed that the functions performing table_open()
are repeatedly called in this condition, which runs for every
qualifying foreign child relations:
if (fdwroutine != NULL &&
fdwroutine->PlanDirectModify != NULL &&
fdwroutine->BeginDirectModify != NULL &&
fdwroutine->IterateDirectModify != NULL &&
fdwroutine->EndDirectModify != NULL &&
withCheckOptionLists == NIL &&
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))
That seems a bit expensive. It might be worth using *_valid flags to
avoid redundant table_open() calls like you're doing for transition
table checking. Maybe something to consider in a separate patch.
--
Thanks, Amit Langote
On Thu, Jul 10, 2025 at 22:20 Amit Langote <amitlangote09@gmail.com> wrote:
Fujita-san,
On Wed, Jul 9, 2025 at 5:07 PM Etsuro Fujita <etsuro.fujita@gmail.com>
wrote:On Tue, Jul 1, 2025 at 11:55 AM Etsuro Fujita <etsuro.fujita@gmail.com>
wrote:
So I would
like to propose to fix this by the following: 1) disable using direct
modify to modify foreign-table partitions if there are any
transition-table triggers on the partitioned table, and then 2) throw
an error inExecARInsertTriggers()/ExecARUpdateTriggers()/ExecARDeleteTriggers()
if they collects transition tuple(s) from a foreign-table partition.
Attached is a WIP patch for that.
Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.Thanks for the new patch. LGTM.
While reading it, I noticed that the functions performing table_open()
are repeatedly called in this condition, which runs for every
qualifying foreign child relations:if (fdwroutine != NULL &&
fdwroutine->PlanDirectModify != NULL &&
fdwroutine->BeginDirectModify != NULL &&
fdwroutine->IterateDirectModify != NULL &&
fdwroutine->EndDirectModify != NULL &&
withCheckOptionLists == NIL &&
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))That seems a bit expensive. It might be worth using *_valid flags to
avoid redundant table_open() calls like you're doing for transition
table checking. Maybe something to consider in a separate patch.
Ah, scratch that because I missed that transition table checking is done
for the “named” relation and these are checking it for child relations.
- Amit
Show quoted text
Amit-san,
On Thu, Jul 10, 2025 at 11:54 PM Amit Langote <amitlangote09@gmail.com> wrote:
On Thu, Jul 10, 2025 at 22:20 Amit Langote <amitlangote09@gmail.com> wrote:
On Wed, Jul 9, 2025 at 5:07 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
Here is an updated version of the patch, in which I added 1) an Assert
to ExecAR* functions to ensure that the passed-in ResultRelInfo
pointer is not NULL, 2) added/tweaked comments a bit more, and 3)
added regression tests.Thanks for the new patch. LGTM.
Cool!
While reading it, I noticed that the functions performing table_open()
are repeatedly called in this condition, which runs for every
qualifying foreign child relations:if (fdwroutine != NULL &&
fdwroutine->PlanDirectModify != NULL &&
fdwroutine->BeginDirectModify != NULL &&
fdwroutine->IterateDirectModify != NULL &&
fdwroutine->EndDirectModify != NULL &&
withCheckOptionLists == NIL &&
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))That seems a bit expensive. It might be worth using *_valid flags to
avoid redundant table_open() calls like you're doing for transition
table checking. Maybe something to consider in a separate patch.
Ah, scratch that because I missed that transition table checking is done for the “named” relation and these are checking it for child relations.
Ok, thanks for taking the time for this patch!
After re-reading the patch I noticed two minor things:
* The existing code in ExecAR* functions already dereferences the
passed-in ResultRelInfo pointer without checking that it is not NULL.
The Assert I added to those functions would be an overkill, so I
removed it. Sorry for the back and forth.
* I added a trigger function trigger_nothing() in the regression
tests, but I noticed an existing trigger function above the tests. To
make the tests a bit small, I replaced trigger_nothing() with the
existing trigger function and removed trigger_nothing().
Attached is a new version of the patch.
Best regards,
Etsuro Fujita
Attachments:
fix-problem-with-transition-tables-v3.patchapplication/octet-stream; name=fix-problem-with-transition-tables-v3.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 2185b42bb4f..fa9ee716067 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8212,6 +8212,119 @@ DELETE FROM rem1; -- can't be pushed down
(5 rows)
DROP TRIGGER trig_row_after_delete ON rem1;
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+ SERVER loopback OPTIONS (table_name 'local_tbl');
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+CREATE TRIGGER parent_tbl_insert_trig
+ AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR: cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR: cannot collect transition tuples from child foreign tables
+CONTEXT: COPY parent_tbl, line 1: "AAA 42"
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+ERROR: cannot collect transition tuples from child foreign tables
+COPY parent_tbl (a, b) FROM stdin;
+ERROR: cannot collect transition tuples from child foreign tables
+CONTEXT: COPY parent_tbl, line 1: "AAA 42"
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+ Foreign Update on public.foreign_tbl parent_tbl_1
+ Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+ -> Foreign Scan on public.foreign_tbl parent_tbl_1
+ Output: (parent_tbl_1.b + 1), parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.*
+ Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR: cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+ QUERY PLAN
+------------------------------------------------------------------
+ Delete on public.parent_tbl
+ Foreign Delete on public.foreign_tbl parent_tbl_1
+ Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+ -> Foreign Scan on public.foreign_tbl parent_tbl_1
+ Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+ Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(6 rows)
+
+DELETE FROM parent_tbl;
+ERROR: cannot collect transition tuples from child foreign tables
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------
+ Update on public.parent_tbl
+ Update on public.parent_tbl parent_tbl_1
+ Foreign Update on public.foreign_tbl parent_tbl_2
+ Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1
+ -> Result
+ Output: (parent_tbl.b + 1), parent_tbl.tableoid, parent_tbl.ctid, (NULL::record)
+ -> Append
+ -> Seq Scan on public.parent_tbl parent_tbl_1
+ Output: parent_tbl_1.b, parent_tbl_1.tableoid, parent_tbl_1.ctid, NULL::record
+ -> Foreign Scan on public.foreign_tbl parent_tbl_2
+ Output: parent_tbl_2.b, parent_tbl_2.tableoid, parent_tbl_2.ctid, parent_tbl_2.*
+ Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE
+(12 rows)
+
+UPDATE parent_tbl SET b = b + 1;
+ERROR: cannot collect transition tuples from child foreign tables
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Delete on public.parent_tbl
+ Delete on public.parent_tbl parent_tbl_1
+ Foreign Delete on public.foreign_tbl parent_tbl_2
+ Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1
+ -> Append
+ -> Seq Scan on public.parent_tbl parent_tbl_1
+ Output: parent_tbl_1.tableoid, parent_tbl_1.ctid
+ -> Foreign Scan on public.foreign_tbl parent_tbl_2
+ Output: parent_tbl_2.tableoid, parent_tbl_2.ctid
+ Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE
+(10 rows)
+
+DELETE FROM parent_tbl;
+ERROR: cannot collect transition tuples from child foreign tables
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+-- Cleanup
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
-- ===================================================================
-- test inheritance features
-- ===================================================================
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index e534b40de3c..b449e122abe 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2272,6 +2272,83 @@ EXPLAIN (verbose, costs off)
DELETE FROM rem1; -- can't be pushed down
DROP TRIGGER trig_row_after_delete ON rem1;
+-- We are allowed to create transition-table triggers on both kinds of
+-- inheritance even if they contain foreign tables as children, but currently
+-- collecting transition tuples from such foreign tables is not supported.
+
+CREATE TABLE local_tbl (a text, b int);
+CREATE FOREIGN TABLE foreign_tbl (a text, b int)
+ SERVER loopback OPTIONS (table_name 'local_tbl');
+
+INSERT INTO foreign_tbl VALUES ('AAA', 42);
+
+-- Test case for partition hierarchy
+CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a);
+ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA');
+
+CREATE TRIGGER parent_tbl_insert_trig
+ AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA 42
+\.
+
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+
+INSERT INTO parent_tbl VALUES ('AAA', 42);
+
+COPY parent_tbl (a, b) FROM stdin;
+AAA 42
+\.
+
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl;
+DROP TABLE parent_tbl;
+
+-- Test case for non-partition hierarchy
+CREATE TABLE parent_tbl (a text, b int);
+ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl;
+
+CREATE TRIGGER parent_tbl_update_trig
+ AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+CREATE TRIGGER parent_tbl_delete_trig
+ AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table
+ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func();
+
+EXPLAIN (VERBOSE, COSTS OFF)
+UPDATE parent_tbl SET b = b + 1;
+UPDATE parent_tbl SET b = b + 1;
+
+EXPLAIN (VERBOSE, COSTS OFF)
+DELETE FROM parent_tbl;
+DELETE FROM parent_tbl;
+
+ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl;
+DROP TABLE parent_tbl;
+
+-- Cleanup
+DROP FOREIGN TABLE foreign_tbl;
+DROP TABLE local_tbl;
+
-- ===================================================================
-- test inheritance features
-- ===================================================================
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 67f8e70f9c1..ee5f27d92e4 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2544,6 +2544,15 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ transition_capture->tcs_insert_new_table)
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_insert_after_row) ||
(transition_capture && transition_capture->tcs_insert_new_table))
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2787,6 +2796,15 @@ ExecARDeleteTriggers(EState *estate,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ transition_capture->tcs_delete_old_table)
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_delete_after_row) ||
(transition_capture && transition_capture->tcs_delete_old_table))
{
@@ -3115,6 +3133,16 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ if (relinfo->ri_FdwRoutine && transition_capture &&
+ (transition_capture->tcs_update_old_table ||
+ transition_capture->tcs_update_new_table))
+ {
+ Assert(relinfo->ri_RootResultRelInfo);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot collect transition tuples from child foreign tables")));
+ }
+
if ((trigdesc && trigdesc->trig_update_after_row) ||
(transition_capture &&
(transition_capture->tcs_update_old_table ||
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8a9f1d7a943..d3119288b1d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7224,6 +7224,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
ModifyTable *node = makeNode(ModifyTable);
bool returning_old_or_new = false;
bool returning_old_or_new_valid = false;
+ bool transition_tables = false;
+ bool transition_tables_valid = false;
List *fdw_private_list;
Bitmapset *direct_modify_plans;
ListCell *lc;
@@ -7370,8 +7372,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
* callback functions needed for that and (2) there are no local
* structures that need to be run for each modified row: row-level
* triggers on the foreign table, stored generated columns, WITH CHECK
- * OPTIONs from parent views, or Vars returning OLD/NEW in the
- * RETURNING list.
+ * OPTIONs from parent views, Vars returning OLD/NEW in the RETURNING
+ * list, or transition tables on the named relation.
*/
direct_modify = false;
if (fdwroutine != NULL &&
@@ -7383,7 +7385,10 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
!has_row_triggers(root, rti, operation) &&
!has_stored_generated_columns(root, rti))
{
- /* returning_old_or_new is the same for all result relations */
+ /*
+ * returning_old_or_new and transition_tables are the same for all
+ * result relations, respectively
+ */
if (!returning_old_or_new_valid)
{
returning_old_or_new =
@@ -7392,7 +7397,18 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
returning_old_or_new_valid = true;
}
if (!returning_old_or_new)
- direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i);
+ {
+ if (!transition_tables_valid)
+ {
+ transition_tables = has_transition_tables(root,
+ nominalRelation,
+ operation);
+ transition_tables_valid = true;
+ }
+ if (!transition_tables)
+ direct_modify = fdwroutine->PlanDirectModify(root, node,
+ rti, i);
+ }
}
if (direct_modify)
direct_modify_plans = bms_add_member(direct_modify_plans, i);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 59233b64730..1321f41c4b7 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2303,6 +2303,59 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event)
return result;
}
+/*
+ * has_transition_tables
+ *
+ * Detect whether the specified relation has any transition tables for event.
+ */
+bool
+has_transition_tables(PlannerInfo *root, Index rti, CmdType event)
+{
+ RangeTblEntry *rte = planner_rt_fetch(rti, root);
+ Relation relation;
+ TriggerDesc *trigDesc;
+ bool result = false;
+
+ /* Currently foreign tables cannot have transition tables */
+ if (rte->rtekind == RTE_RELATION &&
+ rte->relkind == RELKIND_FOREIGN_TABLE)
+ return result;
+
+ /* Assume we already have adequate lock */
+ relation = table_open(rte->relid, NoLock);
+
+ trigDesc = relation->trigdesc;
+ switch (event)
+ {
+ case CMD_INSERT:
+ if (trigDesc &&
+ trigDesc->trig_insert_new_table)
+ result = true;
+ break;
+ case CMD_UPDATE:
+ if (trigDesc &&
+ (trigDesc->trig_update_old_table ||
+ trigDesc->trig_update_new_table))
+ result = true;
+ break;
+ case CMD_DELETE:
+ if (trigDesc &&
+ trigDesc->trig_delete_old_table)
+ result = true;
+ break;
+ /* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */
+ case CMD_MERGE:
+ result = false;
+ break;
+ default:
+ elog(ERROR, "unrecognized CmdType: %d", (int) event);
+ break;
+ }
+
+ table_close(relation, NoLock);
+ return result;
+}
+
/*
* has_stored_generated_columns
*
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index cd74e4b1e8b..ac80458467b 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -72,6 +72,8 @@ extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
+extern bool has_transition_tables(PlannerInfo *root, Index rti, CmdType event);
+
extern bool has_stored_generated_columns(PlannerInfo *root, Index rti);
extern Bitmapset *get_dependent_generated_columns(PlannerInfo *root, Index rti,
On Mon, Jul 14, 2025 at 8:00 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
After re-reading the patch I noticed two minor things:
* The existing code in ExecAR* functions already dereferences the
passed-in ResultRelInfo pointer without checking that it is not NULL.
The Assert I added to those functions would be an overkill, so I
removed it. Sorry for the back and forth.* I added a trigger function trigger_nothing() in the regression
tests, but I noticed an existing trigger function above the tests. To
make the tests a bit small, I replaced trigger_nothing() with the
existing trigger function and removed trigger_nothing().Attached is a new version of the patch.
I have pushed this and back-patched it to all supported versions.
Best regards,
Etsuro Fujita