postgres_fdw: batch inserts vs. before row triggers
Hi,
(I added Tomas in CC:.)
One thing I noticed while reviewing the patch for fast copying into
foreign tables/partitions using batch insert [1]/messages/by-id/bc489202-9855-7550-d64c-ad2d83c24867@postgrespro.ru is that in
postgres_fdw we allow batch-inserting into foreign tables/partitions
with before row triggers, but such triggers might query the target
table/partition and act differently if the tuples that have already
been processed and prepared for batch-insertion are not there. Here
is an example using HEAD:
create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw options
(dbname 'postgres');
create user mapping for current_user server loopback;
create table t (a int);
create foreign table ft (a int) server loopback options (table_name 't');
create function ft_rowcount_tf() returns trigger as $$ begin raise
notice '%: rows = %', tg_name, (select count(*) from ft); return new;
end; $$ language plpgsql;
create trigger ft_rowcount before insert on ft for each row execute
function ft_rowcount_tf();
insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 1
NOTICE: ft_rowcount: rows = 2
NOTICE: ft_rowcount: rows = 3
NOTICE: ft_rowcount: rows = 4
NOTICE: ft_rowcount: rows = 5
NOTICE: ft_rowcount: rows = 6
NOTICE: ft_rowcount: rows = 7
NOTICE: ft_rowcount: rows = 8
NOTICE: ft_rowcount: rows = 9
INSERT 0 10
This looks good, but when batch insert is enabled, the trigger
produces incorrect results:
alter foreign table ft options (add batch_size '10');
delete from ft;
insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
INSERT 0 10
So I think we should disable batch insert in such cases, just as we
disable multi insert when there are any before row triggers on the
target (local) tables/partitions in copyfrom.c. Attached is a patch
for that.
Best regards,
Etsuro Fujita
[1]: /messages/by-id/bc489202-9855-7550-d64c-ad2d83c24867@postgrespro.ru
Attachments:
postgres_fdw-disable-batching.patchapplication/octet-stream; name=postgres_fdw-disable-batching.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 30e95f585f..477de09a87 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -10008,6 +10008,34 @@ SELECT COUNT(*) FROM ftable;
2
(1 row)
+-- Disable batch inserting into foreign tables with BEFORE ROW INSERT triggers
+-- even if the batch_size option is enabled.
+ALTER FOREIGN TABLE ftable OPTIONS ( SET batch_size '10' );
+CREATE TRIGGER trig_row_before BEFORE INSERT ON ftable
+FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
+EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (3), (4);
+ QUERY PLAN
+-------------------------------------------------------------
+ Insert on public.ftable
+ Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1)
+ Batch Size: 1
+ -> Values Scan on "*VALUES*"
+ Output: "*VALUES*".column1
+(5 rows)
+
+INSERT INTO ftable VALUES (3), (4);
+NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON ftable
+NOTICE: NEW: (3)
+NOTICE: trig_row_before(23, skidoo) BEFORE ROW INSERT ON ftable
+NOTICE: NEW: (4)
+SELECT COUNT(*) FROM ftable;
+ count
+-------
+ 4
+(1 row)
+
+-- Clean up
+DROP TRIGGER trig_row_before ON ftable;
DROP FOREIGN TABLE ftable;
DROP TABLE batch_table;
-- Use partitioning
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index c51dd68722..149e7309d0 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2012,8 +2012,8 @@ postgresExecForeignBatchInsert(EState *estate,
* Determine the maximum number of tuples that can be inserted in bulk
*
* Returns the batch size specified for server or table. When batching is not
- * allowed (e.g. for tables with AFTER ROW triggers or with RETURNING clause),
- * returns 1.
+ * allowed (e.g. for tables with BEFORE/AFTER ROW triggers or with RETURNING
+ * clause), returns 1.
*/
static int
postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo)
@@ -2042,10 +2042,19 @@ postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo)
else
batch_size = get_batch_size_option(resultRelInfo->ri_RelationDesc);
- /* Disable batching when we have to use RETURNING. */
+ /*
+ * Disable batching when we have to use RETURNING or there are any
+ * BEFORE/AFTER ROW INSERT triggers on the target table.
+ *
+ * When there are any BEFORE ROW INSERT triggers on the table, we can't
+ * support it, because such triggers might query the table we're inserting
+ * into and act differently if the tuples that have already been processed
+ * and prepared for insertion are not there.
+ */
if (resultRelInfo->ri_projectReturning != NULL ||
(resultRelInfo->ri_TrigDesc &&
- resultRelInfo->ri_TrigDesc->trig_insert_after_row))
+ (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+ resultRelInfo->ri_TrigDesc->trig_insert_after_row)))
return 1;
/*
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index ea35e61eb8..ed181dedff 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3135,6 +3135,18 @@ CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batc
EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (1), (2);
INSERT INTO ftable VALUES (1), (2);
SELECT COUNT(*) FROM ftable;
+
+-- Disable batch inserting into foreign tables with BEFORE ROW INSERT triggers
+-- even if the batch_size option is enabled.
+ALTER FOREIGN TABLE ftable OPTIONS ( SET batch_size '10' );
+CREATE TRIGGER trig_row_before BEFORE INSERT ON ftable
+FOR EACH ROW EXECUTE PROCEDURE trigger_data(23,'skidoo');
+EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (3), (4);
+INSERT INTO ftable VALUES (3), (4);
+SELECT COUNT(*) FROM ftable;
+
+-- Clean up
+DROP TRIGGER trig_row_before ON ftable;
DROP FOREIGN TABLE ftable;
DROP TABLE batch_table;
On Sun, Apr 17, 2022 at 6:20 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
Here
is an example using HEAD:create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw options
(dbname 'postgres');
create user mapping for current_user server loopback;
create table t (a int);
create foreign table ft (a int) server loopback options (table_name 't');
create function ft_rowcount_tf() returns trigger as $$ begin raise
notice '%: rows = %', tg_name, (select count(*) from ft); return new;
end; $$ language plpgsql;
create trigger ft_rowcount before insert on ft for each row execute
function ft_rowcount_tf();insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 1
NOTICE: ft_rowcount: rows = 2
NOTICE: ft_rowcount: rows = 3
NOTICE: ft_rowcount: rows = 4
NOTICE: ft_rowcount: rows = 5
NOTICE: ft_rowcount: rows = 6
NOTICE: ft_rowcount: rows = 7
NOTICE: ft_rowcount: rows = 8
NOTICE: ft_rowcount: rows = 9
INSERT 0 10This looks good, but when batch insert is enabled, the trigger
produces incorrect results:alter foreign table ft options (add batch_size '10');
delete from ft;insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
INSERT 0 10
Actually, the results are correct, as we do batch-insert here. But I
just wanted to show that the trigger behaves *differently* when doing
batch-insert.
So I think we should disable batch insert in such cases, just as we
disable multi insert when there are any before row triggers on the
target (local) tables/partitions in copyfrom.c. Attached is a patch
for that.
If there are no objections from Tomas or anyone else, I'll commit the patch.
Best regards,
Etsuro Fujita
On 4/19/22 11:16, Etsuro Fujita wrote:
On Sun, Apr 17, 2022 at 6:20 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
Here
is an example using HEAD:create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw options
(dbname 'postgres');
create user mapping for current_user server loopback;
create table t (a int);
create foreign table ft (a int) server loopback options (table_name 't');
create function ft_rowcount_tf() returns trigger as $$ begin raise
notice '%: rows = %', tg_name, (select count(*) from ft); return new;
end; $$ language plpgsql;
create trigger ft_rowcount before insert on ft for each row execute
function ft_rowcount_tf();insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 1
NOTICE: ft_rowcount: rows = 2
NOTICE: ft_rowcount: rows = 3
NOTICE: ft_rowcount: rows = 4
NOTICE: ft_rowcount: rows = 5
NOTICE: ft_rowcount: rows = 6
NOTICE: ft_rowcount: rows = 7
NOTICE: ft_rowcount: rows = 8
NOTICE: ft_rowcount: rows = 9
INSERT 0 10This looks good, but when batch insert is enabled, the trigger
produces incorrect results:alter foreign table ft options (add batch_size '10');
delete from ft;insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
INSERT 0 10Actually, the results are correct, as we do batch-insert here. But I
just wanted to show that the trigger behaves *differently* when doing
batch-insert.So I think we should disable batch insert in such cases, just as we
disable multi insert when there are any before row triggers on the
target (local) tables/partitions in copyfrom.c. Attached is a patch
for that.If there are no objections from Tomas or anyone else, I'll commit the patch.
+1, I think it's a bug to do batch insert in this case.
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Hi,
On Tue, Apr 19, 2022 at 9:00 PM Tomas Vondra
<tomas.vondra@enterprisedb.com> wrote:
On 4/19/22 11:16, Etsuro Fujita wrote:
On Sun, Apr 17, 2022 at 6:20 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
So I think we should disable batch insert in such cases, just as we
disable multi insert when there are any before row triggers on the
target (local) tables/partitions in copyfrom.c. Attached is a patch
for that.If there are no objections from Tomas or anyone else, I'll commit the patch.
+1, I think it's a bug to do batch insert in this case.
Pushed and back-patched to v14, after tweaking a comment a little bit.
Thanks!
Best regards,
Etsuro Fujita
Hi,
Another thing I noticed while working on the "Fast COPY FROM based on
batch insert" patch is: batch inserts vs. WITH CHECK OPTION
constraints from parent views. Here is an example on a normal build
producing incorrect results.
CREATE TABLE base_tbl (a int, b int);
CREATE FUNCTION row_before_insert_trigfunc() RETURNS trigger AS
$$BEGIN NEW.a := NEW.a + 10; RETURN NEW; END$$ LANGUAGE plpgsql;
CREATE TRIGGER row_before_insert_trigger BEFORE INSERT ON base_tbl FOR
EACH ROW EXECUTE PROCEDURE row_before_insert_trigfunc();
CREATE FOREIGN TABLE foreign_tbl (a int, b int) SERVER loopback
OPTIONS (table_name 'base_tbl');
CREATE VIEW rw_view AS SELECT * FROM foreign_tbl WHERE a < b WITH CHECK OPTION;
ALTER SERVER loopback OPTIONS (ADD batch_size '10');
EXPLAIN VERBOSE INSERT INTO rw_view VALUES (0, 15), (0, 5);
QUERY PLAN
--------------------------------------------------------------------------------
Insert on public.foreign_tbl (cost=0.00..0.03 rows=0 width=0)
Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b
Batch Size: 10
-> Values Scan on "*VALUES*" (cost=0.00..0.03 rows=2 width=8)
Output: "*VALUES*".column1, "*VALUES*".column2
(5 rows)
INSERT INTO rw_view VALUES (0, 15), (0, 5);
INSERT 0 2
This isn't correct; the INSERT query should abort because the
second-inserted row violates the WCO constraint as it is changed to
(10, 5) by the BEFORE ROW trigger.
Also, the query caused an assertion failure on an assert-enabled build, like:
TRAP: FailedAssertion("*numSlots == 1", File: "postgres_fdw.c", Line:
4164, PID: 7775)
I think the root cause for these is that WCO constraints are enforced
locally, but in batch-insert mode postgres_fdw cannot currently
retrieve the data needed to enforce such constraints locally that was
actually inserted on the remote side (except for the first-inserted
row). And I think this leads to the incorrect results on the normal
build as the WCO constraint is enforced with the data passed from the
core for the second-inserted row, and leads to the assertion failure
on the assert-enabled build.
To fix, I modified postgresGetForeignModifyBatchSize() to disable
batch insert when there are any such constraints, like when there are
any AFTER ROW triggers on the foreign table. Attached is a patch for
that.
If there are no objections, I'll commit the patch.
Best regards,
Etsuro Fujita
Attachments:
postgres_fdw-disable-batching-further.patchapplication/octet-stream; name=postgres_fdw-disable-batching-further.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index ade797159d..e79d0d1503 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -6536,6 +6536,29 @@ SELECT * FROM foreign_tbl;
20 | 30
(1 row)
+-- We don't allow batch insert when there are any WCO constraints
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+EXPLAIN (VERBOSE, COSTS OFF)
+INSERT INTO rw_view VALUES (0, 15), (0, 5);
+ QUERY PLAN
+--------------------------------------------------------------------------------
+ Insert on public.foreign_tbl
+ Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b
+ Batch Size: 1
+ -> Values Scan on "*VALUES*"
+ Output: "*VALUES*".column1, "*VALUES*".column2
+(5 rows)
+
+INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail
+ERROR: new row violates check option for view "rw_view"
+DETAIL: Failing row contains (10, 5).
+SELECT * FROM foreign_tbl;
+ a | b
+----+----
+ 20 | 30
+(1 row)
+
+ALTER SERVER loopback OPTIONS (DROP batch_size);
DROP FOREIGN TABLE foreign_tbl CASCADE;
NOTICE: drop cascades to view rw_view
DROP TRIGGER row_before_insupd_trigger ON base_tbl;
@@ -6628,6 +6651,27 @@ SELECT * FROM foreign_tbl;
20 | 30
(1 row)
+-- We don't allow batch insert when there are any WCO constraints
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+EXPLAIN (VERBOSE, COSTS OFF)
+INSERT INTO rw_view VALUES (0, 15), (0, 5);
+ QUERY PLAN
+--------------------------------------------------------
+ Insert on public.parent_tbl
+ -> Values Scan on "*VALUES*"
+ Output: "*VALUES*".column1, "*VALUES*".column2
+(3 rows)
+
+INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail
+ERROR: new row violates check option for view "rw_view"
+DETAIL: Failing row contains (10, 5).
+SELECT * FROM foreign_tbl;
+ a | b
+----+----
+ 20 | 30
+(1 row)
+
+ALTER SERVER loopback OPTIONS (DROP batch_size);
DROP FOREIGN TABLE foreign_tbl CASCADE;
DROP TRIGGER row_before_insupd_trigger ON child_tbl;
DROP TABLE parent_tbl CASCADE;
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 048db542d3..a9c97455e1 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2043,8 +2043,9 @@ postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo)
batch_size = get_batch_size_option(resultRelInfo->ri_RelationDesc);
/*
- * Disable batching when we have to use RETURNING or there are any
- * BEFORE/AFTER ROW INSERT triggers on the foreign table.
+ * Disable batching when we have to use RETURNING, there are any
+ * BEFORE/AFTER ROW INSERT triggers on the foreign table, or there are any
+ * WITH CHECK OPTION constraints from parent views.
*
* When there are any BEFORE ROW INSERT triggers on the table, we can't
* support it, because such triggers might query the table we're inserting
@@ -2054,7 +2055,8 @@ postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo)
if (resultRelInfo->ri_projectReturning != NULL ||
(resultRelInfo->ri_TrigDesc &&
(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
- resultRelInfo->ri_TrigDesc->trig_insert_after_row)))
+ resultRelInfo->ri_TrigDesc->trig_insert_after_row)) ||
+ resultRelInfo->ri_WithCheckOptions != NIL)
return 1;
/*
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index b7817c5a41..08ea77524c 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -1503,6 +1503,14 @@ UPDATE rw_view SET b = b + 15;
UPDATE rw_view SET b = b + 15; -- ok
SELECT * FROM foreign_tbl;
+-- We don't allow batch insert when there are any WCO constraints
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+EXPLAIN (VERBOSE, COSTS OFF)
+INSERT INTO rw_view VALUES (0, 15), (0, 5);
+INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail
+SELECT * FROM foreign_tbl;
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+
DROP FOREIGN TABLE foreign_tbl CASCADE;
DROP TRIGGER row_before_insupd_trigger ON base_tbl;
DROP TABLE base_tbl;
@@ -1541,6 +1549,14 @@ UPDATE rw_view SET b = b + 15;
UPDATE rw_view SET b = b + 15; -- ok
SELECT * FROM foreign_tbl;
+-- We don't allow batch insert when there are any WCO constraints
+ALTER SERVER loopback OPTIONS (ADD batch_size '10');
+EXPLAIN (VERBOSE, COSTS OFF)
+INSERT INTO rw_view VALUES (0, 15), (0, 5);
+INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail
+SELECT * FROM foreign_tbl;
+ALTER SERVER loopback OPTIONS (DROP batch_size);
+
DROP FOREIGN TABLE foreign_tbl CASCADE;
DROP TRIGGER row_before_insupd_trigger ON child_tbl;
DROP TABLE parent_tbl CASCADE;
On Tue, 19 Apr 2022 at 14:00, Tomas Vondra
<tomas.vondra@enterprisedb.com> wrote:
On 4/19/22 11:16, Etsuro Fujita wrote:
On Sun, Apr 17, 2022 at 6:20 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
Here
is an example using HEAD:create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw options
(dbname 'postgres');
create user mapping for current_user server loopback;
create table t (a int);
create foreign table ft (a int) server loopback options (table_name 't');
create function ft_rowcount_tf() returns trigger as $$ begin raise
notice '%: rows = %', tg_name, (select count(*) from ft); return new;
end; $$ language plpgsql;
create trigger ft_rowcount before insert on ft for each row execute
function ft_rowcount_tf();insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 1
NOTICE: ft_rowcount: rows = 2
NOTICE: ft_rowcount: rows = 3
NOTICE: ft_rowcount: rows = 4
NOTICE: ft_rowcount: rows = 5
NOTICE: ft_rowcount: rows = 6
NOTICE: ft_rowcount: rows = 7
NOTICE: ft_rowcount: rows = 8
NOTICE: ft_rowcount: rows = 9
INSERT 0 10This looks good, but when batch insert is enabled, the trigger
produces incorrect results:alter foreign table ft options (add batch_size '10');
delete from ft;insert into ft select i from generate_series(1, 10) i;
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
NOTICE: ft_rowcount: rows = 0
INSERT 0 10Actually, the results are correct, as we do batch-insert here. But I
just wanted to show that the trigger behaves *differently* when doing
batch-insert.+1, I think it's a bug to do batch insert in this case.
I don't have a current version of the SQL spec, but one preliminary
version of SQL:2012 I retrieved via the wiki details that all BEFORE
triggers on INSERT/UPDATE/DELETE statements are all executed before
_any_ of that statements' affected data is modified.
See the "SQL:2011 (preliminary)" document you can grab on the wiki,
Part 2: for INSERT, in 15.10 (4) the BEFORE triggers on the changeset
are executed, and only after that in section 15.10 (5)(c) the
changeset is inserted into the target table. During the BEFORE-trigger
this table does not contain the rows of the changeset, thus a count(*)
on that table would result in a single value for all the BEFORE
triggers triggered on that statement, regardless of the FOR EACH ROW
specifier. The sections for DELETE are 15.7 (6) and 15.7 (7); and for
UPDATE 15.13(7) and 15.13(9) respectively.
I don't know about the semantics of triggers in the latest SQL
standard versions, but based on that sample it seems like we're
non-compliant on BEFORE trigger behaviour, and it doesn't seem like
it's documented in the trigger documentation.
I seem to recall a mail on this topic (changes in trigger execution
order with respect to the DML it is triggered by in the newest SQL
spec) but I can't seem to find that thread.
Kind regards,
Matthias van de Meent
Matthias van de Meent <boekewurm+postgres@gmail.com> writes:
I don't have a current version of the SQL spec, but one preliminary
version of SQL:2012 I retrieved via the wiki details that all BEFORE
triggers on INSERT/UPDATE/DELETE statements are all executed before
_any_ of that statements' affected data is modified.
...
I don't know about the semantics of triggers in the latest SQL
standard versions, but based on that sample it seems like we're
non-compliant on BEFORE trigger behaviour, and it doesn't seem like
it's documented in the trigger documentation.
I think we're compliant if you declare the trigger functions as
stable (or immutable, but in any case where this matters, I think
you'd be lying). They'll then run with the snapshot of the calling
query, in which those updates are not yet visible.
This is documented somewhere, but maybe not anywhere near triggers.
regards, tom lane
On Wed, 3 Aug 2022 at 23:57, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Matthias van de Meent <boekewurm+postgres@gmail.com> writes:
I don't have a current version of the SQL spec, but one preliminary
version of SQL:2012 I retrieved via the wiki details that all BEFORE
triggers on INSERT/UPDATE/DELETE statements are all executed before
_any_ of that statements' affected data is modified.
...
I don't know about the semantics of triggers in the latest SQL
standard versions, but based on that sample it seems like we're
non-compliant on BEFORE trigger behaviour, and it doesn't seem like
it's documented in the trigger documentation.I think we're compliant if you declare the trigger functions as
stable (or immutable, but in any case where this matters, I think
you'd be lying). They'll then run with the snapshot of the calling
query, in which those updates are not yet visible.This is documented somewhere, but maybe not anywhere near triggers.
Thank you for this pointer.
Looking around a bit, it seems like this behaviour for functions is
indeed documented in xfunc.sgml, but rendered docs page [0]https://www.postgresql.org/docs/current/xfunc-volatility.html does not
seem to mention triggers, nor does the triggers page link to that part
of the xfunc document. This makes it quite easy to overlook that this
is expected (?) behaviour for VOLATILE functions only.
Kind regards,
Matthias van de Meent
[0]: https://www.postgresql.org/docs/current/xfunc-volatility.html
On Wed, Aug 3, 2022 at 2:24 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
To fix, I modified postgresGetForeignModifyBatchSize() to disable
batch insert when there are any such constraints, like when there are
any AFTER ROW triggers on the foreign table. Attached is a patch for
that.If there are no objections, I'll commit the patch.
Pushed after modifying the patch a bit so that in that function the
WCO test in the if test is done before the trigger test, as the former
would be cheaper than the latter.
Best regards,
Etsuro Fujita
Hi,
While working on something else, I notice some more oddities. Here is
an example:
create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw options
(dbname 'postgres');
create user mapping for current_user server loopback;
create table t1 (a text, b int);
create foreign table ft1 (a text, b int) server loopback options
(table_name 't1');
create table w1 (a text, b int);
create function ft1_rowcount_trigf() returns trigger language plpgsql
as $$ begin raise notice '%: there are % rows in ft1', tg_name,
(select count(*) from ft1); return new; end; $$;
create trigger ft1_rowcount_trigger before insert on w1 for each row
execute function ft1_rowcount_trigf();
alter server loopback options (add batch_size '10');
with t as (insert into w1 values ('foo', 10), ('bar', 20) returning *)
insert into ft1 select * from t;
NOTICE: ft1_rowcount_trigger: there are 0 rows in ft1
NOTICE: ft1_rowcount_trigger: there are 0 rows in ft1
INSERT 0 2
The command tag shows that two rows were inserted into ft1, but:
select * from ft1;
a | b
-----+----
foo | 10
bar | 20
foo | 10
bar | 20
(4 rows)
ft1 has four rows, which is wrong. Also, when inserting the second
row (‘bar’, 20) into w1, the BEFORE ROW INSERT trigger should see the
first row (‘foo’, 10) in ft1, but it reports no rows were visible
there.
The reason for the former is that this bit added by commit b663a4136
is done not only when running the primary ModifyTable node but when
running the secondary ModifyTable node (with the wrong
ModifyTableState).
/*
* Insert remaining tuples for batch insert.
*/
if (proute)
relinfos = estate->es_tuple_routing_result_relations;
else
relinfos = estate->es_opened_result_relations;
foreach(lc, relinfos)
{
resultRelInfo = lfirst(lc);
if (resultRelInfo->ri_NumSlots > 0)
ExecBatchInsert(node, resultRelInfo,
resultRelInfo->ri_Slots,
resultRelInfo->ri_PlanSlots,
resultRelInfo->ri_NumSlots,
estate, node->canSetTag);
}
The reason for the latter is that that commit fails to flush pending
inserts before executing any BEFORE ROW triggers, so that rows are
visible to such triggers.
Attached is a patch for fixing these issues. In the patch I added to
the EState struct a List member es_insert_pending_result_relations to
store ResultRelInfos for foreign tables on which batch inserts are to
be performed, so that we avoid scanning through
es_tuple_routing_result_relations or es_opened_result_relations each
time when flushing pending inserts to the foreign tables.
Best regards,
Etsuro Fujita
Attachments:
fix-handling-of-pending-inserts.patchapplication/octet-stream; name=fix-handling-of-pending-inserts.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 558e94b845..6024e5ccc7 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -10448,7 +10448,130 @@ SELECT * FROM batch_table ORDER BY x;
50 | test50 | test50
(50 rows)
+-- Clean up
+DROP TABLE batch_table;
+DROP TABLE batch_table_p0;
+DROP TABLE batch_table_p1;
ALTER SERVER loopback OPTIONS (DROP batch_size);
+-- Test that pending inserts are handled properly when needed
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable (a text, b int)
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE wtable (a text, b int);
+CREATE FUNCTION ftable_rowcount_trigf() RETURNS trigger LANGUAGE plpgsql AS
+$$
+begin
+ raise notice '%: there are % rows in ftable',
+ TG_NAME, (SELECT count(*) FROM ftable);
+ if TG_OP = 'DELETE' then
+ return OLD;
+ else
+ return NEW;
+ end if;
+end;
+$$;
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT OR UPDATE OR DELETE ON wtable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+WITH t AS (
+ INSERT INTO wtable VALUES ('foo', 10), ('bar', 20) RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+SELECT * FROM wtable;
+ a | b
+-----+----
+ foo | 10
+ bar | 20
+(2 rows)
+
+SELECT * FROM ftable;
+ a | b
+-----+----
+ foo | 10
+ bar | 20
+(2 rows)
+
+DELETE FROM ftable;
+WITH t AS (
+ UPDATE wtable SET b = b + 100 RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+SELECT * FROM wtable;
+ a | b
+-----+-----
+ foo | 110
+ bar | 120
+(2 rows)
+
+SELECT * FROM ftable;
+ a | b
+-----+-----
+ foo | 110
+ bar | 120
+(2 rows)
+
+DELETE FROM ftable;
+WITH t AS (
+ DELETE FROM wtable RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+SELECT * FROM wtable;
+ a | b
+---+---
+(0 rows)
+
+SELECT * FROM ftable;
+ a | b
+-----+-----
+ foo | 110
+ bar | 120
+(2 rows)
+
+DELETE FROM ftable;
+-- Clean up
+DROP TRIGGER ftable_rowcount_trigger ON wtable;
+DROP TABLE wtable;
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+CREATE TABLE parent (a text, b int) PARTITION BY LIST (a);
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable
+ PARTITION OF parent
+ FOR VALUES IN ('AAA')
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE ltable
+ PARTITION OF parent
+ FOR VALUES IN ('BBB');
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT ON ltable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+INSERT INTO parent VALUES ('AAA', 42), ('BBB', 42), ('AAA', 42), ('BBB', 42);
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 2 rows in ftable
+SELECT tableoid::regclass, * FROM parent;
+ tableoid | a | b
+----------+-----+----
+ ftable | AAA | 42
+ ftable | AAA | 42
+ ltable | BBB | 42
+ ltable | BBB | 42
+(4 rows)
+
+-- Clean up
+DROP TRIGGER ftable_rowcount_trigger ON ltable;
+DROP TABLE ltable;
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+DROP TABLE parent;
+DROP FUNCTION ftable_rowcount_trigf;
-- ===================================================================
-- test asynchronous execution
-- ===================================================================
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index b0dbb41fb5..8bffff3433 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3372,8 +3372,90 @@ INSERT INTO batch_table SELECT i, 'test'||i, 'test'|| i FROM generate_series(1,
SELECT COUNT(*) FROM batch_table;
SELECT * FROM batch_table ORDER BY x;
+-- Clean up
+DROP TABLE batch_table;
+DROP TABLE batch_table_p0;
+DROP TABLE batch_table_p1;
+
ALTER SERVER loopback OPTIONS (DROP batch_size);
+-- Test that pending inserts are handled properly when needed
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable (a text, b int)
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE wtable (a text, b int);
+CREATE FUNCTION ftable_rowcount_trigf() RETURNS trigger LANGUAGE plpgsql AS
+$$
+begin
+ raise notice '%: there are % rows in ftable',
+ TG_NAME, (SELECT count(*) FROM ftable);
+ if TG_OP = 'DELETE' then
+ return OLD;
+ else
+ return NEW;
+ end if;
+end;
+$$;
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT OR UPDATE OR DELETE ON wtable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+
+WITH t AS (
+ INSERT INTO wtable VALUES ('foo', 10), ('bar', 20) RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+SELECT * FROM wtable;
+SELECT * FROM ftable;
+DELETE FROM ftable;
+
+WITH t AS (
+ UPDATE wtable SET b = b + 100 RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+SELECT * FROM wtable;
+SELECT * FROM ftable;
+DELETE FROM ftable;
+
+WITH t AS (
+ DELETE FROM wtable RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+SELECT * FROM wtable;
+SELECT * FROM ftable;
+DELETE FROM ftable;
+
+-- Clean up
+DROP TRIGGER ftable_rowcount_trigger ON wtable;
+DROP TABLE wtable;
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+
+CREATE TABLE parent (a text, b int) PARTITION BY LIST (a);
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable
+ PARTITION OF parent
+ FOR VALUES IN ('AAA')
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE ltable
+ PARTITION OF parent
+ FOR VALUES IN ('BBB');
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT ON ltable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+
+INSERT INTO parent VALUES ('AAA', 42), ('BBB', 42), ('AAA', 42), ('BBB', 42);
+SELECT tableoid::regclass, * FROM parent;
+
+-- Clean up
+DROP TRIGGER ftable_rowcount_trigger ON ltable;
+DROP TABLE ltable;
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+DROP TABLE parent;
+DROP FUNCTION ftable_rowcount_trigf;
+
-- ===================================================================
-- test asynchronous execution
-- ===================================================================
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index d78862e660..ef828e0496 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1261,6 +1261,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_ChildToRootMap = NULL;
resultRelInfo->ri_ChildToRootMapValid = false;
resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
+ resultRelInfo->ri_ModifyTableState = NULL;
}
/*
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 40e3c07693..262cabd940 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1034,6 +1034,13 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
Assert(partRelInfo->ri_BatchSize >= 1);
+ /*
+ * If doing batch insert, setup back-link so we can easily find the
+ * mtstate again.
+ */
+ if (partRelInfo->ri_BatchSize > 1)
+ partRelInfo->ri_ModifyTableState = mtstate;
+
partRelInfo->ri_CopyMultiInsertBuffer = NULL;
/*
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 9df1f81ea8..0e595ffa6e 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -127,6 +127,7 @@ CreateExecutorState(void)
estate->es_result_relations = NULL;
estate->es_opened_result_relations = NIL;
estate->es_tuple_routing_result_relations = NIL;
+ estate->es_insert_pending_result_relations = NIL;
estate->es_trig_target_relations = NIL;
estate->es_param_list_info = NULL;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index b7ea953b55..f9db5fc4de 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -142,6 +142,7 @@ static void ExecBatchInsert(ModifyTableState *mtstate,
int numSlots,
EState *estate,
bool canSetTag);
+static void ExecPendingInserts(EState *estate);
static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
@@ -761,6 +762,10 @@ ExecInsert(ModifyTableContext *context,
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)
{
+ /* Flush any pending inserts, so rows are visible to the triggers */
+ if (estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(estate);
+
if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
return NULL; /* "do nothing" */
}
@@ -794,6 +799,8 @@ ExecInsert(ModifyTableContext *context,
*/
if (resultRelInfo->ri_BatchSize > 1)
{
+ bool flushed = false;
+
/*
* When we've reached the desired batch size, perform the
* insertion.
@@ -806,6 +813,7 @@ ExecInsert(ModifyTableContext *context,
resultRelInfo->ri_NumSlots,
estate, canSetTag);
resultRelInfo->ri_NumSlots = 0;
+ flushed = true;
}
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
@@ -848,6 +856,22 @@ ExecInsert(ModifyTableContext *context,
ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots],
planSlot);
+ /*
+ * If these are the first tuples stored in the buffers, add the
+ * target rel to the es_insert_pending_result_relations list,
+ * except in the case where flushing was done above, in which case
+ * the target rel would already have been added to the list, so no
+ * need to do this.
+ */
+ if (resultRelInfo->ri_NumSlots == 0 && !flushed)
+ {
+ Assert(!list_member_ptr(estate->es_insert_pending_result_relations,
+ resultRelInfo));
+ estate->es_insert_pending_result_relations =
+ lappend(estate->es_insert_pending_result_relations,
+ resultRelInfo);
+ }
+
resultRelInfo->ri_NumSlots++;
MemoryContextSwitchTo(oldContext);
@@ -1188,6 +1212,32 @@ ExecBatchInsert(ModifyTableState *mtstate,
estate->es_processed += numInserted;
}
+/*
+ * ExecPendingInserts -- flushes all pending inserts to the foreign tables
+ */
+static void
+ExecPendingInserts(EState *estate)
+{
+ ListCell *lc;
+
+ foreach(lc, estate->es_insert_pending_result_relations)
+ {
+ ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(lc);
+ ModifyTableState *mtstate = resultRelInfo->ri_ModifyTableState;
+
+ Assert(mtstate);
+ ExecBatchInsert(mtstate, resultRelInfo,
+ resultRelInfo->ri_Slots,
+ resultRelInfo->ri_PlanSlots,
+ resultRelInfo->ri_NumSlots,
+ estate, mtstate->canSetTag);
+ resultRelInfo->ri_NumSlots = 0;
+ }
+
+ list_free(estate->es_insert_pending_result_relations);
+ estate->es_insert_pending_result_relations = NIL;
+}
+
/*
* ExecDeletePrologue -- subroutine for ExecDelete
*
@@ -1203,9 +1253,15 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
/* BEFORE ROW DELETE triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_before_row)
+ {
+ /* Flush any pending inserts, so rows are visible to the triggers */
+ if (context->estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(context->estate);
+
return ExecBRDeleteTriggers(context->estate, context->epqstate,
resultRelInfo, tupleid, oldtuple,
epqreturnslot);
+ }
return true;
}
@@ -1780,9 +1836,15 @@ ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
/* BEFORE ROW UPDATE triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_before_row)
+ {
+ /* Flush any pending inserts, so rows are visible to the triggers */
+ if (context->estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(context->estate);
+
return ExecBRUpdateTriggers(context->estate, context->epqstate,
resultRelInfo, tupleid, oldtuple, slot,
&context->tmfd);
+ }
return true;
}
@@ -3452,9 +3514,6 @@ ExecModifyTable(PlanState *pstate)
HeapTupleData oldtupdata;
HeapTuple oldtuple;
ItemPointer tupleid;
- PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
- List *relinfos = NIL;
- ListCell *lc;
CHECK_FOR_INTERRUPTS();
@@ -3756,21 +3815,8 @@ ExecModifyTable(PlanState *pstate)
/*
* Insert remaining tuples for batch insert.
*/
- if (proute)
- relinfos = estate->es_tuple_routing_result_relations;
- else
- relinfos = estate->es_opened_result_relations;
-
- foreach(lc, relinfos)
- {
- resultRelInfo = lfirst(lc);
- if (resultRelInfo->ri_NumSlots > 0)
- ExecBatchInsert(node, resultRelInfo,
- resultRelInfo->ri_Slots,
- resultRelInfo->ri_PlanSlots,
- resultRelInfo->ri_NumSlots,
- estate, node->canSetTag);
- }
+ if (estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(estate);
/*
* We're done, but fire AFTER STATEMENT triggers before exiting.
@@ -4295,6 +4341,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
}
else
resultRelInfo->ri_BatchSize = 1;
+
+ /*
+ * If doing batch insert, setup back-link so we can easily find the
+ * mtstate again.
+ */
+ if (resultRelInfo->ri_BatchSize > 1)
+ resultRelInfo->ri_ModifyTableState = mtstate;
}
/*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 01b1727fc0..c1297a87b3 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -571,6 +571,9 @@ typedef struct ResultRelInfo
* one of its ancestors; see ExecCrossPartitionUpdateForeignKey().
*/
List *ri_ancestorResultRels;
+
+ /* for use by nodeModifyTable.c when performing batch-inserts */
+ struct ModifyTableState *ri_ModifyTableState;
} ResultRelInfo;
/* ----------------
@@ -692,6 +695,12 @@ typedef struct EState
int es_jit_flags;
struct JitContext *es_jit;
struct JitInstrumentation *es_jit_worker_instr;
+
+ /*
+ * The following list contains ResultRelInfos for foreign tables on which
+ * batch inserts are to be executed
+ */
+ List *es_insert_pending_result_relations;
} EState;
On Fri, Nov 18, 2022 at 8:46 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
Attached is a patch for fixing these issues.
Here is an updated patch. In the attached, I added an assertion to
ExecInsert(). Also, I tweaked comments and test cases a little bit,
for consistency. Also, I noticed a copy-and-pasteo in a comment in
ExecBatchInsert(), so I fixed it as well.
Barring objections, I will commit the patch.
Best regards,
Etsuro Fujita
Attachments:
fix-handling-of-pending-inserts-2.patchapplication/octet-stream; name=fix-handling-of-pending-inserts-2.patchDownload
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 558e94b845..2ab3f1efaa 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -10448,7 +10448,130 @@ SELECT * FROM batch_table ORDER BY x;
50 | test50 | test50
(50 rows)
+-- Clean up
+DROP TABLE batch_table;
+DROP TABLE batch_table_p0;
+DROP TABLE batch_table_p1;
ALTER SERVER loopback OPTIONS (DROP batch_size);
+-- Test that pending inserts are handled properly when needed
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable (a text, b int)
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE ltable (a text, b int);
+CREATE FUNCTION ftable_rowcount_trigf() RETURNS trigger LANGUAGE plpgsql AS
+$$
+begin
+ raise notice '%: there are % rows in ftable',
+ TG_NAME, (SELECT count(*) FROM ftable);
+ if TG_OP = 'DELETE' then
+ return OLD;
+ else
+ return NEW;
+ end if;
+end;
+$$;
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT OR UPDATE OR DELETE ON ltable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+WITH t AS (
+ INSERT INTO ltable VALUES ('AAA', 42), ('BBB', 42) RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+SELECT * FROM ltable;
+ a | b
+-----+----
+ AAA | 42
+ BBB | 42
+(2 rows)
+
+SELECT * FROM ftable;
+ a | b
+-----+----
+ AAA | 42
+ BBB | 42
+(2 rows)
+
+DELETE FROM ftable;
+WITH t AS (
+ UPDATE ltable SET b = b + 100 RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+SELECT * FROM ltable;
+ a | b
+-----+-----
+ AAA | 142
+ BBB | 142
+(2 rows)
+
+SELECT * FROM ftable;
+ a | b
+-----+-----
+ AAA | 142
+ BBB | 142
+(2 rows)
+
+DELETE FROM ftable;
+WITH t AS (
+ DELETE FROM ltable RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+NOTICE: ftable_rowcount_trigger: there are 0 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+SELECT * FROM ltable;
+ a | b
+---+---
+(0 rows)
+
+SELECT * FROM ftable;
+ a | b
+-----+-----
+ AAA | 142
+ BBB | 142
+(2 rows)
+
+DELETE FROM ftable;
+-- Clean up
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+DROP TRIGGER ftable_rowcount_trigger ON ltable;
+DROP TABLE ltable;
+CREATE TABLE parent (a text, b int) PARTITION BY LIST (a);
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable
+ PARTITION OF parent
+ FOR VALUES IN ('AAA')
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE ltable
+ PARTITION OF parent
+ FOR VALUES IN ('BBB');
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT ON ltable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+INSERT INTO parent VALUES ('AAA', 42), ('BBB', 42), ('AAA', 42), ('BBB', 42);
+NOTICE: ftable_rowcount_trigger: there are 1 rows in ftable
+NOTICE: ftable_rowcount_trigger: there are 2 rows in ftable
+SELECT tableoid::regclass, * FROM parent;
+ tableoid | a | b
+----------+-----+----
+ ftable | AAA | 42
+ ftable | AAA | 42
+ ltable | BBB | 42
+ ltable | BBB | 42
+(4 rows)
+
+-- Clean up
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+DROP TRIGGER ftable_rowcount_trigger ON ltable;
+DROP TABLE ltable;
+DROP TABLE parent;
+DROP FUNCTION ftable_rowcount_trigf;
-- ===================================================================
-- test asynchronous execution
-- ===================================================================
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index b0dbb41fb5..51560429e0 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3372,8 +3372,94 @@ INSERT INTO batch_table SELECT i, 'test'||i, 'test'|| i FROM generate_series(1,
SELECT COUNT(*) FROM batch_table;
SELECT * FROM batch_table ORDER BY x;
+-- Clean up
+DROP TABLE batch_table;
+DROP TABLE batch_table_p0;
+DROP TABLE batch_table_p1;
+
ALTER SERVER loopback OPTIONS (DROP batch_size);
+-- Test that pending inserts are handled properly when needed
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable (a text, b int)
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE ltable (a text, b int);
+CREATE FUNCTION ftable_rowcount_trigf() RETURNS trigger LANGUAGE plpgsql AS
+$$
+begin
+ raise notice '%: there are % rows in ftable',
+ TG_NAME, (SELECT count(*) FROM ftable);
+ if TG_OP = 'DELETE' then
+ return OLD;
+ else
+ return NEW;
+ end if;
+end;
+$$;
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT OR UPDATE OR DELETE ON ltable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+
+WITH t AS (
+ INSERT INTO ltable VALUES ('AAA', 42), ('BBB', 42) RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+
+SELECT * FROM ltable;
+SELECT * FROM ftable;
+DELETE FROM ftable;
+
+WITH t AS (
+ UPDATE ltable SET b = b + 100 RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+
+SELECT * FROM ltable;
+SELECT * FROM ftable;
+DELETE FROM ftable;
+
+WITH t AS (
+ DELETE FROM ltable RETURNING *
+)
+INSERT INTO ftable SELECT * FROM t;
+
+SELECT * FROM ltable;
+SELECT * FROM ftable;
+DELETE FROM ftable;
+
+-- Clean up
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+DROP TRIGGER ftable_rowcount_trigger ON ltable;
+DROP TABLE ltable;
+
+CREATE TABLE parent (a text, b int) PARTITION BY LIST (a);
+CREATE TABLE batch_table (a text, b int);
+CREATE FOREIGN TABLE ftable
+ PARTITION OF parent
+ FOR VALUES IN ('AAA')
+ SERVER loopback
+ OPTIONS (table_name 'batch_table', batch_size '2');
+CREATE TABLE ltable
+ PARTITION OF parent
+ FOR VALUES IN ('BBB');
+CREATE TRIGGER ftable_rowcount_trigger
+BEFORE INSERT ON ltable
+FOR EACH ROW EXECUTE PROCEDURE ftable_rowcount_trigf();
+
+INSERT INTO parent VALUES ('AAA', 42), ('BBB', 42), ('AAA', 42), ('BBB', 42);
+
+SELECT tableoid::regclass, * FROM parent;
+
+-- Clean up
+DROP FOREIGN TABLE ftable;
+DROP TABLE batch_table;
+DROP TRIGGER ftable_rowcount_trigger ON ltable;
+DROP TABLE ltable;
+DROP TABLE parent;
+DROP FUNCTION ftable_rowcount_trigf;
+
-- ===================================================================
-- test asynchronous execution
-- ===================================================================
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index d78862e660..ef828e0496 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1261,6 +1261,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_ChildToRootMap = NULL;
resultRelInfo->ri_ChildToRootMapValid = false;
resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
+ resultRelInfo->ri_ModifyTableState = NULL;
}
/*
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 40e3c07693..262cabd940 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1034,6 +1034,13 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
Assert(partRelInfo->ri_BatchSize >= 1);
+ /*
+ * If doing batch insert, setup back-link so we can easily find the
+ * mtstate again.
+ */
+ if (partRelInfo->ri_BatchSize > 1)
+ partRelInfo->ri_ModifyTableState = mtstate;
+
partRelInfo->ri_CopyMultiInsertBuffer = NULL;
/*
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 9df1f81ea8..0e595ffa6e 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -127,6 +127,7 @@ CreateExecutorState(void)
estate->es_result_relations = NULL;
estate->es_opened_result_relations = NIL;
estate->es_tuple_routing_result_relations = NIL;
+ estate->es_insert_pending_result_relations = NIL;
estate->es_trig_target_relations = NIL;
estate->es_param_list_info = NULL;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index b7ea953b55..271ff2be8e 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -142,6 +142,7 @@ static void ExecBatchInsert(ModifyTableState *mtstate,
int numSlots,
EState *estate,
bool canSetTag);
+static void ExecPendingInserts(EState *estate);
static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
@@ -761,6 +762,10 @@ ExecInsert(ModifyTableContext *context,
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)
{
+ /* Flush any pending inserts, so rows are visible to the triggers */
+ if (estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(estate);
+
if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
return NULL; /* "do nothing" */
}
@@ -794,6 +799,8 @@ ExecInsert(ModifyTableContext *context,
*/
if (resultRelInfo->ri_BatchSize > 1)
{
+ bool flushed = false;
+
/*
* When we've reached the desired batch size, perform the
* insertion.
@@ -806,6 +813,7 @@ ExecInsert(ModifyTableContext *context,
resultRelInfo->ri_NumSlots,
estate, canSetTag);
resultRelInfo->ri_NumSlots = 0;
+ flushed = true;
}
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
@@ -848,6 +856,24 @@ ExecInsert(ModifyTableContext *context,
ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots],
planSlot);
+ /*
+ * If these are the first tuples stored in the buffers, add the
+ * target rel to the es_insert_pending_result_relations list,
+ * except in the case where flushing was done above, in which case
+ * the target rel would already have been added to the list, so no
+ * need to do this.
+ */
+ if (resultRelInfo->ri_NumSlots == 0 && !flushed)
+ {
+ Assert(!list_member_ptr(estate->es_insert_pending_result_relations,
+ resultRelInfo));
+ estate->es_insert_pending_result_relations =
+ lappend(estate->es_insert_pending_result_relations,
+ resultRelInfo);
+ }
+ Assert(list_member_ptr(estate->es_insert_pending_result_relations,
+ resultRelInfo));
+
resultRelInfo->ri_NumSlots++;
MemoryContextSwitchTo(oldContext);
@@ -1166,9 +1192,8 @@ ExecBatchInsert(ModifyTableState *mtstate,
slot = rslots[i];
/*
- * AFTER ROW Triggers or RETURNING expressions might reference the
- * tableoid column, so (re-)initialize tts_tableOid before evaluating
- * them.
+ * AFTER ROW Triggers might reference the tableoid column, so
+ * (re-)initialize tts_tableOid before evaluating them.
*/
slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
@@ -1188,6 +1213,32 @@ ExecBatchInsert(ModifyTableState *mtstate,
estate->es_processed += numInserted;
}
+/*
+ * ExecPendingInserts -- flushes all pending inserts to the foreign tables
+ */
+static void
+ExecPendingInserts(EState *estate)
+{
+ ListCell *lc;
+
+ foreach(lc, estate->es_insert_pending_result_relations)
+ {
+ ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(lc);
+ ModifyTableState *mtstate = resultRelInfo->ri_ModifyTableState;
+
+ Assert(mtstate);
+ ExecBatchInsert(mtstate, resultRelInfo,
+ resultRelInfo->ri_Slots,
+ resultRelInfo->ri_PlanSlots,
+ resultRelInfo->ri_NumSlots,
+ estate, mtstate->canSetTag);
+ resultRelInfo->ri_NumSlots = 0;
+ }
+
+ list_free(estate->es_insert_pending_result_relations);
+ estate->es_insert_pending_result_relations = NIL;
+}
+
/*
* ExecDeletePrologue -- subroutine for ExecDelete
*
@@ -1203,9 +1254,15 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
/* BEFORE ROW DELETE triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_before_row)
+ {
+ /* Flush any pending inserts, so rows are visible to the triggers */
+ if (context->estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(context->estate);
+
return ExecBRDeleteTriggers(context->estate, context->epqstate,
resultRelInfo, tupleid, oldtuple,
epqreturnslot);
+ }
return true;
}
@@ -1780,9 +1837,15 @@ ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
/* BEFORE ROW UPDATE triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_before_row)
+ {
+ /* Flush any pending inserts, so rows are visible to the triggers */
+ if (context->estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(context->estate);
+
return ExecBRUpdateTriggers(context->estate, context->epqstate,
resultRelInfo, tupleid, oldtuple, slot,
&context->tmfd);
+ }
return true;
}
@@ -3452,9 +3515,6 @@ ExecModifyTable(PlanState *pstate)
HeapTupleData oldtupdata;
HeapTuple oldtuple;
ItemPointer tupleid;
- PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
- List *relinfos = NIL;
- ListCell *lc;
CHECK_FOR_INTERRUPTS();
@@ -3756,21 +3816,8 @@ ExecModifyTable(PlanState *pstate)
/*
* Insert remaining tuples for batch insert.
*/
- if (proute)
- relinfos = estate->es_tuple_routing_result_relations;
- else
- relinfos = estate->es_opened_result_relations;
-
- foreach(lc, relinfos)
- {
- resultRelInfo = lfirst(lc);
- if (resultRelInfo->ri_NumSlots > 0)
- ExecBatchInsert(node, resultRelInfo,
- resultRelInfo->ri_Slots,
- resultRelInfo->ri_PlanSlots,
- resultRelInfo->ri_NumSlots,
- estate, node->canSetTag);
- }
+ if (estate->es_insert_pending_result_relations != NIL)
+ ExecPendingInserts(estate);
/*
* We're done, but fire AFTER STATEMENT triggers before exiting.
@@ -4295,6 +4342,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
}
else
resultRelInfo->ri_BatchSize = 1;
+
+ /*
+ * If doing batch insert, setup back-link so we can easily find the
+ * mtstate again.
+ */
+ if (resultRelInfo->ri_BatchSize > 1)
+ resultRelInfo->ri_ModifyTableState = mtstate;
}
/*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0f10d4432b..6e0d8fde57 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -571,6 +571,9 @@ typedef struct ResultRelInfo
* one of its ancestors; see ExecCrossPartitionUpdateForeignKey().
*/
List *ri_ancestorResultRels;
+
+ /* for use by nodeModifyTable.c when performing batch-inserts */
+ struct ModifyTableState *ri_ModifyTableState;
} ResultRelInfo;
/* ----------------
@@ -692,6 +695,12 @@ typedef struct EState
int es_jit_flags;
struct JitContext *es_jit;
struct JitInstrumentation *es_jit_worker_instr;
+
+ /*
+ * The following list contains ResultRelInfos for foreign tables on which
+ * batch-inserts are to be executed
+ */
+ List *es_insert_pending_result_relations;
} EState;
On Thu, Nov 24, 2022 at 8:19 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
Here is an updated patch. In the attached, I added an assertion to
ExecInsert(). Also, I tweaked comments and test cases a little bit,
for consistency. Also, I noticed a copy-and-pasteo in a comment in
ExecBatchInsert(), so I fixed it as well.Barring objections, I will commit the patch.
I have committed the patch.
Best regards,
Etsuro Fujita
Etsuro Fujita <etsuro.fujita@gmail.com> writes:
I have committed the patch.
Apologies for not having paid attention to this thread, but ...
I don't think the committed patch is acceptable at all, at least
not in the back branches, because it creates a severe ABI break.
Specifically, by adding a field to ResultRelInfo you have changed
the array stride of es_result_relations, and that will break any
previously-compiled extension code that accesses that array.
I'm not terribly pleased with it having added a field to EState
either. That seems much more global than what we need here.
Couldn't we add the field to ModifyTableState, instead?
regards, tom lane
On Sat, Nov 26, 2022 at 1:57 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
I don't think the committed patch is acceptable at all, at least
not in the back branches, because it creates a severe ABI break.
Specifically, by adding a field to ResultRelInfo you have changed
the array stride of es_result_relations, and that will break any
previously-compiled extension code that accesses that array.
Ugh.
I'm not terribly pleased with it having added a field to EState
either. That seems much more global than what we need here.
The field stores pending buffered inserts, and I added it to Estate so
that it can be shared across primary/secondary ModifyTable nodes.
(Re-)consider this:
create extension postgres_fdw;
create server loopback foreign data wrapper postgres_fdw options
(dbname 'postgres');
create user mapping for current_user server loopback;
create table t1 (a text, b int);
create foreign table ft1 (a text, b int) server loopback options
(table_name 't1');
create table w1 (a text, b int);
create function ft1_rowcount_trigf() returns trigger language plpgsql as
$$
begin
raise notice '%: there are % rows in ft1',
tg_name, (select count(*) from ft1);
return new;
end;
$$;
create trigger ft1_rowcount_trigger before insert on w1 for each row
execute function ft1_rowcount_trigf();
alter server loopback options (add batch_size '10');
with t as (insert into w1 values ('foo', 10), ('bar', 20) returning *)
insert into ft1 select * from t;
NOTICE: ft1_rowcount_trigger: there are 0 rows in ft1
NOTICE: ft1_rowcount_trigger: there are 1 rows in ft1
INSERT 0 2
For this query, the primary ModifyTable node doing batch insert is
executed concurrently with the secondary ModifyTable node doing the
modifying CTE, and in the secondary ModifyTable node, any pending
buffered insert done in the primary ModifyTable node needs to be
flushed before firing the BEFORE ROW trigger, so the row is visible to
the trigger. The field is useful for cases like this.
Couldn't we add the field to ModifyTableState, instead?
We could probably do so, but I thought having a global list would be
more efficient to handle pending buffered inserts than that.
Anyway I will work on this further. Thanks for looking at this!
Best regards,
Etsuro Fujita
Etsuro Fujita <etsuro.fujita@gmail.com> writes:
On Sat, Nov 26, 2022 at 1:57 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Couldn't we add the field to ModifyTableState, instead?
We could probably do so, but I thought having a global list would be
more efficient to handle pending buffered inserts than that.
OK, as long as there's a reason for doing it that way, it's OK
by me. I don't think that adding a field at the end of EState
is an ABI problem.
We have to do something else than add to ResultRelInfo, though.
regards, tom lane
On Sun, Nov 27, 2022 at 12:11 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Etsuro Fujita <etsuro.fujita@gmail.com> writes:
On Sat, Nov 26, 2022 at 1:57 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Couldn't we add the field to ModifyTableState, instead?
We could probably do so, but I thought having a global list would be
more efficient to handle pending buffered inserts than that.OK, as long as there's a reason for doing it that way, it's OK
by me. I don't think that adding a field at the end of EState
is an ABI problem.We have to do something else than add to ResultRelInfo, though.
OK, I removed from ResultRelInfo a field that I added in the commit to
save the owning ModifyTableState if insert-pending, and added to
EState another List member to save such ModifyTableStates, instead. I
am planning to apply this to not only back branches but HEAD, to make
back-patching easy, if there are no objections.
Best regards,
Etsuro Fujita
Attachments:
Avoid-ABI-break.patchapplication/octet-stream; name=Avoid-ABI-break.patchDownload
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index b6751da574..2824da64fe 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1262,7 +1262,6 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_ChildToRootMap = NULL;
resultRelInfo->ri_ChildToRootMapValid = false;
resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
- resultRelInfo->ri_ModifyTableState = NULL;
}
/*
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 8e6453aec2..f12707ff35 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1038,13 +1038,6 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
Assert(partRelInfo->ri_BatchSize >= 1);
- /*
- * If doing batch insert, setup back-link so we can easily find the
- * mtstate again.
- */
- if (partRelInfo->ri_BatchSize > 1)
- partRelInfo->ri_ModifyTableState = mtstate;
-
partRelInfo->ri_CopyMultiInsertBuffer = NULL;
/*
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 9695de85b9..3cc77a60de 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -128,9 +128,11 @@ CreateExecutorState(void)
estate->es_result_relations = NULL;
estate->es_opened_result_relations = NIL;
estate->es_tuple_routing_result_relations = NIL;
- estate->es_insert_pending_result_relations = NIL;
estate->es_trig_target_relations = NIL;
+ estate->es_insert_pending_result_relations = NIL;
+ estate->es_insert_pending_modifytables = NIL;
+
estate->es_param_list_info = NULL;
estate->es_param_exec_vals = NULL;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 271ff2be8e..882ae73c5c 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -858,10 +858,12 @@ ExecInsert(ModifyTableContext *context,
/*
* If these are the first tuples stored in the buffers, add the
- * target rel to the es_insert_pending_result_relations list,
- * except in the case where flushing was done above, in which case
- * the target rel would already have been added to the list, so no
- * need to do this.
+ * target rel and the mtstate to the
+ * es_insert_pending_result_relations and
+ * es_insert_pending_modifytables lists respectively, execpt in
+ * the case where flushing was done above, in which case they
+ * would already have been added to the lists, so no need to do
+ * this.
*/
if (resultRelInfo->ri_NumSlots == 0 && !flushed)
{
@@ -870,6 +872,8 @@ ExecInsert(ModifyTableContext *context,
estate->es_insert_pending_result_relations =
lappend(estate->es_insert_pending_result_relations,
resultRelInfo);
+ estate->es_insert_pending_modifytables =
+ lappend(estate->es_insert_pending_modifytables, mtstate);
}
Assert(list_member_ptr(estate->es_insert_pending_result_relations,
resultRelInfo));
@@ -1219,12 +1223,14 @@ ExecBatchInsert(ModifyTableState *mtstate,
static void
ExecPendingInserts(EState *estate)
{
- ListCell *lc;
+ ListCell *l1,
+ *l2;
- foreach(lc, estate->es_insert_pending_result_relations)
+ forboth(l1, estate->es_insert_pending_result_relations,
+ l2, estate->es_insert_pending_modifytables)
{
- ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(lc);
- ModifyTableState *mtstate = resultRelInfo->ri_ModifyTableState;
+ ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l1);
+ ModifyTableState *mtstate = (ModifyTableState *) lfirst(l2);
Assert(mtstate);
ExecBatchInsert(mtstate, resultRelInfo,
@@ -1236,7 +1242,9 @@ ExecPendingInserts(EState *estate)
}
list_free(estate->es_insert_pending_result_relations);
+ list_free(estate->es_insert_pending_modifytables);
estate->es_insert_pending_result_relations = NIL;
+ estate->es_insert_pending_modifytables = NIL;
}
/*
@@ -4342,13 +4350,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
}
else
resultRelInfo->ri_BatchSize = 1;
-
- /*
- * If doing batch insert, setup back-link so we can easily find the
- * mtstate again.
- */
- if (resultRelInfo->ri_BatchSize > 1)
- resultRelInfo->ri_ModifyTableState = mtstate;
}
/*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index a2008846c6..541fbda5b2 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -571,9 +571,6 @@ typedef struct ResultRelInfo
* one of its ancestors; see ExecCrossPartitionUpdateForeignKey().
*/
List *ri_ancestorResultRels;
-
- /* for use by nodeModifyTable.c when performing batch-inserts */
- struct ModifyTableState *ri_ModifyTableState;
} ResultRelInfo;
/* ----------------
@@ -698,10 +695,11 @@ typedef struct EState
struct JitInstrumentation *es_jit_worker_instr;
/*
- * The following list contains ResultRelInfos for foreign tables on which
- * batch-inserts are to be executed.
+ * Lists of ResultRelInfos for foreign tables on which batch-inserts are
+ * to be executed and owning ModifyTableStates, stored in the same order.
*/
List *es_insert_pending_result_relations;
+ List *es_insert_pending_modifytables;
} EState;
On Fri, Dec 2, 2022 at 4:54 PM Etsuro Fujita <etsuro.fujita@gmail.com> wrote:
On Sun, Nov 27, 2022 at 12:11 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
OK, as long as there's a reason for doing it that way, it's OK
by me. I don't think that adding a field at the end of EState
is an ABI problem.We have to do something else than add to ResultRelInfo, though.
OK, I removed from ResultRelInfo a field that I added in the commit to
save the owning ModifyTableState if insert-pending, and added to
EState another List member to save such ModifyTableStates, instead. I
am planning to apply this to not only back branches but HEAD, to make
back-patching easy, if there are no objections.
There seems to be no objection, so pushed.
Best regards,
Etsuro Fujita