From 1f9069cc4e0f8e1642a24d4f9053908838abc049 Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Wed, 2 Mar 2022 15:31:45 +1100 Subject: [PATCH v1] PG docs - Logical Replication Filtering. This patch introduces a new PG docs page for giving an overview of what kinds of logical replication filtering are available, and how to use them. The main new content addition to this page is the section describing "Row Filtering". Author: Peter Smith Reviewed By: Greg Nancarrow Discussion: https://www.postgresql.org/message-id/CAHut%2BPsiSQvWpEbxLK0hksjq0hRzko1_uY8EOTOx7dEfNhzeaQ%40mail.gmail.com --- doc/src/sgml/logical-replication.sgml | 399 ++++++++++++++++++++++++++++++- doc/src/sgml/ref/create_publication.sgml | 2 + 2 files changed, 399 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index 96b4886..f620faf 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -116,8 +116,10 @@ Publications can choose to limit the changes they produce to any combination of INSERT, UPDATE, - DELETE, and TRUNCATE, similar to how triggers are fired by - particular event types. By default, all operation types are replicated. + DELETE, and TRUNCATE by using + operation filters. By default, all operation types are + replicated. See + for details. @@ -317,6 +319,399 @@ + + Filtering + + + By default, all data from all published tables will be replicated to the + appropriate subscribers. + + + + The default data replication can be reduced by using filters. A user might + choose to use filters for behavioral or for performance reasons. + + + + There are 3 different ways to filter what data gets replicated. + + + + Operation Filters + + + Publications can choose to limit the changes they produce to any combination + of INSERT, UPDATE, DELETE, + and TRUNCATE, similar to how triggers are fired by + particular event types. + + + + By default, all operation types are replicated. + + + + Use the PUBLICATION ... + WITH (publish = value) + clause to modify which operations will do replication. + + + + Examples + + + Create a publication that only publishes INSERT + operations in one table: + +CREATE PUBLICATION insert_only FOR TABLE mydata WITH (publish = 'insert'); + + + + + + + + + Row Filters + + + (This feature is available since PostgreSQL 15) + + + + With row filtering, the decision whether to replicate row data depends on + the row data values. Rows that don't satisfy an optional WHERE + clause will be filtered out. This allows a database or set of tables to be + partially replicated. + + + + Use the PUBLICATION ... FOR TABLE + + table_name WHERE ( + expression) + to specify which rows will be replicated. + + + + The row filtering is defined per table. A new row filter can be added simply + by specifying a WHERE clause after the table name. The + WHERE clause must be enclosed by parentheses. + + + + Row filter Rules + + + If the row filter evaluates to false or + NULL then the row is not replicated. + + + + Row filters are applied before publishing the changes. + + + + The WHERE clause expression is evaluated with the same + role used for the replication connection. + + + + Row filters have no effect for TRUNCATE commands. + + + + + + UPDATE transformations + + + Whenever an UPDATE is processed, the row filter + expression is evaluated for both the old and new row (i.e. before + and after the data is updated). + + + + If both evaluations are true, it replicates the + UPDATE. + + + + If both evaluations are false, it doesn't replicate + anything. + + + + If only one of the old/new rows matches the row filter expression, the + UPDATE is transformed to INSERT or + DELETE, to avoid any data inconsistency, as follows: + +Case 1: old-row (no match) new-row (no match) --> (drop change) +Case 2: old-row (no match) new row (match) --> INSERT +Case 3: old-row (match) new-row (no match) --> DELETE +Case 4: old-row (match) new row (match) --> UPDATE + + + + + + + Partition tables + + + If the publication contains a partitioned table, the publication parameter + publish_via_partition_root determines which row filter + is used. + + + + + If publish_via_partition_root is false + (default), each partition's row filter is used. + + + + + + If publish_via_partition_root is true, + the root partitioned table's row filter is used. + + + + + + + + + + WHERE clause Expression Restrictions + + + The row filter WHERE clause for a table added to a + publication that publishes UPDATE and/or + DELETE operations must contain only columns that are + covered by the replica identity. The row filter + WHERE clause for a table added to a publication that + publishes INSERT can use any column. + + + + The WHERE clause allows only simple expressions that + don't have user-defined functions, operators, non-immutable built-in + functions, or references to system columns. + + + + + + Subscriber initial table synchronization + + + If you choose to do the initial table synchronization, only data that + satisfies the row filters is copied to the subscriber. + + + + If the subscription has several publications in which a table has been + published with different WHERE clauses, rows that satisfy + any of the expressions will be copied. (see + ). + + + + + Publication operations of filters are ignored during initial table + synchronization. + + + + + + If a subscriber is a pre-15 version, the initial table synchronization + won't use row filters even if they are defined in the publication. + + + + + + + Combining multiple row filters for the same table + + + If the subscription has several publications in which the same table has + been published with different row filters (for the same publish + operation), those expressions get OR'ed together, so that rows satisfying + any of the expressions will be replicated. + + + + This means all the other filters (for the same table) become redundant if: + + + + + one of the publications has no filter at all. + + + + + + one of the publications was created using FOR ALL TABLES. + + + + + + one of the publications was created using + FOR ALL TABLES IN SCHEMA and the table belongs to + that schema. + + + + + + + + + + PSQL commands to display Row Filters + + + PSQL commands \dRp+ and \d will + display row filter information. + + + + + + Examples + + + Create some tables to be used in the following examples. + +test_pub=# CREATE TABLE t1(a int, b int, c text); +CREATE TABLE +test_pub=# CREATE TABLE t2(d int, e int, f int); +CREATE TABLE + + + + + Create some publications. + + + + Notice that table t1 is a member of multiple + publications. + +test_pub=# CREATE PUBLICATION p1 FOR TABLE t1 WHERE (a > 5 AND c IS NULL); +CREATE PUBLICATION +test_pub=# CREATE PUBLICATION p2 FOR TABLE t1, t2 WHERE (e = 99); +CREATE PUBLICATION + + + + + The PSQL command \dRp+ shows what row filters are + defined for the publications. + +test_pub=# \dRp+ + Publication p1 + Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root +----------+------------+---------+---------+---------+-----------+---------- + postgres | f | t | t | t | t | f +Tables: + "public.t1" WHERE ((a > 5) AND (c IS NULL)) + + Publication p2 + Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root +----------+------------+---------+---------+---------+-----------+---------- + postgres | f | t | t | t | t | f +Tables: + "public.t1" + "public.t2" WHERE (e = 99) + + + + + The PSQL command \d shows what publications the table is + a member of, as well as that table's row filter in those publications. + + + + Notice that table t1 is a member of two publications, but + only has a WHERE clause for p1. + +test_pub=# \d t1 + Table "public.t1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Publications: + "p1" WHERE ((a > 5) AND (c IS NULL)) + "p2" + +test_pub=# \d t2 + Table "public.t2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + d | integer | | | + e | integer | | | + f | integer | | | +Publications: + "p2" WHERE (e = 99) + + + + + + + + + + Conflicts diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml index 4979b9b..cb16e97 100644 --- a/doc/src/sgml/ref/create_publication.sgml +++ b/doc/src/sgml/ref/create_publication.sgml @@ -247,6 +247,8 @@ CREATE PUBLICATION name publish_via_partition_root determines if it uses the partition's row filter (if the parameter is false, the default) or the root partitioned table's row filter. + See for more details about + row filters. -- 1.8.3.1