From aea0c822489fbdeba14141e215f76e3777521bb2 Mon Sep 17 00:00:00 2001
From: Euler Taveira <euler.taveira@enterprisedb.com>
Date: Thu, 10 Mar 2022 17:38:24 -0300
Subject: [PATCH v4] doc: new section for row filter.

---
 doc/src/sgml/logical-replication.sgml    | 444 +++++++++++++++++++++++
 doc/src/sgml/ref/create_publication.sgml |   2 +
 2 files changed, 446 insertions(+)

diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
index 82326c3901..38ac77b6e1 100644
--- a/doc/src/sgml/logical-replication.sgml
+++ b/doc/src/sgml/logical-replication.sgml
@@ -118,6 +118,8 @@
    any combination of <command>INSERT</command>, <command>UPDATE</command>,
    <command>DELETE</command>, and <command>TRUNCATE</command>, similar to how triggers are fired by
    particular event types.  By default, all operation types are replicated.
+   (Row filters have no effect for <command>TRUNCATE</command>. See 
+   <xref linkend="logical-replication-row-filter"/>).
   </para>
 
   <para>
@@ -317,6 +319,448 @@
   </sect2>
  </sect1>
 
+ <sect1 id="logical-replication-row-filter">
+  <title>Row Filter</title>
+
+  <para>
+   The published tables replicate all data to the subscribers. The replicated
+   data can be selected using a <firstterm>row filter</firstterm>. The user
+   might choose to use row filters for behavioral, security or performance
+   reasons.
+  </para>
+
+  <para>
+   If a published table sets a row filter, a row is replicated if its data
+   satisfies the row filter expression. It means if row filters are used in a
+   set of tables, data will be partially replicated.
+  </para>
+
+  <para>
+   The row filter is defined per table. Use a <literal>WHERE</literal> clause
+   after the table name for each published table that requires data to be
+   filtered out. The <literal>WHERE</literal> clause must be enclosed by
+   parentheses. See <xref linkend="sql-createpublication"/> for details.
+  </para>
+
+  <sect2 id="logical-replication-row-filter-rules">
+   <title>Row Filter Rules</title>
+
+   <para>
+    Row filter is applied <emphasis>before</emphasis> publishing the changes.
+   </para>
+
+   <para>
+    If the row filter evaluates to <literal>false</literal> or
+    <literal>NULL</literal> then the row is not replicated.
+   </para>
+
+   <para>
+    The <literal>WHERE</literal> clause expression is evaluated with the same
+    role used for the replication connection. It means the role specified in the
+    <literal>CONNECTION</literal> clause of the <xref linkend="sql-createsubscription"/>.
+   </para>
+
+   <para>
+    Row filter has no effect for <command>TRUNCATE</command> command.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-restrictions">
+   <title>Expression Restrictions</title>
+
+   <para>
+    The <literal>WHERE</literal> clause allows only simple expressions. It
+    cannot contain user-defined functions, operators, types, and collations,
+    system column references and non-immutable built-in functions.
+   </para>
+
+   <para>
+    If a publication publishes <literal>update</literal> or
+    <literal>delete</literal> operations, the <literal>WHERE</literal> clause
+    must contain only columns that are part of the replica identity (see
+    <xref linkend="sql-altertable-replica-identity"/>). If it publishes only
+    <literal>insert</literal> operation, the expression can use any column.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-transformations">
+   <title>UPDATE Transformations</title>
+
+   <para>
+    Whenever an <command>UPDATE</command> is processed, the row filter
+    expression is evaluated for the old and new row. If both evaluations are
+    <literal>true</literal>, it replicates the <command>UPDATE</command> change.
+    If both evaluations are <literal>false</literal>, it doesn't replicate the
+    change. If only one of the rows matches the row filter expression, a
+    transformation is required. <command>UPDATE</command> should be transformed
+    into a <command>DELETE</command> or <command>INSERT</command> to avoid data
+    inconsistency. The row on the subscriber should reflect what is defined by
+    the row filter expression on the publisher.
+   </para>
+
+   <para>
+    If the old row satisfies the row filter expression (it was sent to the
+    subscriber) but the new row doesn't, after the <command>UPDATE</command>,
+    from a data consistency perspective, the old row should be removed from the
+    subscriber. Hence, <command>UPDATE</command> should be transformed into a
+    <command>DELETE</command>.
+   </para>
+
+   <para>
+    On the other hand, if the old row doesn't satisfy the row filter expression
+    (it wasn't sent to the subscriber) but the new row does, after the
+    <command>UPDATE</command>, from a data consistency perspective, that new row
+    should be inserted into the subscriber. Hence, <command>UPDATE</command>
+    should be transformed into an <command>INSERT</command>.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-partitioned-table">
+   <title>Partitioned Table</title>
+
+   <para>
+    If the publication contains a partitioned table, the parameter
+    <literal>publish_via_partition_root</literal> determines which row filter
+    expression is used. If the parameter
+    <literal>publish_via_partition_root</literal> is true, the row filter
+    expression associated with the partitioned table is used. Otherwise, the row
+    filter expression associated with the individual partition is used.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-initial-data-sync">
+   <title>Initial Data Synchronization</title>
+
+   <para>
+    If the subscription requires copying pre-existing data in the publications
+    and a publication contains <literal>WHERE</literal> clauses, only data that
+    satisfies the row filter expressions is copied to the subscriber.
+   </para>
+
+   <para>
+    If the subscription has several publications in which a table has been
+    published with different <literal>WHERE</literal> clauses, rows that satisfy
+    any of the expressions will be copied (see
+    <xref linkend="logical-replication-row-filter-combining"/> for details).
+   </para>
+
+   <para>
+    If the subscriber is in a release prior to 15, copy pre-existing data
+    doesn't use row filter expressions even if it is defined in the publication.
+    This is because old releases can only copy the entire table data.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-combining">
+   <title>Combining Multiple Row Filters</title>
+
+   <para>
+    If the subscription has several publications in which the same table has
+    been published with different row filters (for the same
+    <literal>publish</literal> operation), rows that satisfy any of the row
+    filter expressions is replicated. It means all other row filters for the
+    same table become irrelevant if:
+
+    <itemizedlist>
+     <listitem>
+      <para>
+       one of the publications has no row filter;
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       one of the publications was created using
+       <literal>FOR ALL TABLES</literal> clause. This clause does not allow row
+       filters;
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       one of the publications was created using
+       <literal>FOR ALL TABLES IN SCHEMA</literal> clause and the table belongs
+       to the referred schema. This clause does not allow row filters.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-examples">
+   <title>Examples</title>
+
+   <para>
+    Create some tables to be used in the following examples.
+<programlisting>
+testpub=# CREATE TABLE t1(a int, b int, c text, primary key(a,c));
+CREATE TABLE
+testpub=# CREATE TABLE t2(d int, e int, f int, primary key(d));
+CREATE TABLE
+testpub=# CREATE TABLE t3(g int, h int, i int, primary key(g));
+CREATE TABLE
+</programlisting>
+   </para>
+
+   <para>
+    Create some publications.
+<programlisting>
+testpub=# CREATE PUBLICATION p1 FOR TABLE t1 WHERE (a > 5 AND c = 'NSW');
+CREATE PUBLICATION
+testpub=# CREATE PUBLICATION p2 FOR TABLE t1, t2 WHERE (e = 99);
+CREATE PUBLICATION
+testpub=# CREATE PUBLICATION p3 FOR TABLE t2 WHERE (d = 10), t3 WHERE (g = 10);
+CREATE PUBLICATION
+</programlisting>
+   </para>
+
+   <para>
+    The publication <literal>p1</literal> contains only one table
+    (<literal>t1</literal>) that has a row filter expression for it.
+   </para>
+
+   <para>
+    The publication <literal>p2</literal> contains 2 tables. The table
+    <literal>t1</literal> has no row filter and the table <literal>t2</literal>
+    has a row filter.
+   </para>
+
+   <para>
+    The publication <literal>p3</literal> contains 2 tables and each one has a
+    defined row filter.
+   </para>
+
+   <para>
+    The <command>psql</command> shows the row filter expressions (if defined)
+    for each table.
+
+<programlisting>
+testpub=# \dRp+
+                               Publication p1
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1" WHERE ((a > 5) AND (c = 'NSW'::text))
+
+                               Publication p2
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1"
+    "public.t2" WHERE (e = 99)
+
+                               Publication p3
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t2" WHERE (d = 10)
+    "public.t3" WHERE (g = 10)
+
+testpub=# \d t1
+                 Table "public.t1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           | not null | 
+ b      | integer |           |          | 
+ c      | text    |           | not null | 
+Indexes:
+    "t1_pkey" PRIMARY KEY, btree (a, c)
+Publications:
+    "p1" WHERE ((a > 5) AND (c = 'NSW'::text))
+    "p2"
+
+testpub=# \d t2
+                 Table "public.t2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ d      | integer |           | not null | 
+ e      | integer |           |          | 
+ f      | integer |           |          | 
+Indexes:
+    "t2_pkey" PRIMARY KEY, btree (d)
+Publications:
+    "p2" WHERE (e = 99)
+    "p3" WHERE (d = 10)
+</programlisting>
+   </para>
+
+   <para>
+    On the subscriber, create the table <literal>t1</literal> with the same
+    definition as the one on the publisher and also create the subscription
+    <literal>s1</literal> that subscribes to the publication
+    <literal>p1</literal>.
+<programlisting>
+testsub=# CREATE TABLE t1(a int, b int, c text, primary key(a,c));
+CREATE TABLE
+testsub=# CREATE SUBSCRIPTION s1
+testsub-# CONNECTION 'host=10.1.2.3 port=5432 user=repuser dbname=testpub'
+testsub-# PUBLICATION p1;
+CREATE SUBSCRIPTION
+</programlisting>
+   </para>
+
+   <para>
+    Insert some rows.
+<programlisting>
+testpub=# INSERT INTO t1 VALUES (2, 102, 'NSW');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (3, 103, 'QLD');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (4, 104, 'VIC');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (5, 105, 'ACT');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (6, 106, 'NSW');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (7, 107, 'NT');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (8, 108, 'QLD');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (9, 109, 'NSW');
+INSERT 0 1
+</programlisting>
+<programlisting>
+testpub=# SELECT * FROM t1; 
+ a |  b  |  c
+---+-----+-----
+ 2 | 102 | NSW
+ 3 | 103 | QLD
+ 4 | 104 | VIC
+ 5 | 105 | ACT
+ 6 | 106 | NSW
+ 7 | 107 | NT
+ 8 | 108 | QLD
+ 9 | 109 | NSW
+(8 rows)
+</programlisting>
+   </para>
+
+   <para>
+    Notice that only the rows satisfying the <literal>t1</literal>
+    <literal>WHERE</literal> clause of the publication
+    <literal>p1</literal> are replicated.
+<programlisting>
+testsub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 6 | 106 | NSW
+ 9 | 109 | NSW
+(2 rows)
+</programlisting>
+   </para>
+
+   <para>
+    Update a row whose both (old and new) rows satisfy the <literal>t1</literal>
+    <literal>WHERE</literal> clause of the publication <literal>p1</literal>. It
+    replicates the <command>UPDATE</command> as expected.
+<programlisting>
+testpub=# UPDATE t1 SET b = 999 WHERE a = 6;
+UPDATE 1
+</programlisting>
+<programlisting>
+testpub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 2 | 102 | NSW
+ 3 | 103 | QLD
+ 4 | 104 | VIC
+ 5 | 105 | ACT
+ 7 | 107 | NT
+ 8 | 108 | QLD
+ 9 | 109 | NSW
+ 6 | 999 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+testsub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 9 | 109 | NSW
+ 6 | 999 | NSW
+(2 rows)
+</programlisting>
+   </para>
+
+   <para>
+    Update a row whose the old row doesn't satisfy the <literal>t1</literal>
+    <literal>WHERE</literal> clause of the publication <literal>p1</literal>
+    but the new row satisfies it. The <literal>UPDATE</literal> is transformed
+    into an <command>INSERT</command> and the change is replicated. See the new
+    row on the subscriber.
+<programlisting>
+testpub=# UPDATE t1 SET a = 555 WHERE a = 2;
+UPDATE 1
+</programlisting>
+<programlisting>
+testpub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   3 | 103 | QLD
+   4 | 104 | VIC
+   5 | 105 | ACT
+   7 | 107 | NT
+   8 | 108 | QLD
+   9 | 109 | NSW
+   6 | 999 | NSW
+ 555 | 102 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+testsub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   9 | 109 | NSW
+   6 | 999 | NSW
+ 555 | 102 | NSW
+(3 rows)
+</programlisting>
+   </para>
+
+   <para>
+    Update a row whose the old row satisfies the <literal>t1</literal>
+    <literal>WHERE</literal> clause of the publication <literal>p1</literal>
+    but the new row doesn't satisfy it. The <command>UPDATE</command> is
+    transformed into a <command>DELETE</command> and the change is replicated.
+    See that the row is removed from the subscriber.
+<programlisting>
+testpub=# UPDATE t1 SET c = 'VIC' WHERE a = 9;
+UPDATE 1
+</programlisting>
+<programlisting>
+testpub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   3 | 103 | QLD
+   4 | 104 | VIC
+   5 | 105 | ACT
+   7 | 107 | NT
+   8 | 108 | QLD
+   6 | 999 | NSW
+ 999 | 102 | NSW
+   9 | 109 | VIC
+(8 rows)
+</programlisting>
+<programlisting>
+testsub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   6 | 999 | NSW
+ 999 | 102 | NSW
+(2 rows)
+</programlisting>
+   </para>
+
+  </sect2>
+
+ </sect1>
+
  <sect1 id="logical-replication-conflicts">
   <title>Conflicts</title>
 
diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index 4979b9b646..3591d51af3 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -247,6 +247,8 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
    <literal>publish_via_partition_root</literal> determines if it uses the
    partition's row filter (if the parameter is false, the default) or the root
    partitioned table's row filter.
+   See <xref linkend="logical-replication-row-filter"/> for details about row
+   filters.
   </para>
 
   <para>
-- 
2.30.2

