From 47eab24dc65167ff4cd8c35fa86f596139f0afa3 Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.b.smith@fujitsu.com>
Date: Fri, 8 Apr 2022 15:45:27 +1000
Subject: [PATCH v6] PG DOCS page for row filters.

This patch introduces a new documentation page for describing the "Row Filters" feature.

Author: Peter Smith, Euler Taveira
Reviewed By: Greg Nancarrow, Aleksander Alekseev, Amit Kapila, Ajin Cherian
Discussion: https://www.postgresql.org/message-id/CAHut%2BPtnsBr59%3D_NvxXp_%3DS-em0WxyuDOQmSTuHGb4sVhkHffg%40mail.gmail.com
---
 doc/src/sgml/logical-replication.sgml    | 551 +++++++++++++++++++++++++++++++
 doc/src/sgml/ref/create_publication.sgml |   2 +
 2 files changed, 553 insertions(+)
diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
index 555fbd7..6e23be9 100644
--- a/doc/src/sgml/logical-replication.sgml
+++ b/doc/src/sgml/logical-replication.sgml
@@ -118,6 +118,8 @@
    any combination of <command>INSERT</command>, <command>UPDATE</command>,
    <command>DELETE</command>, and <command>TRUNCATE</command>, similar to how triggers are fired by
    particular event types.  By default, all operation types are replicated.
+   (Row filters have no effect for <command>TRUNCATE</command>. See 
+   <xref linkend="logical-replication-row-filter"/>).
   </para>
 
   <para>
@@ -317,6 +319,555 @@
   </sect2>
  </sect1>
 
+ <sect1 id="logical-replication-row-filter">
+  <title>Row Filters</title>
+
+  <para>
+   By default, all data from all published tables will be replicated to the
+   appropriate subscribers.
+  </para>
+
+  <para>
+   The replicated data can be reduced by using a <firstterm>row filter</firstterm>.
+   A user might choose to use row filters for behavioral, security or performance
+   reasons.
+  </para>
+
+  <para>
+   If a published table sets a row filter, a row is replicated only if its data
+   satisfies the row filter expression. This allows a set of tables to be
+   partially replicated.
+  </para>
+
+  <para>
+   The row filter is defined per table. Use a <literal>WHERE</literal> clause
+   after the table name for each published table that requires data to be
+   filtered out. The <literal>WHERE</literal> clause must be enclosed by
+   parentheses. See <xref linkend="sql-createpublication"/> for details.
+  </para>
+
+  <sect2 id="logical-replication-row-filter-rules">
+   <title>Row Filter Rules</title>
+
+   <para>
+    Row filters are applied <emphasis>before</emphasis> publishing the changes.
+   </para>
+
+   <para>
+    If the row filter evaluates to <literal>false</literal> or
+    <literal>NULL</literal> then the row is not replicated.
+   </para>
+
+   <para>
+    The <literal>WHERE</literal> clause expression is evaluated with the same
+    role used for the replication connection (i.e. the role specified in the
+    <literal>CONNECTION</literal> clause of the <xref linkend="sql-createsubscription"/>).
+   </para>
+
+   <para>
+    Row filters have no effect for <command>TRUNCATE</command> command.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-restrictions">
+   <title>Expression Restrictions</title>
+
+   <para>
+    The <literal>WHERE</literal> clause allows only simple expressions. It
+    cannot contain user-defined functions, operators, types, and collations,
+    system column references or non-immutable built-in functions.
+   </para>
+
+   <para>
+    If a publication publishes <command>UPDATE</command> and/or
+    <command>DELETE</command> operations, the row filter <literal>WHERE</literal> clause
+    must contain only columns that are covered by the replica identity (see
+    <xref linkend="sql-altertable-replica-identity"/>). If a publication publishes only
+    <command>INSERT</command>, the row filter <literal>WHERE</literal> clause
+    can use any column.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-transformations">
+   <title>UPDATE Transformations</title>
+
+   <para>
+    Whenever an <command>UPDATE</command> is processed, the row filter
+    expression is evaluated for both the old and new row (i.e. using the data
+    before and after the update).
+   </para>
+
+   <para>
+    If both evaluations are <literal>true</literal>, it replicates the
+    <command>UPDATE</command> change.
+   </para>
+
+   <para>
+    If both evaluations are <literal>false</literal>, it doesn't replicate
+    the change.
+   </para>
+
+   <para>
+    If only one of the old/new rows matches the row filter expression, the
+    <command>UPDATE</command> is transformed to <command>INSERT</command> or
+    <command>DELETE</command>, to avoid any data inconsistency. The row on the
+    subscriber should reflect what is defined by the row filter expression on
+    the publisher.
+
+   <itemizedlist>
+    <listitem>
+     <para>
+      If the old row satisfies the row filter expression (it was sent to the
+      subscriber) but the new row doesn't, then from a data consistency
+      perspective the old row should be removed from the subscriber.
+      So the <command>UPDATE</command> is transformed into a <command>DELETE</command>.
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      If the old row doesn't satisfy the row filter expression (it wasn't sent
+      to the subscriber) but the new row does, then from a data consistency
+      perspective the new row should be added to the subscriber.
+      So the <command>UPDATE</command> is transformed into an <command>INSERT</command>.
+     </para>
+    </listitem>
+   </itemizedlist>
+
+Summary:
+<synopsis>
+Case 1: old-row (no match)    new-row (no match)   -->   (drop change)
+Case 2: old-row (no match)    new row (match)      -->   INSERT
+Case 3: old-row (match)       new-row (no match)   -->   DELETE
+Case 4: old-row (match)       new row (match)      -->   UPDATE
+</synopsis>
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-partitioned-table">
+   <title>Partitioned Tables</title>
+
+   <para>
+    If the publication contains a partitioned table, the publication parameter
+    <literal>publish_via_partition_root</literal> determines which row filter
+    is used.
+    <itemizedlist>
+
+     <listitem>
+      <para>
+       If <literal>publish_via_partition_root</literal> is <literal>false</literal>
+       (default), each <emphasis>partition's</emphasis> row filter is used.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       If <literal>publish_via_partition_root</literal> is <literal>true</literal>,
+       the <emphasis>root partitioned table's</emphasis> row filter is used.
+      </para>
+     </listitem>
+
+    </itemizedlist>
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-initial-data-sync">
+   <title>Initial Data Synchronization</title>
+
+   <para>
+    If the subscription requires copying pre-existing table data
+    and a publication contains <literal>WHERE</literal> clauses, only data that
+    satisfies the row filter expressions is copied to the subscriber.
+   </para>
+
+   <para>
+    If the subscription has several publications in which a table has been
+    published with different <literal>WHERE</literal> clauses, rows that satisfy
+    <emphasis>any</emphasis> of the expressions will be copied. (see
+    <xref linkend="logical-replication-row-filter-combining"/> for details).
+   </para>
+
+   <note>
+    <para>
+     Publication <literal>publish</literal> operations are ignored when copying pre-existing table data.
+    </para>
+   </note>
+
+   <note>
+    <para>
+     If the subscriber is in a release prior to 15, copy pre-existing data
+     doesn't use row filters even if they are defined in the publication.
+     This is because old releases can only copy the entire table data.
+    </para>
+   </note>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-combining">
+   <title>Combining Multiple Row Filters</title>
+
+   <para>
+    If the subscription has several publications in which the same table has
+    been published with different row filters (for the same <literal>publish</literal>
+    operation), those expressions get OR'ed together, so that rows satisfying
+    <emphasis>any</emphasis> of the expressions will be replicated.
+   </para>
+
+   <para>
+    This means all the other row filters for the same table become redundant if:
+    <itemizedlist>
+
+     <listitem>
+      <para>
+       one of the publications has no row filter.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       one of the publications was created using <literal>FOR ALL TABLES</literal>.
+       This clause does not allow row filters.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       one of the publications was created using
+       <literal>FOR ALL TABLES IN SCHEMA</literal> and the table belongs to
+       the referred schema. This clause does not allow row filters.
+      </para>
+     </listitem>
+
+    </itemizedlist>
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filter-examples">
+   <title>Examples</title>
+
+   <para>
+    Create some tables to be used in the following examples.
+<programlisting>
+testpub=# CREATE TABLE t1(a int, b int, c text, primary key(a,c));
+CREATE TABLE
+testpub=# CREATE TABLE t2(d int, e int, f int, primary key(d));
+CREATE TABLE
+testpub=# CREATE TABLE t3(g int, h int, i int, primary key(g));
+CREATE TABLE
+</programlisting>
+   </para>
+
+   <para>
+    Create some publications.
+<programlisting>
+testpub=# CREATE PUBLICATION p1 FOR TABLE t1 WHERE (a > 5 AND c = 'NSW');
+CREATE PUBLICATION
+testpub=# CREATE PUBLICATION p2 FOR TABLE t1, t2 WHERE (e = 99);
+CREATE PUBLICATION
+testpub=# CREATE PUBLICATION p3 FOR TABLE t2 WHERE (d = 10), t3 WHERE (g = 10);
+CREATE PUBLICATION
+</programlisting>
+    <itemizedlist>
+     <listitem>
+      <para>
+       Publication <literal>p1</literal> has 1 table (<literal>t1</literal>) and
+       that table has a row filter.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       Publication <literal>p2</literal> has 2 tables. Table <literal>t1</literal>
+       has no row filter, and table <literal>t2</literal> has a row filter.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       Publication <literal>p3</literal> has 2 tables, and both of them have a
+       row filter.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+   <para>
+    The PSQL command <command>\dRp+</command> shows the row filter expressions
+    (if defined) for each table of the publications.
+<programlisting>
+testpub=# \dRp+
+                               Publication p1
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1" WHERE ((a > 5) AND (c = 'NSW'::text))
+
+                               Publication p2
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1"
+    "public.t2" WHERE (e = 99)
+
+                               Publication p3
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t2" WHERE (d = 10)
+    "public.t3" WHERE (g = 10)
+</programlisting>
+   </para>
+
+   <para>
+    The PSQL command <command>\d</command> shows what publications the table is
+    a member of, as well as that table's row filter expression (if defined) in
+    those publications.
+<programlisting>
+testpub=# \d t1
+                 Table "public.t1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           | not null | 
+ b      | integer |           |          | 
+ c      | text    |           | not null | 
+Indexes:
+    "t1_pkey" PRIMARY KEY, btree (a, c)
+Publications:
+    "p1" WHERE ((a > 5) AND (c = 'NSW'::text))
+    "p2"
+
+testpub=# \d t2
+                 Table "public.t2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ d      | integer |           | not null | 
+ e      | integer |           |          | 
+ f      | integer |           |          | 
+Indexes:
+    "t2_pkey" PRIMARY KEY, btree (d)
+Publications:
+    "p2" WHERE (e = 99)
+    "p3" WHERE (d = 10)
+
+testpub=# \d t3
+                 Table "public.t3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ g      | integer |           | not null | 
+ h      | integer |           |          | 
+ i      | integer |           |          | 
+Indexes:
+    "t3_pkey" PRIMARY KEY, btree (g)
+Publications:
+    "p3" WHERE (g = 10)
+</programlisting>
+    <itemizedlist>
+     <listitem>
+      <para>
+       Table <literal>t1</literal> is a member of 2 publications, but
+       has a row filter only in <literal>p1</literal>.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       Table <literal>t2</literal> is a member of 2 publications, and
+       has a different row filter in each of them.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+   <para>
+    On the subscriber node, create a table <literal>t1</literal> with the same
+    definition as the one on the publisher, and also create the subscription
+    <literal>s1</literal> that subscribes to the publication <literal>p1</literal>.
+<programlisting>
+testsub=# CREATE TABLE t1(a int, b int, c text, primary key(a,c));
+CREATE TABLE
+testsub=# CREATE SUBSCRIPTION s1
+testsub-# CONNECTION 'host=localhost dbname=testpub application_name=s1'
+testsub-# PUBLICATION p1;
+CREATE SUBSCRIPTION
+</programlisting>
+   </para>
+
+   <para>
+    Insert some rows.
+<programlisting>
+testpub=# INSERT INTO t1 VALUES (2, 102, 'NSW');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (3, 103, 'QLD');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (4, 104, 'VIC');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (5, 105, 'ACT');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (6, 106, 'NSW');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (7, 107, 'NT');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (8, 108, 'QLD');
+INSERT 0 1
+testpub=# INSERT INTO t1 VALUES (9, 109, 'NSW');
+INSERT 0 1
+
+testpub=# SELECT * FROM t1; 
+ a |  b  |  c
+---+-----+-----
+ 2 | 102 | NSW
+ 3 | 103 | QLD
+ 4 | 104 | VIC
+ 5 | 105 | ACT
+ 6 | 106 | NSW
+ 7 | 107 | NT
+ 8 | 108 | QLD
+ 9 | 109 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+testsub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 6 | 106 | NSW
+ 9 | 109 | NSW
+(2 rows)
+</programlisting>
+    <itemizedlist>
+     <listitem>
+      <para>
+       Only the rows satisfying the <literal>t1 WHERE</literal>
+       clause of publication <literal>p1</literal> are replicated.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+   <para>
+    Update some data, where the old and new row values both
+    satisfy the <literal>t1 WHERE</literal> clause of publication
+    <literal>p1</literal>.
+<programlisting>
+testpub=# UPDATE t1 SET b = 999 WHERE a = 6;
+UPDATE 1
+
+testpub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 2 | 102 | NSW
+ 3 | 103 | QLD
+ 4 | 104 | VIC
+ 5 | 105 | ACT
+ 7 | 107 | NT
+ 8 | 108 | QLD
+ 9 | 109 | NSW
+ 6 | 999 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+testsub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 9 | 109 | NSW
+ 6 | 999 | NSW
+(2 rows)
+</programlisting>
+    <itemizedlist>
+     <listitem>
+      <para>
+       The <command>UPDATE</command> replicates the change as normal.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+   <para>
+    Update some data, where the old row values did not satisfy
+    the <literal>t1 WHERE</literal> clause of publication <literal>p1</literal>,
+    but the new row values do satisfy it.
+<programlisting>
+testpub=# UPDATE t1 SET a = 555 WHERE a = 2;
+UPDATE 1
+
+testpub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   3 | 103 | QLD
+   4 | 104 | VIC
+   5 | 105 | ACT
+   7 | 107 | NT
+   8 | 108 | QLD
+   9 | 109 | NSW
+   6 | 999 | NSW
+ 555 | 102 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+testsub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   9 | 109 | NSW
+   6 | 999 | NSW
+ 555 | 102 | NSW
+(3 rows)
+</programlisting>
+    <itemizedlist>
+     <listitem>
+      <para>
+       The <command>UPDATE</command> is transformed into an <command>INSERT</command>
+       and the change is replicated.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+   <para>
+    Update some data, where the old row values satisfied
+    the <literal>t1 WHERE</literal> clause of publication <literal>p1</literal>,
+    but the new row values do not satisfy it.
+<programlisting>
+testpub=# UPDATE t1 SET c = 'VIC' WHERE a = 9;
+UPDATE 1
+
+testpub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   3 | 103 | QLD
+   4 | 104 | VIC
+   5 | 105 | ACT
+   7 | 107 | NT
+   8 | 108 | QLD
+   6 | 999 | NSW
+ 555 | 102 | NSW
+   9 | 109 | VIC
+(8 rows)
+</programlisting>
+<programlisting>
+testsub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   6 | 999 | NSW
+ 555 | 102 | NSW
+(2 rows)
+</programlisting>
+    <itemizedlist>
+     <listitem>
+      <para>
+       The <command>UPDATE</command> is transformed into a <command>DELETE</command>
+       and the change is replicated.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+  </sect2>
+
+ </sect1>
+
  <sect1 id="logical-replication-conflicts">
   <title>Conflicts</title>
 
diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index fb2d013..23d883c 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -254,6 +254,8 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
    <literal>publish_via_partition_root</literal> determines if it uses the
    partition's row filter (if the parameter is false, the default) or the root
    partitioned table's row filter.
+   See <xref linkend="logical-replication-row-filter"/> for details about row
+   filters.
   </para>
 
   <para>
-- 
1.8.3.1