From c6179c3cf1395884d4a42b5ad983542a3fc4887c Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Tue, 31 Oct 2023 03:52:41 -0400
Subject: [PATCH v2] Currently we do not show any examples of using ctid
 anywhere, nor do we address the often-requested but problematic use case of
 having a LIMIT clause on UPDATE and DELETE statements. These examples are a
 subtle way of addressing both those concerns.

---
 doc/src/sgml/ref/delete.sgml | 29 +++++++++++++++++++++++++++++
 doc/src/sgml/ref/select.sgml | 24 ++++++++++++++++++++++++
 doc/src/sgml/ref/update.sgml | 23 +++++++++++++++++++++++
 3 files changed, 76 insertions(+)

diff --git a/doc/src/sgml/ref/delete.sgml b/doc/src/sgml/ref/delete.sgml
index 1b81b4e7d7..4e08c6c85e 100644
--- a/doc/src/sgml/ref/delete.sgml
+++ b/doc/src/sgml/ref/delete.sgml
@@ -234,6 +234,35 @@ DELETE FROM films
    In some cases the join style is easier to write or faster to
    execute than the sub-select style.
   </para>
+  <para>
+   In situations where a single operation would consume too many resources,
+   either causing the operation to fail or negatively impacting other workloads,
+   it may be desirable to break up a large <command>DELETE</command> into
+   multiple separate commands. While doing this will actually increase the
+   total amount of work performed, it can break the work into chunks that have
+   a more acceptable impact on other workloads.  The
+   <glossterm linkend="glossary-sql-standard">SQL standard</glossterm> does
+   not define a <literal>LIMIT</literal> clause for <command>DELETE</command>
+   operations, but it is possible get the equivalent functionality through the
+   <literal>USING</literal> clause to a
+   <link linkend="queries-with">Common Table Expression</link> which identifies
+   a subset of rows to be deleted, locks those rows, and returns their system
+   column <link linkend="ddl-system-columns-ctid">ctid</link> values:
+<programlisting>
+WITH delete_batch AS (
+  SELECT l.ctid
+  FROM user_logs AS l
+  WHERE l.status = 'archived'
+  ORDER BY l.creation_date
+  LIMIT 10000
+  FOR UPDATE
+)
+DELETE FROM user_logs AS ul
+USING delete_branch AS del
+WHERE ul.ctid = del.ctid;
+</programlisting>
+  This allows for flexible search criteria within the CTE and an efficient self-join.
+  </para>
  </refsect1>
 
  <refsect1>
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index 42d78913cf..10e10ea249 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -1679,6 +1679,30 @@ SELECT * FROM (SELECT * FROM mytable FOR UPDATE) ss WHERE col1 = 5;
     condition is not textually within the sub-query.
    </para>
 
+   <para>
+    In cases where a <acronym>DML</acronym> operation involving many rows
+    must be performed, and that table experiences numerous other simultaneous
+    <acronym>DML</acronym> operations, a <literal>FOR UPDATE</literal> clause
+    used in conjunction with <literal>SKIP LOCKED</literal> can be useful for
+    performing partial <acronym>DML</acronym> operations:
+
+<programlisting>
+WITH mods AS (
+    SELECT ctid FROM mytable
+    WHERE status = 'active' AND retries > 10
+    ORDER BY id FOR UPDATE SKIP LOCKED
+)
+UPDATE mytable SET status = 'failed'
+FROM mods WHERE mytable.ctid = mods.ctid;
+</programlisting>
+
+    This allows the <acronym>DML</acronym> operation to be performed in parts, avoiding locking,
+    until such time as the set of rows that remain to be modified is small enough
+    that the locking will not affect overall performance, at which point the same
+    statement can be issued without the <literal>SKIP LOCKED</literal> clause to ensure
+    that no rows were overlooked. This technique has the additional benefit that it can reduce
+    the overal bloat of the updated table if the table can be vacuumed in between batch updates.
+   </para>
   <para>
    Previous releases failed to preserve a lock which is upgraded by a later
    savepoint.  For example, this code:
diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml
index 2ab24b0523..c045d5dd49 100644
--- a/doc/src/sgml/ref/update.sgml
+++ b/doc/src/sgml/ref/update.sgml
@@ -442,6 +442,29 @@ COMMIT;
 <programlisting>
 UPDATE films SET kind = 'Dramatic' WHERE CURRENT OF c_films;
 </programlisting></para>
+
+  <para>
+   To break up a large <command>UPDATE</command> into more manageable pieces,
+   it is possible to do a self-join on the
+   <link linkend="ddl-system-columns-ctid">ctid</link> system column using
+   <link linkend="queries-with">Common Table Expression</link> to limit the
+   number of rows to be updated:
+<programlisting>
+WITH exceeded_max_retries AS (
+  SELECT w.ctid
+  FROM work_item AS w
+  WHERE w.status = 'active'
+  AND w.num_retries > 10
+  ORDER BY w.retry_timestamp
+  FOR UPDATE
+  LIMIT 5000
+)
+UPDATE work_item
+SET status = 'failed'
+FROM exceeded_max_retries AS emr
+WHERE work_item.ctid = emr.ctid
+</programlisting>
+  </para>
  </refsect1>
 
  <refsect1>
-- 
2.41.0

