From a6b57bf3a88c5df614b5dede99af3e99fe8e8089 Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Mon, 12 Feb 2024 11:32:49 -0500
Subject: [PATCH v3] Documentation: Show alternatives to LIMIT on UPDATE and
 DELETE

Show examples of how to simulate UPDATE or DELETE with a LIMIT clause.

These examples also serve to show the existence and utility of ctid self-joins.
---
 doc/src/sgml/ref/delete.sgml | 18 ++++++++++++++++++
 doc/src/sgml/ref/update.sgml | 37 ++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/doc/src/sgml/ref/delete.sgml b/doc/src/sgml/ref/delete.sgml
index 1b81b4e7d7..1544a28e18 100644
--- a/doc/src/sgml/ref/delete.sgml
+++ b/doc/src/sgml/ref/delete.sgml
@@ -234,6 +234,24 @@ DELETE FROM films
    In some cases the join style is easier to write or faster to
    execute than the sub-select style.
   </para>
+  <para id="delete-limit">
+   While there is no <literal>LIMIT</literal> clause for
+   <command>DELETE</command>, it is possible to get a similar effect
+   using the method for <command>UPDATE</command> operations described
+   <link linkend="update-limit">the documentation of <command>UPDATE</command></link>.
+<programlisting>
+WITH delete_batch AS (
+  SELECT l.ctid
+  FROM user_logs AS l
+  WHERE l.status = 'archived'
+  ORDER BY l.creation_date
+  LIMIT 10000
+  FOR UPDATE
+)
+DELETE FROM user_logs AS ul
+USING delete_branch AS del
+WHERE ul.ctid = del.ctid;
+</programlisting></para>
  </refsect1>
 
  <refsect1>
diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml
index 2ab24b0523..ed3dd029c7 100644
--- a/doc/src/sgml/ref/update.sgml
+++ b/doc/src/sgml/ref/update.sgml
@@ -442,6 +442,43 @@ COMMIT;
 <programlisting>
 UPDATE films SET kind = 'Dramatic' WHERE CURRENT OF c_films;
 </programlisting></para>
+  <para id="update-limit">
+   Updates affecting many rows can have negative effects on system performance,
+   such as table bloat, increased replica lag, increased lock contention,
+   and possible failure of the operation due to a deadlock. In such situations
+   it can make sense to perform the operation in smaller batches. Performing a
+   <command>VACUUM</command> operation on the table between batches can help
+   reduce table bloat. The
+   <glossterm linkend="glossary-sql-standard">SQL standard</glossterm> does
+   not define a <literal>LIMIT</literal> clause for <command>UPDATE</command>
+   operations, but it is possible get a similar effect through the use of a
+   <link linkend="queries-with">Common Table Expression</link> and an
+   efficient self-join via the system column
+   <link linkend="ddl-system-columns-ctid">ctid</link>:
+<programlisting>
+WITH exceeded_max_retries AS (
+  SELECT w.ctid
+  FROM work_item AS w
+  WHERE w.status = 'active'
+  AND w.num_retries > 10
+  ORDER BY w.retry_timestamp
+  FOR UPDATE
+  LIMIT 5000
+)
+UPDATE work_item
+SET status = 'failed'
+FROM exceeded_max_retries AS emr
+WHERE work_item.ctid = emr.ctid
+</programlisting>
+    If lock contention is a concern, then <literal>SKIP LOCKED</literal> can
+    be added to the <acronym>CTE</acronym>. However, one final
+    <command>UPDATE</command> without <literal>SKIP LOCKED</literal> or
+    <literal>LIMIT</literal> will be needed to ensure that no matching rows
+    were overlooked. The use of an <literal>ORDER BY</literal> clause allows
+    the command to prioritize which rows will be locked and updated. This can
+    also reduce contention with other update operations if they use the same
+    ordering.
+  </para>
  </refsect1>
 
  <refsect1>
-- 
2.43.0

