From 9c3f7b19ab8364cf151ca8b053150d29d37c390c Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Sun, 30 May 2021 13:15:55 +0200
Subject: [PATCH] Expand the docs on PostgreSQL extension coding and background
 worker development a little so that key topics like allocation, interrupt
 handling, exit callbacks, transaction callbacks, PG_TRY()/PG_CATCH(),
 resource owners, transaction and snapshot state, etc are at least briefly
 mentioned with a few pointers to where to learn more.

Add some detail on background worker signal handling.

Author: Craig Ringer
Reviewed-By: Laurenz Albe, Bharath Rupireddy
Discussion: https://postgr.es/m/CAGRY4nxanz6MQrOv_babTjg02VwcXkq+eBsQhqdFX6-PgTapDQ@mail.gmail.com
---
 doc/src/sgml/bgworker.sgml |  94 +++++++++++++++++++--
 doc/src/sgml/xfunc.sgml    | 163 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 248 insertions(+), 9 deletions(-)
diff --git a/doc/src/sgml/bgworker.sgml b/doc/src/sgml/bgworker.sgml
index 7fd673ab54..a99d048ed1 100644
--- a/doc/src/sgml/bgworker.sgml
+++ b/doc/src/sgml/bgworker.sgml
@@ -1,6 +1,6 @@
 <!-- doc/src/sgml/bgworker.sgml -->
 
-<chapter id="bgworker">
+<chapter id="bgworker" xreflabel="Background Worker Processes">
  <title>Background Worker Processes</title>
 
  <indexterm zone="bgworker">
@@ -29,6 +29,15 @@
   </para>
  </warning>
 
+ <para>
+  All code that will be executed in PostgreSQL background workers is expected
+  to follow the basic rules set out for all PostgreSQL extension code in <xref
+  linkend="xfunc-writing-code"/>.
+ </para>
+
+ <sect1>
+  <title>Background Worker initialization and resource allocation</title>
+
  <para>
   Background workers can be initialized at the time that
   <productname>PostgreSQL</productname> is started by including the module name in
@@ -95,6 +104,10 @@ typedef struct BackgroundWorker
        buffers, or any custom data structures which the worker itself may
        wish to create and use.
       </para>
+      <para>
+       See <xref linkend="xfunc-shared-addin"/> for information on how to
+       allocate these resources.
+      </para>
      </listitem>
     </varlistentry>
 
@@ -212,9 +225,9 @@ typedef struct BackgroundWorker
    Signals are initially blocked when control reaches the
    background worker's main function, and must be unblocked by it; this is to
    allow the process to customize its signal handlers, if necessary.
-   Signals can be unblocked in the new process by calling
-   <function>BackgroundWorkerUnblockSignals</function> and blocked by calling
-   <function>BackgroundWorkerBlockSignals</function>.
+   It is important that all background workers set up and unblock signal
+   handling before they enter their main loops.  Signal handling in background
+   workers is discussed separately in <xref linkend="bgworker-signals"/>.
   </para>
 
   <para>
@@ -296,13 +309,80 @@ typedef struct BackgroundWorker
   </para>
 
   <para>
-   The <filename>src/test/modules/worker_spi</filename> module
-   contains a working example,
-   which demonstrates some useful techniques.
+   Background workers that require inter-process communication and/or
+   synchronisation should use <productname>PostgreSQL</productname>'s built-in
+   IPC and concurrency features as discussed in
+   <xref linkend="xfunc-concurrency-synchronization"/> and
+   <xref linkend="xfunc-ipc"/>.
+  </para>
+
+  <para>
+   If a background worker needs to sleep or wait for activity, it should
+   always use
+   <link linkend="xfunc-sleeping-interrupts-blocking"><productname>PostgreSQL</productname>'s
+   interupt-aware APIs</link> for the purpose.  Do not use
+   <function>usleep()</function>, <function>system()</function> or other
+   blocking system calls.
+  </para>
+
+  <para>
+   The modules <filename>src/test/modules/worker_spi</filename> and
+   <filename>src/test/modules/test_shm_mq</filename> contain working examples
+   that demonstrates some useful techniques.
   </para>
 
   <para>
    The maximum number of registered background workers is limited by
    <xref linkend="guc-max-worker-processes"/>.
   </para>
+
+ </sect1>
+
+ <sect1 id="bgworker-signals" xreflabel="Signal Handling in Background Workers">
+  <title>Signal Handling in Background Workers</title>
+
+  <para>
+   Signals can be unblocked in the new process by calling
+   <function>BackgroundWorkerUnblockSignals</function> and blocked by calling
+   <function>BackgroundWorkerBlockSignals</function>.
+   The default signal handlers set up for background workers
+   <emphasis>do not interrupt queries or blocking calls into other
+   <productname>PostgreSQL</productname> code</emphasis>
+   so they are only suitable for simple background workers that frequently and
+   predictably return control to their main loop.  If your worker uses the
+   default background worker signal handling, it should call
+   <function>HandleMainLoopInterrupts()</function> on each pass through its
+   main loop.
+  </para>
+
+  <para>
+   Background workers that may make blocking calls into core
+   <productname>PostgreSQL</productname> code and/or run user-supplied queries
+   should generally replace the default background worker signal handlers with the
+   handlers used for normal user backends.  This will ensure that the worker will
+   respond in a timely manner to a termination request, query cancel request,
+   recovery conflict interrupt, deadlock detector request, etc.
+   To set up these handlers, run the following before unblocking interrupts:
+<programlisting>
+    pqsignal(SIGTERM, die);
+    pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+    pqsignal(SIGHUP, SignalHandlerForConfigReload);
+</programlisting>
+   Then ensure that your main loop and any other code that could run for some
+   time contains <function>CHECK_FOR_INTERRUPTS()</function> calls.
+   A background worker using these signal handlers must use <link
+   linkend="xfunc-resource-management"><productname>PostgreSQL</productname>'s
+   resource management APIs and callbacks</link> to handle cleanup rather
+   than relying on control returning to the main loop, because the signal
+   handlers may call <function>proc_exit()</function> directly.
+   This is recommended practice for all types of extension code anyway.
+  </para>
+
+  <para>
+   See the comments in <filename>src/include/miscadmin.h</filename>
+   for more details on signal handling.
+  </para>
+
+ </sect1>
+
 </chapter>
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index 41bcc5b79d..cc79871f04 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -2593,7 +2593,7 @@ CREATE FUNCTION concat_text(text, text) RETURNS text
     </para>
    </sect2>
 
-   <sect2>
+   <sect2 id="xfunc-writing-code" xreflabel="Writing PostgreSQL Extension Code">
     <title>Writing Code</title>
 
     <para>
@@ -2650,7 +2650,8 @@ CREATE FUNCTION concat_text(text, text) RETURNS text
         <function>malloc</function> and <function>free</function>.
         The memory allocated by <function>palloc</function> will be
         freed automatically at the end of each transaction, preventing
-        memory leaks.
+        memory leaks. See <filename>src/backend/utils/mmgr/README</filename>
+        for more details on PostgreSQL's memory management.
        </para>
       </listitem>
 
@@ -2694,6 +2695,164 @@ CREATE FUNCTION concat_text(text, text) RETURNS text
         error messages to this effect.
        </para>
       </listitem>
+
+      <listitem>
+       <para>
+        Use <function>elog()</function> and <function>ereport()</function> for
+        logging output and raising errors instead of any direct stdio calls.
+        Do not attempt to use C++ exceptions or Windows Structured Exception
+        Handling, and never call <function>exit()</function> directly
+        (for a discussion of extensions in C++, see <xref linkend="extend-cpp"/>).
+        PostgreSQL uses its own <function>longjmp()</function> based
+        exception system for <literal>ERROR</literal>-level <function>elog()</function>
+        and <function>ereport()</function> calls. These can be handled and forwarded with
+        <literal>PG_TRY()</literal> and <literal>PG_RE_THROW()</literal>:
+<programlisting>
+          PG_TRY();
+          {
+            ...
+          }
+          PG_CATCH();
+          {
+            ....
+            /* Do not attempt to swallow the exception, always rethrow */
+            PG_RE_THROW();
+          }
+          PG_END_TRY();
+</programlisting>
+        See <filename>src/include/utils/elog.h</filename> and
+        <filename>src/backend/utils/error/elog.c</filename> for details.
+       </para>
+      </listitem>
+
+      <listitem id="xfunc-threading">
+       <para>
+        Individual <productname>PostgreSQL</productname> backends are single-threaded.
+        Never call any <productname>PostgreSQL</productname> function or access any
+        <productname>PostgreSQL</productname>-managed data structure from a thread
+        other than the main thread.  If at all possible, your extension should not
+        start any threads.  <productname>PostgreSQL</productname> generally uses
+        subprocesses (see <xref linkend="bgworker"/>).
+       </para>
+      </listitem>
+
+      <listitem id="xfunc-file-and-pipe-io">
+       <para>
+        To execute subprocesses and open files, use the routines provided by
+        the file descriptor manager like <function>BasicOpenFile</function>
+        and <function>OpenPipeStream</function> instead of a direct
+        <function>open()</function>, <function>fopen()</function>,
+        <function>popen()</function> or <function>system()</function>.
+        See <filename>src/include/storage/fd.h</filename>.
+       </para>
+      </listitem>
+
+      <listitem id="xfunc-sleeping-interrupts-blocking"
+                xreflabel="Sleeping, Blocking and Interrupt Handling in Extensions">
+       <para>
+        Extension code should always be structured as a non-blocking
+        loop over any ongoing external activities. Avoid making blocking calls
+        into 3rd party libraries or uninterruptible
+        system calls. Use <productname>PostgreSQL</productname>'s provided wait
+        primitives like <function>WaitEventSetWait</function> where necessary. Any
+        potentially long-running loop should periodically call <function>
+        CHECK_FOR_INTERRUPTS()</function> to give <productname>PostgreSQL</productname>
+        a chance to interrupt the function in the case of events like a shutdown
+        request or query cancellation.
+        This means that you should nor use <function>sleep()</function> or
+        <function>usleep()</function> for any nontrivial amount of time &mdash;
+        use <function>WaitLatch()</function> or its variants instead.
+        For details on interrupt handling, see comments near the definition of the
+        <literal>CHECK_FOR_INTERRUPTS()</literal> macro in
+        <filename>src/include/util/miscadmin.h</filename>.
+       </para>
+      </listitem>
+
+      <listitem id="xfunc-resource-management"
+                xreflabel="Resource Management in Extensions">
+       <para>
+        Use <productname>PostgreSQL</productname>'s resource tracking and
+        registration mechanisms and the associated callbacks instead of relying
+        on flow control based cleanup. Your extension function could be terminated
+        in mid-execution by <productname>PostgreSQL</productname> if any function
+        that it calls performs a <function>CHECK_FOR_INTERRUPTS()</function> check.
+        If this happens, it may not get the opportunity to take cleanup actions
+        via a <literal>PG_TRY()</literal> ... <literal>PG_CATCH()</literal>
+        block.  So all cleanup of resources not already managed by the
+        <productname>PostgreSQL</productname> runtime must be handled using
+        appropriate callbacks provided by <productname>PostgreSQL</productname>.
+        These include, but are not limited to, these callback registration APIs:
+        <itemizedlist>
+         <listitem><para><function>before_shmem_exit()</function></para></listitem>
+         <listitem><para><function>on_shmem_exit()</function></para></listitem>
+         <listitem><para><function>on_proc_exit()</function></para></listitem>
+         <listitem><para><literal>PG_ENSURE_ERROR_CLEANUP()</literal></para></listitem>
+         <listitem><para><function>RegisterXactCallback()</function></para></listitem>
+         <listitem><para><function>RegisterResourceReleaseCallback()</function></para></listitem>
+        </itemizedlist>
+        More information on resource management, cleanup and lifecycle can be
+        found in the <productname>PostgreSQL</productname> headers and sources.
+       </para>
+      </listitem>
+
+      <listitem id="xfunc-concurrency-synchronization"
+                xreflabel="Concurrency and Synchronization in Extensions">
+       <para>
+        Use the <productname>PostgreSQL</productname> runtime's concurrency and
+        synchronisation primitives rather than platform-native primitives when
+        coordinating activity between <productname>PostgreSQL</productname> processes.
+        These include signals and ProcSignal multiplexed signals,
+        heavyweight locks ("normal" database object locks),
+        <link linkend="xfunc-shared-addin">LWLock</link>s, Spinlocks, latches,
+        condition variables, and more.
+       </para>
+      </listitem>
+
+      <listitem id="xfunc-ipc"
+                xreflabel="Inter-Process Communication in Extensions">
+       <para>
+        Sometimes, relation-based state management is not sufficient to meet the
+        needs of an extension.  In that case, the extension may need to use
+        <productname>PostgreSQL</productname>'s shared-memory based inter-process
+        communication features, and should certainly do so instead of inventing
+        its own or trying to use platform level features. An extension may use
+        <link linkend="xfunc-shared-addin">"raw" shared memory requested from
+        the postmaster at startup</link> or higher level features like dynamic
+        shared memory segments (<acronym>DSM</acronym>),
+        dynamic shared areas (<acronym>DSA</acronym>),
+        or shared memory message queues (<acronym>shm_mq</acronym>).
+        Examples of the use of some these features can be found in the
+        <filename>src/test/modules/test_shm_mq/</filename> example extension
+        or other parts of the source code.
+       </para>
+      </listitem>
+
+      <listitem id="xfunc-relcache-syscache">
+       <para>
+        Look up system catalogs and table metadata using the relcache and syscache
+        APIs (<function>SearchSysCache...</function>,
+        <function>relation_open()</function>, etc) rather than attempting to run
+        SQL queries to fetch the information.  Ensure that your function holds
+        the necessary locks on the target objects.  Take care not to make any calls
+        that could trigger cache invalidations while still accessing any
+        syscache cache entries, as this can cause bugs that are hard to find.
+        See <filename>src/backend/utils/cache/syscache.c</filename>,
+        <filename>src/backend/utils/cache/relcache.c</filename>,
+        <filename>src/backend/access/common/relation.c</filename> and their
+        headers for details.
+       </para>
+      </listitem>
+
+      <listitem>
+       <para>
+        There are many README files in the <productname>PostgreSQL</productname>
+        source tree that discuss specific development topics in more detail than
+        be covered here.  Many subsystems' source files also have detailed comments
+        that explain their correct use.
+        The existing sources are your best reference for more complex tasks.
+       </para>
+      </listitem>
+
      </itemizedlist>
     </para>
    </sect2>
-- 
2.26.3