From 1b641b201c1745ca2a2628c21791329ebc87d1df Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <daniel@yesql.se>
Date: Wed, 10 Jul 2019 11:25:25 +0200
Subject: [PATCH 2/2] Online checksums patch for v13

Updated from previous patches and now using the global barriers
---
 doc/src/sgml/func.sgml                      |  65 ++
 doc/src/sgml/ref/initdb.sgml                |   7 +-
 doc/src/sgml/reference.sgml                 |   1 +
 doc/src/sgml/wal.sgml                       |  81 +++
 src/backend/access/rmgrdesc/xlogdesc.c      |  16 +
 src/backend/access/transam/xlog.c           | 131 +++-
 src/backend/access/transam/xlogfuncs.c      |  57 ++
 src/backend/catalog/system_views.sql        |   5 +
 src/backend/postmaster/Makefile             |   5 +-
 src/backend/postmaster/bgworker.c           |   7 +
 src/backend/postmaster/checksumhelper.c     | 909 ++++++++++++++++++++++++++++
 src/backend/postmaster/pgstat.c             |   6 +
 src/backend/replication/basebackup.c        |   2 +-
 src/backend/replication/logical/decode.c    |   1 +
 src/backend/storage/ipc/ipci.c              |   2 +
 src/backend/storage/lmgr/lwlocknames.txt    |   1 +
 src/backend/storage/page/README             |   3 +-
 src/backend/storage/page/bufpage.c          |   6 +-
 src/backend/utils/adt/pgstatfuncs.c         |   4 +-
 src/backend/utils/misc/guc.c                |  36 +-
 src/bin/pg_upgrade/controldata.c            |   9 +
 src/bin/pg_upgrade/pg_upgrade.h             |   2 +-
 src/include/access/xlog.h                   |  10 +-
 src/include/access/xlog_internal.h          |   7 +
 src/include/catalog/pg_control.h            |   1 +
 src/include/catalog/pg_proc.dat             |  16 +
 src/include/pgstat.h                        |   4 +-
 src/include/postmaster/checksumhelper.h     |  31 +
 src/include/storage/bufpage.h               |   1 +
 src/include/storage/checksum.h              |   7 +
 src/test/Makefile                           |   3 +-
 src/test/checksum/.gitignore                |   2 +
 src/test/checksum/Makefile                  |  24 +
 src/test/checksum/README                    |  22 +
 src/test/checksum/t/001_standby_checksum.pl | 104 ++++
 35 files changed, 1553 insertions(+), 35 deletions(-)
 create mode 100644 src/backend/postmaster/checksumhelper.c
 create mode 100644 src/include/postmaster/checksumhelper.h
 create mode 100644 src/test/checksum/.gitignore
 create mode 100644 src/test/checksum/Makefile
 create mode 100644 src/test/checksum/README
 create mode 100644 src/test/checksum/t/001_standby_checksum.pl

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index a7abf8c2ee..d9ec3f2516 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20919,6 +20919,71 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
 
   </sect2>
 
+  <sect2 id="functions-admin-checksum">
+   <title>Data Checksum Functions</title>
+
+   <para>
+    The functions shown in <xref linkend="functions-checksums-table" /> can
+    be used to enable or disable data checksums in a running cluster.
+    See <xref linkend="checksums" /> for details.
+   </para>
+
+   <table id="functions-checksums-table">
+    <title>Checksum <acronym>SQL</acronym> Functions</title>
+    <tgroup cols="3">
+     <thead>
+      <row>
+       <entry>Function</entry>
+       <entry>Return Type</entry>
+       <entry>Description</entry>
+      </row>
+     </thead>
+     <tbody>
+      <row>
+       <entry>
+        <indexterm>
+         <primary>pg_enable_data_checksums</primary>
+        </indexterm>
+        <literal><function>pg_enable_data_checksums(<optional><parameter>cost_delay</parameter> <type>int</type>, <parameter>cost_limit</parameter> <type>int</type></optional>)</function></literal>
+       </entry>
+       <entry>
+        void
+       </entry>
+       <entry>
+        <para>
+         Initiates data checksums for the cluster. This will switch the data checksums mode
+         to <literal>in progress</literal> and start a background worker that will process
+         all data in the database and enable checksums for it. When all data pages have had
+         checksums enabled, the cluster will automatically switch to checksums
+         <literal>on</literal>.
+        </para>
+        <para>
+         If <parameter>cost_delay</parameter> and <parameter>cost_limit</parameter> are
+         specified, the speed of the process is throttled using the same principles as
+         <link linkend="runtime-config-resource-vacuum-cost">Cost-based Vacuum Delay</link>.
+        </para>
+       </entry>
+      </row>
+      <row>
+       <entry>
+        <indexterm>
+         <primary>pg_disable_data_checksums</primary>
+        </indexterm>
+        <literal><function>pg_disable_data_checksums()</function></literal>
+       </entry>
+       <entry>
+        void
+       </entry>
+       <entry>
+        Disables data checksums for the cluster.
+       </entry>
+      </row>
+     </tbody>
+    </tgroup>
+   </table>
+
+  </sect2>
+
   <sect2 id="functions-admin-dbobject">
    <title>Database Object Management Functions</title>
 
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index da5c8f5307..b545ad73cb 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -217,9 +217,10 @@ PostgreSQL documentation
         Use checksums on data pages to help detect corruption by the
         I/O system that would otherwise be silent. Enabling checksums
         may incur a noticeable performance penalty. If set, checksums
-        are calculated for all objects, in all databases. All checksum
-        failures will be reported in the
-        <xref linkend="pg-stat-database-view"/> view.
+        are calculated for all objects, in all databases. All
+        checksum failures will be reported in the <xref
+        linkend="pg-stat-database-view"/> view.
+        See <xref linkend="checksums" /> for details.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index cef09dd38b..938ebf8477 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -284,6 +284,7 @@
    &pgtestfsync;
    &pgtesttiming;
    &pgupgrade;
+   &pgVerifyChecksums;
    &pgwaldump;
    &postgres;
    &postmaster;
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 4eb8feb903..7838f3616a 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -230,6 +230,87 @@
   </para>
  </sect1>
 
+ <sect1 id="checksums">
+  <title>Data checksums</title>
+  <indexterm>
+   <primary>checksums</primary>
+  </indexterm>
+
+  <para>
+   Data pages are not checksum protected by default, but this can optionally be enabled for a cluster.
+   When enabled, each data page will be assigned a checksum that is updated when the page is
+   written and verified every time the page is read. Only data pages are protected by checksums,
+   internal data structures and temporary files are not.
+  </para>
+
+  <para>
+   Checksums are normally enabled when the cluster is initialized using
+   <link linkend="app-initdb-data-checksums"><application>initdb</application></link>. They
+   can also be enabled or disabled at runtime. In all cases, checksums are enabled or disabled
+   at the full cluster level, and cannot be specified individually for databases or tables.
+  </para>
+
+  <para>
+   The current state of checksums in the cluster can be verified by viewing the value
+   of the read-only configuration variable <xref linkend="guc-data-checksums" /> by
+   issuing the command <command>SHOW data_checksums</command>.
+  </para>
+
+  <para>
+   When attempting to recover from corrupt data it may be necessary to bypass the checksum
+   protection in order to recover data. To do this, temporarily set the configuration parameter
+   <xref linkend="guc-ignore-checksum-failure" />.
+  </para>
+
+  <sect2 id="checksums-enable-disable">
+   <title>On-line enabling of checksums</title>
+
+   <para>
+    Checksums can be enabled or disabled online, by calling the appropriate
+    <link linkend="functions-admin-checksum">functions</link>.
+    Disabling of checksums takes effect immediately when the function is called.
+   </para>
+
+   <para>
+    Enabling checksums will put the cluster in <literal>inprogress</literal> mode.
+    During this time, checksums will be written but not verified. In addition to
+    this, a background worker process is started that enables checksums on all
+    existing data in the cluster. Once this worker has completed processing all
+    databases in the cluster, the checksum mode will automatically switch to
+    <literal>on</literal>.
+   </para>
+
+   <para>
+    The process will initially wait for all open transactions to finish before
+    it starts, so that it can be certain that there are no tables that have been
+    created inside a transaction that has not committed yet and thus would not
+    be visible to the process enabling checksums. It will also, for each database,
+    wait for all pre-existing temporary tables to get removed before it finishes.
+    If long-lived temporary tables are used in the application it may be necessary
+    to terminate these application connections to allow the process to complete.
+    Information about open transactions and connections with temporary tables is
+    written to log.
+   </para>
+
+   <para>
+    If the cluster is stopped while in <literal>inprogress</literal> mode, for
+    any reason, then this process must be restarted manually. To do this,
+    re-execute the function <function>pg_enable_data_checksums()</function>
+    once the cluster has been restarted. It is not possible to resume the work,
+    the process has to start from scratch.
+   </para>
+
+   <note>
+    <para>
+     Enabling checksums can cause significant I/O to the system, as most of the
+     database pages will need to be rewritten, and will be written both to the
+     data files and the WAL.
+    </para>
+   </note>
+
+  </sect2>
+ </sect1>
+
   <sect1 id="wal-intro">
    <title>Write-Ahead Logging (<acronym>WAL</acronym>)</title>
 
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 33060f3042..ced4ab6d78 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -18,6 +18,7 @@
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
 #include "catalog/pg_control.h"
+#include "storage/bufpage.h"
 #include "utils/guc.h"
 #include "utils/timestamp.h"
 
@@ -140,6 +141,18 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 						 xlrec.ThisTimeLineID, xlrec.PrevTimeLineID,
 						 timestamptz_to_str(xlrec.end_time));
 	}
+	else if (info == XLOG_CHECKSUMS)
+	{
+		xl_checksum_state xlrec;
+
+		memcpy(&xlrec, rec, sizeof(xl_checksum_state));
+		if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_VERSION)
+			appendStringInfo(buf, "on");
+		else if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_INPROGRESS_VERSION)
+			appendStringInfo(buf, "inprogress");
+		else
+			appendStringInfo(buf, "off");
+	}
 }
 
 const char *
@@ -185,6 +198,9 @@ xlog_identify(uint8 info)
 		case XLOG_FPI_FOR_HINT:
 			id = "FPI_FOR_HINT";
 			break;
+		case XLOG_CHECKSUMS:
+			id = "CHECKSUMS";
+			break;
 	}
 
 	return id;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e651a841bb..b6a073b82a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -867,6 +867,7 @@ static void SetLatestXTime(TimestampTz xtime);
 static void SetCurrentChunkStartTime(TimestampTz xtime);
 static void CheckRequiredParameterValues(void);
 static void XLogReportParameters(void);
+static void XlogChecksums(ChecksumType new_type);
 static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
 								TimeLineID prevTLI);
 static void LocalSetXLogInsertAllowed(void);
@@ -1050,7 +1051,7 @@ XLogInsertRecord(XLogRecData *rdata,
 		Assert(RedoRecPtr < Insert->RedoRecPtr);
 		RedoRecPtr = Insert->RedoRecPtr;
 	}
-	doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
+	doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites || DataChecksumsInProgress());
 
 	if (doPageWrites &&
 		(!prevDoPageWrites ||
@@ -4779,10 +4780,6 @@ ReadControlFile(void)
 		(SizeOfXLogLongPHD - SizeOfXLogShortPHD);
 
 	CalculateCheckpointSegments();
-
-	/* Make the initdb settings visible as GUC variables, too */
-	SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
-					PGC_INTERNAL, PGC_S_OVERRIDE);
 }
 
 /*
@@ -4819,12 +4816,93 @@ GetMockAuthenticationNonce(void)
  * Are checksums enabled for data pages?
  */
 bool
-DataChecksumsEnabled(void)
+DataChecksumsNeedWrite(void)
 {
 	Assert(ControlFile != NULL);
 	return (ControlFile->data_checksum_version > 0);
 }
 
+bool
+DataChecksumsNeedVerify(void)
+{
+	Assert(ControlFile != NULL);
+
+	/*
+	 * Only verify checksums if they are fully enabled in the cluster. In
+	 * inprogress state they are only updated, not verified.
+	 */
+	return (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION);
+}
+
+bool
+DataChecksumsInProgress(void)
+{
+	Assert(ControlFile != NULL);
+	return (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION);
+}
+
+void
+SetDataChecksumsInProgress(void)
+{
+	Assert(ControlFile != NULL);
+	if (ControlFile->data_checksum_version > 0)
+		return;
+
+	XlogChecksums(PG_DATA_CHECKSUM_INPROGRESS_VERSION);
+
+	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+	ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_VERSION;
+	UpdateControlFile();
+	LWLockRelease(ControlFileLock);
+	WaitForGlobalBarrier(EmitGlobalBarrier(GLOBBAR_CHECKSUM));
+}
+
+void
+SetDataChecksumsOn(void)
+{
+	Assert(ControlFile != NULL);
+
+	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+
+	if (ControlFile->data_checksum_version != PG_DATA_CHECKSUM_INPROGRESS_VERSION)
+	{
+		LWLockRelease(ControlFileLock);
+		elog(ERROR, "Checksums not in inprogress mode");
+	}
+
+	ControlFile->data_checksum_version = PG_DATA_CHECKSUM_VERSION;
+	UpdateControlFile();
+	LWLockRelease(ControlFileLock);
+	WaitForGlobalBarrier(EmitGlobalBarrier(GLOBBAR_CHECKSUM));
+
+	XlogChecksums(PG_DATA_CHECKSUM_VERSION);
+}
+
+void
+SetDataChecksumsOff(void)
+{
+	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+
+	ControlFile->data_checksum_version = 0;
+	UpdateControlFile();
+	LWLockRelease(ControlFileLock);
+	WaitForGlobalBarrier(EmitGlobalBarrier(GLOBBAR_CHECKSUM));
+
+	XlogChecksums(0);
+}
+
+/* guc hook */
+const char *
+show_data_checksums(void)
+{
+	if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
+		return "on";
+	else if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION)
+		return "inprogress";
+	else
+		return "off";
+}
+
 /*
  * Returns a fake LSN for unlogged relations.
  *
@@ -7746,6 +7824,18 @@ StartupXLOG(void)
 	CompleteCommitTsInitialization();
 
 	/*
+	 * If we reach this point with checksums in inprogress state, we notify
+	 * the user that they need to manually restart the process to enable
+	 * checksums.
+	 * This is because we cannot launch a dynamic background worker directly
+	 * from here, it has to be launched from a regular backend.
+	 */
+	if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION)
+		ereport(WARNING,
+				(errmsg("checksum state is \"inprogress\" with no worker"),
+				 errhint("Either disable or enable checksums by calling the pg_disable_data_checksums() or pg_enable_data_checksums() functions.")));
+
+	/*
 	 * All done with end-of-recovery actions.
 	 *
 	 * Now allow backends to write WAL and update the control file status in
@@ -9469,6 +9559,24 @@ XLogReportParameters(void)
 }
 
 /*
+ * Log the new state of checksums
+ */
+static void
+XlogChecksums(ChecksumType new_type)
+{
+	xl_checksum_state xlrec;
+	XLogRecPtr	recptr;
+
+	xlrec.new_checksumtype = new_type;
+
+	XLogBeginInsert();
+	XLogRegisterData((char *) &xlrec, sizeof(xl_checksum_state));
+
+	recptr = XLogInsert(RM_XLOG_ID, XLOG_CHECKSUMS);
+	XLogFlush(recptr);
+}
+
+/*
  * Update full_page_writes in shared memory, and write an
  * XLOG_FPW_CHANGE record if necessary.
  *
@@ -9919,6 +10027,17 @@ xlog_redo(XLogReaderState *record)
 		/* Keep track of full_page_writes */
 		lastFullPageWrites = fpw;
 	}
+	else if (info == XLOG_CHECKSUMS)
+	{
+		xl_checksum_state state;
+
+		memcpy(&state, XLogRecGetData(record), sizeof(xl_checksum_state));
+
+		LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+		ControlFile->data_checksum_version = state.new_checksumtype;
+		UpdateControlFile();
+		LWLockRelease(ControlFileLock);
+	}
 }
 
 #ifdef WAL_DEBUG
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 4795c6fa94..a1e0d93e28 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -25,6 +25,7 @@
 #include "catalog/pg_type.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "postmaster/checksumhelper.h"
 #include "pgstat.h"
 #include "replication/walreceiver.h"
 #include "storage/smgr.h"
@@ -776,3 +777,59 @@ pg_promote(PG_FUNCTION_ARGS)
 			(errmsg("server did not promote within %d seconds", wait_seconds)));
 	PG_RETURN_BOOL(false);
 }
+
+/*
+ * Disables checksums for the cluster, unless already disabled.
+ *
+ * Has immediate effect - the checksums are set to off right away.
+ */
+Datum
+disable_data_checksums(PG_FUNCTION_ARGS)
+{
+	/*
+	 * If we don't need to write new checksums, then clearly they are already
+	 * disabled.
+	 */
+	if (!DataChecksumsNeedWrite())
+		ereport(ERROR,
+				(errmsg("data checksums already disabled")));
+
+	ShutdownChecksumHelperIfRunning();
+
+	SetDataChecksumsOff();
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Enables checksums for the cluster, unless already enabled.
+ *
+ * Supports vacuum-like cost-based throttling, to limit system load.
+ * Starts a background worker that updates checksums on existing data.
+ */
+Datum
+enable_data_checksums(PG_FUNCTION_ARGS)
+{
+	int			cost_delay = PG_GETARG_INT32(0);
+	int			cost_limit = PG_GETARG_INT32(1);
+
+	if (cost_delay < 0)
+		ereport(ERROR,
+				(errmsg("cost delay cannot be less than zero")));
+	if (cost_limit <= 0)
+		ereport(ERROR,
+				(errmsg("cost limit must be a positive value")));
+
+	/*
+	 * Allow state change from "off" or from "inprogress", since this is how
+	 * we restart the worker if necessary.
+	 */
+	if (DataChecksumsNeedVerify())
+		ereport(ERROR,
+				(errmsg("data checksums already enabled")));
+
+	SetDataChecksumsInProgress();
+	StartChecksumHelperLauncher(cost_delay, cost_limit);
+
+	PG_RETURN_VOID();
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ea4c85e395..1d8cd7d3a9 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1155,6 +1155,11 @@ CREATE OR REPLACE FUNCTION
   RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote'
   PARALLEL SAFE;
 
+CREATE OR REPLACE FUNCTION pg_enable_data_checksums (
+        cost_delay int DEFAULT 0, cost_limit int DEFAULT 100)
+  RETURNS void STRICT VOLATILE LANGUAGE internal AS 'enable_data_checksums'
+  PARALLEL RESTRICTED;
+
 -- legacy definition for compatibility with 9.3
 CREATE OR REPLACE FUNCTION
   json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile
index 71c23211b2..ee8f8c1cd3 100644
--- a/src/backend/postmaster/Makefile
+++ b/src/backend/postmaster/Makefile
@@ -12,7 +12,8 @@ subdir = src/backend/postmaster
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = autovacuum.o bgworker.o bgwriter.o checkpointer.o fork_process.o \
-	pgarch.o pgstat.o postmaster.o startup.o syslogger.o walwriter.o
+OBJS = autovacuum.o bgworker.o bgwriter.o checkpointer.o checksumhelper.o \
+	fork_process.o pgarch.o pgstat.o postmaster.o startup.o syslogger.o \
+	walwriter.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index f300f9285b..f40b7044bd 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -20,6 +20,7 @@
 #include "pgstat.h"
 #include "port/atomics.h"
 #include "postmaster/bgworker_internals.h"
+#include "postmaster/checksumhelper.h"
 #include "postmaster/postmaster.h"
 #include "replication/logicallauncher.h"
 #include "replication/logicalworker.h"
@@ -129,6 +130,12 @@ static const struct
 	},
 	{
 		"ApplyWorkerMain", ApplyWorkerMain
+	},
+	{
+		"ChecksumHelperLauncherMain", ChecksumHelperLauncherMain
+	},
+	{
+		"ChecksumHelperWorkerMain", ChecksumHelperWorkerMain
 	}
 };
 
diff --git a/src/backend/postmaster/checksumhelper.c b/src/backend/postmaster/checksumhelper.c
new file mode 100644
index 0000000000..06db05979c
--- /dev/null
+++ b/src/backend/postmaster/checksumhelper.c
@@ -0,0 +1,909 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksumhelper.c
+ *	  Background worker to walk the database and write checksums to pages
+ *
+ * When enabling data checksums on a database at initdb time, no extra process
+ * is required as each page is checksummed, and verified, at accesses.  When
+ * enabling checksums on an already running cluster, which was not initialized
+ * with checksums, this helper worker will ensure that all pages are
+ * checksummed before verification of the checksums is turned on.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/postmaster/checksumhelper.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/xact.h"
+#include "catalog/pg_database.h"
+#include "commands/vacuum.h"
+#include "common/relpath.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bgworker.h"
+#include "postmaster/bgwriter.h"
+#include "postmaster/checksumhelper.h"
+#include "storage/bufmgr.h"
+#include "storage/checksum.h"
+#include "storage/lmgr.h"
+#include "storage/ipc.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/ps_status.h"
+
+
+typedef enum
+{
+	SUCCESSFUL = 0,
+	ABORTED,
+	FAILED
+}			ChecksumHelperResult;
+
+typedef struct ChecksumHelperShmemStruct
+{
+	/*
+	 * Access to launcher_started and abort must be protected by
+	 * ChecksumHelperLock.
+	 */
+	bool		launcher_started;
+	bool		abort;
+
+	/*
+	 * Access to other members can be done without a lock, as while they
+	 * are in shared memory, they are never concurrently accessed. When
+	 * a worker is running, the launcher is only waiting for that worker
+	 * to finish.
+	 */
+	ChecksumHelperResult success;
+	bool		process_shared_catalogs;
+	/* Parameter values set on start */
+	int			cost_delay;
+	int			cost_limit;
+}			ChecksumHelperShmemStruct;
+
+/* Shared memory segment for checksumhelper */
+static ChecksumHelperShmemStruct * ChecksumHelperShmem;
+
+/* Bookkeeping for work to do */
+typedef struct ChecksumHelperDatabase
+{
+	Oid			dboid;
+	char	   *dbname;
+}			ChecksumHelperDatabase;
+
+typedef struct ChecksumHelperRelation
+{
+	Oid			reloid;
+	char		relkind;
+}			ChecksumHelperRelation;
+
+/* Prototypes */
+static List *BuildDatabaseList(void);
+static List *BuildRelationList(bool include_shared);
+static List *BuildTempTableList(void);
+static ChecksumHelperResult ProcessDatabase(ChecksumHelperDatabase * db);
+static void launcher_cancel_handler(SIGNAL_ARGS);
+
+/*
+ * Main entry point for checksumhelper launcher process.
+ */
+void
+StartChecksumHelperLauncher(int cost_delay, int cost_limit)
+{
+	BackgroundWorker bgw;
+	BackgroundWorkerHandle *bgw_handle;
+
+	LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE);
+	if (ChecksumHelperShmem->abort)
+	{
+		LWLockRelease(ChecksumHelperLock);
+		ereport(ERROR,
+				(errmsg("could not start checksumhelper: has been canceled")));
+	}
+
+	if (ChecksumHelperShmem->launcher_started)
+	{
+		/* Failed to set means somebody else started */
+		LWLockRelease(ChecksumHelperLock);
+		ereport(ERROR,
+				(errmsg("could not start checksumhelper: already running")));
+	}
+
+	ChecksumHelperShmem->cost_delay = cost_delay;
+	ChecksumHelperShmem->cost_limit = cost_limit;
+
+	memset(&bgw, 0, sizeof(bgw));
+	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
+	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
+	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ChecksumHelperLauncherMain");
+	snprintf(bgw.bgw_name, BGW_MAXLEN, "checksumhelper launcher");
+	snprintf(bgw.bgw_type, BGW_MAXLEN, "checksumhelper launcher");
+	bgw.bgw_restart_time = BGW_NEVER_RESTART;
+	bgw.bgw_notify_pid = MyProcPid;
+	bgw.bgw_main_arg = (Datum) 0;
+
+	ChecksumHelperShmem->launcher_started = true;
+	LWLockRelease(ChecksumHelperLock);
+
+	if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
+	{
+		LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE);
+		ChecksumHelperShmem->launcher_started = false;
+		LWLockRelease(ChecksumHelperLock);
+		ereport(ERROR,
+				(errmsg("failed to start checksum helper launcher")));
+	}
+}
+
+/*
+ * ShutdownChecksumHelperIfRunning
+ *		Request shutdown of the checksumhelper
+ *
+ * This does not turn off processing immediately, it signals the checksum
+ * process to end when done with the current block.
+ */
+void
+ShutdownChecksumHelperIfRunning(void)
+{
+	/* If the launcher isn't started, there is nothing to shut down */
+	LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE);
+	if (ChecksumHelperShmem->launcher_started)
+		ChecksumHelperShmem->abort = true;
+	LWLockRelease(ChecksumHelperLock);
+}
+
+/*
+ * ProcessSingleRelationFork
+ *		Enable checksums in a single relation/fork.
+ *
+ * Returns true if successful, and false if *aborted*. On error, an actual
+ * error is raised in the lower levels.
+ */
+static bool
+ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrategy strategy)
+{
+	BlockNumber numblocks = RelationGetNumberOfBlocksInFork(reln, forkNum);
+	BlockNumber b;
+	char		activity[NAMEDATALEN * 2 + 128];
+
+	for (b = 0; b < numblocks; b++)
+	{
+		Buffer		buf = ReadBufferExtended(reln, forkNum, b, RBM_NORMAL, strategy);
+
+		/*
+		 * Report to pgstat every 100 blocks (so as not to "spam")
+		 */
+		if ((b % 100) == 0)
+		{
+			snprintf(activity, sizeof(activity) - 1, "processing: %s.%s (%s block %d/%d)",
+					 get_namespace_name(RelationGetNamespace(reln)), RelationGetRelationName(reln),
+					 forkNames[forkNum], b, numblocks);
+			pgstat_report_activity(STATE_RUNNING, activity);
+		}
+
+		/* Need to get an exclusive lock before we can flag as dirty */
+		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+		/*
+		 * Mark the buffer as dirty and force a full page write.  We have to
+		 * re-write the page to WAL even if the checksum hasn't changed,
+		 * because if there is a replica it might have a slightly different
+		 * version of the page with an invalid checksum, caused by unlogged
+		 * changes (e.g. hintbits) on the master happening while checksums
+		 * were off. This can happen if there was a valid checksum on the page
+		 * at one point in the past, so only when checksums are first on, then
+		 * off, and then turned on again.
+		 */
+		START_CRIT_SECTION();
+		MarkBufferDirty(buf);
+		log_newpage_buffer(buf, false);
+		END_CRIT_SECTION();
+
+		UnlockReleaseBuffer(buf);
+
+		/*
+		 * This is the only place where we check if we are asked to abort, the
+		 * abortion will bubble up from here.
+		 * It's safe to check this without a lock, because if we miss it being
+		 * set, we will try again soon.
+		 */
+		if (ChecksumHelperShmem->abort)
+			return false;
+
+		vacuum_delay_point();
+	}
+
+	return true;
+}
+
+/*
+ * ProcessSingleRelationByOid
+ *		Process a single relation based on oid.
+ *
+ * Returns true if successful, and false if *aborted*. On error, an actual error
+ * is raised in the lower levels.
+ */
+static bool
+ProcessSingleRelationByOid(Oid relationId, BufferAccessStrategy strategy)
+{
+	Relation	rel;
+	ForkNumber	fnum;
+	bool		aborted = false;
+
+	StartTransactionCommand();
+
+	elog(DEBUG2, "Checksumhelper starting to process relation %d", relationId);
+	rel = try_relation_open(relationId, AccessShareLock);
+	if (rel == NULL)
+	{
+		/*
+		 * Relation no longer exist. We consider this a success, since there are no
+		 * pages in it that need checksums, and thus return true.
+		 */
+		elog(DEBUG1, "Checksumhelper skipping relation %d as it no longer exists", relationId);
+		CommitTransactionCommand();
+		pgstat_report_activity(STATE_IDLE, NULL);
+		return true;
+	}
+	RelationOpenSmgr(rel);
+
+	for (fnum = 0; fnum <= MAX_FORKNUM; fnum++)
+	{
+		if (smgrexists(rel->rd_smgr, fnum))
+		{
+			if (!ProcessSingleRelationFork(rel, fnum, strategy))
+			{
+				aborted = true;
+				break;
+			}
+		}
+	}
+	relation_close(rel, AccessShareLock);
+	elog(DEBUG2, "Checksumhelper done with relation %d: %s",
+		 relationId, (aborted ? "aborted" : "finished"));
+
+	CommitTransactionCommand();
+
+	pgstat_report_activity(STATE_IDLE, NULL);
+
+	return !aborted;
+}
+
+/*
+ * ProcessDatabase
+ *		Enable checksums in a single database.
+ *
+ * We do this by launching a dynamic background worker into this database, and
+ * waiting for it to finish.  We have to do this in a separate worker, since
+ * each process can only be connected to one database during its lifetime.
+ */
+static ChecksumHelperResult
+ProcessDatabase(ChecksumHelperDatabase * db)
+{
+	BackgroundWorker bgw;
+	BackgroundWorkerHandle *bgw_handle;
+	BgwHandleStatus status;
+	pid_t		pid;
+	char		activity[NAMEDATALEN + 64];
+
+	ChecksumHelperShmem->success = FAILED;
+
+	memset(&bgw, 0, sizeof(bgw));
+	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
+	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
+	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ChecksumHelperWorkerMain");
+	snprintf(bgw.bgw_name, BGW_MAXLEN, "checksumhelper worker");
+	snprintf(bgw.bgw_type, BGW_MAXLEN, "checksumhelper worker");
+	bgw.bgw_restart_time = BGW_NEVER_RESTART;
+	bgw.bgw_notify_pid = MyProcPid;
+	bgw.bgw_main_arg = ObjectIdGetDatum(db->dboid);
+
+	if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
+	{
+		ereport(LOG,
+				(errmsg("failed to start worker for checksumhelper in \"%s\"",
+						db->dbname)));
+		return FAILED;
+	}
+
+	status = WaitForBackgroundWorkerStartup(bgw_handle, &pid);
+	if (status != BGWH_STARTED)
+	{
+		ereport(LOG,
+				(errmsg("failed to wait for worker startup for checksumhelper in \"%s\"",
+						db->dbname)));
+		return FAILED;
+	}
+
+	ereport(DEBUG1,
+			(errmsg("started background worker for checksums in \"%s\"",
+					db->dbname)));
+
+	snprintf(activity, sizeof(activity) - 1,
+			 "Waiting for worker in database %s (pid %d)", db->dbname, pid);
+	pgstat_report_activity(STATE_RUNNING, activity);
+
+
+	status = WaitForBackgroundWorkerShutdown(bgw_handle);
+	if (status != BGWH_STOPPED)
+	{
+		ereport(LOG,
+				(errmsg("failed to wait for worker shutdown for checksumhelper in \"%s\"",
+						db->dbname)));
+		return FAILED;
+	}
+
+	if (ChecksumHelperShmem->success == ABORTED)
+		ereport(LOG,
+				(errmsg("checksumhelper was aborted during processing in \"%s\"",
+						db->dbname)));
+
+	ereport(DEBUG1,
+			(errmsg("background worker for checksums in \"%s\" completed",
+					db->dbname)));
+
+	pgstat_report_activity(STATE_IDLE, NULL);
+
+	return ChecksumHelperShmem->success;
+}
+
+static void
+launcher_exit(int code, Datum arg)
+{
+	LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE);
+	ChecksumHelperShmem->abort = false;
+	ChecksumHelperShmem->launcher_started = false;
+	LWLockRelease(ChecksumHelperLock);
+}
+
+static void
+launcher_cancel_handler(SIGNAL_ARGS)
+{
+	LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE);
+	ChecksumHelperShmem->abort = true;
+	LWLockRelease(ChecksumHelperLock);
+}
+
+static void
+WaitForAllTransactionsToFinish(void)
+{
+	TransactionId waitforxid;
+
+	LWLockAcquire(XidGenLock, LW_SHARED);
+	waitforxid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid);
+	LWLockRelease(XidGenLock);
+
+	while (true)
+	{
+		TransactionId oldestxid = GetOldestActiveTransactionId();
+
+		elog(DEBUG1, "Checking old transactions");
+		if (TransactionIdPrecedes(oldestxid, waitforxid))
+		{
+			char activity[64];
+
+			/* Oldest running xid is older than us, so wait */
+			snprintf(activity, sizeof(activity), "Waiting for current transactions to finish (waiting for %d)", waitforxid);
+			pgstat_report_activity(STATE_RUNNING, activity);
+
+			/* Retry every 5 seconds */
+			ResetLatch(MyLatch);
+			(void) WaitLatch(MyLatch,
+							 WL_LATCH_SET | WL_TIMEOUT,
+							 5000,
+							 WAIT_EVENT_PG_SLEEP);
+		}
+		else
+		{
+			pgstat_report_activity(STATE_IDLE, NULL);
+			return;
+		}
+	}
+}
+
+void
+ChecksumHelperLauncherMain(Datum arg)
+{
+	List	   *DatabaseList;
+	HTAB	   *ProcessedDatabases = NULL;
+	List	   *FailedDatabases = NIL;
+	ListCell   *lc,
+			   *lc2;
+	HASHCTL     hash_ctl;
+	bool		found_failed = false;
+
+	on_shmem_exit(launcher_exit, 0);
+
+	ereport(DEBUG1,
+			(errmsg("checksumhelper launcher started")));
+
+	pqsignal(SIGTERM, die);
+	pqsignal(SIGINT, launcher_cancel_handler);
+
+	BackgroundWorkerUnblockSignals();
+
+	init_ps_display(pgstat_get_backend_desc(B_CHECKSUMHELPER_LAUNCHER), "", "", "");
+
+	memset(&hash_ctl, 0, sizeof(hash_ctl));
+	hash_ctl.keysize = sizeof(Oid);
+	hash_ctl.entrysize = sizeof(ChecksumHelperResult);
+	ProcessedDatabases = hash_create("Processed databases",
+									 64,
+									 &hash_ctl,
+									 HASH_ELEM);
+
+	/*
+	 * Initialize a connection to shared catalogs only.
+	 */
+	BackgroundWorkerInitializeConnection(NULL, NULL, 0);
+
+	/*
+	 * Set up so first run processes shared catalogs, but not once in every
+	 * db.
+	 */
+	ChecksumHelperShmem->process_shared_catalogs = true;
+
+	while (true)
+	{
+		int			processed_databases;
+
+		/*
+		 * Get a list of all databases to process. This may include databases
+		 * that were created during our runtime.
+		 *
+		 * Since a database can be created as a copy of any other database
+		 * (which may not have existed in our last run), we have to repeat
+		 * this loop until no new databases show up in the list. Since we wait
+		 * for all pre-existing transactions finish, this way we can be
+		 * certain that there are no databases left without checksums.
+		 */
+		DatabaseList = BuildDatabaseList();
+
+		/*
+		 * If there are no databases at all to checksum, we can exit
+		 * immediately as there is no work to do. This can probably never
+		 * happen, but just in case.
+		 */
+		if (DatabaseList == NIL || list_length(DatabaseList) == 0)
+			return;
+
+		processed_databases = 0;
+
+		foreach(lc, DatabaseList)
+		{
+			ChecksumHelperDatabase *db = (ChecksumHelperDatabase *) lfirst(lc);
+			ChecksumHelperResult result;
+			Oid *oid;
+
+			/* Skup if this database has been processed already */
+			if (hash_search(ProcessedDatabases, (void *) &db->dboid, HASH_FIND, NULL))
+			{
+				pfree(db->dbname);
+				pfree(db);
+				continue;
+			}
+
+			result = ProcessDatabase(db);
+
+			/* Make a copy of the oid so we can free the rest of the structure */
+			oid = palloc(sizeof(Oid));
+			*oid = db->dboid;
+			pfree(db->dbname);
+			pfree(db);
+
+			hash_search(ProcessedDatabases, (void *) oid, HASH_ENTER, NULL);
+			processed_databases++;
+
+			if (result == SUCCESSFUL)
+			{
+				/*
+				 * If one database has completed shared catalogs, we
+				 * don't have to process them again.
+				 */
+				if (ChecksumHelperShmem->process_shared_catalogs)
+					ChecksumHelperShmem->process_shared_catalogs = false;
+			}
+			else if (result == FAILED)
+			{
+				/*
+				 * Put failed databases on the remaining list.
+				 */
+				FailedDatabases = lappend(FailedDatabases, db);
+			}
+			else
+				/* Abort flag set, so exit the whole process */
+				return;
+		}
+
+		elog(DEBUG1, "Completed one loop of checksum enabling, %i databases processed", processed_databases);
+
+		list_free(DatabaseList);
+
+		/*
+		 * If no databases were processed in this run of the loop, we have now
+		 * finished all databases and no concurrently created ones can exist.
+		 */
+		if (processed_databases == 0)
+			break;
+	}
+
+	/*
+	 * FailedDatabases now has all databases that failed one way or another.
+	 * This can be because they actually failed for some reason, or because the
+	 * database was dropped between us getting the database list and trying to
+	 * process it. Get a fresh list of databases to detect the second case
+	 * where the database was dropped before we had started processing it. If a
+	 * database still exists, but enabling checksums failed then we fail the
+	 * entire checksumming process and exit with an error.
+	 */
+	DatabaseList = BuildDatabaseList();
+
+	foreach(lc, FailedDatabases)
+	{
+		ChecksumHelperDatabase *db = (ChecksumHelperDatabase *) lfirst(lc);
+		bool found = false;
+
+		foreach(lc2, DatabaseList)
+		{
+			ChecksumHelperDatabase *db2 = (ChecksumHelperDatabase *) lfirst(lc2);
+
+			if (db->dboid == db2->dboid)
+			{
+				found = true;
+				ereport(WARNING,
+						(errmsg("failed to enable checksums in \"%s\"",
+								db->dbname)));
+				break;
+			}
+		}
+
+		if (found)
+			found_failed = true;
+		else
+		{
+			ereport(LOG,
+					(errmsg("database \"%s\" has been dropped, skipping",
+							db->dbname)));
+		}
+	}
+
+	if (found_failed)
+	{
+		/* Disable checksums on cluster, because we failed */
+		SetDataChecksumsOff();
+		ereport(ERROR,
+				(errmsg("checksumhelper failed to enable checksums in all databases, aborting")));
+	}
+
+	/*
+	 * Force a checkpoint to get everything out to disk. XXX: this should
+	 * probably not be an IMMEDIATE checkpoint, but leave it there for now
+	 * for testing.
+	 */
+	RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_IMMEDIATE);
+
+	/*
+	 * Everything has been processed, so flag checksums enabled.
+	 */
+	SetDataChecksumsOn();
+
+	ereport(LOG,
+			(errmsg("checksums enabled, checksumhelper launcher shutting down")));
+}
+
+/*
+ * ChecksumHelperShmemSize
+ *		Compute required space for checksumhelper-related shared memory
+ */
+Size
+ChecksumHelperShmemSize(void)
+{
+	Size		size;
+
+	size = sizeof(ChecksumHelperShmemStruct);
+	size = MAXALIGN(size);
+
+	return size;
+}
+
+/*
+ * ChecksumHelperShmemInit
+ *		Allocate and initialize checksumhelper-related shared memory
+ */
+void
+ChecksumHelperShmemInit(void)
+{
+	bool		found;
+
+	ChecksumHelperShmem = (ChecksumHelperShmemStruct *)
+		ShmemInitStruct("ChecksumHelper Data",
+						ChecksumHelperShmemSize(),
+						&found);
+
+	if (!found)
+	{
+		MemSet(ChecksumHelperShmem, 0, ChecksumHelperShmemSize());
+	}
+}
+
+/*
+ * BuildDatabaseList
+ *		Compile a list of all currently available databases in the cluster
+ *
+ * This creates the list of databases for the checksumhelper workers to add
+ * checksums to.
+ */
+static List *
+BuildDatabaseList(void)
+{
+	List	   *DatabaseList = NIL;
+	Relation	rel;
+	TableScanDesc scan;
+	HeapTuple	tup;
+	MemoryContext ctx = CurrentMemoryContext;
+	MemoryContext oldctx;
+
+	StartTransactionCommand();
+
+	rel = table_open(DatabaseRelationId, AccessShareLock);
+
+	/*
+	 * Before we do this, wait for all pending transactions to finish. This
+	 * will ensure there are no concurrently running CREATE DATABASE, which
+	 * could cause us to miss the creation of a database that was copied
+	 * without checksums.
+	 */
+	WaitForAllTransactionsToFinish();
+
+	scan = table_beginscan_catalog(rel, 0, NULL);
+
+	while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+	{
+		Form_pg_database pgdb = (Form_pg_database) GETSTRUCT(tup);
+		ChecksumHelperDatabase *db;
+
+		oldctx = MemoryContextSwitchTo(ctx);
+
+		db = (ChecksumHelperDatabase *) palloc(sizeof(ChecksumHelperDatabase));
+
+		db->dboid = pgdb->oid;
+		db->dbname = pstrdup(NameStr(pgdb->datname));
+
+		DatabaseList = lappend(DatabaseList, db);
+
+		MemoryContextSwitchTo(oldctx);
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	CommitTransactionCommand();
+
+	return DatabaseList;
+}
+
+/*
+ * BuildRelationList
+ *		Compile a list of all relations in the database
+ *
+ * If shared is true, both shared relations and local ones are returned, else
+ * all non-shared relations are returned.
+ * Temp tables are not included.
+ */
+static List *
+BuildRelationList(bool include_shared)
+{
+	List	   *RelationList = NIL;
+	Relation	rel;
+	TableScanDesc scan;
+	HeapTuple	tup;
+	MemoryContext ctx = CurrentMemoryContext;
+	MemoryContext oldctx;
+
+	StartTransactionCommand();
+
+	rel = table_open(RelationRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(rel, 0, NULL);
+
+	while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+	{
+		Form_pg_class pgc = (Form_pg_class) GETSTRUCT(tup);
+		ChecksumHelperRelation *relentry;
+
+		if (pgc->relpersistence == 't')
+			continue;
+
+		if (pgc->relisshared && !include_shared)
+			continue;
+
+		/*
+		 * Only include relations types that have local storage
+		 */
+		if (pgc->relkind == RELKIND_VIEW ||
+			pgc->relkind == RELKIND_COMPOSITE_TYPE ||
+			pgc->relkind == RELKIND_FOREIGN_TABLE)
+			continue;
+
+		oldctx = MemoryContextSwitchTo(ctx);
+		relentry = (ChecksumHelperRelation *) palloc(sizeof(ChecksumHelperRelation));
+
+		relentry->reloid = pgc->oid;
+		relentry->relkind = pgc->relkind;
+
+		RelationList = lappend(RelationList, relentry);
+
+		MemoryContextSwitchTo(oldctx);
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	CommitTransactionCommand();
+
+	return RelationList;
+}
+
+/*
+ * BuildTempTableList
+ *		Compile a list of all temporary tables in database
+ *
+ * Returns a List of oids.
+ */
+static List *
+BuildTempTableList(void)
+{
+	List	   *RelationList = NIL;
+	Relation	rel;
+	TableScanDesc scan;
+	HeapTuple	tup;
+	MemoryContext ctx = CurrentMemoryContext;
+	MemoryContext oldctx;
+
+	StartTransactionCommand();
+
+	rel = table_open(RelationRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(rel, 0, NULL);
+
+	while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+	{
+		Form_pg_class pgc = (Form_pg_class) GETSTRUCT(tup);
+
+		if (pgc->relpersistence != 't')
+			continue;
+
+		oldctx = MemoryContextSwitchTo(ctx);
+		RelationList = lappend_oid(RelationList, pgc->oid);
+		MemoryContextSwitchTo(oldctx);
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	CommitTransactionCommand();
+
+	return RelationList;
+}
+
+/*
+ * Main function for enabling checksums in a single database
+ */
+void
+ChecksumHelperWorkerMain(Datum arg)
+{
+	Oid			dboid = DatumGetObjectId(arg);
+	List	   *RelationList = NIL;
+	List	   *InitialTempTableList = NIL;
+	ListCell   *lc;
+	BufferAccessStrategy strategy;
+	bool		aborted = false;
+
+	pqsignal(SIGTERM, die);
+
+	BackgroundWorkerUnblockSignals();
+
+	init_ps_display(pgstat_get_backend_desc(B_CHECKSUMHELPER_WORKER), "", "", "");
+
+	ereport(DEBUG1,
+			(errmsg("checksum worker starting for database oid %d", dboid)));
+
+	BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid, BGWORKER_BYPASS_ALLOWCONN);
+
+	/*
+	 * Get a list of all temp tables present as we start in this database.
+	 * We need to wait until they are all gone until we are done, since
+	 * we cannot access those files and modify them.
+	 */
+	InitialTempTableList = BuildTempTableList();
+
+	/*
+	 * Enable vacuum cost delay, if any.
+	 */
+	VacuumCostDelay = ChecksumHelperShmem->cost_delay;
+	VacuumCostLimit = ChecksumHelperShmem->cost_limit;
+	VacuumCostActive = (VacuumCostDelay > 0);
+	VacuumCostBalance = 0;
+	VacuumPageHit = 0;
+	VacuumPageMiss = 0;
+	VacuumPageDirty = 0;
+
+	/*
+	 * Create and set the vacuum strategy as our buffer strategy.
+	 */
+	strategy = GetAccessStrategy(BAS_VACUUM);
+
+	RelationList = BuildRelationList(ChecksumHelperShmem->process_shared_catalogs);
+	foreach(lc, RelationList)
+	{
+		ChecksumHelperRelation *rel = (ChecksumHelperRelation *) lfirst(lc);
+
+		if (!ProcessSingleRelationByOid(rel->reloid, strategy))
+		{
+			aborted = true;
+			break;
+		}
+	}
+	list_free_deep(RelationList);
+
+	if (aborted)
+	{
+		ChecksumHelperShmem->success = ABORTED;
+		ereport(DEBUG1,
+				(errmsg("checksum worker aborted in database oid %d", dboid)));
+		return;
+	}
+
+	/*
+	 * Wait for all temp tables that existed when we started to go away. This
+	 * is necessary since we cannot "reach" them to enable checksums.
+	 * Any temp tables created after we started will already have checksums
+	 * in them (due to the inprogress state), so those are safe.
+	 */
+	while (true)
+	{
+		List *CurrentTempTables;
+		ListCell *lc;
+		int numleft;
+		char activity[64];
+
+		CurrentTempTables = BuildTempTableList();
+		numleft = 0;
+		foreach(lc, InitialTempTableList)
+		{
+			if (list_member_oid(CurrentTempTables, lfirst_oid(lc)))
+				numleft++;
+		}
+		list_free(CurrentTempTables);
+
+		if (numleft == 0)
+			break;
+
+		/* At least one temp table left to wait for */
+		snprintf(activity, sizeof(activity), "Waiting for %d temp tables to be removed", numleft);
+		pgstat_report_activity(STATE_RUNNING, activity);
+
+		/* Retry every 5 seconds */
+		ResetLatch(MyLatch);
+		(void) WaitLatch(MyLatch,
+						 WL_LATCH_SET | WL_TIMEOUT,
+						 5000,
+						 WAIT_EVENT_PG_SLEEP);
+	}
+
+	list_free(InitialTempTableList);
+
+	ChecksumHelperShmem->success = SUCCESSFUL;
+	ereport(DEBUG1,
+			(errmsg("checksum worker completed in database oid %d", dboid)));
+}
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index a0631ee154..b29bab24d2 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -4310,6 +4310,12 @@ pgstat_get_backend_desc(BackendType backendType)
 		case B_WAL_WRITER:
 			backendDesc = "walwriter";
 			break;
+		case B_CHECKSUMHELPER_LAUNCHER:
+			backendDesc = "checksumhelper launcher";
+			break;
+		case B_CHECKSUMHELPER_WORKER:
+			backendDesc = "checksumhelper worker";
+			break;
 	}
 
 	return backendDesc;
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index c91f66dcbe..836d8e1a32 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1384,7 +1384,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 
 	_tarWriteHeader(tarfilename, NULL, statbuf, false);
 
-	if (!noverify_checksums && DataChecksumsEnabled())
+	if (!noverify_checksums && DataChecksumsNeedVerify())
 	{
 		char	   *filename;
 
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 5315d93af0..fe620265a8 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -199,6 +199,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 		case XLOG_FPW_CHANGE:
 		case XLOG_FPI_FOR_HINT:
 		case XLOG_FPI:
+		case XLOG_CHECKSUMS:
 			break;
 		default:
 			elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info);
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index d7d733530f..876c6279c5 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -27,6 +27,7 @@
 #include "postmaster/autovacuum.h"
 #include "postmaster/bgworker_internals.h"
 #include "postmaster/bgwriter.h"
+#include "postmaster/checksumhelper.h"
 #include "postmaster/postmaster.h"
 #include "replication/logicallauncher.h"
 #include "replication/slot.h"
@@ -255,6 +256,7 @@ CreateSharedMemoryAndSemaphores(int port)
 	WalSndShmemInit();
 	WalRcvShmemInit();
 	ApplyLauncherShmemInit();
+	ChecksumHelperShmemInit();
 
 	/*
 	 * Set up other modules that need some shared memory space
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index db47843229..d50b4b13e1 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -49,3 +49,4 @@ MultiXactTruncationLock				41
 OldSnapshotTimeMapLock				42
 LogicalRepWorkerLock				43
 CLogTruncationLock					44
+ChecksumHelperLock					45
diff --git a/src/backend/storage/page/README b/src/backend/storage/page/README
index 5127d98da3..f873fb0eea 100644
--- a/src/backend/storage/page/README
+++ b/src/backend/storage/page/README
@@ -9,7 +9,8 @@ have a very low measured incidence according to research on large server farms,
 http://www.cs.toronto.edu/~bianca/papers/sigmetrics09.pdf, discussed
 2010/12/22 on -hackers list.
 
-Current implementation requires this be enabled system-wide at initdb time.
+Checksums can be enabled at initdb time, but can also be turned on and off
+using pg_enable_data_checksums()/pg_disable_data_checksums() at runtime.
 
 The checksum is not valid at all times on a data page!!
 The checksum is valid when the page leaves the shared pool and is checked
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 6b49810e37..6e3bfa045a 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -94,7 +94,7 @@ PageIsVerified(Page page, BlockNumber blkno)
 	 */
 	if (!PageIsNew(page))
 	{
-		if (DataChecksumsEnabled())
+		if (DataChecksumsNeedVerify())
 		{
 			checksum = pg_checksum_page((char *) page, blkno);
 
@@ -1171,7 +1171,7 @@ PageSetChecksumCopy(Page page, BlockNumber blkno)
 	static char *pageCopy = NULL;
 
 	/* If we don't need a checksum, just return the passed-in data */
-	if (PageIsNew(page) || !DataChecksumsEnabled())
+	if (PageIsNew(page) || !DataChecksumsNeedWrite())
 		return (char *) page;
 
 	/*
@@ -1198,7 +1198,7 @@ void
 PageSetChecksumInplace(Page page, BlockNumber blkno)
 {
 	/* If we don't need a checksum, just return */
-	if (PageIsNew(page) || !DataChecksumsEnabled())
+	if (PageIsNew(page) || !DataChecksumsNeedWrite())
 		return;
 
 	((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 05240bfd14..61e856deac 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1527,7 +1527,7 @@ pg_stat_get_db_checksum_failures(PG_FUNCTION_ARGS)
 	int64		result;
 	PgStat_StatDBEntry *dbentry;
 
-	if (!DataChecksumsEnabled())
+	if (!DataChecksumsNeedWrite())
 		PG_RETURN_NULL();
 
 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
@@ -1545,7 +1545,7 @@ pg_stat_get_db_checksum_last_failure(PG_FUNCTION_ARGS)
 	TimestampTz result;
 	PgStat_StatDBEntry *dbentry;
 
-	if (!DataChecksumsEnabled())
+	if (!DataChecksumsNeedWrite())
 		PG_RETURN_NULL();
 
 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 90ffd89339..bd07fe4170 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
 #include "access/transam.h"
 #include "access/twophase.h"
 #include "access/xact.h"
+#include "access/xlog.h"
 #include "access/xlog_internal.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_authid.h"
@@ -71,6 +72,7 @@
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
+#include "storage/checksum.h"
 #include "storage/dsm_impl.h"
 #include "storage/standby.h"
 #include "storage/fd.h"
@@ -471,6 +473,16 @@ static struct config_enum_entry shared_memory_options[] = {
 };
 
 /*
+ * Options for data_checksums enum.
+ */
+static const struct config_enum_entry data_checksum_options[] = {
+	{"on", DATA_CHECKSUMS_ON, true},
+	{"off", DATA_CHECKSUMS_OFF, true},
+	{"inprogress", DATA_CHECKSUMS_INPROGRESS, true},
+	{NULL, 0, false}
+};
+
+/*
  * Options for enum values stored in other modules
  */
 extern const struct config_enum_entry wal_level_options[];
@@ -572,7 +584,7 @@ static int	max_identifier_length;
 static int	block_size;
 static int	segment_size;
 static int	wal_block_size;
-static bool data_checksums;
+static int	data_checksums_tmp;
 static bool integer_datetimes;
 static bool assert_enabled;
 static char *recovery_target_timeline_string;
@@ -1824,17 +1836,6 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"data_checksums", PGC_INTERNAL, PRESET_OPTIONS,
-			gettext_noop("Shows whether data checksums are turned on for this cluster."),
-			NULL,
-			GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
-		},
-		&data_checksums,
-		false,
-		NULL, NULL, NULL
-	},
-
-	{
 		{"syslog_sequence_numbers", PGC_SIGHUP, LOGGING_WHERE,
 			gettext_noop("Add sequence number to syslog messages to avoid duplicate suppression."),
 			NULL
@@ -4537,6 +4538,17 @@ static struct config_enum ConfigureNamesEnum[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"data_checksums", PGC_INTERNAL, PRESET_OPTIONS,
+			gettext_noop("Shows whether data checksums are turned on for this cluster."),
+			NULL,
+			GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
+		},
+		&data_checksums_tmp,
+		DATA_CHECKSUMS_OFF, data_checksum_options,
+		NULL, NULL, show_data_checksums
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index 38236415be..9e196cc2dd 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -658,6 +658,15 @@ check_control_data(ControlData *oldctrl,
 	 */
 
 	/*
+	 * If checksums have been turned on in the old cluster, but the
+	 * checksumhelper have yet to finish, then disallow upgrading. The user
+	 * should either let the process finish, or turn off checksums, before
+	 * retrying.
+	 */
+	if (oldctrl->data_checksum_version == 2)
+		pg_fatal("checksum enabling in old cluster is in progress\n");
+
+	/*
 	 * We might eventually allow upgrades from checksum to no-checksum
 	 * clusters.
 	 */
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index f724ecf9ca..66758bbd7f 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -220,7 +220,7 @@ typedef struct
 	uint32		large_object;
 	bool		date_is_int;
 	bool		float8_pass_by_value;
-	bool		data_checksum_version;
+	uint32		data_checksum_version;
 } ControlData;
 
 /*
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..ae02c09c84 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -189,7 +189,7 @@ extern PGDLLIMPORT int wal_level;
  * of the bits make it to disk, but the checksum wouldn't match.  Also WAL-log
  * them if forced by wal_log_hints=on.
  */
-#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints)
+#define XLogHintBitIsNeeded() (DataChecksumsNeedWrite() || wal_log_hints)
 
 /* Do we need to WAL-log information required only for Hot Standby and logical replication? */
 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA)
@@ -293,7 +293,13 @@ extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
 extern void UpdateControlFile(void);
 extern uint64 GetSystemIdentifier(void);
 extern char *GetMockAuthenticationNonce(void);
-extern bool DataChecksumsEnabled(void);
+extern bool DataChecksumsNeedWrite(void);
+extern bool DataChecksumsNeedVerify(void);
+extern bool DataChecksumsInProgress(void);
+extern void SetDataChecksumsInProgress(void);
+extern void SetDataChecksumsOn(void);
+extern void SetDataChecksumsOff(void);
+extern const char *show_data_checksums(void);
 extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
 extern Size XLOGShmemSize(void);
 extern void XLOGShmemInit(void);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 3f0de6625d..d4e3a3eab2 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -25,6 +25,7 @@
 #include "lib/stringinfo.h"
 #include "pgtime.h"
 #include "storage/block.h"
+#include "storage/checksum.h"
 #include "storage/relfilenode.h"
 
 
@@ -241,6 +242,12 @@ typedef struct xl_restore_point
 	char		rp_name[MAXFNAMELEN];
 } xl_restore_point;
 
+/* Information logged when checksum level is changed */
+typedef struct xl_checksum_state
+{
+	ChecksumType new_checksumtype;
+}			xl_checksum_state;
+
 /* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */
 typedef struct xl_end_of_recovery
 {
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index ff98d9e91a..8177414854 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -76,6 +76,7 @@ typedef struct CheckPoint
 #define XLOG_END_OF_RECOVERY			0x90
 #define XLOG_FPI_FOR_HINT				0xA0
 #define XLOG_FPI						0xB0
+#define XLOG_CHECKSUMS					0xC0
 
 
 /*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cf1f409351..c6101a07f2 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10638,6 +10638,22 @@
   proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}',
   prosrc => 'pg_control_init' },
 
+{ oid => '4142',
+  descr => 'disable data checksums',
+  proname => 'pg_disable_data_checksums', provolatile => 'v', prorettype => 'bool',
+  proparallel => 'r',
+  proargtypes => '',
+  prosrc => 'disable_data_checksums' },
+
+{ oid => '4035',
+  descr => 'enable data checksums',
+  proname => 'pg_enable_data_checksums', provolatile => 'v', prorettype => 'void',
+  proparallel => 'r',
+  proargtypes => 'int4 int4', proallargtypes => '{int4,int4}',
+  proargmodes => '{i,i}',
+  proargnames => '{cost_delay,cost_limit}',
+  prosrc => 'enable_data_checksums' },
+
 # collation management functions
 { oid => '3445', descr => 'import collations from operating system',
   proname => 'pg_import_system_collations', procost => '100',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index c997add881..346de83de9 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -727,7 +727,9 @@ typedef enum BackendType
 	B_STARTUP,
 	B_WAL_RECEIVER,
 	B_WAL_SENDER,
-	B_WAL_WRITER
+	B_WAL_WRITER,
+	B_CHECKSUMHELPER_LAUNCHER,
+	B_CHECKSUMHELPER_WORKER
 } BackendType;
 
 
diff --git a/src/include/postmaster/checksumhelper.h b/src/include/postmaster/checksumhelper.h
new file mode 100644
index 0000000000..556f801668
--- /dev/null
+++ b/src/include/postmaster/checksumhelper.h
@@ -0,0 +1,31 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksumhelper.h
+ *	  header file for checksum helper background worker
+ *
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/postmaster/checksumhelper.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef CHECKSUMHELPER_H
+#define CHECKSUMHELPER_H
+
+/* Shared memory */
+extern Size ChecksumHelperShmemSize(void);
+extern void ChecksumHelperShmemInit(void);
+
+/* Start the background processes for enabling checksums */
+void		StartChecksumHelperLauncher(int cost_delay, int cost_limit);
+
+/* Shutdown the background processes, if any */
+void		ShutdownChecksumHelperIfRunning(void);
+
+/* Background worker entrypoints */
+void		ChecksumHelperLauncherMain(Datum arg);
+void		ChecksumHelperWorkerMain(Datum arg);
+
+#endif							/* CHECKSUMHELPER_H */
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 4ef6d8ddd4..cf31f24b01 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -198,6 +198,7 @@ typedef PageHeaderData *PageHeader;
  */
 #define PG_PAGE_LAYOUT_VERSION		4
 #define PG_DATA_CHECKSUM_VERSION	1
+#define PG_DATA_CHECKSUM_INPROGRESS_VERSION		2
 
 /* ----------------------------------------------------------------
  *						page support macros
diff --git a/src/include/storage/checksum.h b/src/include/storage/checksum.h
index 7ef32a3baa..2c414aa1e7 100644
--- a/src/include/storage/checksum.h
+++ b/src/include/storage/checksum.h
@@ -15,6 +15,13 @@
 
 #include "storage/block.h"
 
+typedef enum ChecksumType
+{
+	DATA_CHECKSUMS_OFF = 0,
+	DATA_CHECKSUMS_ON,
+	DATA_CHECKSUMS_INPROGRESS
+}			ChecksumType;
+
 /*
  * Compute the checksum for a Postgres page.  The page must be aligned on a
  * 4-byte boundary.
diff --git a/src/test/Makefile b/src/test/Makefile
index efb206aa75..6469ac94a4 100644
--- a/src/test/Makefile
+++ b/src/test/Makefile
@@ -12,7 +12,8 @@ subdir = src/test
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 
-SUBDIRS = perl regress isolation modules authentication recovery subscription
+SUBDIRS = perl regress isolation modules authentication recovery subscription \
+			checksum
 
 # Test suites that are not safe by default but can be run if selected
 # by the user via the whitespace-separated list in variable
diff --git a/src/test/checksum/.gitignore b/src/test/checksum/.gitignore
new file mode 100644
index 0000000000..871e943d50
--- /dev/null
+++ b/src/test/checksum/.gitignore
@@ -0,0 +1,2 @@
+# Generated by test suite
+/tmp_check/
diff --git a/src/test/checksum/Makefile b/src/test/checksum/Makefile
new file mode 100644
index 0000000000..22a3b64dd8
--- /dev/null
+++ b/src/test/checksum/Makefile
@@ -0,0 +1,24 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/test/checksum
+#
+# Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/test/checksum/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/test/checksum
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+check:
+	$(prove_check)
+
+installcheck:
+	$(prove_installcheck)
+
+clean distclean maintainer-clean:
+	rm -rf tmp_check
+
diff --git a/src/test/checksum/README b/src/test/checksum/README
new file mode 100644
index 0000000000..e3fbd2bdb5
--- /dev/null
+++ b/src/test/checksum/README
@@ -0,0 +1,22 @@
+src/test/checksum/README
+
+Regression tests for data checksums
+===================================
+
+This directory contains a test suite for enabling data checksums
+in a running cluster with streaming replication.
+
+Running the tests
+=================
+
+    make check
+
+or
+
+    make installcheck
+
+NOTE: This creates a temporary installation (in the case of "check"),
+with multiple nodes, be they master or standby(s) for the purpose of
+the tests.
+
+NOTE: This requires the --enable-tap-tests argument to configure.
diff --git a/src/test/checksum/t/001_standby_checksum.pl b/src/test/checksum/t/001_standby_checksum.pl
new file mode 100644
index 0000000000..891743fa6c
--- /dev/null
+++ b/src/test/checksum/t/001_standby_checksum.pl
@@ -0,0 +1,104 @@
+# Test suite for testing enabling data checksums with streaming replication
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 10;
+
+my $MAX_TRIES = 30;
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_master->backup($backup_name);
+
+# Create streaming standby linking to master
+my $node_standby_1 = get_new_node('standby_1');
+$node_standby_1->init_from_backup($node_master, $backup_name,
+	has_streaming => 1);
+$node_standby_1->start;
+
+# Create some content on master to have un-checksummed data in the cluster
+$node_master->safe_psql('postgres',
+	"CREATE TABLE t AS SELECT generate_series(1,10000) AS a;");
+
+# Wait for standbys to catch up
+$node_master->wait_for_catchup($node_standby_1, 'replay',
+	$node_master->lsn('insert'));
+
+# Check that checksums are turned off
+my $result = $node_master->safe_psql('postgres',
+	"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+is($result, "off", 'ensure checksums are turned off on master');
+
+$result = $node_standby_1->safe_psql('postgres',
+	"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+is($result, "off", 'ensure checksums are turned off on standby_1');
+
+# Enable checksums for the cluster
+$node_master->safe_psql('postgres', "SELECT pg_enable_data_checksums();");
+
+# Ensure that the master has switched to inprogress immediately
+$result = $node_master->safe_psql('postgres',
+	"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+is($result, "inprogress", 'ensure checksums are in progress on master');
+
+# Wait for checksum enable to be replayed
+$node_master->wait_for_catchup($node_standby_1, 'replay');
+
+# Ensure that the standby has switched to inprogress or on
+# Normally it would be "inprogress", but it is theoretically possible for the master
+# to complete the checksum enabling *and* have the standby replay that record before
+# we reach the check below.
+$result = $node_standby_1->safe_psql('postgres',
+	"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+cmp_ok($result, '~~', ["inprogress", "on"], 'ensure checksums are on or in progress on standby_1');
+
+# Insert some more data which should be checksummed on INSERT
+$node_master->safe_psql('postgres',
+	"INSERT INTO t VALUES (generate_series(1,10000));");
+
+# Wait for checksums enabled on the master
+for (my $i = 0; $i < $MAX_TRIES; $i++)
+{
+	$result = $node_master->safe_psql('postgres',
+		"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+	last if ($result eq 'on');
+	sleep(1);
+}
+is ($result, "on", 'ensure checksums are enabled on master');
+
+# Wait for checksums enabled on the standby
+for (my $i = 0; $i < $MAX_TRIES; $i++)
+{
+	$result = $node_standby_1->safe_psql('postgres',
+		"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+	last if ($result eq 'on');
+	sleep(1);
+}
+is ($result, "on", 'ensure checksums are enabled on standby');
+
+$result = $node_master->safe_psql('postgres', "SELECT count(a) FROM t");
+is ($result, "20000", 'ensure we can safely read all data with checksums');
+
+# Disable checksums and ensure it's propagated to standby and that we can
+# still read all data
+$node_master->safe_psql('postgres', "SELECT pg_disable_data_checksums();");
+$result = $node_master->safe_psql('postgres',
+	"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+is($result, "off", 'ensure checksums are off on master');
+
+# Wait for checksum disable to be replayed
+$node_master->wait_for_catchup($node_standby_1, 'replay');
+
+# Ensure that the standby has switched to off
+$result = $node_standby_1->safe_psql('postgres',
+	"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
+is($result, "off", 'ensure checksums are off on standby_1');
+
+$result = $node_master->safe_psql('postgres', "SELECT count(a) FROM t");
+is ($result, "20000", 'ensure we can safely read all data without checksums');
-- 
2.11.0

