From a706447e46daa3fbb55538b9e96809c799f1b99a Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 16 Nov 2023 14:42:31 +0900
Subject: [PATCH v4 4/4] Add basic test for promotion and restart race
 condition

This test fails after 7863ee4def65 is reverted.  test_injection_points
is extended so as it is possible to add condition variables to wait for
in the point callbacks, with a SQL function to broadcast condition
variables that may be sleeping.

I guess that this should be extended so as there is more than one
condition variable stored in shmem for this module, controlling which
variable to wait for directly in the callback itself, but that's not
really necessary now.
---
 src/backend/access/transam/xlog.c             |   7 +
 .../modules/test_injection_points/meson.build |   1 +
 .../t/002_invalid_checkpoint_after_promote.pl | 132 ++++++++++++++++++
 .../test_injection_points--1.0.sql            |  10 ++
 .../test_injection_points.c                   |  73 ++++++++++
 5 files changed, 223 insertions(+)
 create mode 100644 src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1159dff1a6..d3828bcee3 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -99,6 +99,7 @@
 #include "storage/sync.h"
 #include "utils/guc_hooks.h"
 #include "utils/guc_tables.h"
+#include "utils/injection_point.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
 #include "utils/relmapper.h"
@@ -7329,6 +7330,12 @@ CreateRestartPoint(int flags)
 
 	CheckPointGuts(lastCheckPoint.redo, flags);
 
+	/*
+	 * This location is important to be after CheckPointGuts() to ensure
+	 * that some work has happened.
+	 */
+	INJECTION_POINT("CreateRestartPoint");
+
 	/*
 	 * Remember the prior checkpoint's redo ptr for
 	 * UpdateCheckPointDistanceEstimate()
diff --git a/src/test/modules/test_injection_points/meson.build b/src/test/modules/test_injection_points/meson.build
index 6006b38f3d..6ebdc728b7 100644
--- a/src/test/modules/test_injection_points/meson.build
+++ b/src/test/modules/test_injection_points/meson.build
@@ -37,6 +37,7 @@ tests += {
   'tap': {
     'tests': [
       't/001_snapshot_status.pl',
+      't/002_invalid_checkpoint_after_promote.pl',
     ],
   }
 }
diff --git a/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl b/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl
new file mode 100644
index 0000000000..2da243e871
--- /dev/null
+++ b/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl
@@ -0,0 +1,132 @@
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep nanosleep);
+use Test::More;
+
+# initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('master');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf(
+	'postgresql.conf', q[
+checkpoint_timeout = 30s
+log_checkpoints = on
+restart_after_crash = on
+]);
+$node_primary->start;
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# setup a standby
+my $node_standby = PostgreSQL::Test::Cluster->new('standby1');
+$node_standby->init_from_backup($node_primary, $backup_name, has_streaming => 1);
+$node_standby->start;
+
+# dummy table for the upcoming tests.
+$node_primary->safe_psql('postgres', 'checkpoint');
+$node_primary->safe_psql('postgres', 'CREATE TABLE prim_tab (a int);');
+
+# Register a injection point on the standby so as the follow-up
+# restart point running on it will wait.
+$node_primary->safe_psql('postgres', 'CREATE EXTENSION test_injection_points;');
+# Wait until the extension has been created on the standby
+$node_primary->wait_for_replay_catchup($node_standby);
+# This causes a restartpoint to wait on a standby.
+$node_standby->safe_psql('postgres',
+  "SELECT test_injection_points_attach('CreateRestartPoint', 'wait');");
+
+# Execute a restart point on the standby, that will be waited on.
+# This needs to be in the background as we'll wait on it.
+my $logstart = -s $node_standby->logfile;
+my $psql_session =
+  $node_standby->background_psql('postgres', on_error_stop => 0);
+$psql_session->query_until(qr/starting_checkpoint/, q(
+   \echo starting_checkpoint
+   CHECKPOINT;
+));
+
+# Switch one WAL segment to make the restartpoint remove it.
+$node_primary->safe_psql('postgres', 'INSERT INTO prim_tab VALUES (1);');
+$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal();');
+$node_primary->wait_for_replay_catchup($node_standby);
+
+# Wait until the checkpointer is in the middle of the restartpoint
+# processing.
+ok( $node_standby->poll_query_until(
+	'postgres',
+	qq[SELECT count(*) FROM pg_stat_activity
+           WHERE backend_type = 'checkpointer' AND wait_event = 'test_injection_wait' ;],
+	'1'),
+    'checkpointer is waiting at restart point'
+    ) or die "Timed out while waiting for checkpointer to run restartpoint";
+
+
+# Restartpoint should have started on standby.
+my $log = slurp_file($node_standby->logfile, $logstart);
+my $checkpoint_start = 0;
+if ($log =~ m/restartpoint starting: immediate wait/)
+{
+	$checkpoint_start = 1;
+}
+is($checkpoint_start, 1, 'restartpoint has started');
+
+# promote during restartpoint
+$node_primary->stop;
+$node_standby->promote;
+
+# Update the start position before waking up the checkpointer!
+$logstart = -s $node_standby->logfile;
+
+# Now wake up the checkpointer
+$node_standby->safe_psql('postgres',
+  "SELECT test_injection_points_wake();");
+
+# wait until checkpoint completes on the newly-promoted standby.
+my $checkpoint_complete = 0;
+for (my $i = 0; $i < 3000; $i++)
+{
+	my $log = slurp_file($node_standby->logfile, $logstart);
+	if ($log =~ m/restartpoint complete/)
+	{
+		$checkpoint_complete = 1;
+		last;
+	}
+	usleep(100_000);
+}
+is($checkpoint_complete, 1, 'restartpoint has completed');
+
+# kill SIGKILL a backend, and all backend will restart. Note that previous checkpoint has not completed.
+my $psql_timeout = IPC::Run::timer(3600);
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+	[ 'psql', '-XAtq', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', $node_standby->connstr('postgres') ],
+	'<',
+	\$killme_stdin,
+	'>',
+	\$killme_stdout,
+	'2>',
+	\$killme_stderr,
+	$psql_timeout);
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/;
+my $pid = $killme_stdout;
+chomp($pid);
+my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, 'killed process with KILL');
+my $stdout;
+my $stderr;
+
+# after recovery, the server will not start, and log PANIC: could not locate a valid checkpoint record
+for (my $i = 0; $i < 30; $i++)
+{
+    ($ret, $stdout, $stderr) = $node_standby->psql('postgres', 'select 1');
+    last if $ret == 0;
+	sleep(1);
+}
+is($ret, 0, "psql connect success");
+is($stdout, 1, "psql select 1");
+
+done_testing();
diff --git a/src/test/modules/test_injection_points/test_injection_points--1.0.sql b/src/test/modules/test_injection_points/test_injection_points--1.0.sql
index 1c0a689ae2..05f97f0982 100644
--- a/src/test/modules/test_injection_points/test_injection_points--1.0.sql
+++ b/src/test/modules/test_injection_points/test_injection_points--1.0.sql
@@ -25,6 +25,16 @@ RETURNS void
 AS 'MODULE_PATHNAME', 'test_injection_points_run'
 LANGUAGE C STRICT PARALLEL UNSAFE;
 
+--
+-- test_injection_points_wake()
+--
+-- Wakes a condition variable executed in an injection point.
+--
+CREATE FUNCTION test_injection_points_wake()
+RETURNS void
+AS 'MODULE_PATHNAME', 'test_injection_points_wake'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
 --
 -- test_injection_points_detach()
 --
diff --git a/src/test/modules/test_injection_points/test_injection_points.c b/src/test/modules/test_injection_points/test_injection_points.c
index efb2c74c47..8b837d85d3 100644
--- a/src/test/modules/test_injection_points/test_injection_points.c
+++ b/src/test/modules/test_injection_points/test_injection_points.c
@@ -18,13 +18,56 @@
 #include "postgres.h"
 
 #include "fmgr.h"
+#include "storage/condition_variable.h"
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
 #include "utils/builtins.h"
 #include "utils/injection_point.h"
+#include "utils/wait_event.h"
 
 PG_MODULE_MAGIC;
 
+/* Shared state information for injection points. */
+typedef struct TestInjectionPointSharedState
+{
+	/*
+	 * Wait variable that can be registered at a given point, and that can be
+	 * awakened via SQL.
+	 */
+	ConditionVariable	wait_point;
+} TestInjectionPointSharedState;
+
+/* Pointer to shared-memory state. */
+static TestInjectionPointSharedState *inj_state = NULL;
+
+/* Wait event when waiting on condition variable */
+static uint32 test_injection_wait_event = 0;
+
 extern PGDLLEXPORT void test_injection_error(const char *name);
 extern PGDLLEXPORT void test_injection_notice(const char *name);
+extern PGDLLEXPORT void test_injection_wait(const char *name);
+
+
+static void
+test_injection_init_shmem(void)
+{
+	bool		found;
+
+	if (inj_state != NULL)
+		return;
+
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+	inj_state = ShmemInitStruct("test_injection_points",
+								sizeof(TestInjectionPointSharedState),
+								&found);
+	if (!found)
+	{
+		/* First time through ... */
+		MemSet(inj_state, 0, sizeof(TestInjectionPointSharedState));
+		ConditionVariableInit(&inj_state->wait_point);
+	}
+	LWLockRelease(AddinShmemInitLock);
+}
 
 /* Set of callbacks available at point creation */
 void
@@ -39,6 +82,20 @@ test_injection_notice(const char *name)
 	elog(NOTICE, "notice triggered for injection point %s", name);
 }
 
+void
+test_injection_wait(const char *name)
+{
+	if (inj_state == NULL)
+		test_injection_init_shmem();
+	if (test_injection_wait_event == 0)
+		test_injection_wait_event = WaitEventExtensionNew("test_injection_wait");
+
+	/* And sleep.. */
+	ConditionVariablePrepareToSleep(&inj_state->wait_point);
+	ConditionVariableSleep(&inj_state->wait_point, test_injection_wait_event);
+	ConditionVariableCancelSleep();
+}
+
 /*
  * SQL function for creating an injection point.
  */
@@ -54,6 +111,8 @@ test_injection_points_attach(PG_FUNCTION_ARGS)
 		function = "test_injection_error";
 	else if (strcmp(mode, "notice") == 0)
 		function = "test_injection_notice";
+	else if (strcmp(mode, "wait") == 0)
+		function = "test_injection_wait";
 	else
 		elog(ERROR, "incorrect mode \"%s\" for injection point creation", mode);
 
@@ -76,6 +135,20 @@ test_injection_points_run(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
+/*
+ * SQL function for waking a condition variable.
+ */
+PG_FUNCTION_INFO_V1(test_injection_points_wake);
+Datum
+test_injection_points_wake(PG_FUNCTION_ARGS)
+{
+	if (inj_state == NULL)
+		test_injection_init_shmem();
+
+	ConditionVariableBroadcast(&inj_state->wait_point);
+	PG_RETURN_VOID();
+}
+
 /*
  * SQL function for dropping an injection point.
  */
-- 
2.42.0

