From c39dcb10df366a43e1511e5274704ddf02feb3e2 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 14 Nov 2023 20:41:57 +0900
Subject: [PATCH v3 3/3] Add basic test for promotion and restart race
 condition

This test checks after 7863ee4def65 is reverted.  test_injection_points
is extended so as it is possible to add condition variables to wait for
in the point callbacks, with a SQL function to broadcast condition
variables that may be sleeping.
---
 src/backend/access/transam/xlog.c             |   7 +
 .../modules/test_injection_points/meson.build |   6 +
 .../t/002_invalid_checkpoint_after_promote.pl | 132 ++++++++++++++++++
 .../test_injection_points--1.0.sql            |  10 ++
 .../test_injection_points.c                   |  73 ++++++++++
 5 files changed, 228 insertions(+)
 create mode 100644 src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1159dff1a6..b2f9a78467 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -99,6 +99,7 @@
 #include "storage/sync.h"
 #include "utils/guc_hooks.h"
 #include "utils/guc_tables.h"
+#include "utils/injection_point.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
 #include "utils/relmapper.h"
@@ -7329,6 +7330,12 @@ CreateRestartPoint(int flags)
 
 	CheckPointGuts(lastCheckPoint.redo, flags);
 
+	/*
+	 * This location is important to be after CheckPointGuts() to ensure
+	 * that some work has happened.
+	 */
+	INJECTION_POINT_RUN("CreateRestartPoint");
+
 	/*
 	 * Remember the prior checkpoint's redo ptr for
 	 * UpdateCheckPointDistanceEstimate()
diff --git a/src/test/modules/test_injection_points/meson.build b/src/test/modules/test_injection_points/meson.build
index 7509a102ef..c6bb8eda77 100644
--- a/src/test/modules/test_injection_points/meson.build
+++ b/src/test/modules/test_injection_points/meson.build
@@ -34,4 +34,10 @@ tests += {
       'test_injection_points',
     ],
   },
+  'tap': {
+    'tests': [
+      't/001_snapshot_status.pl',
+      't/002_invalid_checkpoint_after_promote.pl',
+    ],
+  },
 }
diff --git a/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl b/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl
new file mode 100644
index 0000000000..5087bc5f9f
--- /dev/null
+++ b/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl
@@ -0,0 +1,132 @@
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep nanosleep);
+use Test::More;
+
+# initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('master');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf(
+	'postgresql.conf', q[
+checkpoint_timeout = 30s
+log_checkpoints = on
+restart_after_crash = on
+]);
+$node_primary->start;
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# setup a standby
+my $node_standby = PostgreSQL::Test::Cluster->new('standby1');
+$node_standby->init_from_backup($node_primary, $backup_name, has_streaming => 1);
+$node_standby->start;
+
+# dummy table for the upcoming tests.
+$node_primary->safe_psql('postgres', 'checkpoint');
+$node_primary->safe_psql('postgres', 'CREATE TABLE prim_tab (a int);');
+
+# Register a injection point on the standby so as the follow-up
+# restart point running on it will wait.
+$node_primary->safe_psql('postgres', 'CREATE EXTENSION test_injection_points;');
+# Wait until the extension has been created on the standby
+$node_primary->wait_for_replay_catchup($node_standby);
+# This causes a restartpoint to wait on a standby.
+$node_standby->safe_psql('postgres',
+  "SELECT test_injection_points_create('CreateRestartPoint', 'wait');");
+
+# Execute a restart point on the standby, that will be waited on.
+# This needs to be in the background as we'll wait on it.
+my $logstart = -s $node_standby->logfile;
+my $psql_session =
+  $node_standby->background_psql('postgres', on_error_stop => 0);
+$psql_session->query_until(qr/starting_checkpoint/, q(
+   \echo starting_checkpoint
+   CHECKPOINT;
+));
+
+# Switch one WAL segment to make the restartpoint remove it.
+$node_primary->safe_psql('postgres', 'INSERT INTO prim_tab VALUES (1);');
+$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal();');
+$node_primary->wait_for_replay_catchup($node_standby);
+
+# Wait until the checkpointer is in the middle of the restartpoint
+# processing.
+ok( $node_standby->poll_query_until(
+	'postgres',
+	qq[SELECT count(*) FROM pg_stat_activity
+           WHERE backend_type = 'checkpointer' AND wait_event = 'test_injection_wait' ;],
+	'1'),
+    'checkpointer is waiting at restart point'
+    ) or die "Timed out while waiting for checkpointer to run restartpoint";
+
+
+# Restartpoint should have started on standby.
+my $log = slurp_file($node_standby->logfile, $logstart);
+my $checkpoint_start = 0;
+if ($log =~ m/restartpoint starting: immediate wait/)
+{
+	$checkpoint_start = 1;
+}
+is($checkpoint_start, 1, 'restartpoint has started');
+
+# promote during restartpoint
+$node_primary->stop;
+$node_standby->promote;
+
+# Update the start position before waking up the checkpointer!
+$logstart = -s $node_standby->logfile;
+
+# Now wake up the checkpointer
+$node_standby->safe_psql('postgres',
+  "SELECT test_injection_points_wake();");
+
+# wait until checkpoint completes on the newly-promoted standby.
+my $checkpoint_complete = 0;
+for (my $i = 0; $i < 3000; $i++)
+{
+	my $log = slurp_file($node_standby->logfile, $logstart);
+	if ($log =~ m/restartpoint complete/)
+	{
+		$checkpoint_complete = 1;
+		last;
+	}
+	usleep(100_000);
+}
+is($checkpoint_complete, 1, 'restartpoint has completed');
+
+# kill SIGKILL a backend, and all backend will restart. Note that previous checkpoint has not completed.
+my $psql_timeout = IPC::Run::timer(3600);
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+	[ 'psql', '-XAtq', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', $node_standby->connstr('postgres') ],
+	'<',
+	\$killme_stdin,
+	'>',
+	\$killme_stdout,
+	'2>',
+	\$killme_stderr,
+	$psql_timeout);
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/;
+my $pid = $killme_stdout;
+chomp($pid);
+my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, 'killed process with KILL');
+my $stdout;
+my $stderr;
+
+# after recovery, the server will not start, and log PANIC: could not locate a valid checkpoint record
+for (my $i = 0; $i < 30; $i++)
+{
+    ($ret, $stdout, $stderr) = $node_standby->psql('postgres', 'select 1');
+    last if $ret == 0;
+	sleep(1);
+}
+is($ret, 0, "psql connect success");
+is($stdout, 1, "psql select 1");
+
+done_testing();
diff --git a/src/test/modules/test_injection_points/test_injection_points--1.0.sql b/src/test/modules/test_injection_points/test_injection_points--1.0.sql
index c4c42b5b69..2fefd48d98 100644
--- a/src/test/modules/test_injection_points/test_injection_points--1.0.sql
+++ b/src/test/modules/test_injection_points/test_injection_points--1.0.sql
@@ -25,6 +25,16 @@ RETURNS void
 AS 'MODULE_PATHNAME', 'test_injection_points_run'
 LANGUAGE C STRICT PARALLEL UNSAFE;
 
+--
+-- test_injection_points_wake()
+--
+-- Wakes a condition variable executed in an injection point.
+--
+CREATE FUNCTION test_injection_points_wake()
+RETURNS void
+AS 'MODULE_PATHNAME', 'test_injection_points_wake'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
 --
 -- test_injection_points_drop()
 --
diff --git a/src/test/modules/test_injection_points/test_injection_points.c b/src/test/modules/test_injection_points/test_injection_points.c
index e02b676773..b3e9c0c572 100644
--- a/src/test/modules/test_injection_points/test_injection_points.c
+++ b/src/test/modules/test_injection_points/test_injection_points.c
@@ -18,13 +18,56 @@
 #include "postgres.h"
 
 #include "fmgr.h"
+#include "storage/condition_variable.h"
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
 #include "utils/builtins.h"
 #include "utils/injection_point.h"
+#include "utils/wait_event.h"
 
 PG_MODULE_MAGIC;
 
+/* Shared state information for injection points. */
+typedef struct TestInjectionPointSharedState
+{
+	/*
+	 * Wait variable that can be registered at a given point, and that can be
+	 * awakened via SQL.
+	 */
+	ConditionVariable	wait_point;
+} TestInjectionPointSharedState;
+
+/* Pointer to shared-memory state. */
+static TestInjectionPointSharedState *inj_state = NULL;
+
+/* Wait event when waiting on condition variable */
+static uint32 test_injection_wait_event = 0;
+
 extern PGDLLEXPORT void test_injection_error(const char *name);
 extern PGDLLEXPORT void test_injection_notice(const char *name);
+extern PGDLLEXPORT void test_injection_wait(const char *name);
+
+
+static void
+test_injection_init_shmem(void)
+{
+	bool		found;
+
+	if (inj_state != NULL)
+		return;
+
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+	inj_state = ShmemInitStruct("test_injection_points",
+								sizeof(TestInjectionPointSharedState),
+								&found);
+	if (!found)
+	{
+		/* First time through ... */
+		MemSet(inj_state, 0, sizeof(TestInjectionPointSharedState));
+		ConditionVariableInit(&inj_state->wait_point);
+	}
+	LWLockRelease(AddinShmemInitLock);
+}
 
 /* Set of callbacks available at point creation */
 void
@@ -39,6 +82,20 @@ test_injection_notice(const char *name)
 	elog(NOTICE, "notice triggered for injection point %s", name);
 }
 
+void
+test_injection_wait(const char *name)
+{
+	if (inj_state == NULL)
+		test_injection_init_shmem();
+	if (test_injection_wait_event == 0)
+		test_injection_wait_event = WaitEventExtensionNew("test_injection_wait");
+
+	/* And sleep.. */
+	ConditionVariablePrepareToSleep(&inj_state->wait_point);
+	ConditionVariableSleep(&inj_state->wait_point, test_injection_wait_event);
+	ConditionVariableCancelSleep();
+}
+
 /*
  * SQL function for creating an injection point.
  */
@@ -54,6 +111,8 @@ test_injection_points_create(PG_FUNCTION_ARGS)
 		function = "test_injection_error";
 	else if (strcmp(mode, "notice") == 0)
 		function = "test_injection_notice";
+	else if (strcmp(mode, "wait") == 0)
+		function = "test_injection_wait";
 	else
 		elog(ERROR, "incorrect mode \"%s\" for injection point creation", mode);
 
@@ -76,6 +135,20 @@ test_injection_points_run(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
+/*
+ * SQL function for waking a condition variable.
+ */
+PG_FUNCTION_INFO_V1(test_injection_points_wake);
+Datum
+test_injection_points_wake(PG_FUNCTION_ARGS)
+{
+	if (inj_state == NULL)
+		test_injection_init_shmem();
+
+	ConditionVariableBroadcast(&inj_state->wait_point);
+	PG_RETURN_VOID();
+}
+
 /*
  * SQL function for dropping an injection point.
  */
-- 
2.42.0

