From df29ebe3121e3b924f9e0fe40b05e55dad2bd4c8 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Mon, 10 Feb 2025 13:36:48 +0000
Subject: [PATCH v1 2/2] Fix race conditions in 035_standby_logical_decoding.pl

In rare circumstances (and on slow machines) it is possible that a xl_running_xacts
is emitted and that the catalog_xmin of a logical slot advances past the conflict
point. In that case no conflict is reported and the test fails.

This commit adds a new injection point to prevent the catalog_xmin to advance
past the conflict point.
---
 src/backend/replication/logical/logical.c     |  3 +++
 .../t/035_standby_logical_decoding.pl         | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)
  11.7% src/backend/replication/logical/
  88.2% src/test/recovery/t/

diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 8ea846bfc3b..578837bfc1c 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -41,6 +41,7 @@
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
 
@@ -1826,6 +1827,8 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
 		bool		updated_xmin = false;
 		bool		updated_restart = false;
 
+		INJECTION_POINT("before-confirm-xmin-location");
+
 		SpinLockAcquire(&MyReplicationSlot->mutex);
 
 		MyReplicationSlot->data.confirmed_flush = lsn;
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index 505e85d1eb6..d6b8d28a7e0 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -10,6 +10,11 @@ use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
 use Test::More;
 
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
 my ($stdout, $stderr, $cascading_stdout, $cascading_stderr, $handle);
 
 my $node_primary = PostgreSQL::Test::Cluster->new('primary');
@@ -256,6 +261,10 @@ sub wait_until_vacuum_can_remove
 	my $xid_horizon = $node_primary->safe_psql('testdb',
 		qq[select pg_snapshot_xmin(pg_current_snapshot());]);
 
+	# Ensure catalog_xmin can not advance
+	$node_standby->safe_psql('testdb',
+		"SELECT injection_points_attach('before-confirm-xmin-location', 'wait');");
+
 	# Launch our sql.
 	$node_primary->safe_psql('testdb', qq[$sql]);
 
@@ -269,6 +278,10 @@ sub wait_until_vacuum_can_remove
 	$node_primary->safe_psql(
 		'testdb', qq[VACUUM $vac_option verbose $to_vac;
 										  INSERT INTO flush_wal DEFAULT VALUES;]);
+
+	# Unlock the catalog_xmin update (if any)
+	$node_standby->safe_psql('testdb',
+		"SELECT injection_points_wakeup_detach('before-confirm-xmin-location');");
 }
 
 ########################
@@ -490,6 +503,12 @@ is($result, qq(10), 'check replicated inserts after subscription on standby');
 $node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub");
 $node_subscriber->stop;
 
+# Create the injection_points extension
+$node_primary->safe_psql('testdb', 'CREATE EXTENSION injection_points;');
+
+# Wait until the extension has been created on the standby
+$node_primary->wait_for_replay_catchup($node_standby);
+
 ##################################################
 # Recovery conflict: Invalidate conflicting slots, including in-use slots
 # Scenario 1: hot_standby_feedback off and vacuum FULL
-- 
2.34.1

