From 31ebbadbca629abf8b7cf982d573ad2c838bf27f Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 26 Dec 2024 14:15:51 +0900
Subject: [PATCH v1 2/2] Fix handling of orphaned 2PC files in the future at
 recovery

Issue introduced by 728bd991c3c4, that has added support for 2PC files
in shared memory at recovery.

Before this feature was introduced, files in the future of the
transaction ID horizon were checked first, followed by a check if a
transaction ID is aborted or committed which could involve a pg_xact
lookup.  After this commit, these checks were done in a reversed order,
but files in the future do not have a state that can be checked in
pg_xact, hence this caused recovery to fail abruptly should an orphaned
2PC file in the future of the transaction horizon exist in pg_twophase
at the beginning of recovery.

A test is added to check this scenario, using an empty 2PC with a
transaction ID large enough to be in the future when running the test.

Author: Vitaly Davydov, Michael Paquier
Discussion: https://postgr.es/m/11e597-676ab680-8d-374f23c0@145466129
Backpatch-through: 13
---
 src/backend/access/transam/twophase.c | 40 ++++++++++-----------
 src/test/recovery/t/009_twophase.pl   | 51 ++++++++++-----------------
 2 files changed, 38 insertions(+), 53 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index d2649264f9..4b5a3146f1 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -2205,26 +2205,6 @@ ProcessTwoPhaseBuffer(TransactionId xid,
 	if (!fromdisk)
 		Assert(prepare_start_lsn != InvalidXLogRecPtr);
 
-	/* Already processed? */
-	if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
-	{
-		if (fromdisk)
-		{
-			ereport(WARNING,
-					(errmsg("removing stale two-phase state file for transaction %u",
-							xid)));
-			RemoveTwoPhaseFile(xid, true);
-		}
-		else
-		{
-			ereport(WARNING,
-					(errmsg("removing stale two-phase state from memory for transaction %u",
-							xid)));
-			PrepareRedoRemove(xid, true);
-		}
-		return NULL;
-	}
-
 	/* Reject XID if too new */
 	if (TransactionIdFollowsOrEquals(xid, origNextXid))
 	{
@@ -2245,6 +2225,26 @@ ProcessTwoPhaseBuffer(TransactionId xid,
 		return NULL;
 	}
 
+	/* Already processed? */
+	if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
+	{
+		if (fromdisk)
+		{
+			ereport(WARNING,
+					(errmsg("removing stale two-phase state file for transaction %u",
+							xid)));
+			RemoveTwoPhaseFile(xid, true);
+		}
+		else
+		{
+			ereport(WARNING,
+					(errmsg("removing stale two-phase state from memory for transaction %u",
+							xid)));
+			PrepareRedoRemove(xid, true);
+		}
+		return NULL;
+	}
+
 	if (fromdisk)
 	{
 		/* Read and validate file */
diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl
index 4b3e0f77dc..854910ea68 100644
--- a/src/test/recovery/t/009_twophase.pl
+++ b/src/test/recovery/t/009_twophase.pl
@@ -534,42 +534,27 @@ is( $psql_out,
 	qq{27|issued to paris},
 	"Check expected t_009_tbl2 data on standby");
 
+###############################################################################
+# Check handling of orphaned 2PC files at recovery.
+###############################################################################
 
-# Exercise the 2PC recovery code in StartupSUBTRANS, which is concerned with
-# ensuring that enough pg_subtrans pages exist on disk to cover the range of
-# prepared transactions at server start time.  There's not much we can verify
-# directly, but let's at least get the code to run.
-$cur_standby->stop();
-configure_and_reload($cur_primary, "synchronous_standby_names = ''");
+$cur_primary->teardown_node;
 
-$cur_primary->safe_psql('postgres', "CHECKPOINT");
+# Grab location in logs of primary
+my $log_offset = -s $cur_primary->logfile;
+
+# Create a fake file with a transaction ID large enough to be in the future,
+# then check that the primary is able to start and remove this file at
+# recovery.
+
+my $future_2pc_file = $cur_primary->data_dir . '/pg_twophase/0000000000FFFFFF';
+append_to_file $future_2pc_file, "";
 
-my $start_lsn =
-  $cur_primary->safe_psql('postgres', 'select pg_current_wal_insert_lsn()');
-$cur_primary->safe_psql('postgres',
-	"CREATE TABLE test(); BEGIN; CREATE TABLE test1(); PREPARE TRANSACTION 'foo';"
-);
-my $osubtrans = $cur_primary->safe_psql('postgres',
-	"select 'pg_subtrans/'||f, s.size from pg_ls_dir('pg_subtrans') f, pg_stat_file('pg_subtrans/'||f) s"
-);
-$cur_primary->pgbench(
-	'--no-vacuum --client=5 --transactions=1000',
-	0,
-	[],
-	[],
-	'pgbench run to cause pg_subtrans traffic',
-	{
-		'009_twophase.pgb' => 'insert into test default values'
-	});
-# StartupSUBTRANS is exercised with a wide range of visible XIDs in this
-# stop/start sequence, because we left a prepared transaction open above.
-# Also, setting subtransaction_buffers to 32 above causes to switch SLRU
-# bank, for additional code coverage.
-$cur_primary->stop;
 $cur_primary->start;
-my $nsubtrans = $cur_primary->safe_psql('postgres',
-	"select 'pg_subtrans/'||f, s.size from pg_ls_dir('pg_subtrans') f, pg_stat_file('pg_subtrans/'||f) s"
-);
-isnt($osubtrans, $nsubtrans, "contents of pg_subtrans/ have changed");
+$cur_primary->log_check(
+	"future two-phase file removed at recovery",
+	$log_offset,
+	log_like =>
+	  [qr/removing future two-phase state file for transaction 16777215/]);
 
 done_testing();
-- 
2.45.2

