From c56dc1f15a71a988f84e573f4a54322643236143 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 20 Apr 2020 17:05:42 +1200
Subject: [PATCH v3 5/5] Truncate snapshot-too-old time map when CLOG is
 truncated.

It's not safe to leave xids in the map that have wrapped around.

Reported-by: Andres Freund
---
 src/backend/commands/vacuum.c                 |  3 +
 src/backend/utils/time/snapmgr.c              | 21 +++++
 src/include/utils/snapmgr.h                   |  1 +
 src/test/modules/snapshot_too_old/Makefile    |  2 +
 .../snapshot_too_old/t/001_truncate.pl        | 80 +++++++++++++++++++
 .../snapshot_too_old/test_sto--1.0.sql        |  5 ++
 src/test/modules/snapshot_too_old/test_sto.c  | 16 ++++
 7 files changed, 128 insertions(+)
 create mode 100644 src/test/modules/snapshot_too_old/t/001_truncate.pl

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5a110edb07..37ead45fa5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1627,6 +1627,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 19e6c52b80..edb47c9664 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1974,6 +1974,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b28d13ce84..4f53aad956 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -135,6 +135,7 @@ extern TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmi
 														 Relation relation);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index be5ad77b7e..0a69f3a232 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -17,6 +17,8 @@ ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/s
 # because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
+TAP_TESTS = 1
+
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/src/test/modules/snapshot_too_old/t/001_truncate.pl b/src/test/modules/snapshot_too_old/t/001_truncate.pl
new file mode 100644
index 0000000000..849c70c5ec
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/t/001_truncate.pl
@@ -0,0 +1,80 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension test_sto');
+
+note "check time map is truncated when CLOG is";
+
+# build up a time map with 4 entries
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:02:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:03:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+my $count;
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 4, "expected to have 4 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we expect all XIDs to have been truncated
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+# put two more in the map
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:04:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:05:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 2, "expected to have 2 entries in the old snapshot time map");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes (this tests wrapping around the mapping array, which is of size 10 + 10)...
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:21:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 18, "expected to have 18 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# this should leave just 16
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 16, "expected to have 16 entries in the old snapshot time map");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we should now be back to empty
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+$node->stop;
diff --git a/src/test/modules/snapshot_too_old/test_sto--1.0.sql b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
index c10afcf23a..b4cc970ba4 100644
--- a/src/test/modules/snapshot_too_old/test_sto--1.0.sql
+++ b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
@@ -12,3 +12,8 @@ CREATE FUNCTION test_sto_reset_all_state()
 RETURNS VOID
 AS 'MODULE_PATHNAME', 'test_sto_reset_all_state'
 LANGUAGE C STRICT;
+
+CREATE FUNCTION test_sto_time_map_size()
+RETURNS int4
+AS 'MODULE_PATHNAME', 'test_sto_time_map_size'
+LANGUAGE C STRICT;
diff --git a/src/test/modules/snapshot_too_old/test_sto.c b/src/test/modules/snapshot_too_old/test_sto.c
index f6c9a1a000..5b874a9641 100644
--- a/src/test/modules/snapshot_too_old/test_sto.c
+++ b/src/test/modules/snapshot_too_old/test_sto.c
@@ -22,6 +22,7 @@
 PG_MODULE_MAGIC;
 PG_FUNCTION_INFO_V1(test_sto_reset_all_state);
 PG_FUNCTION_INFO_V1(test_sto_clobber_snapshot_timestamp);
+PG_FUNCTION_INFO_V1(test_sto_time_map_size);
 
 /*
  * Revert to initial state.  This is not safe except in carefully
@@ -72,3 +73,18 @@ test_sto_clobber_snapshot_timestamp(PG_FUNCTION_ARGS)
 
 	PG_RETURN_NULL();
 }
+
+/*
+ * How many xids are currently in the xid/time map?  Used only for testing.
+ */
+Datum
+test_sto_time_map_size(PG_FUNCTION_ARGS)
+{
+	int		result;
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
+	result = oldSnapshotControl->count_used;
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	PG_RETURN_INT32(result);
+}
-- 
2.20.1

