From 1a1168caa9d4bfe5b36ab14ed31ae0ef98d267e8 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 20 Apr 2020 17:05:42 +1200
Subject: [PATCH v4 5/5] Truncate snapshot-too-old time map when CLOG is
 truncated.

It's not safe to leave xids in the map that have wrapped around.

Reported-by: Andres Freund
---
 src/backend/commands/vacuum.c                 |  3 +
 src/backend/utils/time/snapmgr.c              | 21 +++++
 src/include/utils/snapmgr.h                   |  1 +
 src/test/modules/snapshot_too_old/Makefile    |  4 +-
 .../snapshot_too_old/t/001_truncate.pl        | 81 +++++++++++++++++++
 5 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 src/test/modules/snapshot_too_old/t/001_truncate.pl

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5a110edb07..37ead45fa5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1627,6 +1627,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 19e6c52b80..edb47c9664 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1974,6 +1974,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b28d13ce84..4f53aad956 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -135,6 +135,7 @@ extern TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmi
 														 Relation relation);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index 81836e9953..f919944984 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -9,7 +9,7 @@ EXTENSION = test_sto
 DATA = test_sto--1.0.sql
 PGDESCFILE = "test_sto -- internal testing for snapshot too old errors"
 
-EXTRA_INSTALL = contrib/pg_visibility
+EXTRA_INSTALL = contrib/pg_visibility contrib/old_snapshot
 
 ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
 ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
@@ -19,6 +19,8 @@ ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/s
 # because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
+TAP_TESTS = 1
+
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/src/test/modules/snapshot_too_old/t/001_truncate.pl b/src/test/modules/snapshot_too_old/t/001_truncate.pl
new file mode 100644
index 0000000000..c3bff45f65
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/t/001_truncate.pl
@@ -0,0 +1,81 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension old_snapshot');
+$node->psql('postgres', 'create extension test_sto');
+
+note "check time map is truncated when CLOG is";
+
+# build up a time map with 4 entries
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:02:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:03:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+my $count;
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 4, "expected to have 4 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we expect all XIDs to have been truncated
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+# put two more in the map
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:04:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:05:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 2, "expected to have 2 entries in the old snapshot time map");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes (this tests wrapping around the mapping array, which is of size 10 + 10)...
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:21:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 18, "expected to have 18 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# this should leave just 16
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 16, "expected to have 16 entries in the old snapshot time map");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we should now be back to empty
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+$node->stop;
-- 
2.20.1

