From 49f29afa0f7ee3da13326fb42cc77d00c15160d7 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 20 Apr 2020 17:05:42 +1200
Subject: [PATCH v5 5/6] Truncate snapshot-too-old time map when CLOG is
 truncated.

It's not safe to leave xids in the map that have wrapped around,
although it's probably very hard to actually reach that state.

Reported-by: Andres Freund
---
 src/backend/commands/vacuum.c                 |   3 +
 src/backend/utils/time/snapmgr.c              |  21 ++++
 src/include/utils/snapmgr.h                   |   1 +
 src/test/modules/snapshot_too_old/Makefile    |   4 +-
 .../snapshot_too_old/t/001_truncate.pl        | 100 ++++++++++++++++++
 5 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 src/test/modules/snapshot_too_old/t/001_truncate.pl

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5a110edb07..37ead45fa5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1627,6 +1627,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 19e6c52b80..edb47c9664 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1974,6 +1974,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b28d13ce84..4f53aad956 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -135,6 +135,7 @@ extern TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmi
 														 Relation relation);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index 81836e9953..f919944984 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -9,7 +9,7 @@ EXTENSION = test_sto
 DATA = test_sto--1.0.sql
 PGDESCFILE = "test_sto -- internal testing for snapshot too old errors"
 
-EXTRA_INSTALL = contrib/pg_visibility
+EXTRA_INSTALL = contrib/pg_visibility contrib/old_snapshot
 
 ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
 ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
@@ -19,6 +19,8 @@ ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/s
 # because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
+TAP_TESTS = 1
+
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/src/test/modules/snapshot_too_old/t/001_truncate.pl b/src/test/modules/snapshot_too_old/t/001_truncate.pl
new file mode 100644
index 0000000000..afcca232f2
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/t/001_truncate.pl
@@ -0,0 +1,100 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->append_conf("postgresql.conf", "autovacuum=off");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension old_snapshot');
+$node->psql('postgres', 'create extension test_sto');
+
+note "check time map is truncated when CLOG is";
+
+sub set_time
+{
+	my $time = shift;
+	$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('$time')");
+}
+
+sub advance_xid
+{
+	my $time = shift;
+	$node->psql('postgres', "select pg_current_xact_id()");
+}
+
+sub summarize_mapping
+{
+	my $out;
+	$node->psql('postgres',
+				"select count(*),
+						to_char(min(end_timestamp), 'HH24:MI:SS'),
+						to_char(max(end_timestamp), 'HH24:MI:SS')
+						from pg_old_snapshot_time_mapping()",
+				stdout => \$out);
+	return $out;
+}
+
+sub vacuum_freeze_all
+{
+	$node->psql('postgres', 'vacuum freeze');
+	$node->psql('template0', 'vacuum freeze');
+	$node->psql('template1', 'vacuum freeze');
+}
+
+# build up a time map with 4 entries
+set_time('3000-01-01 00:00:00Z');
+advance_xid();
+set_time('3000-01-01 00:01:00Z');
+advance_xid();
+set_time('3000-01-01 00:02:00Z');
+advance_xid();
+set_time('3000-01-01 00:03:00Z');
+advance_xid();
+is(summarize_mapping(), "4|00:00:00|00:03:00");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# we expect all XIDs to have been truncated
+is(summarize_mapping(), "0||");
+
+# put two more in the map
+set_time('3000-01-01 00:04:00Z');
+advance_xid();
+set_time('3000-01-01 00:05:00Z');
+advance_xid();
+is(summarize_mapping(), "2|00:04:00|00:05:00");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes; we should now have 18
+set_time('3000-01-01 00:21:00Z');
+advance_xid();
+is(summarize_mapping(), "18|00:04:00|00:21:00");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# this should leave just 16, because 2 were truncated
+is(summarize_mapping(), "16|00:06:00|00:21:00");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# we should now be back to empty
+is(summarize_mapping(), "0||");
+
+$node->stop;
-- 
2.20.1

