From 0725a21385afd4356452f3951f8613a0d7e50b31 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 20 Apr 2020 17:05:42 +1200
Subject: [PATCH v6 5/6] Truncate snapshot-too-old time map when CLOG is
 truncated.

It's not safe to leave xids in the map that have wrapped around,
although it's probably very hard to actually reach that state.

Reported-by: Andres Freund
---
 src/backend/commands/vacuum.c                 |   3 +
 src/backend/utils/time/snapmgr.c              |  21 ++++
 src/include/utils/snapmgr.h                   |   1 +
 src/test/modules/snapshot_too_old/Makefile    |   4 +-
 .../snapshot_too_old/t/001_truncate.pl        | 100 ++++++++++++++++++
 5 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 src/test/modules/snapshot_too_old/t/001_truncate.pl

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 22228f5684..459c9126fc 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1645,6 +1645,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index a94465235d..6958df3265 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1995,6 +1995,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b6b403e293..4560f1f03b 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -141,6 +141,7 @@ extern bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 extern void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index 81836e9953..f919944984 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -9,7 +9,7 @@ EXTENSION = test_sto
 DATA = test_sto--1.0.sql
 PGDESCFILE = "test_sto -- internal testing for snapshot too old errors"
 
-EXTRA_INSTALL = contrib/pg_visibility
+EXTRA_INSTALL = contrib/pg_visibility contrib/old_snapshot
 
 ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
 ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
@@ -19,6 +19,8 @@ ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/s
 # because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
+TAP_TESTS = 1
+
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/src/test/modules/snapshot_too_old/t/001_truncate.pl b/src/test/modules/snapshot_too_old/t/001_truncate.pl
new file mode 100644
index 0000000000..afcca232f2
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/t/001_truncate.pl
@@ -0,0 +1,100 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->append_conf("postgresql.conf", "autovacuum=off");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension old_snapshot');
+$node->psql('postgres', 'create extension test_sto');
+
+note "check time map is truncated when CLOG is";
+
+sub set_time
+{
+	my $time = shift;
+	$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('$time')");
+}
+
+sub advance_xid
+{
+	my $time = shift;
+	$node->psql('postgres', "select pg_current_xact_id()");
+}
+
+sub summarize_mapping
+{
+	my $out;
+	$node->psql('postgres',
+				"select count(*),
+						to_char(min(end_timestamp), 'HH24:MI:SS'),
+						to_char(max(end_timestamp), 'HH24:MI:SS')
+						from pg_old_snapshot_time_mapping()",
+				stdout => \$out);
+	return $out;
+}
+
+sub vacuum_freeze_all
+{
+	$node->psql('postgres', 'vacuum freeze');
+	$node->psql('template0', 'vacuum freeze');
+	$node->psql('template1', 'vacuum freeze');
+}
+
+# build up a time map with 4 entries
+set_time('3000-01-01 00:00:00Z');
+advance_xid();
+set_time('3000-01-01 00:01:00Z');
+advance_xid();
+set_time('3000-01-01 00:02:00Z');
+advance_xid();
+set_time('3000-01-01 00:03:00Z');
+advance_xid();
+is(summarize_mapping(), "4|00:00:00|00:03:00");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# we expect all XIDs to have been truncated
+is(summarize_mapping(), "0||");
+
+# put two more in the map
+set_time('3000-01-01 00:04:00Z');
+advance_xid();
+set_time('3000-01-01 00:05:00Z');
+advance_xid();
+is(summarize_mapping(), "2|00:04:00|00:05:00");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes; we should now have 18
+set_time('3000-01-01 00:21:00Z');
+advance_xid();
+is(summarize_mapping(), "18|00:04:00|00:21:00");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# this should leave just 16, because 2 were truncated
+is(summarize_mapping(), "16|00:06:00|00:21:00");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# we should now be back to empty
+is(summarize_mapping(), "0||");
+
+$node->stop;
-- 
2.20.1

