From 1e1259df48373934be43049d8aa4dad328e96b96 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 5 Apr 2021 21:21:14 -0700
Subject: [PATCH] snapshot caching vs subtransactions.

---
 src/backend/storage/ipc/procarray.c |  8 ++++++
 src/test/regress/expected/mvcc.out  | 42 +++++++++++++++++++++++++++
 src/test/regress/parallel_schedule  |  2 +-
 src/test/regress/serial_schedule    |  1 +
 src/test/regress/sql/mvcc.sql       | 44 +++++++++++++++++++++++++++++
 5 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 src/test/regress/expected/mvcc.out
 create mode 100644 src/test/regress/sql/mvcc.sql

diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index e113a85aed4..bf776286de0 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -1210,6 +1210,11 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
 	 */
 	MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
 
+	/*
+	 * NB: No need to increment ShmemVariableCache->xactCompletionCount here,
+	 * nobody can see it yet.
+	 */
+
 	LWLockRelease(ProcArrayLock);
 
 	/* ShmemVariableCache->nextXid must be beyond any observed xid. */
@@ -3915,6 +3920,9 @@ XidCacheRemoveRunningXids(TransactionId xid,
 	/* Also advance global latestCompletedXid while holding the lock */
 	MaintainLatestCompletedXid(latestXid);
 
+	/* ... and xactCompletionCount */
+	ShmemVariableCache->xactCompletionCount++;
+
 	LWLockRelease(ProcArrayLock);
 }
 
diff --git a/src/test/regress/expected/mvcc.out b/src/test/regress/expected/mvcc.out
new file mode 100644
index 00000000000..16ed4ddf2d8
--- /dev/null
+++ b/src/test/regress/expected/mvcc.out
@@ -0,0 +1,42 @@
+--
+-- Verify that index scans encountering dead rows produced by an
+-- aborted subtransaction of the current transaction can utilize the
+-- kill_prio_tuple optimization
+--
+-- NB: The table size is currently *not* expected to stay the same, we
+-- don't have logic to trigger opportunistic pruning in cases like
+-- this.
+BEGIN;
+SET LOCAL enable_seqscan = false;
+SET LOCAL enable_indexonlyscan = false;
+SET LOCAL enable_bitmapscan = false;
+-- Can't easily use a unique index, since dead tuples can be found
+-- independent of the kill_prior_tuples optimization.
+CREATE TABLE clean_aborted_self(key int, data text);
+CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
+INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
+-- save index size from before the changes, for comparison
+SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
+DO $$
+BEGIN
+    -- iterate often enough to see index growth even on larger-than-default page sizes
+    FOR i IN 1..100 LOOP
+        BEGIN
+	    -- perform index scan over all the inserted keys to get them to be seen as dead
+            IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
+	        RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
+            END IF;
+            INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
+	    -- just some error that's not normally thrown
+	    RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
+	EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
+    END LOOP;
+END;$$;
+-- show sizes only if they differ
+SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
+WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
+ size_before | size_after 
+-------------+------------
+(0 rows)
+
+ROLLBACK;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 2e898390892..a0913008577 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -29,7 +29,7 @@ test: strings numerology point lseg line box path polygon circle date time timet
 # geometry depends on point, lseg, box, path, polygon and circle
 # horology depends on interval, timetz, timestamp, timestamptz
 # ----------
-test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid
+test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc
 
 # ----------
 # These four each depend on the previous one
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index a46f3d01789..56448476015 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -11,6 +11,7 @@ test: int4
 test: int8
 test: oid
 test: xid
+test: mvcc
 test: float4
 test: float8
 test: bit
diff --git a/src/test/regress/sql/mvcc.sql b/src/test/regress/sql/mvcc.sql
new file mode 100644
index 00000000000..b22a86dc5e5
--- /dev/null
+++ b/src/test/regress/sql/mvcc.sql
@@ -0,0 +1,44 @@
+--
+-- Verify that index scans encountering dead rows produced by an
+-- aborted subtransaction of the current transaction can utilize the
+-- kill_prio_tuple optimization
+--
+-- NB: The table size is currently *not* expected to stay the same, we
+-- don't have logic to trigger opportunistic pruning in cases like
+-- this.
+BEGIN;
+
+SET LOCAL enable_seqscan = false;
+SET LOCAL enable_indexonlyscan = false;
+SET LOCAL enable_bitmapscan = false;
+
+-- Can't easily use a unique index, since dead tuples can be found
+-- independent of the kill_prior_tuples optimization.
+CREATE TABLE clean_aborted_self(key int, data text);
+CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
+INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
+
+-- save index size from before the changes, for comparison
+SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
+
+DO $$
+BEGIN
+    -- iterate often enough to see index growth even on larger-than-default page sizes
+    FOR i IN 1..100 LOOP
+        BEGIN
+	    -- perform index scan over all the inserted keys to get them to be seen as dead
+            IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
+	        RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
+            END IF;
+            INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
+	    -- just some error that's not normally thrown
+	    RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
+	EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
+    END LOOP;
+END;$$;
+
+-- show sizes only if they differ
+SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
+WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
+
+ROLLBACK;
-- 
2.31.0.121.g9198c13e34

