From 00a8f347fb5a57f3cafe082cdd9c3ea8f6a62053 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 31 Mar 2023 14:58:50 -0700
Subject: [PATCH v1] WIP: Add test exercising hash and gist killtuples

---
 src/test/regress/expected/gist_killtuples.out | 78 ++++++++++++++++
 .../expected/hash_index_killtuples.out        | 89 +++++++++++++++++++
 src/test/regress/parallel_schedule            |  4 +
 src/test/regress/sql/gist_killtuples.sql      | 49 ++++++++++
 .../regress/sql/hash_index_killtuples.sql     | 57 ++++++++++++
 5 files changed, 277 insertions(+)
 create mode 100644 src/test/regress/expected/gist_killtuples.out
 create mode 100644 src/test/regress/expected/hash_index_killtuples.out
 create mode 100644 src/test/regress/sql/gist_killtuples.sql
 create mode 100644 src/test/regress/sql/hash_index_killtuples.sql

diff --git a/src/test/regress/expected/gist_killtuples.out b/src/test/regress/expected/gist_killtuples.out
new file mode 100644
index 00000000000..18d39c871a1
--- /dev/null
+++ b/src/test/regress/expected/gist_killtuples.out
@@ -0,0 +1,78 @@
+-- Basic test of gist's killtuples / pruning during split implementation. This
+-- is not guaranteed to reach those paths, concurrent activity could prevent
+-- cleanup of dead tuples or such. But it's likely to reach them, which seems
+-- better than nothing.
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+CREATE TABLE test_gist_killtuples (
+	k point,
+	v int DEFAULT 0
+);
+CREATE INDEX ON test_gist_killtuples USING gist(k);
+-- create index entries pointing to deleted tuples
+INSERT INTO test_gist_killtuples(k, v) SELECT point(generate_series(1, 1000), 0), 0;
+-- via deletes
+DELETE FROM test_gist_killtuples WHERE k << point(500, 0);
+-- and updates
+UPDATE test_gist_killtuples SET k = k + point(1, 0), v = 1 WHERE k >> point(500, 0);
+-- make the index see tuples are dead via killtuples
+PREPARE engage_killtuples AS
+SELECT sum(k <-> point(0, 0)) FROM test_gist_killtuples WHERE k >> point(0, 0);
+EXPLAIN (COSTS OFF) EXECUTE engage_killtuples;
+                                   QUERY PLAN                                   
+--------------------------------------------------------------------------------
+ Aggregate
+   ->  Index Only Scan using test_gist_killtuples_k_idx on test_gist_killtuples
+         Index Cond: (k >> '(0,0)'::point)
+(3 rows)
+
+EXECUTE engage_killtuples;
+  sum   
+--------
+ 376250
+(1 row)
+
+-- create new index entries, in the same ranges, this should see the dead entries and prune the pages
+INSERT INTO test_gist_killtuples(k, v) SELECT point(generate_series(1, 300), 0), 2;
+UPDATE test_gist_killtuples SET k = k + point(1, 0), v = 3 WHERE k >> point(600, 0);
+-- do some basic cross checking of index scans vs sequential scan
+-- use prepared statement, so we can explain and execute without repeating the statement
+PREPARE check_query AS SELECT tgk.v, min(tgk.k <-> point(0, 0)), max(tgk.k <-> point(0, 0)), count(*),
+  brute = ROW(tgk.v, min(tgk.k <-> point(0, 0)), max(tgk.k <-> point(0, 0)), count(*)) AS are_equal
+FROM (
+    SELECT v, min(k <-> point(0, 0)), max(k <-> point(0, 0)), count(*)
+    FROM test_gist_killtuples GROUP BY v ORDER BY v
+  ) brute,
+  test_gist_killtuples tgk
+WHERE tgk.k >> point(brute.min - 1, 0) AND tgk.k << point(brute.max + 1, 0)
+GROUP BY brute, tgk.v
+ORDER BY tgk.v;
+EXPLAIN (COSTS OFF) EXECUTE check_query;
+                                                                                       QUERY PLAN                                                                                        
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: tgk.v, brute.*
+   ->  Sort
+         Sort Key: tgk.v, brute.*
+         ->  Nested Loop
+               ->  Subquery Scan on brute
+                     ->  GroupAggregate
+                           Group Key: test_gist_killtuples.v
+                           ->  Sort
+                                 Sort Key: test_gist_killtuples.v
+                                 ->  Seq Scan on test_gist_killtuples
+               ->  Index Scan using test_gist_killtuples_k_idx on test_gist_killtuples tgk
+                     Index Cond: ((k >> point((brute.min - '1'::double precision), '0'::double precision)) AND (k << point((brute.max + '1'::double precision), '0'::double precision)))
+(13 rows)
+
+EXECUTE check_query;
+ v | min | max  | count | are_equal 
+---+-----+------+-------+-----------
+ 0 | 500 |  500 |     1 | t
+ 1 | 502 |  600 |    99 | t
+ 2 |   1 |  300 |   300 | t
+ 3 | 602 | 1002 |   401 | t
+(4 rows)
+
+-- Leave the table/index around, so that an index that has been affected by
+-- killtuples could be seen by amcheck or such.
diff --git a/src/test/regress/expected/hash_index_killtuples.out b/src/test/regress/expected/hash_index_killtuples.out
new file mode 100644
index 00000000000..2487b98b18a
--- /dev/null
+++ b/src/test/regress/expected/hash_index_killtuples.out
@@ -0,0 +1,89 @@
+-- Basic test of gist's killtuples / pruning during split implementation. This
+-- is not guaranteed to reach those paths, concurrent activity could prevent
+-- cleanup of dead tuples or such. But it's likely to reach them, which seems
+-- better than nothing.
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+set enable_material=off;
+-- Hash indexes are fairly dense, 1000 tuples isn't enough to easily reach the
+-- killtuples logic. Might need to be adjusted upward for larger page sizes.
+\set high 2000
+CREATE TABLE test_hash_killtuples (
+	k int8,
+	v int DEFAULT 0
+);
+CREATE INDEX ON test_hash_killtuples USING hash(k);
+-- create index entries pointing to deleted tuples
+INSERT INTO test_hash_killtuples(k, v) SELECT generate_series(1, :high), 0;
+-- via deletes
+DELETE FROM test_hash_killtuples WHERE k < (:high / 2);
+-- and updates
+UPDATE test_hash_killtuples SET k = k + 1, v = 1 WHERE k > (:high / 2);
+---- make the index see tuples are dead via killtuples
+PREPARE engage_killtuples AS
+SELECT count(*) FROM generate_series(1, :high + 1) g(i) WHERE EXISTS (SELECT * FROM test_hash_killtuples thk WHERE thk.k = g.i);
+EXPLAIN (COSTS OFF) EXECUTE engage_killtuples;
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop Semi Join
+         ->  Function Scan on generate_series g
+         ->  Index Scan using test_hash_killtuples_k_idx on test_hash_killtuples thk
+               Index Cond: (k = g.i)
+(5 rows)
+
+EXECUTE engage_killtuples;
+ count 
+-------
+  1001
+(1 row)
+
+-- create new index entries, in the same ranges, this should see the dead entries and prune the pages
+INSERT INTO test_hash_killtuples(k, v) SELECT generate_series(1, :high * 0.3), 2;
+UPDATE test_hash_killtuples SET k = k + 1, v = 3 WHERE k > (:high * 0.6);
+-- do some basic cross checking of index scans vs sequential scan
+-- use prepared statement, so we can explain and execute without repeating the statement
+PREPARE check_query AS SELECT thk.v, min(thk.k), max(thk.k), count(*),
+  brute = ROW(thk.v, min(thk.k), max(thk.k), count(*)) AS are_equal
+FROM (
+    SELECT v, min(k), max(k), count(*)
+    FROM test_hash_killtuples GROUP BY v ORDER BY v
+  ) brute,
+  -- hash only does equality lookups, so simulate a range scan using generate_series
+  generate_series(brute.min, brute.max) g(i),
+  test_hash_killtuples thk
+WHERE
+  thk.k = g.i
+GROUP BY brute, thk.v
+ORDER BY thk.v;
+EXPLAIN (COSTS OFF) EXECUTE check_query;
+                                        QUERY PLAN                                         
+-------------------------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: thk.v, brute.*
+   ->  Sort
+         Sort Key: thk.v, brute.*
+         ->  Nested Loop
+               ->  Nested Loop
+                     ->  Subquery Scan on brute
+                           ->  GroupAggregate
+                                 Group Key: test_hash_killtuples.v
+                                 ->  Sort
+                                       Sort Key: test_hash_killtuples.v
+                                       ->  Seq Scan on test_hash_killtuples
+                     ->  Function Scan on generate_series g
+               ->  Index Scan using test_hash_killtuples_k_idx on test_hash_killtuples thk
+                     Index Cond: (k = g.i)
+(15 rows)
+
+EXECUTE check_query;
+ v | min  | max  | count | are_equal 
+---+------+------+-------+-----------
+ 0 | 1000 | 1000 |     1 | t
+ 1 | 1002 | 1200 |   199 | t
+ 2 |    1 |  600 |   600 | t
+ 3 | 1202 | 2002 |   801 | t
+(4 rows)
+
+-- Leave the table/index around, so that an index that has been affected by
+-- killtuples could be seen by amcheck or such.
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 36240356393..c36b310e8f6 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -121,6 +121,10 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
 # ----------
 test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats
 
+# these tests benefit from running in isolation, to be able to remove dead rows
+test: hash_index_killtuples
+test: gist_killtuples
+
 # event_trigger cannot run concurrently with any test that runs DDL
 # oidjoins is read-only, though, and should run late for best coverage
 test: event_trigger oidjoins
diff --git a/src/test/regress/sql/gist_killtuples.sql b/src/test/regress/sql/gist_killtuples.sql
new file mode 100644
index 00000000000..60664a18eee
--- /dev/null
+++ b/src/test/regress/sql/gist_killtuples.sql
@@ -0,0 +1,49 @@
+-- Basic test of gist's killtuples / pruning during split implementation. This
+-- is not guaranteed to reach those paths, concurrent activity could prevent
+-- cleanup of dead tuples or such. But it's likely to reach them, which seems
+-- better than nothing.
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+
+CREATE TABLE test_gist_killtuples (
+	k point,
+	v int DEFAULT 0
+);
+
+CREATE INDEX ON test_gist_killtuples USING gist(k);
+
+-- create index entries pointing to deleted tuples
+INSERT INTO test_gist_killtuples(k, v) SELECT point(generate_series(1, 1000), 0), 0;
+-- via deletes
+DELETE FROM test_gist_killtuples WHERE k << point(500, 0);
+-- and updates
+UPDATE test_gist_killtuples SET k = k + point(1, 0), v = 1 WHERE k >> point(500, 0);
+
+-- make the index see tuples are dead via killtuples
+PREPARE engage_killtuples AS
+SELECT sum(k <-> point(0, 0)) FROM test_gist_killtuples WHERE k >> point(0, 0);
+EXPLAIN (COSTS OFF) EXECUTE engage_killtuples;
+EXECUTE engage_killtuples;
+
+-- create new index entries, in the same ranges, this should see the dead entries and prune the pages
+INSERT INTO test_gist_killtuples(k, v) SELECT point(generate_series(1, 300), 0), 2;
+UPDATE test_gist_killtuples SET k = k + point(1, 0), v = 3 WHERE k >> point(600, 0);
+
+-- do some basic cross checking of index scans vs sequential scan
+-- use prepared statement, so we can explain and execute without repeating the statement
+PREPARE check_query AS SELECT tgk.v, min(tgk.k <-> point(0, 0)), max(tgk.k <-> point(0, 0)), count(*),
+  brute = ROW(tgk.v, min(tgk.k <-> point(0, 0)), max(tgk.k <-> point(0, 0)), count(*)) AS are_equal
+FROM (
+    SELECT v, min(k <-> point(0, 0)), max(k <-> point(0, 0)), count(*)
+    FROM test_gist_killtuples GROUP BY v ORDER BY v
+  ) brute,
+  test_gist_killtuples tgk
+WHERE tgk.k >> point(brute.min - 1, 0) AND tgk.k << point(brute.max + 1, 0)
+GROUP BY brute, tgk.v
+ORDER BY tgk.v;
+
+EXPLAIN (COSTS OFF) EXECUTE check_query;
+EXECUTE check_query;
+
+-- Leave the table/index around, so that an index that has been affected by
+-- killtuples could be seen by amcheck or such.
diff --git a/src/test/regress/sql/hash_index_killtuples.sql b/src/test/regress/sql/hash_index_killtuples.sql
new file mode 100644
index 00000000000..abd15f9a464
--- /dev/null
+++ b/src/test/regress/sql/hash_index_killtuples.sql
@@ -0,0 +1,57 @@
+-- Basic test of gist's killtuples / pruning during split implementation. This
+-- is not guaranteed to reach those paths, concurrent activity could prevent
+-- cleanup of dead tuples or such. But it's likely to reach them, which seems
+-- better than nothing.
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+set enable_material=off;
+
+-- Hash indexes are fairly dense, 1000 tuples isn't enough to easily reach the
+-- killtuples logic. Might need to be adjusted upward for larger page sizes.
+\set high 2000
+
+CREATE TABLE test_hash_killtuples (
+	k int8,
+	v int DEFAULT 0
+);
+
+CREATE INDEX ON test_hash_killtuples USING hash(k);
+
+-- create index entries pointing to deleted tuples
+INSERT INTO test_hash_killtuples(k, v) SELECT generate_series(1, :high), 0;
+-- via deletes
+DELETE FROM test_hash_killtuples WHERE k < (:high / 2);
+-- and updates
+UPDATE test_hash_killtuples SET k = k + 1, v = 1 WHERE k > (:high / 2);
+
+---- make the index see tuples are dead via killtuples
+PREPARE engage_killtuples AS
+SELECT count(*) FROM generate_series(1, :high + 1) g(i) WHERE EXISTS (SELECT * FROM test_hash_killtuples thk WHERE thk.k = g.i);
+EXPLAIN (COSTS OFF) EXECUTE engage_killtuples;
+EXECUTE engage_killtuples;
+
+-- create new index entries, in the same ranges, this should see the dead entries and prune the pages
+INSERT INTO test_hash_killtuples(k, v) SELECT generate_series(1, :high * 0.3), 2;
+UPDATE test_hash_killtuples SET k = k + 1, v = 3 WHERE k > (:high * 0.6);
+
+-- do some basic cross checking of index scans vs sequential scan
+-- use prepared statement, so we can explain and execute without repeating the statement
+PREPARE check_query AS SELECT thk.v, min(thk.k), max(thk.k), count(*),
+  brute = ROW(thk.v, min(thk.k), max(thk.k), count(*)) AS are_equal
+FROM (
+    SELECT v, min(k), max(k), count(*)
+    FROM test_hash_killtuples GROUP BY v ORDER BY v
+  ) brute,
+  -- hash only does equality lookups, so simulate a range scan using generate_series
+  generate_series(brute.min, brute.max) g(i),
+  test_hash_killtuples thk
+WHERE
+  thk.k = g.i
+GROUP BY brute, thk.v
+ORDER BY thk.v;
+
+EXPLAIN (COSTS OFF) EXECUTE check_query;
+EXECUTE check_query;
+
+-- Leave the table/index around, so that an index that has been affected by
+-- killtuples could be seen by amcheck or such.
-- 
2.38.0