From adb42e0093cdc5f1e4546d4499760144478679a5 Mon Sep 17 00:00:00 2001 From: nkey Date: Fri, 13 Dec 2024 00:53:49 +0100 Subject: [PATCH v3] amcheck: Fix bt_index_parent_check behavior with CREATE INDEX CONCURRENTLY Modify snapshot handling in verify_nbtree to properly handle indexes created with CREATE INDEX CONCURRENTLY. Previously, the function could fail to find some tuples in bloom when verifying such indexes because it was using the SnapshotAny, while indexes were build using MVCC snapshot. This could lead to false alarms in bt_index_parent_check's validation. Add a regression test that demonstrates the issue. Discussion: https://postgr.es/m/CANtu0ojmVd27fEhfpST7RG2KZvwkX%3DdMyKUqg0KM87FkOSdz8Q%40mail.gmail.com --- contrib/amcheck/meson.build | 1 + .../t/006_cic_bt_index_parent_check.pl | 51 ++++++++++++++ contrib/amcheck/verify_nbtree.c | 68 ++++++++----------- 3 files changed, 81 insertions(+), 39 deletions(-) create mode 100644 contrib/amcheck/t/006_cic_bt_index_parent_check.pl diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index fc08e32539a..2eb7ff11bd7 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -45,6 +45,7 @@ tests += { 't/003_cic_2pc.pl', 't/004_verify_nbtree_unique.pl', 't/005_pitr.pl', + 't/006_cic_bt_index_parent_check.pl', ], }, } diff --git a/contrib/amcheck/t/006_cic_bt_index_parent_check.pl b/contrib/amcheck/t/006_cic_bt_index_parent_check.pl new file mode 100644 index 00000000000..1d8bb84f225 --- /dev/null +++ b/contrib/amcheck/t/006_cic_bt_index_parent_check.pl @@ -0,0 +1,51 @@ +# Copyright (c) 2021-2024, PostgreSQL Global Development Group + +# Test bt_index_parent_check with index created with CREATE INDEX CONCURRENTLY +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More; + +Test::More->builder->todo_start('filesystem bug') + if PostgreSQL::Test::Utils::has_wal_read_bug; + +my ($node, $result); + +# +# Test set-up +# +$node = PostgreSQL::Test::Cluster->new('CIC_bt_index_parent_check_test'); +$node->init; +$node->append_conf('postgresql.conf', 'fsync = off'); +$node->append_conf('postgresql.conf', 'autovacuum = off'); +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); +$node->start; +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i int primary key))); +# Insert two rows into index +$node->safe_psql('postgres', q(INSERT INTO tbl SELECT i FROM generate_series(1, 2) s(i);)); + +# start background transaction +my $in_progress_h = $node->background_psql('postgres'); +$in_progress_h->query_safe( + q( +BEGIN; +SELECT pg_current_xact_id(); +)); + +# delete one row from table, while background transaction is in progress +$node->safe_psql('postgres', q(DELETE FROM tbl WHERE i = 1;)); +# create index concurrently, which will skip the deleted row +$node->safe_psql('postgres', q(CREATE INDEX CONCURRENTLY idx ON tbl(i);)); + +# check index using bt_index_parent_check +$result = $node->psql('postgres', q(SELECT bt_index_parent_check('idx', heapallindexed => true))); +# it fails, because it is expect to find the deleted row in index +is($result, '0', 'bt_index_parent_check after removed rows'); + +$in_progress_h->quit; +done_testing(); diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index ffe4f721672..89526900d21 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -506,7 +506,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, BTMetaPageData *metad; uint32 previouslevel; BtreeLevel current; - Snapshot snapshot = SnapshotAny; if (!readonly) elog(DEBUG1, "verifying consistency of tree structure for index \"%s\"", @@ -557,38 +556,35 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->heaptuplespresent = 0; /* - * Register our own snapshot in !readonly case, rather than asking + * Register our own snapshot for heapallindexed, rather than asking * table_index_build_scan() to do this for us later. This needs to * happen before index fingerprinting begins, so we can later be * certain that index fingerprinting should have reached all tuples * returned by table_index_build_scan(). */ - if (!state->readonly) - { - snapshot = RegisterSnapshot(GetTransactionSnapshot()); + state->snapshot = RegisterSnapshot(GetTransactionSnapshot()); - /* - * GetTransactionSnapshot() always acquires a new MVCC snapshot in - * READ COMMITTED mode. A new snapshot is guaranteed to have all - * the entries it requires in the index. - * - * We must defend against the possibility that an old xact - * snapshot was returned at higher isolation levels when that - * snapshot is not safe for index scans of the target index. This - * is possible when the snapshot sees tuples that are before the - * index's indcheckxmin horizon. Throwing an error here should be - * very rare. It doesn't seem worth using a secondary snapshot to - * avoid this. - */ - if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), - snapshot->xmin)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("index \"%s\" cannot be verified using transaction snapshot", - RelationGetRelationName(rel)))); - } - } + /* + * GetTransactionSnapshot() always acquires a new MVCC snapshot in + * READ COMMITTED mode. A new snapshot is guaranteed to have all + * the entries it requires in the index. + * + * We must defend against the possibility that an old xact + * snapshot was returned at higher isolation levels when that + * snapshot is not safe for index scans of the target index. This + * is possible when the snapshot sees tuples that are before the + * index's indcheckxmin horizon. Throwing an error here should be + * very rare. It doesn't seem worth using a secondary snapshot to + * avoid this. + */ + if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && + !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), + state->snapshot->xmin)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("index \"%s\" cannot be verified using transaction snapshot", + RelationGetRelationName(rel)))); +} /* * We need a snapshot to check the uniqueness of the index. For better @@ -600,9 +596,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->indexinfo = BuildIndexInfo(state->rel); if (state->indexinfo->ii_Unique) { - if (snapshot != SnapshotAny) - state->snapshot = snapshot; - else + if (state->snapshot == InvalidSnapshot) state->snapshot = RegisterSnapshot(GetTransactionSnapshot()); } } @@ -679,13 +673,12 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, /* * Create our own scan for table_index_build_scan(), rather than * getting it to do so for us. This is required so that we can - * actually use the MVCC snapshot registered earlier in !readonly - * case. + * actually use the MVCC snapshot registered earlier. * * Note that table_index_build_scan() calls heap_endscan() for us. */ scan = table_beginscan_strat(state->heaprel, /* relation */ - snapshot, /* snapshot */ + state->snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ @@ -693,7 +686,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, /* * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY - * behaves in !readonly case. + * behaves. * * It's okay that we don't actually use the same lock strength for the * heap relation as any other ii_Concurrent caller would in !readonly @@ -702,7 +695,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, * that needs to be sure that there was no concurrent recycling of * TIDs. */ - indexinfo->ii_Concurrent = !state->readonly; + indexinfo->ii_Concurrent = true; /* * Don't wait for uncommitted tuple xact commit/abort when index is a @@ -726,14 +719,11 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->heaptuplespresent, RelationGetRelationName(heaprel), 100.0 * bloom_prop_bits_set(state->filter)))); - if (snapshot != SnapshotAny) - UnregisterSnapshot(snapshot); - bloom_free(state->filter); } /* Be tidy: */ - if (snapshot == SnapshotAny && state->snapshot != InvalidSnapshot) + if (state->snapshot != InvalidSnapshot) UnregisterSnapshot(state->snapshot); MemoryContextDelete(state->targetcontext); } -- 2.43.0