From 7719ab1ebdc3d56711505238eabe4f3f6af56af1 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Sat, 19 Oct 2024 12:46:29 +0500 Subject: [PATCH v7 1/3] Prototype B-tree vacuum streamlineing Signed-off-by: Andrey Borodin Signed-off-by: Junwang Zhao --- src/backend/access/nbtree/nbtree.c | 70 +++++++++++----- src/test/modules/test_misc/meson.build | 1 + .../modules/test_misc/t/007_vacuum_btree.pl | 81 +++++++++++++++++++ 3 files changed, 131 insertions(+), 21 deletions(-) create mode 100644 src/test/modules/test_misc/t/007_vacuum_btree.pl diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index f4f79f2..0ee2b2b 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -34,6 +34,7 @@ #include "storage/smgr.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" +#include "utils/injection_point.h" #include "utils/memutils.h" @@ -88,7 +89,7 @@ typedef struct BTParallelScanDescData *BTParallelScanDesc; static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid); -static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno); +static BlockNumber btvacuumpage(BTVacState *vstate, Buffer buf); static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, @@ -969,8 +970,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, Relation rel = info->index; BTVacState vstate; BlockNumber num_pages; - BlockNumber scanblkno; bool needLock; + BlockRangeReadStreamPrivate p; + ReadStream *stream = NULL; /* * Reset fields that track information about the entire index now. This @@ -1039,9 +1041,17 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, */ needLock = !RELATION_IS_LOCAL(rel); - scanblkno = BTREE_METAPAGE + 1; + p.current_blocknum = BTREE_METAPAGE + 1; + stream = read_stream_begin_relation(READ_STREAM_FULL, + info->strategy, + rel, + MAIN_FORKNUM, + block_range_read_stream_cb, + &p, + 0); for (;;) { + Buffer buf; /* Get the current relation length */ if (needLock) LockRelationForExtension(rel, ExclusiveLock); @@ -1054,17 +1064,31 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, num_pages); /* Quit if we've scanned the whole relation */ - if (scanblkno >= num_pages) + if (p.current_blocknum >= num_pages) break; - /* Iterate over pages, then loop back to recheck length */ - for (; scanblkno < num_pages; scanblkno++) + + /* In 007_vacuum_btree test we need to coordinate two distinguishable points here */ + INJECTION_POINT("nbtree-vacuum-1"); + INJECTION_POINT("nbtree-vacuum-2"); + + p.last_exclusive = num_pages; + + /* Iterate over pages, then loop back to recheck relation length */ + while(BufferIsValid(buf = read_stream_next_buffer(stream, NULL))) { - btvacuumpage(&vstate, scanblkno); + BlockNumber current_block = btvacuumpage(&vstate, buf); if (info->report_progress) pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE, - scanblkno); + current_block); } + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + /* + * After reaching the end we have to reset stream to use it again. + * Extra restart in case of just one iteration does not cost us much. + */ + read_stream_reset(stream); } + read_stream_end(stream); /* Set statistics num_pages field to final size of index */ stats->num_pages = num_pages; @@ -1094,9 +1118,11 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, * after our cycleid was acquired) whose right half page happened to reuse * a block that we might have processed at some point before it was * recycled (i.e. before the page split). + * + * Returns BlockNumber of a scanned page (not backtracked). */ -static void -btvacuumpage(BTVacState *vstate, BlockNumber scanblkno) +static BlockNumber +btvacuumpage(BTVacState *vstate, Buffer buf) { IndexVacuumInfo *info = vstate->info; IndexBulkDeleteResult *stats = vstate->stats; @@ -1107,7 +1133,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno) bool attempt_pagedel; BlockNumber blkno, backtrack_to; - Buffer buf; + BlockNumber scanblkno = BufferGetBlockNumber(buf); Page page; BTPageOpaque opaque; @@ -1121,14 +1147,6 @@ backtrack: /* call vacuum_delay_point while not holding any buffer lock */ vacuum_delay_point(); - /* - * We can't use _bt_getbuf() here because it always applies - * _bt_checkpage(), which will barf on an all-zero page. We want to - * recycle all-zero pages, not fail. Also, we want to use a nondefault - * buffer access strategy. - */ - buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - info->strategy); _bt_lockbuf(rel, buf, BT_READ); page = BufferGetPage(buf); opaque = NULL; @@ -1164,7 +1182,7 @@ backtrack: errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"", blkno, scanblkno, RelationGetRelationName(rel)))); _bt_relbuf(rel, buf); - return; + return scanblkno; } /* @@ -1184,7 +1202,7 @@ backtrack: { /* Done with current scanblkno (and all lower split pages) */ _bt_relbuf(rel, buf); - return; + return scanblkno; } } @@ -1415,8 +1433,18 @@ backtrack: if (backtrack_to != P_NONE) { blkno = backtrack_to; + + /* + * We can't use _bt_getbuf() here because it always applies + * _bt_checkpage(), which will barf on an all-zero page. We want to + * recycle all-zero pages, not fail. Also, we want to use a nondefault + * buffer access strategy. + */ + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, + info->strategy); goto backtrack; } + return scanblkno; } /* diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build index 283ffa7..a456a5e 100644 --- a/src/test/modules/test_misc/meson.build +++ b/src/test/modules/test_misc/meson.build @@ -15,6 +15,7 @@ tests += { 't/004_io_direct.pl', 't/005_timeouts.pl', 't/006_signal_autovacuum.pl', + 't/007_vacuum_btree.pl', ], }, } diff --git a/src/test/modules/test_misc/t/007_vacuum_btree.pl b/src/test/modules/test_misc/t/007_vacuum_btree.pl new file mode 100644 index 0000000..e8df4c0 --- /dev/null +++ b/src/test/modules/test_misc/t/007_vacuum_btree.pl @@ -0,0 +1,81 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +# This test verifies that B-tree vacuum can restart read stream. +# To do so we need to insert some data during vacuum. So we wait in injection point +# after first vacuum scan. During this wait we insert some data forcing page split. +# this split will trigger relation extension and subsequent read_stream_reset(). + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use Test::More; + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +# Initialize postgres +my $psql_err = ''; +my $psql_out = ''; +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; + +# This ensures autovacuum do not run +$node->append_conf('postgresql.conf', 'autovacuum = off'); +$node->start; + +# Check if the extension injection_points is available, as it may be +# possible that this script is run with installcheck, where the module +# would not be installed by default. +if (!$node->check_extension('injection_points')) +{ + plan skip_all => 'Extension injection_points not installed'; +} + +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); + +# From this point, vacuum worker will wait at startup. +$node->safe_psql('postgres', + "SELECT injection_points_attach('nbtree-vacuum-2', 'wait');"); + +my $psql_session = $node->background_psql('postgres'); + +$psql_session->query_until( + qr/starting_bg_psql/, + q(\echo starting_bg_psql + create table a as select random() r from generate_series(1,100) x; + create index on a(r); + delete from a; + vacuum a; + )); + +#print $node->safe_psql('postgres','select * from pg_stat_activity'); + +# Wait until an vacuum worker starts. +$node->wait_for_event('client backend', 'nbtree-vacuum-2'); + +$node->safe_psql('postgres', + "SELECT injection_points_attach('nbtree-vacuum-1', 'wait');"); + +# Here's the key point of a test: during vacuum we add some page splits. +# This will force vacuum into doing another scan thus reseting read stream. +$node->safe_psql('postgres', + "insert into a select x from generate_series(1,3000) x;"); + +$node->safe_psql('postgres', + "SELECT injection_points_detach('nbtree-vacuum-2');"); +$node->safe_psql('postgres', + "SELECT injection_points_wakeup('nbtree-vacuum-2');"); + +# Observe that second scan is reached. +$node->wait_for_event('client backend', 'nbtree-vacuum-1'); + +$node->safe_psql('postgres', + "SELECT injection_points_detach('nbtree-vacuum-1');"); +$node->safe_psql('postgres', + "SELECT injection_points_wakeup('nbtree-vacuum-1');"); + +ok($psql_session->quit); + +done_testing(); -- 2.39.5 (Apple Git-154)