From 62c0fd0583d7c163c36855eae1c8b6817a6f5e69 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Tue, 9 Mar 2021 19:21:40 -0800 Subject: [PATCH v2 1/2] Don't consider newly inserted tuples in nbtree VACUUM. Skip full index scan during a VACUUM for nbtree indexes in the case where VACUUM never called btbulkdelete(), even when pg_class stats for the index relation would be considered "stale" by criteria applied using vacuum_cleanup_index_scale_factor. Rely on ANALYZE instead -- it can be relied on to keep pg_class.reltuples up to date, per the amvacuumcleanup contract. Also remove the vacuum_cleanup_index_scale_factor GUC/param in passing (though just disable it on the Postgres 13 branch). VACUUM will still do scans of the index despite never reaching btbulkdelete() in one remaining case: the case where a previous VACUUM operation is known to have performed index page deletion of pages that have yet be placed in the free space map for recycling. Backpatch to Postgres 13 due to an unanticipated interaction with the autovacuum_vacuum_insert_threshold feature added by commit b07642db and the "skip full scan" feature added by commit 857f9c36. This interaction has been tied to a regression with an append-only insert benchmark [1]. It is reasonable to expect a certain amount of overhead from vacuuming that just sets visibility map bits, but it does not seem reasonable to perform a full index scans in btvacuumcleanup() purely to set the pg_class.reltuples stats in affected indexes. There is another reason to backpatch: a bugfix commit tied to the nbtree deduplication feature (bugfix commit 48e12913) taught nbtree VACUUM to track IndexBulkDeleteResult.num_index_tuples using an inherently approximate approach. This made sense -- getting an accurate count in the presence of posting list tuples just isn't worth the cycles for a btvacuumcleanup()-only VACUUM. But btvacuumcleanup() should still indicate that its final num_index_tuples value is just an estimate when its approximate approach to tracking live tuples gets used. Have btvacuumcleanup() acknowledge that its approach to counting is approximate by setting IndexBulkDeleteResult.estimated_count to 'true'. This prevents vacuumlazy.c from setting the index's pg_class.reltuples to a value that significantly underestimates the number of live tuples (at least in certain scenarios with large posting list tuples). This is the same approach that hashvacuumcleanup() has always taken. Index AMs have had the option of giving only approximate num_index_tuples statistics since commit e57345975cf, which updated the relevant index AM API. [1] https://smalldatum.blogspot.com/2021/01/insert-benchmark-postgres-is-still.html Author: Peter Geoghegan Reviewed-By: Masahiko Sawada Discussion: https://postgr.es/m/CAD21AoA4WHthN5uU6+WScZ7+J_RcEjmcuH94qcoUPuB42ShXzg@mail.gmail.com Backpatch: 13-, where autovacuum_vacuum_insert_threshold was added. --- src/backend/access/nbtree/nbtpage.c | 22 +++-- src/backend/access/nbtree/nbtree.c | 102 +++++++++++----------- doc/src/sgml/config.sgml | 41 --------- doc/src/sgml/ref/create_index.sgml | 14 --- src/test/regress/expected/btree_index.out | 29 ------ src/test/regress/sql/btree_index.sql | 19 ---- 6 files changed, 60 insertions(+), 167 deletions(-) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 75628e0eb9..ac6c38b155 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -168,6 +168,9 @@ _bt_getmeta(Relation rel, Buffer metabuf) * * This routine checks if provided cleanup-related information is matching * to those written in the metapage. On mismatch, metapage is overwritten. + * + * Postgres 13 ignores btm_last_cleanup_num_heap_tuples value here + * following backbranch disabling of vacuum_cleanup_index_scale_factor. */ void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, @@ -176,22 +179,15 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, Buffer metabuf; Page metapg; BTMetaPageData *metad; - bool needsRewrite = false; - XLogRecPtr recptr; /* read the metapage and check if it needs rewrite */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); - /* outdated version of metapage always needs rewrite */ - if (metad->btm_version < BTREE_NOVAC_VERSION) - needsRewrite = true; - else if (metad->btm_oldest_btpo_xact != oldestBtpoXact || - metad->btm_last_cleanup_num_heap_tuples != numHeapTuples) - needsRewrite = true; - - if (!needsRewrite) + /* Don't miss chance to upgrade index/metapage when BTREE_MIN_VERSION */ + if (metad->btm_version >= BTREE_NOVAC_VERSION && + metad->btm_oldest_btpo_xact == oldestBtpoXact) { _bt_relbuf(rel, metabuf); return; @@ -209,13 +205,14 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, /* update cleanup-related information */ metad->btm_oldest_btpo_xact = oldestBtpoXact; - metad->btm_last_cleanup_num_heap_tuples = numHeapTuples; + metad->btm_last_cleanup_num_heap_tuples = -1; MarkBufferDirty(metabuf); /* write wal record if needed */ if (RelationNeedsWAL(rel)) { xl_btree_metadata md; + XLogRecPtr recptr; XLogBeginInsert(); XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD); @@ -227,7 +224,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, md.fastroot = metad->btm_fastroot; md.fastlevel = metad->btm_fastlevel; md.oldest_btpo_xact = oldestBtpoXact; - md.last_cleanup_num_heap_tuples = numHeapTuples; + md.last_cleanup_num_heap_tuples = -1; /* Disabled */ md.allequalimage = metad->btm_allequalimage; XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata)); @@ -238,6 +235,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, } END_CRIT_SECTION(); + _bt_relbuf(rel, metabuf); } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index d857afee1c..c37bbe53f9 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -794,6 +794,9 @@ _bt_parallel_advance_array_keys(IndexScanDesc scan) * When we return false, VACUUM can even skip the cleanup-only call to * btvacuumscan (i.e. there will be no btvacuumscan call for this index at * all). Otherwise, a cleanup-only btvacuumscan call is required. + * + * Postgres 13 ignores btm_last_cleanup_num_heap_tuples value here following + * backbranch disabling of vacuum_cleanup_index_scale_factor. */ static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info) @@ -801,60 +804,44 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info) Buffer metabuf; Page metapg; BTMetaPageData *metad; - bool result = false; + uint32 btm_version; + TransactionId prev_btm_oldest_btpo_xact; + /* + * Copy details from metapage to local variables quickly. + * + * Note that we deliberately avoid using cached version of metapage here. + */ metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); + btm_version = metad->btm_version; - if (metad->btm_version < BTREE_NOVAC_VERSION) + if (btm_version < BTREE_NOVAC_VERSION) { /* - * Do cleanup if metapage needs upgrade, because we don't have - * cleanup-related meta-information yet. + * Metapage needs to be dynamically upgraded to store fields that are + * only present when btm_version >= BTREE_NOVAC_VERSION */ - result = true; + _bt_relbuf(info->index, metabuf); + return true; } - else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) && - TransactionIdPrecedes(metad->btm_oldest_btpo_xact, - RecentGlobalXmin)) + + prev_btm_oldest_btpo_xact = metad->btm_oldest_btpo_xact; + _bt_relbuf(info->index, metabuf); + + if (TransactionIdIsValid(prev_btm_oldest_btpo_xact) && + TransactionIdPrecedes(prev_btm_oldest_btpo_xact, RecentGlobalXmin)) { /* * If any oldest btpo.xact from a previously deleted page in the index * is older than RecentGlobalXmin, then at least one deleted page can * be recycled -- don't skip cleanup. */ - result = true; - } - else - { - BTOptions *relopts; - float8 cleanup_scale_factor; - float8 prev_num_heap_tuples; - - /* - * If table receives enough insertions and no cleanup was performed, - * then index would appear have stale statistics. If scale factor is - * set, we avoid that by performing cleanup if the number of inserted - * tuples exceeds vacuum_cleanup_index_scale_factor fraction of - * original tuples count. - */ - relopts = (BTOptions *) info->index->rd_options; - cleanup_scale_factor = (relopts && - relopts->vacuum_cleanup_index_scale_factor >= 0) - ? relopts->vacuum_cleanup_index_scale_factor - : vacuum_cleanup_index_scale_factor; - prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples; - - if (cleanup_scale_factor <= 0 || - prev_num_heap_tuples <= 0 || - (info->num_heap_tuples - prev_num_heap_tuples) / - prev_num_heap_tuples >= cleanup_scale_factor) - result = true; + return true; } - _bt_relbuf(info->index, metabuf); - return result; + return false; } /* @@ -907,9 +894,6 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * still need to do a pass over the index, to recycle any newly-recyclable * pages or to obtain index statistics. _bt_vacuum_needs_cleanup * determines if either are needed. - * - * Since we aren't going to actually delete any leaf items, there's no - * need to go through all the vacuum-cycle-ID pushups. */ if (stats == NULL) { @@ -917,8 +901,23 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) if (!_bt_vacuum_needs_cleanup(info)) return NULL; + /* + * Since we aren't going to actually delete any leaf items, there's no + * need to go through all the vacuum-cycle-ID pushups here. + * + * Posting list tuples are a source of inaccuracy for cleanup-only + * scans. btvacuumscan() will assume that the number of index tuples + * from each page can be used as num_index_tuples, even though + * num_index_tuples is supposed to represent the number of TIDs in the + * index. This naive approach can underestimate the number of tuples + * in the index significantly. + * + * We handle the problem by making num_index_tuples an estimate in + * cleanup-only case. + */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); btvacuumscan(info, stats, NULL, NULL, 0); + stats->estimated_count = true; } /* @@ -926,12 +925,6 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * double-counting some index tuples, so disbelieve any total that exceeds * the underlying heap's count ... if we know that accurately. Otherwise * this might just make matters worse. - * - * Posting list tuples are another source of inaccuracy. Cleanup-only - * btvacuumscan calls assume that the number of index tuples can be used - * as num_index_tuples, even though num_index_tuples is supposed to - * represent the number of TIDs in the index. This naive approach can - * underestimate the number of tuples in the index. */ if (!info->estimated_count) { @@ -971,7 +964,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, * Reset counts that will be incremented during the scan; needed in case * of multiple scans during a single VACUUM command */ - stats->estimated_count = false; stats->num_index_tuples = 0; stats->pages_deleted = 0; @@ -1059,8 +1051,12 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexFreeSpaceMapVacuum(rel); /* - * Maintain the oldest btpo.xact and a count of the current number of heap - * tuples in the metapage (for the benefit of _bt_vacuum_needs_cleanup). + * Maintain the oldest btpo.xact using _bt_update_meta_cleanup_info, for + * the benefit of _bt_vacuum_needs_cleanup. + * + * Note: We deliberately don't store the count of heap tuples here + * anymore. The numHeapTuples argument to _bt_update_meta_cleanup_info() + * is left in place on Postgres 13. * * The page with the oldest btpo.xact is typically a page deleted by this * VACUUM operation, since pages deleted by a previous VACUUM operation @@ -1070,8 +1066,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, * statistics, despite not counting as deleted pages for the purposes of * determining the oldest btpo.xact.) */ - _bt_update_meta_cleanup_info(rel, vstate.oldestBtpoXact, - info->num_heap_tuples); + _bt_update_meta_cleanup_info(rel, vstate.oldestBtpoXact, -1); /* update statistics */ stats->num_pages = num_pages; @@ -1399,7 +1394,10 @@ backtrack: * We don't count the number of live TIDs during cleanup-only calls to * btvacuumscan (i.e. when callback is not set). We count the number * of index tuples directly instead. This avoids the expense of - * directly examining all of the tuples on each page. + * directly examining all of the tuples on each page. VACUUM will + * treat num_index_tuples as an estimate in cleanup-only case, so it + * doesn't matter that this underestimates num_index_tuples + * significantly in some cases. */ if (minoff > maxoff) attempt_pagedel = (blkno == scanblkno); diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index ca0d9bd917..dd2778611f 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8349,47 +8349,6 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; - - vacuum_cleanup_index_scale_factor (floating point) - - vacuum_cleanup_index_scale_factor - configuration parameter - - - - - Specifies the fraction of the total number of heap tuples counted in - the previous statistics collection that can be inserted without - incurring an index scan at the VACUUM cleanup stage. - This setting currently applies to B-tree indexes only. - - - - If no tuples were deleted from the heap, B-tree indexes are still - scanned at the VACUUM cleanup stage when at least one - of the following conditions is met: the index statistics are stale, or - the index contains deleted pages that can be recycled during cleanup. - Index statistics are considered to be stale if the number of newly - inserted tuples exceeds the vacuum_cleanup_index_scale_factor - fraction of the total number of heap tuples detected by the previous - statistics collection. The total number of heap tuples is stored in - the index meta-page. Note that the meta-page does not include this data - until VACUUM finds no dead tuples, so B-tree index - scan at the cleanup stage can only be skipped if the second and - subsequent VACUUM cycles detect no dead tuples. - - - - The value can range from 0 to - 10000000000. - When vacuum_cleanup_index_scale_factor is set to - 0, index scans are never skipped during - VACUUM cleanup. The default value is 0.1. - - - - - bytea_output (enum) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 3537dfaade..5a1fd71478 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -434,20 +434,6 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] - - - vacuum_cleanup_index_scale_factor (floating point) - - vacuum_cleanup_index_scale_factor - storage parameter - - - - - Per-index value for . - - - diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index cfd4338e36..bc113a70b4 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -308,35 +308,6 @@ alter table btree_tall_tbl alter COLUMN t set storage plain; create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10); insert into btree_tall_tbl select g, repeat('x', 250) from generate_series(1, 130) g; --- --- Test vacuum_cleanup_index_scale_factor --- --- Simple create -create table btree_test(a int); -create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0); -select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass; - reloptions ------------------------------------------- - {vacuum_cleanup_index_scale_factor=40.0} -(1 row) - --- Fail while setting improper values -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0); -ERROR: value -10.0 out of bounds for option "vacuum_cleanup_index_scale_factor" -DETAIL: Valid values are between "0.000000" and "10000000000.000000". -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0); -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string'); -ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": string -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true); -ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": true --- Simple ALTER INDEX -alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0); -select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass; - reloptions ------------------------------------------- - {vacuum_cleanup_index_scale_factor=70.0} -(1 row) - -- -- Test for multilevel page deletion -- diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 96f53818ff..c60312db2d 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -150,25 +150,6 @@ create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10); insert into btree_tall_tbl select g, repeat('x', 250) from generate_series(1, 130) g; --- --- Test vacuum_cleanup_index_scale_factor --- - --- Simple create -create table btree_test(a int); -create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0); -select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass; - --- Fail while setting improper values -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0); -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0); -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string'); -create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true); - --- Simple ALTER INDEX -alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0); -select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass; - -- -- Test for multilevel page deletion -- base-commit: 21d5a065fd5f0ed71e0f6726a869c64d13716ceb -- 2.27.0