From 0603ab5d37a02e05d7a30fbd806eadd17a6232e4 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Tue, 9 Mar 2021 12:58:39 -0800 Subject: [PATCH v2 2/2] VACUUM ANALYZE: Always update pg_class.reltuples. Stop assuming that pg_class must have been updated with accurate statistics within VACUUM ANALYZE -- update pg_class for indexes at the same time as the table relation in all cases. VACUUM sometimes gives less accurate statistics than ANALYZE by design, per the contract for amvacuumcleanup() routines established by commit e57345975cf back in 2006. It seems advisable to effectively assume that that always happens. The worst possible consequence of this approach is that it might inaccurately set pg_class.reltuples for indexes whose heap relation ends up with the same inaccurate value anyway, which doesn't seem too bad. We already consistently called vac_update_relstats() (to update pg_class) for the heap/table relation twice during any VACUUM ANALYZE -- once in vacuumlazy.c, and once in analyze.c. We now make sure that we call vac_update_relstats() at least once (though often twice) for each index. This is follow up work to the recent commit that dealt with issues with btvacuumcleanup(). Though this is technically a long-standing issue (e.g., hashvacuumcleanup() is just as affected as btvacuumcleanup()), it seems like a good idea to fix both issues together now. Having VACUUM ANALYZE understand that pg_class.reltuples might not have been set in vacuumlazy.c by the time do_analyze_rel() is reached probably matters much more in light of the btvacuumcleanup() changes. Author: Peter Geoghegan Reviewed-By: Masahiko Sawada Discussion: https://postgr.es/m/CAH2-WzknxdComjhqo4SUxVFk_Q1171GJO2ZgHZ1Y6pion6u8rA@mail.gmail.com Backpatch: 13-, just like the last commit. --- src/backend/access/heap/vacuumlazy.c | 3 +- src/backend/commands/analyze.c | 52 +++++++++++++++++----------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index d8f847b0e6..22ca7f5c54 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1737,7 +1737,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, end_parallel_vacuum(indstats, lps, nindexes); /* Update index statistics */ - update_index_statistics(Irel, indstats, nindexes); + if (vacrelstats->useindex) + update_index_statistics(Irel, indstats, nindexes); /* If no indexes, make log report that lazy_vacuum_heap would've made */ if (vacuumed_pages) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 7295cf0215..eb32edd201 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -600,8 +600,16 @@ do_analyze_rel(Relation onerel, VacuumParams *params, PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE); /* - * Update pages/tuples stats in pg_class ... but not if we're doing - * inherited stats. + * Update pages/tuples stats in pg_class, and report ANALYZE to the stats + * collector ... but not if we're doing inherited stats. + * + * We assume that VACUUM hasn't set pg_class.reltuples already, even + * during a VACUUM ANALYZE. Although VACUUM often updates pg_class, + * exceptions exists. A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command + * will never update pg_class entries for index relations. It's also + * possible that an individual index's pg_class entry won't be updated + * during VACUUM if the index AM returns NULL from its amvacuumcleanup() + * routine. */ if (!inh) { @@ -609,6 +617,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, visibilitymap_count(onerel, &relallvisible, NULL); + /* Update pg_class for table relation */ vac_update_relstats(onerel, relpages, totalrows, @@ -617,15 +626,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params, InvalidTransactionId, InvalidMultiXactId, in_outer_xact); - } - /* - * Same for indexes. Vacuum always scans all indexes, so if we're part of - * VACUUM ANALYZE, don't overwrite the accurate count already inserted by - * VACUUM. - */ - if (!inh && !(params->options & VACOPT_VACUUM)) - { + /* Same for indexes */ for (ind = 0; ind < nindexes; ind++) { AnlIndexData *thisdata = &indexdata[ind]; @@ -641,20 +643,30 @@ do_analyze_rel(Relation onerel, VacuumParams *params, InvalidMultiXactId, in_outer_xact); } + + /* + * Now report ANALYZE to the stats collector. + * + * We deliberately don't report to the stats collector when doing + * inherited stats. because the stats collector only tracks per-table + * stats. + * + * Reset the changes_since_analyze counter only if we analyzed all + * columns; otherwise, there is still work for auto-analyze to do. + */ + pgstat_report_analyze(onerel, totalrows, totaldeadrows, + (va_cols == NIL)); } /* - * Report ANALYZE to the stats collector, too. However, if doing - * inherited stats we shouldn't report, because the stats collector only - * tracks per-table stats. Reset the changes_since_analyze counter only - * if we analyzed all columns; otherwise, there is still work for - * auto-analyze to do. + * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup. + * + * Note that most index AMs perform a no-op as a matter of policy for + * amvacuumcleanup() when called in ANALYZE-only mode. In practice only + * ginvacuumcleanup() currently does real work when called through here + * (and even GIN makes it a no-op unless we're running is an autovacuum + * worker). */ - if (!inh) - pgstat_report_analyze(onerel, totalrows, totaldeadrows, - (va_cols == NIL)); - - /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */ if (!(params->options & VACOPT_VACUUM)) { for (ind = 0; ind < nindexes; ind++) -- 2.27.0