Adjust autovacuum naptime automatically
Hi hackers,
There is a comment in autovacuum.c:
| XXX todo: implement sleep scale factor that existed in contrib code.
and the attached is a patch to implement it.
In contrib code, sleep scale factor was used to adjust naptime only to
lengthen the naptime. But I changed the behavior to be able to shorten it.
In the case of a heavily update workload, the default naptime (60 seconds)
is too long to keep the number of dead tuples low. With my patch, the naptime
will be adjusted around 3 seconds at the case of pgbench (scale=10, 80 tps)
with default other autovacuum parameters.
I have something that I want to discuss with you:
- Can we use the process-exitcode to make autovacuum daemon to communicate
with postmaster? I used it to notify there are any vacuum jobs or not.
- I removed autovacuum_naptime guc variable, because it is adjusted
automatically now. Is it appropriate?
Comments welcome.
Regards,
---
ITAGAKI Takahiro
NTT Open Source Software Center
Attachments:
autovacuum_adjust_naptime-0817.patchapplication/octet-stream; name=autovacuum_adjust_naptime-0817.patchDownload
diff -cpr pgsql-orig/src/backend/postmaster/autovacuum.c pgsql/src/backend/postmaster/autovacuum.c
*** pgsql-orig/src/backend/postmaster/autovacuum.c Thu Aug 17 10:06:50 2006
--- pgsql/src/backend/postmaster/autovacuum.c Thu Aug 17 10:42:10 2006
***************
*** 54,60 ****
* GUC parameters
*/
bool autovacuum_start_daemon = false;
- int autovacuum_naptime;
int autovacuum_vac_thresh;
double autovacuum_vac_scale;
int autovacuum_anl_thresh;
--- 54,59 ----
*************** int autovacuum_vac_cost_limit;
*** 66,71 ****
--- 65,82 ----
/* Flag to tell if we are in the autovacuum daemon process */
static bool am_autovacuum = false;
+ /* Autovac daemon exitcode */
+ #define AUTOVACUUM_ACCELERATE 0
+ #define AUTOVACUUM_DECELERATE 1
+
+ /* Nap time of autovac daemon (only valid in postmaster) */
+ #define AUTOVACUUM_NAPTIME_MIN 1
+ #define AUTOVACUUM_NAPTIME_MAX 300
+ #define AUTOVACUUM_NAPTIME_DEFAULT 60
+ #define AUTOVACUUM_ACCEL_SCALE 0.5
+ #define AUTOVACUUM_DECEL_SCALE 1.2
+ int autovacuum_naptime = AUTOVACUUM_NAPTIME_DEFAULT;
+
/* Last time autovac daemon started/stopped (only valid in postmaster) */
static time_t last_autovac_start_time = 0;
static time_t last_autovac_stop_time = 0;
*************** static pid_t autovac_forkexec(void);
*** 101,107 ****
#endif
NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]);
static void process_whole_db(void);
! static void do_autovacuum(PgStat_StatDBEntry *dbentry);
static List *autovac_get_database_list(void);
static void test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
Form_pg_class classForm,
--- 112,118 ----
#endif
NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]);
static void process_whole_db(void);
! static int do_autovacuum(PgStat_StatDBEntry *dbentry);
static List *autovac_get_database_list(void);
static void test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
Form_pg_class classForm,
*************** autovac_start(void)
*** 136,143 ****
*
* Note that since we will be re-called from the postmaster main loop, we
* will get another chance later if we do nothing now.
- *
- * XXX todo: implement sleep scale factor that existed in contrib code.
*/
curtime = time(NULL);
--- 147,152 ----
*************** autovac_start(void)
*** 186,194 ****
* autovac_stopped --- called by postmaster when subprocess exit is detected
*/
void
! autovac_stopped(void)
{
last_autovac_stop_time = time(NULL);
}
#ifdef EXEC_BACKEND
--- 195,224 ----
* autovac_stopped --- called by postmaster when subprocess exit is detected
*/
void
! autovac_stopped(int exitcode)
{
last_autovac_stop_time = time(NULL);
+
+ switch (exitcode)
+ {
+ case AUTOVACUUM_ACCELERATE:
+ autovacuum_naptime = (int)
+ floor(autovacuum_naptime * AUTOVACUUM_ACCEL_SCALE);
+ break;
+ case AUTOVACUUM_DECELERATE:
+ autovacuum_naptime = (int)
+ ceil(autovacuum_naptime * AUTOVACUUM_DECEL_SCALE);
+ break;
+ default: /* should not happen */
+ elog(WARNING, "autovacuum invalid exitcode %d", exitcode);
+ break;
+ }
+ if (autovacuum_naptime < AUTOVACUUM_NAPTIME_MIN)
+ autovacuum_naptime = AUTOVACUUM_NAPTIME_MIN;
+ if (autovacuum_naptime > AUTOVACUUM_NAPTIME_MAX)
+ autovacuum_naptime = AUTOVACUUM_NAPTIME_MAX;
+
+ elog(DEBUG1, "autovacuum: adjust naptime to %d", autovacuum_naptime);
}
#ifdef EXEC_BACKEND
*************** AutoVacMain(int argc, char *argv[])
*** 225,230 ****
--- 255,261 ----
TransactionId nextXid;
autovac_dbase *db;
bool whole_db;
+ bool vacuumed = false;
sigjmp_buf local_sigjmp_buf;
/* we are a postmaster subprocess now */
*************** AutoVacMain(int argc, char *argv[])
*** 296,302 ****
* callback will be registered to do ProcKill, which will clean up
* necessary state.
*/
! proc_exit(0);
}
/* We can now handle ereport(ERROR) */
--- 327,333 ----
* callback will be registered to do ProcKill, which will clean up
* necessary state.
*/
! proc_exit(AUTOVACUUM_ACCELERATE);
}
/* We can now handle ereport(ERROR) */
*************** AutoVacMain(int argc, char *argv[])
*** 461,473 ****
* And do an appropriate amount of work
*/
if (whole_db)
process_whole_db();
else
! do_autovacuum(db->entry);
}
/* One iteration done, go away */
! proc_exit(0);
}
/*
--- 492,507 ----
* And do an appropriate amount of work
*/
if (whole_db)
+ {
process_whole_db();
+ vacuumed = true;
+ }
else
! vacuumed = (do_autovacuum(db->entry) > 0);
}
/* One iteration done, go away */
! proc_exit(vacuumed ? AUTOVACUUM_ACCELERATE : AUTOVACUUM_DECELERATE);
}
/*
*************** process_whole_db(void)
*** 574,584 ****
* dbentry must be a valid pointer to the database entry in the stats
* databases' hash table, and it will be used to determine whether vacuum or
* analyze is needed on a per-table basis.
*
* Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
* order not to ignore shutdown commands for too long.
*/
! static void
do_autovacuum(PgStat_StatDBEntry *dbentry)
{
Relation classRel,
--- 608,619 ----
* dbentry must be a valid pointer to the database entry in the stats
* databases' hash table, and it will be used to determine whether vacuum or
* analyze is needed on a per-table basis.
+ * Returns the number of vacuumed tables.
*
* Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
* order not to ignore shutdown commands for too long.
*/
! static int
do_autovacuum(PgStat_StatDBEntry *dbentry)
{
Relation classRel,
*************** do_autovacuum(PgStat_StatDBEntry *dbentr
*** 589,594 ****
--- 624,630 ----
List *toast_table_ids = NIL;
ListCell *cell;
PgStat_StatDBEntry *shared;
+ int numVacuumed = 0;
/* Start a transaction so our commands have one to play into. */
StartTransactionCommand();
*************** do_autovacuum(PgStat_StatDBEntry *dbentr
*** 723,732 ****
--- 759,771 ----
tab->dovacuum,
tab->doanalyze,
false);
+ numVacuumed++;
}
/* Finally close out the last transaction. */
CommitTransactionCommand();
+
+ return numVacuumed;
}
/*
diff -cpr pgsql-orig/src/backend/postmaster/postmaster.c pgsql/src/backend/postmaster/postmaster.c
*** pgsql-orig/src/backend/postmaster/postmaster.c Thu Aug 17 10:06:50 2006
--- pgsql/src/backend/postmaster/postmaster.c Thu Aug 17 10:14:03 2006
*************** reaper(SIGNAL_ARGS)
*** 2106,2113 ****
if (AutoVacPID != 0 && pid == AutoVacPID)
{
AutoVacPID = 0;
! autovac_stopped();
! if (exitstatus != 0)
HandleChildCrash(pid, exitstatus,
_("autovacuum process"));
continue;
--- 2106,2114 ----
if (AutoVacPID != 0 && pid == AutoVacPID)
{
AutoVacPID = 0;
! if (WIFEXITED(exitstatus))
! autovac_stopped(WEXITSTATUS(exitstatus));
! else
HandleChildCrash(pid, exitstatus,
_("autovacuum process"));
continue;
diff -cpr pgsql-orig/src/backend/utils/misc/guc.c pgsql/src/backend/utils/misc/guc.c
*** pgsql-orig/src/backend/utils/misc/guc.c Thu Aug 17 10:06:52 2006
--- pgsql/src/backend/utils/misc/guc.c Thu Aug 17 10:14:03 2006
*************** static struct config_int ConfigureNamesI
*** 1539,1553 ****
},
{
- {"autovacuum_naptime", PGC_SIGHUP, AUTOVACUUM,
- gettext_noop("Time to sleep between autovacuum runs, in seconds."),
- NULL,
- GUC_UNIT_S
- },
- &autovacuum_naptime,
- 60, 1, INT_MAX, NULL, NULL
- },
- {
{"autovacuum_vacuum_threshold", PGC_SIGHUP, AUTOVACUUM,
gettext_noop("Minimum number of tuple updates or deletes prior to vacuum."),
NULL
--- 1539,1544 ----
diff -cpr pgsql-orig/src/backend/utils/misc/postgresql.conf.sample pgsql/src/backend/utils/misc/postgresql.conf.sample
*** pgsql-orig/src/backend/utils/misc/postgresql.conf.sample Thu Aug 17 10:06:52 2006
--- pgsql/src/backend/utils/misc/postgresql.conf.sample Thu Aug 17 10:14:03 2006
***************
*** 361,367 ****
#---------------------------------------------------------------------------
#autovacuum = off # enable autovacuum subprocess?
- #autovacuum_naptime = 60 # time between autovacuum runs, in secs
#autovacuum_vacuum_threshold = 1000 # min # of tuple updates before
# vacuum
#autovacuum_analyze_threshold = 500 # min # of tuple updates before
--- 361,366 ----
diff -cpr pgsql-orig/src/include/postmaster/autovacuum.h pgsql/src/include/postmaster/autovacuum.h
*** pgsql-orig/src/include/postmaster/autovacuum.h Thu Aug 17 10:06:55 2006
--- pgsql/src/include/postmaster/autovacuum.h Thu Aug 17 10:14:03 2006
*************** extern bool IsAutoVacuumProcess(void);
*** 31,37 ****
/* Functions to start autovacuum process, called from postmaster */
extern void autovac_init(void);
extern int autovac_start(void);
! extern void autovac_stopped(void);
#ifdef EXEC_BACKEND
extern void AutoVacMain(int argc, char *argv[]);
--- 31,37 ----
/* Functions to start autovacuum process, called from postmaster */
extern void autovac_init(void);
extern int autovac_start(void);
! extern void autovac_stopped(int exitcode);
#ifdef EXEC_BACKEND
extern void AutoVacMain(int argc, char *argv[]);
ITAGAKI Takahiro wrote:
In the case of a heavily update workload, the default naptime (60 seconds)
is too long to keep the number of dead tuples low. With my patch, the naptime
will be adjusted around 3 seconds at the case of pgbench (scale=10, 80 tps)
with default other autovacuum parameters.
Interesting. To be frank I don't know what the sleep scale factor was
supposed to do.
I have something that I want to discuss with you:
- Can we use the process-exitcode to make autovacuum daemon to communicate
with postmaster? I used it to notify there are any vacuum jobs or not.
I can only tell you we do this is Mammoth Replicator and it works for
us. Whether this is a very good idea, I don't know. I didn't find any
other means to communicate stuff from dying processes to the postmaster.
- I removed autovacuum_naptime guc variable, because it is adjusted
automatically now. Is it appropriate?
I think we should provide the user with a way to stop the naptime from
changing at all. Eventually we will have the promised "maintenance
windows" feature which will mean the user will not have to worry at all
about the naptime, but in the meantime I think we should keep it.
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Alvaro Herrera wrote:
ITAGAKI Takahiro wrote:
In the case of a heavily update workload, the default naptime (60 seconds)
is too long to keep the number of dead tuples low. With my patch, the naptime
will be adjusted around 3 seconds at the case of pgbench (scale=10, 80 tps)
with default other autovacuum parameters.
What is this based on? That is, based on what information is it
deciding to reduce the naptime?
Interesting. To be frank I don't know what the sleep scale factor was
supposed to do.
I'm not sure that sleep scale factor is a good idea or not at this
point, but what I was thinking back in the day when i originally wrote
the contrib autovacuum is that I didn't want the system to get bogged
down constantly vacuuming. So, if it just spent a long time working on
one database, it would sleep for long time.
Given that we can now specify the vacuum cost delay settings for
autovacuum and disable tables and everything else, I'm not sure we this
anymore, at least not as it was originally designed. It sounds like
Itagaki is doing things a little different with his patch, but I'm not
sure I understand it.
- I removed autovacuum_naptime guc variable, because it is adjusted
automatically now. Is it appropriate?I think we should provide the user with a way to stop the naptime from
changing at all. Eventually we will have the promised "maintenance
windows" feature which will mean the user will not have to worry at all
about the naptime, but in the meantime I think we should keep it.
I'm not sure that's true. I believe we will want the naptime GUC option
even after we have the maintenance window. I think we might ignore the
naptime during the maintenance window, but even after we have the
maintenance window, we will still vacuum during the day as required.
My vision of the maintenance window has always been very simple, that
is, during the maintenance window the thresholds get reduced by some
factor (probably a GUC variable) so during the day it might take 10000
updates on a table to cause a vacuum but during the naptime it might be
10% of that, 1000. Is this in-line with what others were thinking?
"Matthew T. O'Connor" <matthew@zeut.net> wrote:
Sorry, I should have explained more.
What is this based on? That is, based on what information is it
deciding to reduce the naptime?
If there are some vacuum or analyze jobs, the naptime is shortened
(i.e, autovacuum is accelerated). And if there are no jobs, the naptime
is lengthened (autovacuum is decelerated).
Given that we can now specify the vacuum cost delay settings for
autovacuum and disable tables and everything else, I'm not sure we this
anymore, at least not as it was originally designed. It sounds like
Itagaki is doing things a little different with his patch, but I'm not
sure I understand it.
I noticed my method is based on different views from contrib/pg_autovacuum.
I'm afraid of the lack of vacuum by autovacuum. So if the database seems to
require frequent vacuums, I'll accelerate autovacuum, and vice versa.
If we have a small heavily-updated table and a large rarely-updated table,
we should vacuum the small one soon after vacuum on the large one is done,
even if the large vacuum takes long time. -- but hmm, it may be better to
have multiple autovacuums in such a case primarily.
My vision of the maintenance window has always been very simple, that
is, during the maintenance window the thresholds get reduced by some
factor (probably a GUC variable) so during the day it might take 10000
updates on a table to cause a vacuum but during the naptime it might be
10% of that, 1000. Is this in-line with what others were thinking?
I agree. We can use autovacuum thresholds and cost-delay parameters to
control the frequency and priority of vacuum. I don't think it is good
to control vacuums by changing naptime.
Regards,
---
ITAGAKI Takahiro
NTT Open Source Software Center
ITAGAKI Takahiro wrote:
"Matthew T. O'Connor" <matthew@zeut.net> wrote:
What is this based on? That is, based on what information is it
deciding to reduce the naptime?If there are some vacuum or analyze jobs, the naptime is shortened
(i.e, autovacuum is accelerated). And if there are no jobs, the naptime
is lengthened (autovacuum is decelerated).
Yeah, I looked through the patch after I sent this email. It's an
interesting perspective, but I want to see some performance numbers or
significant bloat reduction before I agree this is a good idea. Again,
when a table is busy, constant vacuuming will help keep down bloat, but
at the expense of throughput.
I noticed my method is based on different views from contrib/pg_autovacuum.
I'm afraid of the lack of vacuum by autovacuum. So if the database seems to
require frequent vacuums, I'll accelerate autovacuum, and vice versa.
If we have a small heavily-updated table and a large rarely-updated table,
we should vacuum the small one soon after vacuum on the large one is done,
even if the large vacuum takes long time. -- but hmm, it may be better to
have multiple autovacuums in such a case primarily.
Yes, I think we are heading in this direction. As of 8.2 PostgreSQL
will allow multiple vacuums at the same time (just not on the same
table), autovacuum hasn't been trained on this yet, but I think it will
eventually.
I agree. We can use autovacuum thresholds and cost-delay parameters to
control the frequency and priority of vacuum. I don't think it is good
to control vacuums by changing naptime.
Now I'm confused, are you now saying that you don't like the concept
behind your patch? Or am I misunderstanding. I think your idea might
be a good one, I'm just not sure yet.
Matt
Matthew T. O'Connor wrote:
My vision of the maintenance window has always been very simple, that
is, during the maintenance window the thresholds get reduced by some
factor (probably a GUC variable) so during the day it might take 10000
updates on a table to cause a vacuum but during the naptime it might be
10% of that, 1000. Is this in-line with what others were thinking?
My vision is a little more complex than that. You define group of
tables, and separately you define time intervals. For each combination
of group and interval you can configure certain parameters, like a
multiplier for the autovacuum thresholds and factors; and also the
"enable" bit. So you can disable vacuum for some intervals, and refine
the equation factors for some others. This is all configured in tables,
not in GUC, so you have more flexibility in choosing stuff for different
groups of tables (say, you really want the small-but-high-update tables
to be still vacuumed even during peak periods, but you don't want that
big fat table to be vacuumed at all during the same period).
I had intended to work on this during the code sprint, but got
distracted. I intend to do it for 8.3 instead.
--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
Alvaro Herrera wrote:
My vision is a little more complex than that. You define group of
tables, and separately you define time intervals. For each combination
of group and interval you can configure certain parameters, like a
multiplier for the autovacuum thresholds and factors; and also the
"enable" bit. So you can disable vacuum for some intervals, and refine
the equation factors for some others. This is all configured in tables,
not in GUC, so you have more flexibility in choosing stuff for different
groups of tables (say, you really want the small-but-high-update tables
to be still vacuumed even during peak periods, but you don't want that
big fat table to be vacuumed at all during the same period).
That sounds good. I worry a bit that it's going to get overly complex.
I suppose if we create the concept of a default window that all new
tables will be automatically be added to when created, then out of the
box we can create 1 default 24 hour maintenance window that would
effectively give us the same functionality we have now.
Could we also use these groups to be used for concurrent vacuums? That
is autovacuum will loop through each group of tables independently thus
allowing multiple simultaneous vacuums on different tables and giving us
a solution to the constantly updated table problem.
On Thu, Aug 17, 2006 at 03:00:00PM +0900, ITAGAKI Takahiro wrote:
"Matthew T. O'Connor" <matthew@zeut.net> wrote:
Sorry, I should have explained more.
What is this based on? That is, based on what information is it
deciding to reduce the naptime?If there are some vacuum or analyze jobs, the naptime is shortened
(i.e, autovacuum is accelerated). And if there are no jobs, the naptime
is lengthened (autovacuum is decelerated).
IMO, the only reason at all for naptime is because there is a
non-trivial cost associated with checking a database to see if any
vacuuming is needed.
One problem that I've run across is that in a cluster with a lot of
databases it can take a very long time to cycle through all of them.
Perhaps a better idea would be to check a number of databases on each
pass. That way you won't bog the server down while checking, but it
won't take as long to get to all the databases.
Also, autovac should immediately continue checking databases after it
finishes vacuuming one. The reason for this is that while vacuuming,
the vacuum_cost_delay settings will almost certainly be in effect, which
will prevent autovac from hammering the system. Since the system won't
be hammered during the vacuum, it's ok to check more databases
immediately after finishing vacuuming on one.
Does anyone have any info on how much load there actually is when
checking databases to see if they need vacuuming?
--
Jim C. Nasby, Sr. Engineering Consultant jnasby@pervasive.com
Pervasive Software http://pervasive.com work: 512-231-6117
vcard: http://jim.nasby.net/pervasive.vcf cell: 512-569-9461
Jim C. Nasby wrote:
On Thu, Aug 17, 2006 at 03:00:00PM +0900, ITAGAKI Takahiro wrote:
IMO, the only reason at all for naptime is because there is a
non-trivial cost associated with checking a database to see if any
vacuuming is needed.
This cost is reduced significantly in the integrated version as compared
to the contrib version, but yes still not zero.
One problem that I've run across is that in a cluster with a lot of
databases it can take a very long time to cycle through all of them.Perhaps a better idea would be to check a number of databases on each
pass. That way you won't bog the server down while checking, but it
won't take as long to get to all the databases.Also, autovac should immediately continue checking databases after it
finishes vacuuming one. The reason for this is that while vacuuming,
the vacuum_cost_delay settings will almost certainly be in effect, which
will prevent autovac from hammering the system. Since the system won't
be hammered during the vacuum, it's ok to check more databases
immediately after finishing vacuuming on one.
This is basically what Itagaki's patch does.
Does anyone have any info on how much load there actually is when
checking databases to see if they need vacuuming?
I haven't.