#!/usr/local/bin/perl
use DBI;
use strict;
use Getopt::Long;

our $STAT_DATA_TABLE          = 'public.pg_stat_vacuum';
our $STAT_THRESHOLD_VIEW = 'public.pg_stat_autovacuum_thresholds';

my $pghost     = $ENV{PGHOST}     || '';
my $pgport     = $ENV{PGPORT}     || '5432';
my $pguser     = $ENV{PGUSER}     || 'postgres';
my $pgpass     = $ENV{PGPASS}     || '';
my $pgdatabase = $ENV{PGDATABASE} || '';
my $verbose    = 0;

my $vacuum_scale  = 1000;
my $vacuum_th     = 2;
my $analyze_scale = 500;
my $analyze_th    = 1;
my $sleep_base    = 15;
my $max_loop_time = 15;
my $reset_stats   = 0;
my $create_schema = 0;

my %options = (
               'database|db=s'     => \$pgdatabase,
               'host=s'            => \$pghost,
               'port=s'            => \$pgport,
               'user=s'            => \$pguser,
               'pass=s'            => \$pgpass,
               'vacuum_scale|V=i'  => \$vacuum_scale,
               'vacuum_th|v=i'     => \$vacuum_th,
               'analyze_scale|A=i' => \$analyze_scale,
               'analyze_th|a=i'    => \$analyze_th,
               'sleep_base|s=i'    => \$sleep_base,
               'max_loop|m=i'      => \$max_loop_time,
               'verbose=i'         => \$verbose,
               'reset-stats'       => \$reset_stats,
               'create-schema'     => \$create_schema
              );

GetOptions(%options);

my $DONE    = 0;
my $RUNNING = 0;
$SIG{INT} = $SIG{TERM} = \&term;

my $pgdsn = 'dbi:Pg';
$pgdsn .= ':dbname=' . $pgdatabase if $pgdatabase;
$pgdsn .= ';host=' . $pghost       if $pghost;
$pgdsn .= ';port=' . $pgport       if $pgport;

my $dbh =
  DBI->connect($pgdsn, $pguser, $pgpass, {AutoCommit => 1, RaiseError => 1});

create_schema($dbh) if $create_schema;
reset_stats($dbh)   if $reset_stats;

my $sql = <<SQL;
    SELECT reloid, nspname, relname, reltuples, 
    num_del_v, num_upd_v, num_del_a, num_upd_a, num_ins_a,
    (num_del_v + num_upd_v > ($vacuum_scale + reltuples * $vacuum_th)) as do_vacuum,
    (num_del_a + num_upd_a + num_ins_a > ($analyze_scale + reltuples * $analyze_th)) as do_analyze
    FROM $STAT_THRESHOLD_VIEW 
    WHERE (num_del_v + num_upd_v > ($vacuum_scale + reltuples * $vacuum_th)) 
        OR (num_del_a + num_upd_a + num_ins_a > ($analyze_scale + reltuples * $analyze_th))
    ORDER BY do_vacuum desc, do_analyze desc
SQL

# inactivity backoff stuff
my $inactive_loops = 0;
my @FIBS = (0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610);

print STDERR "Autovacuum Starting.\n" if $verbose;
my $sth = $dbh->prepare($sql);
while (!$DONE)
{
    $sth->execute();
    my $tables = $sth->fetchall_arrayref({});

    my $to_process = $sth->rows();
    print STDERR "$to_process tables to process.\n"
      if $verbose > 2;

    # Scaling inactivity timer... 
    if ($to_process)
    {
        $inactive_loops-- if $inactive_loops > 0;
    }
    else
    {
        my $to_sleep = $sleep_base * $FIBS[$inactive_loops];
        print STDERR "Sleeping $to_sleep seconds...\n"
          if $verbose > 2;
        sleep $to_sleep;
        $inactive_loops++ if $inactive_loops < 15;
        next;
    }

    my $loop_start = time;
    my $completed;
    for my $tbl (@$tables)
    {
        last if $DONE;
        $RUNNING++;
        vacuum($dbh, $tbl) if $tbl->{do_vacuum};
        analyze($dbh, $tbl) if $tbl->{do_analyze};
        $completed++;
        $RUNNING = 0;
        last if time - $loop_start > $max_loop_time;
    }
    print STDERR ($to_process - $completed) . " tables remain.\n"
      if $verbose > 2 && ($to_process - $completed) > 0;
}

print STDERR "Autovacuum Exiting.\n" if $verbose;
$dbh->disconnect();
exit;

sub vacuum
{
    my ($dbh, $tbl) = @_;

    my $fqtbl = qq{"$tbl->{nspname}"."$tbl->{relname}"};

    print STDERR
      "Vacuuming $fqtbl (t:$tbl->{reltuples},d:$tbl->{num_del_v},u:$tbl->{num_upd_v})\n"
      if $verbose > 1;
    $dbh->do("VACUUM $fqtbl");

    if (
        get_bool(
                 $dbh, "select true from $STAT_DATA_TABLE where reloid=?",
                 $tbl->{reloid}
                )
       )
    {
        $dbh->do(
            qq{
            UPDATE $STAT_DATA_TABLE SET 
                num_del_v=pg_stat_get_tuples_deleted(vtbl.reloid), 
                num_ins_v=pg_stat_get_tuples_inserted(vtbl.reloid),
                num_upd_v=pg_stat_get_tuples_updated(vtbl.reloid)
            FROM (select ?::oid as reloid) vtbl
            WHERE $STAT_DATA_TABLE.reloid=vtbl.reloid
        }, undef, $tbl->{reloid}
        );
    }
    else
    {
        $dbh->do(
            qq{
            INSERT INTO $STAT_DATA_TABLE (reloid, num_del_v, num_ins_v, num_upd_v)
                SELECT vtbl.reloid, pg_stat_get_tuples_deleted(vtbl.reloid),
                    pg_stat_get_tuples_inserted(vtbl.reloid),
                    pg_stat_get_tuples_updated(vtbl.reloid)
                FROM (select ?::oid as reloid) vtbl;
        }, undef, $tbl->{reloid}
        );
    }
}

sub analyze
{
    my ($dbh, $tbl) = @_;
    my $fqtbl = qq{"$tbl->{nspname}"."$tbl->{relname}"};

    print STDERR
      "Analyzing $fqtbl  (t:$tbl->{reltuples},d:$tbl->{num_del_a},u:$tbl->{num_upd_a},i:$tbl->{num_ins_a})\n"
      if $verbose > 1;
    $dbh->do("ANALYZE $fqtbl\n");

    if (
        get_bool(
                 $dbh, "select true from $STAT_DATA_TABLE where reloid=?",
                 $tbl->{reloid}
                )
       )
    {
        $dbh->do(
            qq{
            UPDATE $STAT_DATA_TABLE SET 
                num_del_a=pg_stat_get_tuples_deleted(vtbl.reloid), 
                num_ins_a=pg_stat_get_tuples_inserted(vtbl.reloid),
                num_upd_a=pg_stat_get_tuples_updated(vtbl.reloid)
            FROM (select ?::oid as reloid) vtbl
            WHERE $STAT_DATA_TABLE.reloid=vtbl.reloid
        }, undef, $tbl->{reloid}
        );
    }
    else
    {
        $dbh->do(
            qq{
            INSERT INTO $STAT_DATA_TABLE (reloid, num_del_a, num_ins_a, num_upd_a)
                SELECT vtbl.reloid, pg_stat_get_tuples_deleted(vtbl.reloid),
                    pg_stat_get_tuples_inserted(vtbl.reloid),
                    pg_stat_get_tuples_updated(vtbl.reloid)
                FROM (select ?::oid as reloid) vtbl
        }, undef, $tbl->{reloid}
        );
    }
}

sub get_bool
{
    my ($dbh, $stmt, @params) = @_;
    my ($rc) = $dbh->selectrow_array($stmt, undef, @params);
    return $rc;
}

sub reset_stats
{
    my $dbh = shift;
    $dbh->do("truncate table $STAT_DATA_TABLE");
}

sub create_schema
{
    my $dbh = shift;

    $dbh->do(<<SQL);
    CREATE TABLE $STAT_DATA_TABLE 
    (
	reloid oid not null primary key
    ,	num_del_v bigint
    ,	num_ins_v bigint
    ,	num_upd_v bigint
    ,	num_del_a bigint
    ,	num_ins_a bigint
    ,	num_upd_a bigint
    );

    CREATE OR REPLACE VIEW $STAT_THRESHOLD_VIEW AS
    SELECT 
        c.oid AS reloid, n.nspname, c.relname, c.relpages, c.reltuples, 
        pg_stat_get_tuples_deleted(c.oid) - coalesce(a.num_del_v, 0) AS num_del_v, 
        pg_stat_get_tuples_inserted(c.oid) - coalesce(a.num_ins_v,0) AS num_ins_v, 
        pg_stat_get_tuples_updated(c.oid) - coalesce(a.num_upd_v,0) AS num_upd_v, 
        pg_stat_get_tuples_deleted(c.oid) - coalesce(a.num_del_a,0) AS num_del_a, 
        pg_stat_get_tuples_inserted(c.oid) - coalesce(a.num_ins_a,0) AS num_ins_a, 
        pg_stat_get_tuples_updated(c.oid) - coalesce(a.num_upd_a,0) AS num_upd_a 
    FROM pg_class c LEFT JOIN $STAT_DATA_TABLE a 
	ON a.reloid = c.oid JOIN pg_namespace n ON c.relnamespace = n.oid
    WHERE c.relkind = 'r';
SQL
}

sub term
{
    $DONE++;
    print STDERR "Got TERM signal.\n"
      if $verbose;
    print STDERR "Vacuum still running, will exit after it completes\n"
      if $RUNNING && $verbose > 1;
}

