#!/usr/bin/perl -w

#
#    Copyright 2008 Niklas Edmundsson <nikke@acc.umu.se>,
#                   Tomas Ögren <stric@acc.umu.se>,
#                   David Cameron <cameron@ndgf.org>
#
#    Originally developed by Niklas Edmundsson and Tomas Ögren as
#    cleanbyage. Modified, renamed cache-clean and maintained
#    for ARC by David Cameron.
#
#    Released under Apache License Version 2.0
#

use Sys::Hostname;
use File::Find ();
use File::Path;
use Getopt::Std;
use Fcntl ':mode';
use DirHandle;
use File::Basename;

use strict;
use warnings;


BEGIN {
    unshift @INC, dirname($0).'/../../lib/arc';
}

use ConfigCentral;

# Set the variable $File::Find::dont_use_nlink if you're using AFS,
# since AFS cheats.

# for the convenience of &wanted calls, including -eval statements:
use vars qw/*name *dir *prune/;
*name   = *File::Find::name;
*dir    = *File::Find::dir;
*prune  = *File::Find::prune;

sub wanted;
sub debug;
sub printsize;
sub diskspace;

my(%opts);

my $configfile;

# Min free percentage
my $defminfree = 20;
my $minfree = $defminfree;
my $mincleanfree = $defminfree;

my %files;
my $totsize = 0;
my $totlocksize = 0;
my $totlockfiles = 0;

my $debuglevel = 'INFO';
LogUtils::level($debuglevel);
LogUtils::timestamps(1);
my $log = LogUtils->getLogger('cache-clean');

getopts('hsdc:m:M:i:e:D:', \%opts);

if(defined($opts{c})) {
    $configfile = $opts{c};
}
if(defined($opts{m})) {
    $minfree = $opts{m};
}
if(defined($opts{M})) {
    $mincleanfree = $opts{M};
}
if(defined($opts{D})) {
    $debuglevel = $opts{D};
    LogUtils::level($debuglevel);
}
if(defined($opts{s})) {
    $debuglevel = 'ERROR';
    LogUtils::level($debuglevel);
}

if($minfree < 0 || $minfree > 100) {
    die "Bad value for -m: $minfree\n";
}
if($mincleanfree < 0 || $mincleanfree > 100) {
    die "Bad value for -M: $mincleanfree\n";
}
if($mincleanfree < $minfree) {
    die "-M can't be smaller than -m (now $mincleanfree/$minfree)\n";
}

sub usage {
    print <<EOH;
   usage: cache-clean -h | -s | -m <NN> -M <NN> [-D debug_level]
            [ -c <arex_config_file> | <dir1> [<dir2> [...]] ]
    -h      - This help
    -s      - Statistics mode, show cache usage stats, dont delete anything
    -c      - path to an A-REX config file, xml or ini format
    -m NN   - Min free space to start cleaning (percent)
    -M NN   - Free space to stop cleaning (percent)
    -D      - Debug level, FATAL, ERROR, WARNING, INFO, VERBOSE or DEBUG.
              Default is INFO

   Caches are given by dir1, dir2.. or taken from the config file specified
   by -c or ARC_CONFIG

EOH
    exit 1;
}

usage() if(defined($opts{'h'}) || ((!defined($opts{'M'}) || !defined($opts{'m'})) && !defined($opts{'s'})));

if (!$configfile && $ENV{'ARC_CONFIG'} && -e $ENV{'ARC_CONFIG'}) {
    $configfile = $ENV{'ARC_CONFIG'};
}

$log->info('Cache cleaning started');

my @caches = @ARGV;

if (!@caches) {
    die 'No config file found\n' unless $configfile;
    my $config = ConfigCentral::parseConfig($configfile);
    die "Failed parsing A-REX config file '$configfile'\n" unless $config;
    die "No users set up in config file '$configfile'\n"
        unless $config->{control} and ref $config->{control} eq 'HASH';
    for my $control (values %{$config->{control}}) {
        next unless ref $control eq 'HASH';
        next unless $control->{cachedir} and ref $control->{cachedir} eq 'ARRAY';
        for (@{$control->{cachedir}}) {
            $log->warning("\n Warning: cache-clean cannot deal with substitutions - $_") and next if /%/;
            $log->warning("\n Warning: ignoring malformed cahe location - $_\n") and next unless m{^(/\S+)};
            push @caches, $1;
        }
    }
    die "No caches found in config file '$configfile'\n" unless @caches;
}

# ConfigCentral sets debug level to level in conf file, so we have to reset it here
LogUtils::level($debuglevel);

foreach my $filesystem (@caches) {

    $filesystem =~ s|/+$|| if ($filesystem ne "/");
    next if ($filesystem eq "");

    if ($filesystem =~ /%/) {
        $log->warning("$filesystem: Warning: cache-clean cannot deal with substitutions");
        next;
    }

    if (! -d $filesystem || ! -d $filesystem."/data") {
        $log->info("$filesystem: Cache is empty");
        next;
    }

    # follow sym links to real filesystem
    my $symlinkdest = $filesystem;
    while ($symlinkdest) {
        $filesystem = $symlinkdest;
        $symlinkdest = readlink($symlinkdest);
        $symlinkdest =~ s|/+$|| if $symlinkdest;
    }

    my $fsvalues = diskspace($filesystem);

    if(!($fsvalues)) {
        $log->warning("Unable to stat $filesystem");
        next;
    }

    my $fssize = $fsvalues->{total};
    my $fsfree = $fsvalues->{free};

    my $minfbytes=$fssize*$minfree/100;
    $log->info(join("", "$filesystem: used space ", printsize($fssize-$fsfree), " / ", printsize($fssize), " (", sprintf("%.2f",100-100*$fsfree/$fssize), "%)"));
    if ($fsfree > $minfbytes && !$opts{'s'}) {
        $log->info(join("", "Used space is lower than upper limit (", 100-$minfree, "%)"));
        next;
    }

    $minfbytes=$fssize*$mincleanfree/100;
    
    %files = ();
    $totsize = 0;
    $totlocksize = 0;
    $totlockfiles = 0;

    File::Find::find({wanted => \&wanted}, $filesystem."/data");

    if($opts{'s'}) {
        print "\nUsage statistics: $filesystem\n";
        print "Total deletable files found: ",scalar keys %files," ($totlockfiles files locked)\n";
        print "Total size of deletable files found: ",printsize($totsize)," (",printsize($totlocksize)," locked)\n";
        print "Used space on file system: ",printsize($fssize-$fsfree)," / ",printsize($fssize), " (",sprintf("%.2f",100-100*$fsfree/$fssize),"%)\n";
        my $increment = $totsize / 10;
        if($increment < 1) {
            print "Total size too small to show usage histogram\n";
            next;
        }

        printf "%-21s %-25s %s\n", "At size (% of total)", "Newest file", "Oldest file";
        my $nextinc = $increment;
        my $accumulated = 0;
        my ($newatime, $lastatime);

        foreach my $fil (sort { $files{$b}{atime} <=> $files{$a}{atime} } 
              keys %files) {
            $accumulated += $files{$fil}{size};
            if(!$newatime) {
                $newatime = $files{$fil}{atime};
            }

            if($accumulated > $nextinc) {
                printf "%-21s %-25s %s\n", 
                printsize($accumulated)." (".int(($accumulated/$totsize)*100)."%)",
                scalar localtime($newatime),
                scalar localtime($files{$fil}{atime});
                while($nextinc < $accumulated) {
                    $nextinc += $increment;
                }
                $newatime = undef;
                $lastatime = undef;
            }
            else {
                $lastatime = $files{$fil}{atime};
            }
        }
        printf "%-21s %-25s %s\n", 
        printsize($accumulated)." (100%)", "-",
        scalar localtime($lastatime) if($lastatime);
        next;
    }


    foreach my $fil (sort { $files{$a}{atime} <=> $files{$b}{atime} } keys %files) {
        last if $fsfree > $minfbytes;

        next if (-e "$fil.lock");
        next if (-d "$fil");

        if ( unlink $fil ) {
            $fsfree+=$files{$fil}{size};
            if (defined($opts{'D'}) && -e "$fil.meta") {
                open FILE, "$fil.meta";
                my @lines = <FILE>;
                close FILE;
                my @values = split(' ', $lines[0]);
                $log->verbose("Deleting file: $fil  atime: $files{$fil}{atime}  size: $files{$fil}{size}  url: $values[0]");
            }
            else {
                $log->verbose("Deleting file: $fil  atime: $files{$fil}{atime}  size: $files{$fil}{size}");
            }
        } else {
            $log->warning("Error deleting file '$fil': $!");
        }
	next if (! -e "$fil.meta");
        # not critical if this fails
        if ( unlink "$fil.meta" ) {
            my $lastslash = rindex($fil, "/");
            if ( rmdir(substr($fil, 0, $lastslash))) {
                $log->verbose("Deleting directory ".substr($fil, 0, $lastslash));
            }
        } else {
            $log->warning("Error deleting file '$fil.meta': $!");
        } 
    }
    $log->info(join("", "Cleaning finished, used space now ", printsize($fssize-$fsfree), " / ", printsize($fssize), " (", sprintf("%.2f",100-100*$fsfree/$fssize),"%)"));
}
exit 0;

sub wanted {

    return if $name =~ m|\.lock$|;
    return if $name =~ m|\.meta$|;

    my ($atime, $blocks);

    ($atime, $blocks) = (lstat($_))[8,12];

    return unless defined $atime;

    return unless !(-d _) || -f _ || -l _;
    if (-e "$name.lock") {
        $totlocksize += 512 * $blocks;
        $totlockfiles++;
        return;
    }

    $files{$name}{atime}=$atime;
    $files{$name}{size}=512 * $blocks;
    $totsize += 512 * $blocks;
}

sub printsize($)
{
    my $size = shift;

    if($size > 1024*1024*1024*1024) {
        $size = int($size/(1024*1024*1024*1024));
        return "$size TB";
    }
    if($size > 1024*1024*1024) {
        $size = int($size/(1024*1024*1024));
        return "$size GB";
    }
    if($size > 1024*1024) {
        $size = int($size/(1024*1024));
        return "$size MB";
    }
    if($size > 1024) {
        $size = int($size/1024);
        return "$size kB";
    }

    return $size;
}

#
# Returns disk space (total and free) in bytes on a filesystem
# Taken from arc1/trunk/src/services/a-rex/infoproviders/HostInfo.pm
# TODO: Put in common place
#
sub diskspace ($) {
    my $path = shift;
    my ($diskfree, $disktotal);

    if ( -d "$path") {
        # check if on afs
        if ($path =~ m#/afs/#) {
            my @dfstring =`fs listquota $path 2>/dev/null`;
            if ($? != 0) {
                $log->warning("Failed running: fs listquota $path");
            } elsif ($dfstring[-1] =~ /\s+(\d+)\s+(\d+)\s+\d+%\s+\d+%/) {
                $disktotal = int $1/1024;
                $diskfree  = int(($1 - $2)/1024);
            } else {
                $log->warning("Failed interpreting output of: fs listquota $path");
            }
        # "ordinary" disk
        } else {
            my @dfstring =`df -k $path 2>/dev/null`;
            if ($? != 0) {
                $log->warning("Failed running: df -k $path");

            # The first column may be printed on a separate line.
            # The relevant numbers are always on the last line.
            } elsif ($dfstring[-1] =~ /\s+(\d+)\s+\d+\s+(\d+)\s+\d+%\s+\//) {
                $disktotal = $1*1024;
                $diskfree  = $2*1024;
            } else {
                $log->warning("Failed interpreting output of: df -k $path");
            }
        }
    } else {
        $log->warning("Not a directory: $path");
    }

    return undef unless defined($disktotal) and defined($diskfree);
    return {total => $disktotal, free => $diskfree};
}
