#!/usr/bin/perl -w

# apt-cacher-cleanup.pl
# Script to clean the cache for the Apt-cacher package caching system.
# For more information visit www.apt-cacher.org
#
# Copyright (C) 2002-03, Jonathan Oxer <jon@debian.org>
# Portions  (C) 2002, Jacob Lundberg <jacob@chaos2.org>
# Distributed under the terms of the GNU Public Licence (GPL).

#use strict;
#############################################################################
### configuration ###########################################################
# Include the library for the config file parser
require '/usr/share/apt-cacher/apt-cacher-lib.pl';

# Read in the config file and set the necessary variables
my $configfile = '/etc/apt-cacher/apt-cacher.conf';

my $configref;
eval {
        $configref = read_config($configfile);
};
my %config = %$configref;

# not sure what to do if we can't read the config file...
die "Could not read config file: $@" if $@;

# check whether we're actually meant to clean the cache
if ( $config{clean_cache} ne 1 ) {
	exit 0;
}

# check whether we have access to internal gzip inflation
$gzip = _have_compress_zlib();


#############################################################################
### Preparation of the package lists ########################################

# Make a temp directory in the cache dir if it isn't there yet
my $temp_dir = "$config{cache_dir}/temp";
if (!-d $temp_dir) {
	mkdir($temp_dir, 0755);
} else {
	# Clean out anything left behind previously
	unlink(<$temp_dir/*>);
}

# Move into the temp dir to start processing the Packages lists
chdir($temp_dir) || die "apt-cacher-cleanup.pl: can't open temp directory";

# Loop through each package list in turn and process it.
# We drop the HTTP header by searching for "Package:" (package list)
# or "\x1f\x8b" (gzip header).  We will use Compress:Zlib if we can;
# it's secure and consumes less memory.  The goal of this section is
# to create a list of all Filename fields in the hash %keeplist.

# First though we check if there actually are any Packages files there,
# and if there aren't we can bail right now.
if ( ! <$config{cache_dir}/*Packages*> ) {
	exit 0;
}

if ($gzip) {
	foreach $packagelist ( <$config{cache_dir}/*Packages*> ) {
		# Skip empty files
		next if -z $packagelist;
		# read the whole file as one long line
		my $barelist;
		local $/ = undef;
		open(LISTFILE, "<$packagelist");
		$barelist = <LISTFILE>;
		close(LISTFILE);
		# discard the HTTP header
		$barelist =~ s/^.*?(\x1f\x8b|Package:)/$1/s;
		# decompress if it's gzipped
		if ($1 eq "\x1f\x8b") {
			$barelist = Compress::Zlib::memGunzip($barelist);
		}
		# select lines containing "Filename: ", and remove it
		# from the lines selected (so they just contain the name)
		# slow form: $barelist =~ s#(?ms:^.*?)(?:Filename: .*/|$)##g;
		# $barelist =~ s#(?ms:^.*?)Filename: .*/##g;
		$barelist =~ s#^.*?Filename: [^\n]*/##gms;
		$barelist =~ s/\n[^\n:]*:.*$/\n/s;
		map { $keeplist{$_} = 1 } split("\n", $barelist);
	}
} else { # if ! $gzip
	foreach $packagelist ( <$config{cache_dir}/*Packages*> ) {
		# Skip empty files
		next if -z $packagelist;
		# read the whole file as one long line
		my $barelist;
		local $/ = undef;
		open(LISTFILE, "<$packagelist");
		$barelist = <LISTFILE>;
		close(LISTFILE);
		# discard the HTTP header
		$barelist =~ s/^.*?(\x1f\x8b|Package:)/$1/s;
		# write the file into the temp dir
		my $cache_dir = $config{cache_dir};
		$packagelist =~ s/$cache_dir/$temp_dir/;
		open(LISTFILE, ">$packagelist");
		print(LISTFILE $barelist);
		close(LISTFILE);
	}
	# Since we couldn't unzip the packages ourselves, we call
	# zgrep to list them, then sort them and zap all duplicates.
	#`zgrep -h "Filename: " *Packages* | sort | uniq > packagelist.txt`;
	$barelist = `zgrep -h "Filename: " *Packages*`;
	$barelist =~ s#^Filename: .*/##gm;
	map { $keeplist{$_} = 1 } split("\n", $barelist);
	unlink(<$config{cache_dir}/*Packages*>);
}


#############################################################################
### Cleaning up the actual packages #########################################

# Move back to the cache dir to chomp on the actual .deb packages
chdir("$config{cache_dir}") || die "apt-cacher-cleanup.pl: can't open cache directory";

# Loop through the list of packages and process each in turn
foreach $package ( <*.deb> ) {
	# Look for this package in the package list
	if ( $keeplist{$package} ) {
		#print "$package: OK\n";
	} else {
		# If it isn't in the list, nuke it
		#print "$package: no match (deleting)\n";
		unlink("$config{cache_dir}/$package");
	}
}

# Woohoo, all done!
exit 0;


#############################################################################
### Support functions #######################################################

# Test whether or not the compression lib is available to us.
sub _have_compress_zlib {
	# try and load the compression library
	eval { require Compress::Zlib; };
	if ($@) {
		return undef;
	} else {
		return 1;
	}
}
