# HG changeset patch # User Sascha Teichmann # Date 1294491427 0 # Node ID fb7a900a649f214276c53402704e880a672967bf # Parent 09fcac0bdc25a7e67319496a8f67c6e114a5cb33 contrib: Added script to remove old deb files from directories. diff -r 09fcac0bdc25 -r fb7a900a649f contrib/bin/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/bin/README Sat Jan 08 12:57:07 2011 +0000 @@ -0,0 +1,19 @@ +delete-old-debs.py +------------------ +Script to remove old deb files from a given list +of directories. Tree Packager builds a lot of deb files +when running a long time. Not all of them a needed forever. +We had a directory with over 13.000(!) deb files eating +up 15 GiB of hard disk space. This lead to performance +problems and problems generating the corresponding repository. +This script enables you only leave the n-th youngest deb +files per package in the directory. It may be used inside +a cron job to garbage collect old package. + +Usage: delete-old-debs.py [options] dir ... + +Options: + -h, --help show this help message and exit + -v, --verbose verbose output + -d, --dry-run don't remove the old deb files + -k KEEP, --keep=KEEP number of files to keep. Default: 3 diff -r 09fcac0bdc25 -r fb7a900a649f contrib/bin/delete-old-debs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/bin/delete-old-debs.py Sat Jan 08 12:57:07 2011 +0000 @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +# +# Copyright (C) 2011 by Intevation GmbH +# Authors: +# Sascha L. Teichmann +# +# This program is free software under the GPL (>=v2) +# Read the file COPYING coming with the software for details. + +import sys +import os +import re + +import subprocess +import logging + +from heapq import nsmallest + +from optparse import OptionParser + +log = logging.getLogger(__name__) +log.setLevel(logging.WARNING) +log.addHandler(logging.StreamHandler(sys.stderr)) + +DEFAULT_KEEP = 3 + +FIELD = re.compile("([a-zA-Z]+):\s*(.+)") + +# map rich comparison to 'dpkg --compare-versions' +# map == to !=, < to >= and so on to reverse order in heap. +RICH_CMP = dict([ + ("__%s__" % a, lambda se, ot: + subprocess.call([ + "dpkg", "--compare-versions", + se.version, b, ot.version]) == 0) + for a, b in (("eq", "ne"), ("ne", "eq"), + ("lt", "ge"), ("gt", "le"), + ("le", "gt"), ("ge", "lt"))]) + + +class DebCmp(object): + """Helper class to make deb files comparable + by there versions. + """ + + def __init__(self, version, path): + self.version = version + self.path = path + + self.__dict__.update(RICH_CMP) + + +def deb_info(deb, fields=["Package", "Version"]): + """Extract some meta info from a deb file.""" + po = subprocess.Popen( + ["dpkg-deb", "-f", deb] + fields, + stdout=subprocess.PIPE) + out = po.communicate()[0] + return dict([m.groups() + for m in map(FIELD.match, out.splitlines()) if m]) + + +def oldest_debs(deb_dir, keep=DEFAULT_KEEP): + """Given directory containing deb files this function + returns the files that are older than the youngest + keep-th per package. + """ + + log.info("scanning dir '%s'" % deb_dir) + + packages = {} + + num = 1 + for f in os.listdir(deb_dir): + if not f.endswith(".deb"): continue + deb = os.path.join(deb_dir, f) + if not os.path.isfile(deb): continue + info = deb_info(deb) + packages.setdefault(info['Package'], []).append( + DebCmp(info['Version'], deb)) + if (num % 10) == 0: + log.info("%d debs found" % (num-1)) + num += 1 + + if log.isEnabledFor(logging.INFO): + log.info("%d debs found" % (num-1)) + log.info("number packages: %s" % len(packages)) + + for package, debs in packages.iteritems(): + if len(debs) > keep: + # full sorting is not required + stay = frozenset([d.path for d in nsmallest(keep, debs)]) + + for deb in debs: + if deb.path not in stay: + yield deb.path + + +def main(): + usage = "usage: %prog [options] dir ..." + parser = OptionParser(usage=usage) + parser.add_option( + "-v", "--verbose", action="store_true", + dest="verbose", + help="verbose output") + parser.add_option( + "-d", "--dry-run", action="store_true", + dest="dry_run", + help="don't remove the old deb files") + parser.add_option( + "-k", "--keep", action="store", + dest="keep", type="int", default=DEFAULT_KEEP, + help="number of files to keep. Default: %d" % DEFAULT_KEEP) + + options, args = parser.parse_args() + + remove = options.dry_run and (lambda x: None) or os.remove + keep = max(1, options.keep) + if options.verbose: log.setLevel(logging.INFO) + + for deb_dir in args: + + if not os.path.isdir(deb_dir): + log.warn("'%s' is not a directory" % deb_dir) + continue + + for deb in oldest_debs(deb_dir, keep): + log.debug("remove '%s'" % deb) + remove(deb) + changes = deb.path[:-3] + "changes" + if os.path.isfile(changes): + log.debug("remove '%s'" % changes) + remove(changes) + + +if __name__ == "__main__": + main()