changeset 535:fb7a900a649f

contrib: Added script to remove old deb files from directories.
author Sascha Teichmann <teichmann@intevation.de>
date Sat, 08 Jan 2011 12:57:07 +0000
parents 09fcac0bdc25
children 8a61185a3357
files contrib/bin/README contrib/bin/delete-old-debs.py
diffstat 2 files changed, 157 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/bin/README	Sat Jan 08 12:57:07 2011 +0000
@@ -0,0 +1,19 @@
+delete-old-debs.py
+------------------
+Script to remove old deb files from a given list
+of directories. Tree Packager builds a lot of deb files
+when running a long time. Not all of them a needed forever.
+We had a directory with over 13.000(!) deb files eating
+up 15 GiB of hard disk space. This lead to performance
+problems and problems generating the corresponding repository.
+This script enables you only leave the n-th youngest deb
+files per package in the directory. It may be used inside
+a cron job to garbage collect old package.
+
+Usage: delete-old-debs.py [options] dir ...
+
+Options:
+  -h, --help            show this help message and exit
+  -v, --verbose         verbose output
+  -d, --dry-run         don't remove the old deb files
+  -k KEEP, --keep=KEEP  number of files to keep. Default: 3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/bin/delete-old-debs.py	Sat Jan 08 12:57:07 2011 +0000
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+#
+# Copyright (C) 2011 by Intevation GmbH
+# Authors:
+# Sascha L. Teichmann <sascha.teichmann@intevation.de>
+#
+# This program is free software under the GPL (>=v2)
+# Read the file COPYING coming with the software for details.
+
+import sys
+import os
+import re
+
+import subprocess
+import logging
+
+from heapq import nsmallest
+
+from optparse import OptionParser
+
+log = logging.getLogger(__name__) 
+log.setLevel(logging.WARNING)
+log.addHandler(logging.StreamHandler(sys.stderr))
+
+DEFAULT_KEEP = 3
+
+FIELD = re.compile("([a-zA-Z]+):\s*(.+)")
+
+# map rich comparison to 'dpkg --compare-versions'
+# map == to !=, < to >= and so on to reverse order in heap. 
+RICH_CMP = dict([
+    ("__%s__" % a, lambda se, ot:
+        subprocess.call([
+            "dpkg", "--compare-versions", 
+            se.version, b, ot.version]) == 0)
+    for a, b in (("eq", "ne"), ("ne", "eq"),
+                 ("lt", "ge"), ("gt", "le"),
+                 ("le", "gt"), ("ge", "lt"))])
+
+
+class DebCmp(object):
+    """Helper class to make deb files comparable
+       by there versions.
+    """
+
+    def __init__(self, version, path):
+        self.version = version
+        self.path    = path
+
+        self.__dict__.update(RICH_CMP)
+
+
+def deb_info(deb, fields=["Package", "Version"]):
+    """Extract some meta info from a deb file."""
+    po = subprocess.Popen(
+        ["dpkg-deb", "-f", deb] + fields,
+        stdout=subprocess.PIPE)
+    out = po.communicate()[0]
+    return dict([m.groups()
+                for m in map(FIELD.match, out.splitlines()) if m])
+
+
+def oldest_debs(deb_dir, keep=DEFAULT_KEEP):
+    """Given directory containing deb files this function 
+       returns the files that are older than the youngest 
+       keep-th per package.
+    """
+
+    log.info("scanning dir '%s'" % deb_dir)
+
+    packages = {}
+
+    num = 1
+    for f in os.listdir(deb_dir):
+        if not f.endswith(".deb"): continue
+        deb = os.path.join(deb_dir, f)
+        if not os.path.isfile(deb): continue
+        info = deb_info(deb)
+        packages.setdefault(info['Package'], []).append(
+            DebCmp(info['Version'], deb))
+        if (num % 10) == 0:
+            log.info("%d debs found" % (num-1))
+        num += 1
+
+    if log.isEnabledFor(logging.INFO):
+        log.info("%d debs found" % (num-1))
+        log.info("number packages: %s" % len(packages))
+
+    for package, debs in packages.iteritems():
+        if len(debs) > keep:
+            # full sorting is not required
+            stay = frozenset([d.path for d in nsmallest(keep, debs)])
+
+            for deb in debs:
+                if deb.path not in stay:
+                    yield deb.path
+
+
+def main():
+    usage = "usage: %prog [options] dir ..."
+    parser = OptionParser(usage=usage)
+    parser.add_option(
+        "-v", "--verbose", action="store_true",
+        dest="verbose",
+        help="verbose output")
+    parser.add_option(
+        "-d", "--dry-run", action="store_true",
+        dest="dry_run",
+        help="don't remove the old deb files")
+    parser.add_option(
+        "-k", "--keep", action="store",
+        dest="keep", type="int", default=DEFAULT_KEEP,
+        help="number of files to keep. Default: %d" % DEFAULT_KEEP)
+
+    options, args = parser.parse_args()
+    
+    remove = options.dry_run and (lambda x: None) or os.remove
+    keep   = max(1, options.keep)
+    if options.verbose: log.setLevel(logging.INFO)
+
+    for deb_dir in args:
+
+        if not os.path.isdir(deb_dir):
+            log.warn("'%s' is not a directory" % deb_dir)
+            continue
+
+        for deb in oldest_debs(deb_dir, keep):
+            log.debug("remove '%s'" % deb)
+            remove(deb)
+            changes = deb.path[:-3] + "changes"
+            if os.path.isfile(changes):
+                log.debug("remove '%s'" % changes)
+                remove(changes)
+
+
+if __name__ == "__main__":
+    main()
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)