Mercurial > dive4elements > river
view flys-backend/contrib/import-kms.py @ 4255:670e98f5a441
Fixed leak while merging facets.
The ThemeList that is used by OutputHelper to sort the
Facets for an Output now uses a list to store the ManagedFacets.
The correct order is made up by sorting the List using
Collections.sort() function of the Java JDK. Therfore, the
ManagedFacet class implements the Comparable interface. The
return value of its compareTo(other) method depends on the
value of the 'position' field.
author | Ingo Weinzierl <weinzierl.ingo@googlemail.com> |
---|---|
date | Thu, 25 Oct 2012 14:01:46 +0200 |
parents | 80669241956c |
children |
line wrap: on
line source
#!/usr/bin/env python import sys import logging import re import os import sqlite3 as db import locale import codecs from optparse import OptionParser log = logging.getLogger(__name__) log.setLevel(logging.WARNING) log.addHandler(logging.StreamHandler(sys.stderr)) RANGE = re.compile("([^#]*)#(.*)") DEFAULT_DATABASE = "flys.db" SQL_NEXT_ID = "SELECT coalesce(max(id), -1) + 1 FROM %s" SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?" SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)" SQL_SELECT_RANGE_ID = """ SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ? """ SQL_INSERT_RANGE_ID = """ INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?) """ SQL_SELECT_ANNOTATION_ID = """ SELECT id FROM annotations WHERE range_id = ? AND attribute_id = ? AND position_id = ? """ SQL_INSERT_ANNOTATION_ID = """ INSERT INTO annotations (id, range_id, attribute_id, position_id) VALUES (?, ?, ?, ?) """ def encode(s): try: return unicode(s, "latin-1") except UnicodeDecodeError: return unicode.encode(s, locale.getpreferredencoding()) class hashabledict(dict): def __key(self): return tuple((k, self[k]) for k in sorted(self)) def __hash__(self): return hash(self.__key()) def __eq__(self, other): return self.__key() == other.__key() def cache(f): def func(*args, **kw): key = (args, hashabledict(kw)) try: return f.__cache__[key] except KeyError: value = f(*args, **kw) f.__cache__[key] = value return value f.__cache__ = {} return func NEXT_IDS = {} def next_id(cur, relation): idx = NEXT_IDS.get(relation) if idx is None: cur.execute(SQL_NEXT_ID % relation) idx = cur.fetchone()[0] NEXT_IDS[relation] = idx + 1 return idx def get_id(cur, relation, attribute, value): select_stmt = SQL_SELECT_ID % (relation, attribute) #log.debug(select_stmt) cur.execute(select_stmt, (value,)) row = cur.fetchone() if row: return row[0] idx = next_id(cur, relation) insert_stmnt = SQL_INSERT_ID % (relation, attribute) #log.debug(insert_stmnt) cur.execute(insert_stmnt, (idx, value)) cur.connection.commit() log.debug("insert %s '%s' id: '%d'" % (relation, value, idx)) return idx #@cache def get_river_id(cur, name): return get_id(cur, "rivers", "name", name) #@cache def get_attribute_id(cur, value): return get_id(cur, "attributes", "value", value) #@cache def get_position_id(cur, value): return get_id(cur, "positions", "value", value) #@cache def get_range_id(cur, river_id, a, b): cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b)) row = cur.fetchone() if row: return row[0] idx = next_id(cur, "ranges") cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b)) cur.connection.commit() return idx #@cache def get_annotation_id(cur, range_id, attribute_id, position_id): cur.execute(SQL_SELECT_ANNOTATION_ID, ( range_id, attribute_id, position_id)) row = cur.fetchone() if row: return row[0] idx = next_id(cur, "annotations") cur.execute(SQL_INSERT_ANNOTATION_ID, ( idx, range_id, attribute_id, position_id)) cur.connection.commit() return idx def files(root, accept=lambda x: True): if os.path.isfile(root): if accept(root): yield root elif os.path.isdir(root): stack = [ root ] while stack: cur = stack.pop() for f in os.listdir(cur): p = os.path.join(cur, f) if os.path.isdir(p): stack.append(p) elif os.path.isfile(p) and accept(p): yield p def feed_km(cur, river_id, km_file): log.info("processing: %s" % km_file) for line in codecs.open(km_file, "r", "latin-1"): line = line.strip() if not line or line.startswith('*'): continue parts = [x.strip() for x in line.split(';')] if len(parts) < 3: log.error("cannot process: '%s'" % line) continue m = RANGE.match(parts[2]) try: if m: x = [float(x.replace(",", ".")) for x in m.groups()] a, b = min(x), max(x) if a == b: b = None else: a, b = float(parts[2].replace(",", ".")), None except ValueError: log.error("cannot process: '%s'" % line) continue attribute = parts[0] position = parts[1] attribute_id = get_attribute_id(cur, attribute) if attribute else None position_id = get_position_id(cur, position) if position else None range_id = get_range_id(cur, river_id, a, b) get_annotation_id(cur, range_id, attribute_id, position_id) def main(): usage = "usage: %prog [options] river km-file ..." parser = OptionParser(usage=usage) parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", help="verbose output") parser.add_option( "-r", "--recursive", action="store_true", dest="recursive", default=False, help="recursive") parser.add_option( "-d", "--database", action="store", dest="database", help="database to connect with", default=DEFAULT_DATABASE) options, args = parser.parse_args() if options.verbose: log.setLevel(logging.INFO) if len(args) < 1: log.error("missing river argument") sys.exit(1) river = unicode(args[0], locale.getpreferredencoding()) with db.connect(options.database) as con: cur = con.cursor() river_id = get_river_id(cur, river) for arg in args[1:]: if options.recursive: for km_file in files( arg, lambda x: x.lower().endswith(".km")): feed_km(cur, river_id, km_file) else: feed_km(cur, river_id, arg) if __name__ == '__main__': main()