sascha@162: #!/usr/bin/env python sascha@162: sascha@162: import sys sascha@162: import logging sascha@162: import re sascha@162: import os sascha@162: sascha@162: import sqlite3 as db sascha@162: import locale sascha@162: import codecs sascha@162: sascha@162: from optparse import OptionParser sascha@162: sascha@162: log = logging.getLogger(__name__) sascha@162: log.setLevel(logging.WARNING) sascha@162: log.addHandler(logging.StreamHandler(sys.stderr)) sascha@162: sascha@162: RANGE = re.compile("([^#]*)#(.*)") sascha@162: DEFAULT_DATABASE = "flys.db" sascha@162: sascha@162: SQL_NEXT_ID = "SELECT coalesce(max(id), -1) + 1 FROM %s" sascha@162: SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?" sascha@162: SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)" sascha@162: sascha@162: SQL_SELECT_RANGE_ID = """ sascha@162: SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ? sascha@162: """ sascha@162: SQL_INSERT_RANGE_ID = """ sascha@162: INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?) sascha@162: """ sascha@162: SQL_SELECT_ANNOTATION_ID = """ sascha@162: SELECT id FROM annotations sascha@162: WHERE range_id = ? AND attribute_id = ? AND position_id = ? sascha@162: """ sascha@162: SQL_INSERT_ANNOTATION_ID = """ sascha@162: INSERT INTO annotations (id, range_id, attribute_id, position_id) sascha@162: VALUES (?, ?, ?, ?) sascha@162: """ sascha@162: sascha@162: def encode(s): sascha@162: try: sascha@162: return unicode(s, "latin-1") sascha@162: except UnicodeDecodeError: sascha@162: return unicode.encode(s, locale.getpreferredencoding()) sascha@162: sascha@162: class hashabledict(dict): sascha@162: def __key(self): sascha@162: return tuple((k, self[k]) for k in sorted(self)) sascha@162: def __hash__(self): sascha@162: return hash(self.__key()) sascha@162: def __eq__(self, other): sascha@162: return self.__key() == other.__key() sascha@162: sascha@162: def cache(f): sascha@162: def func(*args, **kw): sascha@162: key = (args, hashabledict(kw)) sascha@162: try: sascha@162: return f.__cache__[key] sascha@162: except KeyError: sascha@162: value = f(*args, **kw) sascha@162: f.__cache__[key] = value sascha@162: return value sascha@162: f.__cache__ = {} sascha@162: return func sascha@162: sascha@162: NEXT_IDS = {} sascha@162: def next_id(cur, relation): sascha@162: idx = NEXT_IDS.get(relation) sascha@162: if idx is None: sascha@162: cur.execute(SQL_NEXT_ID % relation) sascha@162: idx = cur.fetchone()[0] sascha@162: NEXT_IDS[relation] = idx + 1 sascha@162: return idx sascha@162: sascha@162: def get_id(cur, relation, attribute, value): sascha@162: select_stmt = SQL_SELECT_ID % (relation, attribute) sascha@162: #log.debug(select_stmt) sascha@162: cur.execute(select_stmt, (value,)) sascha@162: row = cur.fetchone() sascha@162: if row: return row[0] sascha@162: idx = next_id(cur, relation) sascha@162: insert_stmnt = SQL_INSERT_ID % (relation, attribute) sascha@162: #log.debug(insert_stmnt) sascha@162: cur.execute(insert_stmnt, (idx, value)) sascha@162: cur.connection.commit() sascha@162: log.debug("insert %s '%s' id: '%d'" % (relation, value, idx)) sascha@162: return idx sascha@162: sascha@162: #@cache sascha@162: def get_river_id(cur, name): sascha@162: return get_id(cur, "rivers", "name", name) sascha@162: sascha@162: #@cache sascha@162: def get_attribute_id(cur, value): sascha@162: return get_id(cur, "attributes", "value", value) sascha@162: sascha@162: #@cache sascha@162: def get_position_id(cur, value): sascha@162: return get_id(cur, "positions", "value", value) sascha@162: sascha@162: #@cache sascha@162: def get_range_id(cur, river_id, a, b): sascha@162: cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b)) sascha@162: row = cur.fetchone() sascha@162: if row: return row[0] sascha@162: idx = next_id(cur, "ranges") sascha@162: cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b)) sascha@162: cur.connection.commit() sascha@162: return idx sascha@162: sascha@162: #@cache sascha@162: def get_annotation_id(cur, range_id, attribute_id, position_id): sascha@162: cur.execute(SQL_SELECT_ANNOTATION_ID, ( sascha@162: range_id, attribute_id, position_id)) sascha@162: row = cur.fetchone() sascha@162: if row: return row[0] sascha@162: idx = next_id(cur, "annotations") sascha@162: cur.execute(SQL_INSERT_ANNOTATION_ID, ( sascha@162: idx, range_id, attribute_id, position_id)) sascha@162: cur.connection.commit() sascha@162: return idx sascha@162: sascha@162: def files(root, accept=lambda x: True): sascha@162: if os.path.isfile(root): sascha@162: if accept(root): yield root sascha@162: elif os.path.isdir(root): sascha@162: stack = [ root ] sascha@162: while stack: sascha@162: cur = stack.pop() sascha@162: for f in os.listdir(cur): sascha@162: p = os.path.join(cur, f) sascha@162: if os.path.isdir(p): sascha@162: stack.append(p) sascha@162: elif os.path.isfile(p) and accept(p): sascha@162: yield p sascha@162: sascha@162: def feed_km(cur, river_id, km_file): sascha@162: sascha@162: log.info("processing: %s" % km_file) sascha@162: sascha@162: for line in codecs.open(km_file, "r", "latin-1"): sascha@162: line = line.strip() sascha@162: if not line or line.startswith('*'): sascha@162: continue sascha@162: parts = [x.strip() for x in line.split(';')] sascha@162: if len(parts) < 3: sascha@162: log.error("cannot process: '%s'" % line) sascha@162: continue sascha@162: m = RANGE.match(parts[2]) sascha@162: try: sascha@162: if m: sascha@162: x = [float(x.replace(",", ".")) for x in m.groups()] sascha@162: a, b = min(x), max(x) sascha@162: if a == b: b = None sascha@162: else: sascha@162: a, b = float(parts[2].replace(",", ".")), None sascha@162: except ValueError: sascha@162: log.error("cannot process: '%s'" % line) sascha@162: continue sascha@162: sascha@162: attribute = parts[0] sascha@162: position = parts[1] sascha@162: attribute_id = get_attribute_id(cur, attribute) if attribute else None sascha@162: position_id = get_position_id(cur, position) if position else None sascha@162: sascha@162: range_id = get_range_id(cur, river_id, a, b) sascha@162: sascha@162: get_annotation_id(cur, range_id, attribute_id, position_id) sascha@162: sascha@162: def main(): sascha@162: sascha@162: usage = "usage: %prog [options] river km-file ..." sascha@162: parser = OptionParser(usage=usage) sascha@162: parser.add_option( sascha@162: "-v", "--verbose", action="store_true", sascha@162: dest="verbose", sascha@162: help="verbose output") sascha@162: parser.add_option( sascha@162: "-r", "--recursive", action="store_true", sascha@162: dest="recursive", default=False, sascha@162: help="recursive") sascha@162: parser.add_option( sascha@162: "-d", "--database", action="store", sascha@162: dest="database", sascha@162: help="database to connect with", sascha@162: default=DEFAULT_DATABASE) sascha@162: sascha@162: options, args = parser.parse_args() sascha@162: sascha@162: if options.verbose: sascha@162: log.setLevel(logging.INFO) sascha@162: sascha@162: if len(args) < 1: sascha@162: log.error("missing river argument") sascha@162: sys.exit(1) sascha@162: sascha@162: river = unicode(args[0], locale.getpreferredencoding()) sascha@162: sascha@162: with db.connect(options.database) as con: sascha@162: cur = con.cursor() sascha@162: river_id = get_river_id(cur, river) sascha@162: sascha@162: for arg in args[1:]: sascha@162: if options.recursive: sascha@162: for km_file in files( sascha@162: arg, lambda x: x.lower().endswith(".km")): sascha@162: feed_km(cur, river_id, km_file) sascha@162: else: sascha@162: feed_km(cur, river_id, arg) sascha@162: sascha@162: sascha@162: if __name__ == '__main__': sascha@162: main()