Mercurial > dive4elements > river
diff flys-backend/contrib/import-kms.py @ 162:80669241956c
Initial database import scripts. Not finished, yet.
flys-backend/trunk@1333 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 22 Feb 2011 10:34:53 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/contrib/import-kms.py Tue Feb 22 10:34:53 2011 +0000 @@ -0,0 +1,213 @@ +#!/usr/bin/env python + +import sys +import logging +import re +import os + +import sqlite3 as db +import locale +import codecs + +from optparse import OptionParser + +log = logging.getLogger(__name__) +log.setLevel(logging.WARNING) +log.addHandler(logging.StreamHandler(sys.stderr)) + +RANGE = re.compile("([^#]*)#(.*)") +DEFAULT_DATABASE = "flys.db" + +SQL_NEXT_ID = "SELECT coalesce(max(id), -1) + 1 FROM %s" +SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?" +SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)" + +SQL_SELECT_RANGE_ID = """ +SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ? +""" +SQL_INSERT_RANGE_ID = """ +INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?) +""" +SQL_SELECT_ANNOTATION_ID = """ +SELECT id FROM annotations +WHERE range_id = ? AND attribute_id = ? AND position_id = ? +""" +SQL_INSERT_ANNOTATION_ID = """ +INSERT INTO annotations (id, range_id, attribute_id, position_id) +VALUES (?, ?, ?, ?) +""" + +def encode(s): + try: + return unicode(s, "latin-1") + except UnicodeDecodeError: + return unicode.encode(s, locale.getpreferredencoding()) + +class hashabledict(dict): + def __key(self): + return tuple((k, self[k]) for k in sorted(self)) + def __hash__(self): + return hash(self.__key()) + def __eq__(self, other): + return self.__key() == other.__key() + +def cache(f): + def func(*args, **kw): + key = (args, hashabledict(kw)) + try: + return f.__cache__[key] + except KeyError: + value = f(*args, **kw) + f.__cache__[key] = value + return value + f.__cache__ = {} + return func + +NEXT_IDS = {} +def next_id(cur, relation): + idx = NEXT_IDS.get(relation) + if idx is None: + cur.execute(SQL_NEXT_ID % relation) + idx = cur.fetchone()[0] + NEXT_IDS[relation] = idx + 1 + return idx + +def get_id(cur, relation, attribute, value): + select_stmt = SQL_SELECT_ID % (relation, attribute) + #log.debug(select_stmt) + cur.execute(select_stmt, (value,)) + row = cur.fetchone() + if row: return row[0] + idx = next_id(cur, relation) + insert_stmnt = SQL_INSERT_ID % (relation, attribute) + #log.debug(insert_stmnt) + cur.execute(insert_stmnt, (idx, value)) + cur.connection.commit() + log.debug("insert %s '%s' id: '%d'" % (relation, value, idx)) + return idx + +#@cache +def get_river_id(cur, name): + return get_id(cur, "rivers", "name", name) + +#@cache +def get_attribute_id(cur, value): + return get_id(cur, "attributes", "value", value) + +#@cache +def get_position_id(cur, value): + return get_id(cur, "positions", "value", value) + +#@cache +def get_range_id(cur, river_id, a, b): + cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b)) + row = cur.fetchone() + if row: return row[0] + idx = next_id(cur, "ranges") + cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b)) + cur.connection.commit() + return idx + +#@cache +def get_annotation_id(cur, range_id, attribute_id, position_id): + cur.execute(SQL_SELECT_ANNOTATION_ID, ( + range_id, attribute_id, position_id)) + row = cur.fetchone() + if row: return row[0] + idx = next_id(cur, "annotations") + cur.execute(SQL_INSERT_ANNOTATION_ID, ( + idx, range_id, attribute_id, position_id)) + cur.connection.commit() + return idx + +def files(root, accept=lambda x: True): + if os.path.isfile(root): + if accept(root): yield root + elif os.path.isdir(root): + stack = [ root ] + while stack: + cur = stack.pop() + for f in os.listdir(cur): + p = os.path.join(cur, f) + if os.path.isdir(p): + stack.append(p) + elif os.path.isfile(p) and accept(p): + yield p + +def feed_km(cur, river_id, km_file): + + log.info("processing: %s" % km_file) + + for line in codecs.open(km_file, "r", "latin-1"): + line = line.strip() + if not line or line.startswith('*'): + continue + parts = [x.strip() for x in line.split(';')] + if len(parts) < 3: + log.error("cannot process: '%s'" % line) + continue + m = RANGE.match(parts[2]) + try: + if m: + x = [float(x.replace(",", ".")) for x in m.groups()] + a, b = min(x), max(x) + if a == b: b = None + else: + a, b = float(parts[2].replace(",", ".")), None + except ValueError: + log.error("cannot process: '%s'" % line) + continue + + attribute = parts[0] + position = parts[1] + attribute_id = get_attribute_id(cur, attribute) if attribute else None + position_id = get_position_id(cur, position) if position else None + + range_id = get_range_id(cur, river_id, a, b) + + get_annotation_id(cur, range_id, attribute_id, position_id) + +def main(): + + usage = "usage: %prog [options] river km-file ..." + parser = OptionParser(usage=usage) + parser.add_option( + "-v", "--verbose", action="store_true", + dest="verbose", + help="verbose output") + parser.add_option( + "-r", "--recursive", action="store_true", + dest="recursive", default=False, + help="recursive") + parser.add_option( + "-d", "--database", action="store", + dest="database", + help="database to connect with", + default=DEFAULT_DATABASE) + + options, args = parser.parse_args() + + if options.verbose: + log.setLevel(logging.INFO) + + if len(args) < 1: + log.error("missing river argument") + sys.exit(1) + + river = unicode(args[0], locale.getpreferredencoding()) + + with db.connect(options.database) as con: + cur = con.cursor() + river_id = get_river_id(cur, river) + + for arg in args[1:]: + if options.recursive: + for km_file in files( + arg, lambda x: x.lower().endswith(".km")): + feed_km(cur, river_id, km_file) + else: + feed_km(cur, river_id, arg) + + +if __name__ == '__main__': + main()