diff flys-backend/contrib/import-kms.py @ 162:80669241956c

Initial database import scripts. Not finished, yet. flys-backend/trunk@1333 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 22 Feb 2011 10:34:53 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/contrib/import-kms.py	Tue Feb 22 10:34:53 2011 +0000
@@ -0,0 +1,213 @@
+#!/usr/bin/env python
+
+import sys
+import logging
+import re
+import os
+
+import sqlite3 as db
+import locale
+import codecs
+
+from optparse import OptionParser
+
+log = logging.getLogger(__name__) 
+log.setLevel(logging.WARNING)
+log.addHandler(logging.StreamHandler(sys.stderr))
+
+RANGE = re.compile("([^#]*)#(.*)")
+DEFAULT_DATABASE = "flys.db"
+
+SQL_NEXT_ID   = "SELECT coalesce(max(id), -1) + 1 FROM %s"
+SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?"
+SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)"
+
+SQL_SELECT_RANGE_ID = """
+SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ?
+"""
+SQL_INSERT_RANGE_ID = """
+INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?)
+"""
+SQL_SELECT_ANNOTATION_ID = """
+SELECT id FROM annotations
+WHERE range_id = ? AND attribute_id = ? AND position_id = ?
+"""
+SQL_INSERT_ANNOTATION_ID = """
+INSERT INTO annotations (id, range_id, attribute_id, position_id) 
+VALUES (?, ?, ?, ?)
+"""
+
+def encode(s):
+    try:
+        return unicode(s, "latin-1")
+    except UnicodeDecodeError:
+        return unicode.encode(s, locale.getpreferredencoding())
+
+class hashabledict(dict):
+    def __key(self):
+        return tuple((k, self[k]) for k in sorted(self))
+    def __hash__(self):
+        return hash(self.__key())
+    def __eq__(self, other):
+        return self.__key() == other.__key()
+
+def cache(f):
+    def func(*args, **kw):
+        key = (args, hashabledict(kw))
+        try:
+            return f.__cache__[key]
+        except KeyError:
+            value = f(*args, **kw)
+            f.__cache__[key] = value
+            return value
+    f.__cache__ = {}
+    return func
+
+NEXT_IDS = {}
+def next_id(cur, relation):
+    idx = NEXT_IDS.get(relation)
+    if idx is None:
+        cur.execute(SQL_NEXT_ID % relation)
+        idx = cur.fetchone()[0]
+    NEXT_IDS[relation] = idx + 1
+    return idx
+
+def get_id(cur, relation, attribute, value):
+    select_stmt = SQL_SELECT_ID % (relation, attribute)
+    #log.debug(select_stmt)
+    cur.execute(select_stmt, (value,))
+    row = cur.fetchone()
+    if row: return row[0]
+    idx = next_id(cur, relation)
+    insert_stmnt = SQL_INSERT_ID % (relation, attribute)
+    #log.debug(insert_stmnt)
+    cur.execute(insert_stmnt, (idx, value))
+    cur.connection.commit()
+    log.debug("insert %s '%s' id: '%d'" % (relation, value, idx))
+    return idx
+
+#@cache
+def get_river_id(cur, name):
+    return get_id(cur, "rivers", "name", name)
+
+#@cache
+def get_attribute_id(cur, value):
+    return get_id(cur, "attributes", "value", value)
+
+#@cache
+def get_position_id(cur, value):
+    return get_id(cur, "positions", "value", value)
+
+#@cache
+def get_range_id(cur, river_id, a, b):
+    cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b))
+    row = cur.fetchone()
+    if row: return row[0]
+    idx = next_id(cur, "ranges")
+    cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b))
+    cur.connection.commit()
+    return idx
+
+#@cache
+def get_annotation_id(cur, range_id, attribute_id, position_id):
+    cur.execute(SQL_SELECT_ANNOTATION_ID, (
+        range_id, attribute_id, position_id))
+    row = cur.fetchone()
+    if row: return row[0]
+    idx = next_id(cur, "annotations")
+    cur.execute(SQL_INSERT_ANNOTATION_ID, (
+        idx, range_id, attribute_id, position_id))
+    cur.connection.commit()
+    return idx
+
+def files(root, accept=lambda x: True):
+    if os.path.isfile(root):
+        if accept(root): yield root
+    elif os.path.isdir(root):
+        stack = [ root ]
+        while stack:
+            cur = stack.pop()
+            for f in os.listdir(cur):
+                p = os.path.join(cur, f)
+                if os.path.isdir(p):
+                    stack.append(p)
+                elif os.path.isfile(p) and accept(p):
+                    yield p
+
+def feed_km(cur, river_id, km_file):
+
+    log.info("processing: %s" % km_file)
+
+    for line in codecs.open(km_file, "r", "latin-1"):
+        line = line.strip()
+        if not line or line.startswith('*'):
+            continue
+        parts = [x.strip() for x in line.split(';')]
+        if len(parts) < 3:
+            log.error("cannot process: '%s'" % line)
+            continue
+        m = RANGE.match(parts[2])
+        try:
+            if m:
+                x = [float(x.replace(",", ".")) for x in m.groups()]
+                a, b = min(x), max(x)
+                if a == b: b = None
+            else:
+                a, b = float(parts[2].replace(",", ".")), None
+        except ValueError:
+            log.error("cannot process: '%s'" % line)
+            continue
+
+        attribute = parts[0]
+        position  = parts[1]
+        attribute_id = get_attribute_id(cur, attribute) if attribute else None
+        position_id  = get_position_id(cur, position) if position else None
+
+        range_id = get_range_id(cur, river_id, a, b)
+
+        get_annotation_id(cur, range_id, attribute_id, position_id)
+
+def main():
+
+    usage = "usage: %prog [options] river km-file ..."
+    parser = OptionParser(usage=usage)
+    parser.add_option(
+        "-v", "--verbose", action="store_true",
+        dest="verbose",
+        help="verbose output")
+    parser.add_option(
+        "-r", "--recursive", action="store_true",
+        dest="recursive", default=False,
+        help="recursive")
+    parser.add_option(
+        "-d", "--database", action="store",
+        dest="database",
+        help="database to connect with",
+        default=DEFAULT_DATABASE)
+
+    options, args = parser.parse_args()
+
+    if options.verbose:
+        log.setLevel(logging.INFO)
+    
+    if len(args) < 1:
+        log.error("missing river argument")
+        sys.exit(1)
+
+    river = unicode(args[0], locale.getpreferredencoding())
+
+    with db.connect(options.database) as con:
+        cur = con.cursor()
+        river_id = get_river_id(cur, river)
+
+        for arg in args[1:]:
+            if options.recursive:
+                for km_file in files(
+                    arg, lambda x: x.lower().endswith(".km")):
+                    feed_km(cur, river_id, km_file)
+            else:
+                feed_km(cur, river_id, arg)
+        
+
+if __name__ == '__main__':
+    main()

http://dive4elements.wald.intevation.org