Mercurial > dive4elements > river

--- a/flys-backend/contrib/import-gew.py	Wed Mar 06 15:46:40 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,223 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-import os
-import codecs
-import re
-
-HAUPTWERT  = re.compile(r"\s*([^\s]+)\s+([^\s+]+)\s+([QWDT-])")
-WHITESPACE = re.compile(r"\s+")
-
-class KM(object):
-
-    def __init__(self, filename):
-        self.filename = filename
-        self.load_values()
-
-    def load_values(self):
-        with codecs.open(self.filename, "r", "latin-1") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("*"):
-                    parts = [s.strip() for s in line.split(";")]
-                    # TODO: Use code from import-kms.py
-
-class AbflussTafel(object):
-
-    def __init__(self, filename):
-        self.filename = filename
-        self.name     = ""
-        self.values = []
-        self.load_values()
-
-    def load_values(self):
-        with codecs.open(self.filename, "r", "latin-1") as f:
-            first = True
-            for line in f:
-                line = line.strip()
-                if not line: continue
-                if line.startswith("#! name="):
-                    self.name = line[8:]
-                    continue
-                if line.startswith("#") or line.startswith("*"):
-                    continue
-                line = line.replace(",", ".")
-                splits = WHITESPACE.split(line)
-
-                if len(splits) < 2 or len(splits) > 11:
-                    continue
-
-                w = float(splits[0])
-
-                shift = 0
-
-                if len(splits) != 11 and first:
-                    shift = 11 - len(splits)
-
-                for idx, q in enumerate(splits[1:]):
-                    i_w = w + shift + idx
-                    i_q = float(q)
-                    w_q = (i_w/100.0, i_q/100.0)
-                    self.values.append(w_q)
-
-                first = False
-
-
-class Hauptwert(object):
-    def __init__(self, name, value, kind):
-        self.name  = name
-        self.extra = value
-        self.kind  = kind
-
-class Pegel(object):
-    def __init__(self, name, start, stop, sta, at, html):
-        self.name       = name
-        self.start      = start
-        self.stop       = stop
-        self.sta        = sta
-        self.at         = at
-        self.html       = html
-        self.aeo        = 0.0
-        self.nullpunkt  = 0.0
-        self.km         = 0.0
-        self.hauptwerte = []
-        self.load_hauptwerte()
-        self.at_data = AbflussTafel(self.at)
-
-    def load_hauptwerte(self):
-        with codecs.open(self.sta, "r", "latin-1") as f:
-            for line_no, line in enumerate(f):
-                line = line.rstrip()
-                if line_no == 0:
-                    first = False
-                    name = line[16:37].strip()
-                    line = [s.replace(",", ".") for s in line[37:].split()]
-                    self.aeo = float(line[0])
-                    self.nullpunkt = float(line[1])
-                    print >> sys.stderr, "pegel name: '%s'" % name
-                    print >> sys.stderr, "pegel aeo: '%f'" % self.aeo
-                    print >> sys.stderr, "pegel nullpunkt: '%f'" % self.nullpunkt
-                elif line_no == 1:
-                    self.km = float(line[29:36].strip().replace(",", "."))
-                    print >> sys.stderr, "km: '%f'" % self.km
-                else:
-                    if not line: continue
-                    line = line.replace(",", ".")
-                    m = HAUPTWERT.match(line)
-                    if not m: continue
-                    self.hauptwerte.append(Hauptwert(
-                        m.group(1), float(m.group(2)), m.group(3)))
-
-class Gewaesser(object):
-
-    def __init__(self, name=None, b_b=None, wst=None):
-        self.name = name
-        self.b_b = b_b
-        self.wst = wst
-        self.pegel = []
-
-    def load_pegel(self):
-        dir_name = os.path.dirname(self.wst)
-        pegel_glt = find_file(dir_name, "PEGEL.GLT")
-        if not pegel_glt:
-            print >> sys.stderr, "Missing PEGEL.GLT for %r" % self.name
-            return
-
-        print >> sys.stderr, "pegel_glt: %r" % pegel_glt
-
-        with codecs.open(pegel_glt, "r", "latin-1") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                # using re to cope with quoted columns,
-                # shlex has unicode problems.
-                parts = [p for p in re.split("( |\\\".*?\\\"|'.*?')", line)
-                         if p.strip()]
-                if len(parts) < 7:
-                    print >> sys.stderr, "too less colums (need 7): %r" % line
-                    continue
-
-                print >> sys.stderr, "%r" % parts
-                self.pegel.append(Pegel(
-                    parts[0],
-                    min(float(parts[2]), float(parts[3])),
-                    max(float(parts[2]), float(parts[3])),
-                    norm_path(parts[4], dir_name),
-                    norm_path(parts[5], dir_name),
-                    parts[6]))
-
-
-    def __repr__(self):
-        return u"Gewaesser(name=%r, b_b=%r, wst=%r)" % (
-            self.name, self.b_b, self.wst)
-
-def norm_path(path, ref):
-    if not os.path.isabs(path):
-        path = os.path.normpath(os.path.join(ref, path))
-    return path
-
-def find_file(path, what):
-    what = what.lower()
-    for filename in os.listdir(path):
-        p = os.path.join(path, filename)
-        if os.path.isfile(p) and filename.lower() == what:
-            return p
-    return None
-
-
-def read_gew(filename):
-
-    gewaesser = []
-
-    current = Gewaesser()
-
-    filename = os.path.abspath(filename)
-    dirname = os.path.dirname(filename)
-
-    with codecs.open(filename, "r", "latin-1") as f:
-        for line in f:
-            line = line.strip()
-            if not line or line.startswith("*"):
-                continue
-
-            if line.startswith(u"Gewässer:"):
-                if current.name:
-                    gewaesser.append(current)
-                    current = Gewaesser()
-                current.name = line[len(u"Gewässer:"):].strip()
-            elif line.startswith(u"B+B-Info:"):
-                current.b_b = norm_path(line[len(u"B+B-Info:"):].strip(),
-                                        dirname)
-            elif line.startswith(u"WSTDatei:"):
-                current.wst = norm_path(line[len(u"WSTDatei:"):].strip(),
-                                        dirname)
-
-        if current.name:
-            gewaesser.append(current)
-
-    return gewaesser
-
-def main():
-
-    if len(sys.argv) < 2:
-        print >> sys.stderr, "missing gew file"
-        sys.exit(1)
-
-    gew_filename = sys.argv[1]
-
-    if not os.path.isfile(gew_filename):
-        print >> sys.stderr, "'%s' is not a file" % gew_filename
-        sys.exit(1)
-
-    gewaesser = read_gew(gew_filename)
-
-    for gew in gewaesser:
-        gew.load_pegel()
-
-
-
-if __name__ == '__main__':
-    main()
-# vim: set fileencoding=utf-8 :
--- a/flys-backend/contrib/import-kms.py	Wed Mar 06 15:46:40 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,213 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import logging
-import re
-import os
-
-import sqlite3 as db
-import locale
-import codecs
-
-from optparse import OptionParser
-
-log = logging.getLogger(__name__)
-log.setLevel(logging.WARNING)
-log.addHandler(logging.StreamHandler(sys.stderr))
-
-RANGE = re.compile("([^#]*)#(.*)")
-DEFAULT_DATABASE = "flys.db"
-
-SQL_NEXT_ID   = "SELECT coalesce(max(id), -1) + 1 FROM %s"
-SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?"
-SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)"
-
-SQL_SELECT_RANGE_ID = """
-SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ?
-"""
-SQL_INSERT_RANGE_ID = """
-INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?)
-"""
-SQL_SELECT_ANNOTATION_ID = """
-SELECT id FROM annotations
-WHERE range_id = ? AND attribute_id = ? AND position_id = ?
-"""
-SQL_INSERT_ANNOTATION_ID = """
-INSERT INTO annotations (id, range_id, attribute_id, position_id)
-VALUES (?, ?, ?, ?)
-"""
-
-def encode(s):
-    try:
-        return unicode(s, "latin-1")
-    except UnicodeDecodeError:
-        return unicode.encode(s, locale.getpreferredencoding())
-
-class hashabledict(dict):
-    def __key(self):
-        return tuple((k, self[k]) for k in sorted(self))
-    def __hash__(self):
-        return hash(self.__key())
-    def __eq__(self, other):
-        return self.__key() == other.__key()
-
-def cache(f):
-    def func(*args, **kw):
-        key = (args, hashabledict(kw))
-        try:
-            return f.__cache__[key]
-        except KeyError:
-            value = f(*args, **kw)
-            f.__cache__[key] = value
-            return value
-    f.__cache__ = {}
-    return func
-
-NEXT_IDS = {}
-def next_id(cur, relation):
-    idx = NEXT_IDS.get(relation)
-    if idx is None:
-        cur.execute(SQL_NEXT_ID % relation)
-        idx = cur.fetchone()[0]
-    NEXT_IDS[relation] = idx + 1
-    return idx
-
-def get_id(cur, relation, attribute, value):
-    select_stmt = SQL_SELECT_ID % (relation, attribute)
-    #log.debug(select_stmt)
-    cur.execute(select_stmt, (value,))
-    row = cur.fetchone()
-    if row: return row[0]
-    idx = next_id(cur, relation)
-    insert_stmnt = SQL_INSERT_ID % (relation, attribute)
-    #log.debug(insert_stmnt)
-    cur.execute(insert_stmnt, (idx, value))
-    cur.connection.commit()
-    log.debug("insert %s '%s' id: '%d'" % (relation, value, idx))
-    return idx
-
-#@cache
-def get_river_id(cur, name):
-    return get_id(cur, "rivers", "name", name)
-
-#@cache
-def get_attribute_id(cur, value):
-    return get_id(cur, "attributes", "value", value)
-
-#@cache
-def get_position_id(cur, value):
-    return get_id(cur, "positions", "value", value)
-
-#@cache
-def get_range_id(cur, river_id, a, b):
-    cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b))
-    row = cur.fetchone()
-    if row: return row[0]
-    idx = next_id(cur, "ranges")
-    cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b))
-    cur.connection.commit()
-    return idx
-
-#@cache
-def get_annotation_id(cur, range_id, attribute_id, position_id):
-    cur.execute(SQL_SELECT_ANNOTATION_ID, (
-        range_id, attribute_id, position_id))
-    row = cur.fetchone()
-    if row: return row[0]
-    idx = next_id(cur, "annotations")
-    cur.execute(SQL_INSERT_ANNOTATION_ID, (
-        idx, range_id, attribute_id, position_id))
-    cur.connection.commit()
-    return idx
-
-def files(root, accept=lambda x: True):
-    if os.path.isfile(root):
-        if accept(root): yield root
-    elif os.path.isdir(root):
-        stack = [ root ]
-        while stack:
-            cur = stack.pop()
-            for f in os.listdir(cur):
-                p = os.path.join(cur, f)
-                if os.path.isdir(p):
-                    stack.append(p)
-                elif os.path.isfile(p) and accept(p):
-                    yield p
-
-def feed_km(cur, river_id, km_file):
-
-    log.info("processing: %s" % km_file)
-
-    for line in codecs.open(km_file, "r", "latin-1"):
-        line = line.strip()
-        if not line or line.startswith('*'):
-            continue
-        parts = [x.strip() for x in line.split(';')]
-        if len(parts) < 3:
-            log.error("cannot process: '%s'" % line)
-            continue
-        m = RANGE.match(parts[2])
-        try:
-            if m:
-                x = [float(x.replace(",", ".")) for x in m.groups()]
-                a, b = min(x), max(x)
-                if a == b: b = None
-            else:
-                a, b = float(parts[2].replace(",", ".")), None
-        except ValueError:
-            log.error("cannot process: '%s'" % line)
-            continue
-
-        attribute = parts[0]
-        position  = parts[1]
-        attribute_id = get_attribute_id(cur, attribute) if attribute else None
-        position_id  = get_position_id(cur, position) if position else None
-
-        range_id = get_range_id(cur, river_id, a, b)
-
-        get_annotation_id(cur, range_id, attribute_id, position_id)
-
-def main():
-
-    usage = "usage: %prog [options] river km-file ..."
-    parser = OptionParser(usage=usage)
-    parser.add_option(
-        "-v", "--verbose", action="store_true",
-        dest="verbose",
-        help="verbose output")
-    parser.add_option(
-        "-r", "--recursive", action="store_true",
-        dest="recursive", default=False,
-        help="recursive")
-    parser.add_option(
-        "-d", "--database", action="store",
-        dest="database",
-        help="database to connect with",
-        default=DEFAULT_DATABASE)
-
-    options, args = parser.parse_args()
-
-    if options.verbose:
-        log.setLevel(logging.INFO)
-
-    if len(args) < 1:
-        log.error("missing river argument")
-        sys.exit(1)
-
-    river = unicode(args[0], locale.getpreferredencoding())
-
-    with db.connect(options.database) as con:
-        cur = con.cursor()
-        river_id = get_river_id(cur, river)
-
-        for arg in args[1:]:
-            if options.recursive:
-                for km_file in files(
-                    arg, lambda x: x.lower().endswith(".km")):
-                    feed_km(cur, river_id, km_file)
-            else:
-                feed_km(cur, river_id, arg)
-
-
-if __name__ == '__main__':
-    main()