# HG changeset patch # User Tom Gottfried # Date 1362583684 -3600 # Node ID 08c5e3a646dc789cd78b6dc8fc133af088866ec9 # Parent 6553c8e364dbc7e08171a718fb83a0bb22a2a40e removed obsolete files from backend/contrib diff -r 6553c8e364db -r 08c5e3a646dc flys-backend/contrib/import-gew.py --- a/flys-backend/contrib/import-gew.py Wed Mar 06 15:46:40 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,223 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -import os -import codecs -import re - -HAUPTWERT = re.compile(r"\s*([^\s]+)\s+([^\s+]+)\s+([QWDT-])") -WHITESPACE = re.compile(r"\s+") - -class KM(object): - - def __init__(self, filename): - self.filename = filename - self.load_values() - - def load_values(self): - with codecs.open(self.filename, "r", "latin-1") as f: - for line in f: - line = line.strip() - if not line or line.startswith("*"): - parts = [s.strip() for s in line.split(";")] - # TODO: Use code from import-kms.py - -class AbflussTafel(object): - - def __init__(self, filename): - self.filename = filename - self.name = "" - self.values = [] - self.load_values() - - def load_values(self): - with codecs.open(self.filename, "r", "latin-1") as f: - first = True - for line in f: - line = line.strip() - if not line: continue - if line.startswith("#! name="): - self.name = line[8:] - continue - if line.startswith("#") or line.startswith("*"): - continue - line = line.replace(",", ".") - splits = WHITESPACE.split(line) - - if len(splits) < 2 or len(splits) > 11: - continue - - w = float(splits[0]) - - shift = 0 - - if len(splits) != 11 and first: - shift = 11 - len(splits) - - for idx, q in enumerate(splits[1:]): - i_w = w + shift + idx - i_q = float(q) - w_q = (i_w/100.0, i_q/100.0) - self.values.append(w_q) - - first = False - - -class Hauptwert(object): - def __init__(self, name, value, kind): - self.name = name - self.extra = value - self.kind = kind - -class Pegel(object): - def __init__(self, name, start, stop, sta, at, html): - self.name = name - self.start = start - self.stop = stop - self.sta = sta - self.at = at - self.html = html - self.aeo = 0.0 - self.nullpunkt = 0.0 - self.km = 0.0 - self.hauptwerte = [] - self.load_hauptwerte() - self.at_data = AbflussTafel(self.at) - - def load_hauptwerte(self): - with codecs.open(self.sta, "r", "latin-1") as f: - for line_no, line in enumerate(f): - line = line.rstrip() - if line_no == 0: - first = False - name = line[16:37].strip() - line = [s.replace(",", ".") for s in line[37:].split()] - self.aeo = float(line[0]) - self.nullpunkt = float(line[1]) - print >> sys.stderr, "pegel name: '%s'" % name - print >> sys.stderr, "pegel aeo: '%f'" % self.aeo - print >> sys.stderr, "pegel nullpunkt: '%f'" % self.nullpunkt - elif line_no == 1: - self.km = float(line[29:36].strip().replace(",", ".")) - print >> sys.stderr, "km: '%f'" % self.km - else: - if not line: continue - line = line.replace(",", ".") - m = HAUPTWERT.match(line) - if not m: continue - self.hauptwerte.append(Hauptwert( - m.group(1), float(m.group(2)), m.group(3))) - -class Gewaesser(object): - - def __init__(self, name=None, b_b=None, wst=None): - self.name = name - self.b_b = b_b - self.wst = wst - self.pegel = [] - - def load_pegel(self): - dir_name = os.path.dirname(self.wst) - pegel_glt = find_file(dir_name, "PEGEL.GLT") - if not pegel_glt: - print >> sys.stderr, "Missing PEGEL.GLT for %r" % self.name - return - - print >> sys.stderr, "pegel_glt: %r" % pegel_glt - - with codecs.open(pegel_glt, "r", "latin-1") as f: - for line in f: - line = line.strip() - if not line or line.startswith("#"): - continue - # using re to cope with quoted columns, - # shlex has unicode problems. - parts = [p for p in re.split("( |\\\".*?\\\"|'.*?')", line) - if p.strip()] - if len(parts) < 7: - print >> sys.stderr, "too less colums (need 7): %r" % line - continue - - print >> sys.stderr, "%r" % parts - self.pegel.append(Pegel( - parts[0], - min(float(parts[2]), float(parts[3])), - max(float(parts[2]), float(parts[3])), - norm_path(parts[4], dir_name), - norm_path(parts[5], dir_name), - parts[6])) - - - def __repr__(self): - return u"Gewaesser(name=%r, b_b=%r, wst=%r)" % ( - self.name, self.b_b, self.wst) - -def norm_path(path, ref): - if not os.path.isabs(path): - path = os.path.normpath(os.path.join(ref, path)) - return path - -def find_file(path, what): - what = what.lower() - for filename in os.listdir(path): - p = os.path.join(path, filename) - if os.path.isfile(p) and filename.lower() == what: - return p - return None - - -def read_gew(filename): - - gewaesser = [] - - current = Gewaesser() - - filename = os.path.abspath(filename) - dirname = os.path.dirname(filename) - - with codecs.open(filename, "r", "latin-1") as f: - for line in f: - line = line.strip() - if not line or line.startswith("*"): - continue - - if line.startswith(u"Gewässer:"): - if current.name: - gewaesser.append(current) - current = Gewaesser() - current.name = line[len(u"Gewässer:"):].strip() - elif line.startswith(u"B+B-Info:"): - current.b_b = norm_path(line[len(u"B+B-Info:"):].strip(), - dirname) - elif line.startswith(u"WSTDatei:"): - current.wst = norm_path(line[len(u"WSTDatei:"):].strip(), - dirname) - - if current.name: - gewaesser.append(current) - - return gewaesser - -def main(): - - if len(sys.argv) < 2: - print >> sys.stderr, "missing gew file" - sys.exit(1) - - gew_filename = sys.argv[1] - - if not os.path.isfile(gew_filename): - print >> sys.stderr, "'%s' is not a file" % gew_filename - sys.exit(1) - - gewaesser = read_gew(gew_filename) - - for gew in gewaesser: - gew.load_pegel() - - - -if __name__ == '__main__': - main() -# vim: set fileencoding=utf-8 : diff -r 6553c8e364db -r 08c5e3a646dc flys-backend/contrib/import-kms.py --- a/flys-backend/contrib/import-kms.py Wed Mar 06 15:46:40 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,213 +0,0 @@ -#!/usr/bin/env python - -import sys -import logging -import re -import os - -import sqlite3 as db -import locale -import codecs - -from optparse import OptionParser - -log = logging.getLogger(__name__) -log.setLevel(logging.WARNING) -log.addHandler(logging.StreamHandler(sys.stderr)) - -RANGE = re.compile("([^#]*)#(.*)") -DEFAULT_DATABASE = "flys.db" - -SQL_NEXT_ID = "SELECT coalesce(max(id), -1) + 1 FROM %s" -SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?" -SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)" - -SQL_SELECT_RANGE_ID = """ -SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ? -""" -SQL_INSERT_RANGE_ID = """ -INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?) -""" -SQL_SELECT_ANNOTATION_ID = """ -SELECT id FROM annotations -WHERE range_id = ? AND attribute_id = ? AND position_id = ? -""" -SQL_INSERT_ANNOTATION_ID = """ -INSERT INTO annotations (id, range_id, attribute_id, position_id) -VALUES (?, ?, ?, ?) -""" - -def encode(s): - try: - return unicode(s, "latin-1") - except UnicodeDecodeError: - return unicode.encode(s, locale.getpreferredencoding()) - -class hashabledict(dict): - def __key(self): - return tuple((k, self[k]) for k in sorted(self)) - def __hash__(self): - return hash(self.__key()) - def __eq__(self, other): - return self.__key() == other.__key() - -def cache(f): - def func(*args, **kw): - key = (args, hashabledict(kw)) - try: - return f.__cache__[key] - except KeyError: - value = f(*args, **kw) - f.__cache__[key] = value - return value - f.__cache__ = {} - return func - -NEXT_IDS = {} -def next_id(cur, relation): - idx = NEXT_IDS.get(relation) - if idx is None: - cur.execute(SQL_NEXT_ID % relation) - idx = cur.fetchone()[0] - NEXT_IDS[relation] = idx + 1 - return idx - -def get_id(cur, relation, attribute, value): - select_stmt = SQL_SELECT_ID % (relation, attribute) - #log.debug(select_stmt) - cur.execute(select_stmt, (value,)) - row = cur.fetchone() - if row: return row[0] - idx = next_id(cur, relation) - insert_stmnt = SQL_INSERT_ID % (relation, attribute) - #log.debug(insert_stmnt) - cur.execute(insert_stmnt, (idx, value)) - cur.connection.commit() - log.debug("insert %s '%s' id: '%d'" % (relation, value, idx)) - return idx - -#@cache -def get_river_id(cur, name): - return get_id(cur, "rivers", "name", name) - -#@cache -def get_attribute_id(cur, value): - return get_id(cur, "attributes", "value", value) - -#@cache -def get_position_id(cur, value): - return get_id(cur, "positions", "value", value) - -#@cache -def get_range_id(cur, river_id, a, b): - cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b)) - row = cur.fetchone() - if row: return row[0] - idx = next_id(cur, "ranges") - cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b)) - cur.connection.commit() - return idx - -#@cache -def get_annotation_id(cur, range_id, attribute_id, position_id): - cur.execute(SQL_SELECT_ANNOTATION_ID, ( - range_id, attribute_id, position_id)) - row = cur.fetchone() - if row: return row[0] - idx = next_id(cur, "annotations") - cur.execute(SQL_INSERT_ANNOTATION_ID, ( - idx, range_id, attribute_id, position_id)) - cur.connection.commit() - return idx - -def files(root, accept=lambda x: True): - if os.path.isfile(root): - if accept(root): yield root - elif os.path.isdir(root): - stack = [ root ] - while stack: - cur = stack.pop() - for f in os.listdir(cur): - p = os.path.join(cur, f) - if os.path.isdir(p): - stack.append(p) - elif os.path.isfile(p) and accept(p): - yield p - -def feed_km(cur, river_id, km_file): - - log.info("processing: %s" % km_file) - - for line in codecs.open(km_file, "r", "latin-1"): - line = line.strip() - if not line or line.startswith('*'): - continue - parts = [x.strip() for x in line.split(';')] - if len(parts) < 3: - log.error("cannot process: '%s'" % line) - continue - m = RANGE.match(parts[2]) - try: - if m: - x = [float(x.replace(",", ".")) for x in m.groups()] - a, b = min(x), max(x) - if a == b: b = None - else: - a, b = float(parts[2].replace(",", ".")), None - except ValueError: - log.error("cannot process: '%s'" % line) - continue - - attribute = parts[0] - position = parts[1] - attribute_id = get_attribute_id(cur, attribute) if attribute else None - position_id = get_position_id(cur, position) if position else None - - range_id = get_range_id(cur, river_id, a, b) - - get_annotation_id(cur, range_id, attribute_id, position_id) - -def main(): - - usage = "usage: %prog [options] river km-file ..." - parser = OptionParser(usage=usage) - parser.add_option( - "-v", "--verbose", action="store_true", - dest="verbose", - help="verbose output") - parser.add_option( - "-r", "--recursive", action="store_true", - dest="recursive", default=False, - help="recursive") - parser.add_option( - "-d", "--database", action="store", - dest="database", - help="database to connect with", - default=DEFAULT_DATABASE) - - options, args = parser.parse_args() - - if options.verbose: - log.setLevel(logging.INFO) - - if len(args) < 1: - log.error("missing river argument") - sys.exit(1) - - river = unicode(args[0], locale.getpreferredencoding()) - - with db.connect(options.database) as con: - cur = con.cursor() - river_id = get_river_id(cur, river) - - for arg in args[1:]: - if options.recursive: - for km_file in files( - arg, lambda x: x.lower().endswith(".km")): - feed_km(cur, river_id, km_file) - else: - feed_km(cur, river_id, arg) - - -if __name__ == '__main__': - main()