Mercurial > dive4elements > river
diff flys-backend/contrib/import-gew.py @ 162:80669241956c
Initial database import scripts. Not finished, yet.
flys-backend/trunk@1333 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 22 Feb 2011 10:34:53 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/contrib/import-gew.py Tue Feb 22 10:34:53 2011 +0000 @@ -0,0 +1,223 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +import codecs +import re + +HAUPTWERT = re.compile(r"\s*([^\s]+)\s+([^\s+]+)\s+([QWDT-])") +WHITESPACE = re.compile(r"\s+") + +class KM(object): + + def __init__(self, filename): + self.filename = filename + self.load_values() + + def load_values(self): + with codecs.open(self.filename, "r", "latin-1") as f: + for line in f: + line = line.strip() + if not line or line.startswith("*"): + parts = [s.strip() for s in line.split(";")] + # TODO: Use code from import-kms.py + +class AbflussTafel(object): + + def __init__(self, filename): + self.filename = filename + self.name = "" + self.values = [] + self.load_values() + + def load_values(self): + with codecs.open(self.filename, "r", "latin-1") as f: + first = True + for line in f: + line = line.strip() + if not line: continue + if line.startswith("#! name="): + self.name = line[8:] + continue + if line.startswith("#") or line.startswith("*"): + continue + line = line.replace(",", ".") + splits = WHITESPACE.split(line) + + if len(splits) < 2 or len(splits) > 11: + continue + + w = float(splits[0]) + + shift = 0 + + if len(splits) != 11 and first: + shift = 11 - len(splits) + + for idx, q in enumerate(splits[1:]): + i_w = w + shift + idx + i_q = float(q) + w_q = (i_w/100.0, i_q/100.0) + self.values.append(w_q) + + first = False + + +class Hauptwert(object): + def __init__(self, name, value, kind): + self.name = name + self.extra = value + self.kind = kind + +class Pegel(object): + def __init__(self, name, start, stop, sta, at, html): + self.name = name + self.start = start + self.stop = stop + self.sta = sta + self.at = at + self.html = html + self.aeo = 0.0 + self.nullpunkt = 0.0 + self.km = 0.0 + self.hauptwerte = [] + self.load_hauptwerte() + self.at_data = AbflussTafel(self.at) + + def load_hauptwerte(self): + with codecs.open(self.sta, "r", "latin-1") as f: + for line_no, line in enumerate(f): + line = line.rstrip() + if line_no == 0: + first = False + name = line[16:37].strip() + line = [s.replace(",", ".") for s in line[37:].split()] + self.aeo = float(line[0]) + self.nullpunkt = float(line[1]) + print >> sys.stderr, "pegel name: '%s'" % name + print >> sys.stderr, "pegel aeo: '%f'" % self.aeo + print >> sys.stderr, "pegel nullpunkt: '%f'" % self.nullpunkt + elif line_no == 1: + self.km = float(line[29:36].strip().replace(",", ".")) + print >> sys.stderr, "km: '%f'" % self.km + else: + if not line: continue + line = line.replace(",", ".") + m = HAUPTWERT.match(line) + if not m: continue + self.hauptwerte.append(Hauptwert( + m.group(1), float(m.group(2)), m.group(3))) + +class Gewaesser(object): + + def __init__(self, name=None, b_b=None, wst=None): + self.name = name + self.b_b = b_b + self.wst = wst + self.pegel = [] + + def load_pegel(self): + dir_name = os.path.dirname(self.wst) + pegel_glt = find_file(dir_name, "PEGEL.GLT") + if not pegel_glt: + print >> sys.stderr, "Missing PEGEL.GLT for %r" % self.name + return + + print >> sys.stderr, "pegel_glt: %r" % pegel_glt + + with codecs.open(pegel_glt, "r", "latin-1") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + # using re to cope with quoted columns, + # shlex has unicode problems. + parts = [p for p in re.split("( |\\\".*?\\\"|'.*?')", line) + if p.strip()] + if len(parts) < 7: + print >> sys.stderr, "too less colums (need 7): %r" % line + continue + + print >> sys.stderr, "%r" % parts + self.pegel.append(Pegel( + parts[0], + min(float(parts[2]), float(parts[3])), + max(float(parts[2]), float(parts[3])), + norm_path(parts[4], dir_name), + norm_path(parts[5], dir_name), + parts[6])) + + + def __repr__(self): + return u"Gewaesser(name=%r, b_b=%r, wst=%r)" % ( + self.name, self.b_b, self.wst) + +def norm_path(path, ref): + if not os.path.isabs(path): + path = os.path.normpath(os.path.join(ref, path)) + return path + +def find_file(path, what): + what = what.lower() + for filename in os.listdir(path): + p = os.path.join(path, filename) + if os.path.isfile(p) and filename.lower() == what: + return p + return None + + +def read_gew(filename): + + gewaesser = [] + + current = Gewaesser() + + filename = os.path.abspath(filename) + dirname = os.path.dirname(filename) + + with codecs.open(filename, "r", "latin-1") as f: + for line in f: + line = line.strip() + if not line or line.startswith("*"): + continue + + if line.startswith(u"Gewässer:"): + if current.name: + gewaesser.append(current) + current = Gewaesser() + current.name = line[len(u"Gewässer:"):].strip() + elif line.startswith(u"B+B-Info:"): + current.b_b = norm_path(line[len(u"B+B-Info:"):].strip(), + dirname) + elif line.startswith(u"WSTDatei:"): + current.wst = norm_path(line[len(u"WSTDatei:"):].strip(), + dirname) + + if current.name: + gewaesser.append(current) + + return gewaesser + +def main(): + + if len(sys.argv) < 2: + print >> sys.stderr, "missing gew file" + sys.exit(1) + + gew_filename = sys.argv[1] + + if not os.path.isfile(gew_filename): + print >> sys.stderr, "'%s' is not a file" % gew_filename + sys.exit(1) + + gewaesser = read_gew(gew_filename) + + for gew in gewaesser: + gew.load_pegel() + + + +if __name__ == '__main__': + main() +# vim: set fileencoding=utf-8 :