view flys-backend/contrib/import-gew.py @ 4769:92a08725bc63

Do not die in W80 parser id invalid date is given.
author Sascha L. Teichmann <teichmann@intevation.de>
date Fri, 04 Jan 2013 15:46:33 +0100
parents 80669241956c
children
line wrap: on
line source
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import os
import codecs
import re

HAUPTWERT  = re.compile(r"\s*([^\s]+)\s+([^\s+]+)\s+([QWDT-])")
WHITESPACE = re.compile(r"\s+")

class KM(object):

    def __init__(self, filename):
        self.filename = filename
        self.load_values()

    def load_values(self):
        with codecs.open(self.filename, "r", "latin-1") as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith("*"):
                    parts = [s.strip() for s in line.split(";")]
                    # TODO: Use code from import-kms.py

class AbflussTafel(object):

    def __init__(self, filename):
        self.filename = filename
        self.name     = ""
        self.values = []
        self.load_values()

    def load_values(self):
        with codecs.open(self.filename, "r", "latin-1") as f:
            first = True
            for line in f:
                line = line.strip()
                if not line: continue
                if line.startswith("#! name="):
                    self.name = line[8:]
                    continue
                if line.startswith("#") or line.startswith("*"):
                    continue
                line = line.replace(",", ".")
                splits = WHITESPACE.split(line)

                if len(splits) < 2 or len(splits) > 11:
                    continue

                w = float(splits[0])

                shift = 0

                if len(splits) != 11 and first:
                    shift = 11 - len(splits)

                for idx, q in enumerate(splits[1:]):
                    i_w = w + shift + idx
                    i_q = float(q)
                    w_q = (i_w/100.0, i_q/100.0)
                    self.values.append(w_q)

                first = False


class Hauptwert(object):
    def __init__(self, name, value, kind):
        self.name  = name
        self.extra = value
        self.kind  = kind

class Pegel(object):
    def __init__(self, name, start, stop, sta, at, html):
        self.name       = name
        self.start      = start
        self.stop       = stop
        self.sta        = sta
        self.at         = at
        self.html       = html
        self.aeo        = 0.0
        self.nullpunkt  = 0.0
        self.km         = 0.0
        self.hauptwerte = []
        self.load_hauptwerte()
        self.at_data = AbflussTafel(self.at)

    def load_hauptwerte(self):
        with codecs.open(self.sta, "r", "latin-1") as f:
            for line_no, line in enumerate(f):
                line = line.rstrip()
                if line_no == 0:
                    first = False
                    name = line[16:37].strip()
                    line = [s.replace(",", ".") for s in line[37:].split()]
                    self.aeo = float(line[0])
                    self.nullpunkt = float(line[1])
                    print >> sys.stderr, "pegel name: '%s'" % name
                    print >> sys.stderr, "pegel aeo: '%f'" % self.aeo
                    print >> sys.stderr, "pegel nullpunkt: '%f'" % self.nullpunkt
                elif line_no == 1:
                    self.km = float(line[29:36].strip().replace(",", "."))
                    print >> sys.stderr, "km: '%f'" % self.km
                else:
                    if not line: continue
                    line = line.replace(",", ".")
                    m = HAUPTWERT.match(line)
                    if not m: continue
                    self.hauptwerte.append(Hauptwert(
                        m.group(1), float(m.group(2)), m.group(3)))

class Gewaesser(object):

    def __init__(self, name=None, b_b=None, wst=None):
        self.name = name
        self.b_b = b_b
        self.wst = wst
        self.pegel = []

    def load_pegel(self):
        dir_name = os.path.dirname(self.wst)
        pegel_glt = find_file(dir_name, "PEGEL.GLT")
        if not pegel_glt:
            print >> sys.stderr, "Missing PEGEL.GLT for %r" % self.name
            return

        print >> sys.stderr, "pegel_glt: %r" % pegel_glt

        with codecs.open(pegel_glt, "r", "latin-1") as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith("#"):
                    continue
                # using re to cope with quoted columns,
                # shlex has unicode problems.
                parts = [p for p in re.split("( |\\\".*?\\\"|'.*?')", line) 
                         if p.strip()]
                if len(parts) < 7:
                    print >> sys.stderr, "too less colums (need 7): %r" % line
                    continue

                print >> sys.stderr, "%r" % parts
                self.pegel.append(Pegel(
                    parts[0],
                    min(float(parts[2]), float(parts[3])),
                    max(float(parts[2]), float(parts[3])),
                    norm_path(parts[4], dir_name),
                    norm_path(parts[5], dir_name),
                    parts[6]))


    def __repr__(self):
        return u"Gewaesser(name=%r, b_b=%r, wst=%r)" % (
            self.name, self.b_b, self.wst)

def norm_path(path, ref):
    if not os.path.isabs(path):
        path = os.path.normpath(os.path.join(ref, path))
    return path

def find_file(path, what):
    what = what.lower()
    for filename in os.listdir(path):
        p = os.path.join(path, filename)
        if os.path.isfile(p) and filename.lower() == what:
            return p
    return None
    

def read_gew(filename):

    gewaesser = []

    current = Gewaesser()

    filename = os.path.abspath(filename)
    dirname = os.path.dirname(filename)

    with codecs.open(filename, "r", "latin-1") as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith("*"):
                continue

            if line.startswith(u"Gewässer:"):
                if current.name:
                    gewaesser.append(current)
                    current = Gewaesser()
                current.name = line[len(u"Gewässer:"):].strip()
            elif line.startswith(u"B+B-Info:"):
                current.b_b = norm_path(line[len(u"B+B-Info:"):].strip(),
                                        dirname)
            elif line.startswith(u"WSTDatei:"):
                current.wst = norm_path(line[len(u"WSTDatei:"):].strip(),
                                        dirname)

        if current.name:
            gewaesser.append(current)

    return gewaesser

def main():

    if len(sys.argv) < 2:
        print >> sys.stderr, "missing gew file"
        sys.exit(1)

    gew_filename = sys.argv[1]

    if not os.path.isfile(gew_filename):
        print >> sys.stderr, "'%s' is not a file" % gew_filename
        sys.exit(1)

    gewaesser = read_gew(gew_filename)

    for gew in gewaesser:
        gew.load_pegel()

    

if __name__ == '__main__':
    main()
# vim: set fileencoding=utf-8 :

http://dive4elements.wald.intevation.org