view backend/src/main/java/org/dive4elements/river/importer/parsers/FlowVelocityModelParser.java @ 9650:a2a42a6bac6b

Importer (s/u-info) extensions: outer try/catch for parse and log of line no, catching parsing exception if not enough value fields, parsing error and warning log messages with line number, detecting and rejecting duplicate data series, better differentiation between error and warning log messages
author mschaefer
date Mon, 23 Mar 2020 14:57:03 +0100
parents c43d8c1a4455
children
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.backend.utils.EpsilonComparator;
import org.dive4elements.river.importer.ImportDischargeZone;
import org.dive4elements.river.importer.ImportFlowVelocityModel;
import org.dive4elements.river.importer.ImportFlowVelocityModelValue;
import org.dive4elements.river.importer.common.AbstractParser;


public class FlowVelocityModelParser extends LineParser {

    private static final Logger log =
            Logger.getLogger(FlowVelocityModelParser.class);

    private static final Pattern META_REGEX =
            Pattern.compile(".*Rechnung [unter ]*(.*) \\(Pegel (.*)\\).*");

    private static final Pattern META_GAUGE =
            Pattern.compile("(.*) Q=(\\w*)m3/s");

    private static final Pattern META_MAINVALUE_A =
            Pattern.compile("([a-zA-Z]+)+(\\d+)*[\\w()]*");

    private static final Pattern META_MAINVALUE_B =
            Pattern.compile(
                    "(([a-zA-Z]+)+(\\d+)*)\\s*-\\s*(([a-zA-Z]+)+(\\d+)*\\S*)");

    private static final Pattern META_MAINVALUE_C =
            Pattern.compile("([0-9]++)\\s?(\\S*)|([0-9]++,[0-9]++)\\s?(\\S*)");

    private static final Pattern META_MAINVALUE_D =
            Pattern.compile(
                    "(([0-9]*)\\s?(\\w*)|([0-9]++,[0-9]++)\\s?(\\w*))\\s*"
                            + "bis (([0-9]*)\\s?(\\S*)|([0-9]++,[0-9]++)\\s?(\\S*))");

    private static final Pattern META_MAINVALUE_E =
            Pattern.compile(
                    "(([a-zA-Z]+)+(\\d+)*)\\s*bis (([a-zA-Z]+)+(\\d+)*\\S*)");

    private static final NumberFormat nf =
            NumberFormat.getInstance(DEFAULT_LOCALE);


    private final List<ImportFlowVelocityModel> models;

    private ImportFlowVelocityModel current;

    protected String description;

    protected TreeSet<Double> kmExists;


    public FlowVelocityModelParser() {
        this.models = new ArrayList<>();
        this.kmExists = new TreeSet<>(EpsilonComparator.CMP);
    }


    public List<ImportFlowVelocityModel> getModels() {
        return this.models;
    }

    @Override
    public void parse(final File file) throws IOException {
        this.description = file.getName();

        super.parse(file);
    }

    @Override
    protected void reset() {
        this.current = new ImportFlowVelocityModel(this.description);
        this.kmExists.clear();
    }


    @Override
    protected void finish() {
        this.models.add(this.current);
        // description = null;
    }


    @Override
    protected void handleLine(final int lineNum, final String line) {
        if (line.startsWith(START_META_CHAR)) {
            handleMetaLine(stripMetaLine(line));
        }
        else {
            handleDataLine(line);
        }
    }


    protected void handleMetaLine(final String line) {
        final Matcher m = META_REGEX.matcher(line);

        if (m.matches()) {
            final String mainValueStr = m.group(1);
            log.debug("mainValueStr = '" + mainValueStr + "'");
            final String gaugeStr     = m.group(2);

            final Object[] valueData = handleMainValueString(mainValueStr);
            final Object[] gaugeData = handleGaugeString(gaugeStr);

            if (valueData == null || valueData.length < 2) {
                log.warn("skip invalid MainValue part in '" + line + "'");
                return;
            }

            if (gaugeData == null || gaugeData.length < 2) {
                log.warn("skip invalid gauge part in '" + line + "'");
                return;
            }

            if (log.isDebugEnabled()) {
                log.debug("Found meta information:");
                log.debug("   Gauge: " + gaugeData[0]);
                log.debug("   Value: " + gaugeData[1]);
                log.debug("   Lower: " + valueData[0]);
                log.debug("   upper: " + valueData[1]);
            }

            this.current.setDischargeZone(new ImportDischargeZone(
                    (String) gaugeData[0],
                    (BigDecimal) gaugeData[1],
                    (String) valueData[0],
                    (String) valueData[1]
                    ));
        }
    }


    protected Object[] handleMainValueString(final String mainValueStr) {
        final Matcher mA = META_MAINVALUE_A.matcher(mainValueStr.trim());
        if (mA.matches()) {
            log.debug("mainValueStr matches META_MAINVALUE_A");
            final String name = mA.group(0);

            return new Object[] { name, name };
        }

        final Matcher mB = META_MAINVALUE_B.matcher(mainValueStr.trim());
        if (mB.matches()) {
            log.debug("mainValueStr matches META_MAINVALUE_B");
            final String lower = mB.group(1);
            final String upper = mB.group(4);

            return new Object[] { lower, upper };
        }

        final Matcher mC = META_MAINVALUE_C.matcher(mainValueStr.trim());
        if (mC.matches()) {
            log.debug("mainValueStr matches META_MAINVALUE_C");
            final String facA  = mC.group(1);
            final String nameA = mC.group(2);
            final String facB  = mC.group(3);
            final String nameB = mC.group(4);

            final String fac  = facA  != null ? facA  : facB;
            final String name = nameA != null ? nameA : nameB;

            final String mainValue = fac + " " + name;

            return new Object[] { mainValue, mainValue };
        }

        final Matcher mD = META_MAINVALUE_D.matcher(mainValueStr.trim());
        if (mD.matches()) {
            log.debug("mainValueStr matches META_MAINVALUE_D");
            final String loFacA  = mD.group(2);
            final String loNameA = mD.group(3);
            final String loFacB  = mD.group(4);
            final String loNameB = mD.group(5);

            final String upFacA  = mD.group(7);
            final String upNameA = mD.group(8);
            final String upFacB  = mD.group(9);
            final String upNameB = mD.group(10);

            final String loFac  = loFacA  != null ? loFacA  : loFacB;
            final String loName = loNameA != null ? loNameA : loNameB;

            final String upFac  = upFacA  != null ? upFacA  : upFacB;
            final String upName = upNameA != null ? upNameA : upNameB;

            final String loMainValue = loFac + " " + loName;
            final String upMainValue = upFac + " " + upName;

            return new Object[] { loMainValue, upMainValue };
        }

        final Matcher mE = META_MAINVALUE_E.matcher(mainValueStr.trim());
        if (mE.matches()) {
            log.debug("mainValueStr matches META_MAINVALUE_E");
            final String lower = mE.group(1);
            final String upper = mE.group(4);

            return new Object[] { lower, upper };
        }

        log.debug("mainValueStr not matched");
        return null;
    }


    protected Object[] handleGaugeString(final String gaugeStr) {
        final Matcher m = META_GAUGE.matcher(gaugeStr);

        if (m.matches()) {
            final String name = m.group(1);
            final String qStr = m.group(2);

            try {
                return new Object[] {
                        name,
                        AbstractParser.parseDecimal(qStr) };
            }
            catch (final ParseException pe) {
                log.warn("Could not parse Q value: '" + qStr + "'");
            }
        }

        return null;
    }


    protected void handleDataLine(final String line) {
        final String[] cols = line.split(SEPERATOR_CHAR);

        if (cols.length < 5) {
            log.warn("skip invalid data line: '" + line + "'");
            return;
        }

        try {
            final BigDecimal km = AbstractParser.parseDecimal(cols[0]);

            final Double key = Double.valueOf(km.doubleValue());

            if (this.kmExists.contains(key)) {
                log.warn("duplicate station '" + km + "': -> ignored");
                return;
            }

            final BigDecimal q = AbstractParser.parseDecimal(cols[1]);
            final BigDecimal total = AbstractParser.parseDecimal(cols[2]);
            final BigDecimal main = AbstractParser.parseDecimal(cols[3]);
            final BigDecimal stress = AbstractParser.parseDecimal(cols[4]);

            this.current.addValue(new ImportFlowVelocityModelValue(km, q, total, main, stress));

            this.kmExists.add(key);
        }
        catch (final ParseException pe) {
            log.warn("Unparseable flow velocity values:", pe);
        }
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org