view backend/src/main/java/org/dive4elements/river/importer/uinfo/parsers/SalixParser.java @ 9660:f0cad5212f49

Importer (s/u-info) extensions: iota (salix): detecting, logging, cancelling in case of wrong column titles/units, detecting, logging and skipping lines with missing values
author mschaefer
date Mon, 23 Mar 2020 15:40:12 +0100
parents 66a43d9f65c8
children
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.uinfo.parsers;

import java.io.File;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.uinfo.importitem.SalixKmLineImport;
import org.dive4elements.river.importer.uinfo.importitem.SalixSeriesImport;
import org.dive4elements.river.model.uinfo.Salix;
import org.dive4elements.river.model.uinfo.SalixValue;

/**
 * Reads and parses a salix file
 *
 * @author Matthias Schäfer
 *
 */
public class SalixParser extends AbstractParser<Salix, SalixValue, SalixKmLineImport, SalixSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(SalixParser.class);

    private static final String IMPORT_FILENAME = "Iota.csv";

    private static final Pattern META_FIRST = Pattern.compile("^#\\sIota.*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private enum ColTitlePattern {
        FACTOR("Iota\\s*\\[(.*)\\].*"), //
        MWMNW("\\(MW-MNW\\).*\\[(.*)\\].*");

        private final Pattern pattern;

        ColTitlePattern(final String regexp) {
            this.pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE);
        }

        public Pattern getPattern() {
            return this.pattern;
        }
    }

    private final EnumMap<ColTitlePattern, Integer> cols;

    private final EnumMap<ColTitlePattern, String> units;


    /***** CONSTRUCTORS *****/

    public SalixParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.cols = new EnumMap<>(ColTitlePattern.class);
        this.units = new EnumMap<>(ColTitlePattern.class);
    }

    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipUInfoSalix();
    }

    /**
     * Creates a list of parsers for all salix import files in a directory
     */
    public static List<SalixParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<SalixParser> parsers = new ArrayList<>();
        final File importFile = new File(importDir, IMPORT_FILENAME);
        if (importFile.exists())
            parsers.add(new SalixParser(importFile, new File(relativeDir, IMPORT_FILENAME), river));
        return parsers;
    }

    @Override
    protected SalixSeriesImport createSeriesImport(final String filename) {
        return new SalixSeriesImport(filename);
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaFirst())
            return true;
        else if (handleMetaEvaluator())
            return true;
        else
            return false;
    }

    private boolean handleMetaFirst() {
        final Matcher m = META_FIRST.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_FIRST);
            return true;
        }
        return false;
    }

    private boolean handleMetaEvaluator() {
        final Matcher m = META_EVALUATOR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_EVALUATOR);
            this.seriesHeader.setEvaluationBy(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    @Override
    protected boolean handleMetaColumnTitles() {
        if (!super.handleMetaColumnTitles())
            return false;
        for (final ColTitlePattern col : ColTitlePattern.values()) {
            this.cols.put(col, -1);
            this.units.put(col, "");
        }
        for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
            for (final ColTitlePattern col : ColTitlePattern.values()) {
                final Matcher m = col.getPattern().matcher(this.columnTitles.get(i));
                if (m.matches()) {
                    this.cols.put(col, i);
                    this.units.put(col, m.group(1));
                    break;
                }
            }
        }
        if ((this.cols.get(ColTitlePattern.FACTOR) < 0) || (this.cols.get(ColTitlePattern.MWMNW) < 0)) {
            logLineError("Column of the iota value and/or mnw-mw-diff could not be identified");
            this.headerParsingState = ParsingState.STOP;
            return true;
        }
        if (!this.units.get(ColTitlePattern.FACTOR).equals("m") || !this.units.get(ColTitlePattern.MWMNW).equals("m")) {
            logLineError("Column of the iota value and/or mnw-mw-diff have unsupported units");
            this.headerParsingState = ParsingState.STOP;
        }
        return true;
    }

    @Override
    protected SalixKmLineImport createKmLineImport(final Double km, final String[] values) {
        final Number factor = parseDoubleCheckNull(values, this.cols.get(ColTitlePattern.FACTOR));
        if ((factor == null) || Double.isNaN(factor.doubleValue())) {
            logLineWarning(INVALID_VALUE_ERROR_FORMAT, "iota");
            return null;
        }
        final Number mnwmw = parseDoubleCheckNull(values, this.cols.get(ColTitlePattern.MWMNW));
        if ((mnwmw == null) || Double.isNaN(mnwmw.doubleValue())) {
            logLineWarning(INVALID_VALUE_ERROR_FORMAT, "MNW-MW-diff");
            return null;
        }
        return new SalixKmLineImport(km, factor.doubleValue(), mnwmw.doubleValue());
    }
}

http://dive4elements.wald.intevation.org