view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java @ 9658:d86c7cb68b41

Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date, detecting wrong column titles and cancelling the import, specific error message if gauge not found
author mschaefer
date Mon, 23 Mar 2020 15:33:40 +0100
parents abe069eb180c
children
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.DailyDischargeDayLineImport;
import org.dive4elements.river.importer.sinfo.importitem.DailyDischargeSeriesImport;
import org.dive4elements.river.model.sinfo.DailyDischarge;
import org.dive4elements.river.model.sinfo.DailyDischargeValue;

/**
 * Reads and parses a daily discharge file
 *
 * @author Matthias Schäfer
 *
 */
public class DailyDischargeParser extends AbstractParser<DailyDischarge, DailyDischargeValue, DailyDischargeDayLineImport, DailyDischargeSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(DailyDischargeParser.class);

    static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss\\.csv", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE);

    // private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*",
    // Pattern.CASE_INSENSITIVE);

    private static final Pattern META_BETREIBER = Pattern.compile("^#\\s*Betreiber:.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_PARAMETER = Pattern.compile("^#\\s*Parameter-Name:.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_ZEITREIHE = Pattern.compile("^#\\s*# Zeitreihe.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_REIHEBEGINN = Pattern.compile("^#\\s*Beginn der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_REIHEENDE = Pattern.compile("^#\\s*Ende der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*(Datum)\\s*;\\s*(Q[^;]*)", Pattern.CASE_INSENSITIVE);

    private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");

    private final List<Date> dates;


    /***** CONSTRUCTORS *****/

    public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.dates = new ArrayList<>();
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoDailyDischarge();
    }

    /**
     * Creates a list of parsers for all daily discharge import files in a directory
     */
    public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<DailyDischargeParser> parsers = new ArrayList<>();
        if (importDir.exists()) {
            for (final File file : listFiles(importDir, IMPORT_FILENAME))
                parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river));
        }
        return parsers;
    }

    @Override
    protected KmMode kmMode() {
        return KmMode.NONE;
    }

    @Override
    protected DailyDischargeSeriesImport createSeriesImport(final String filename) {
        return new DailyDischargeSeriesImport(filename);
    }

    @Override
    protected DailyDischargeDayLineImport createKmLineImport(final Double km, final String[] values) {
        Date day = null;
        try {
            day = dateFormat.parse(values[0]);
        }
        catch (final Exception e) {
            logLineWarning("Invalid date");
            return null;
        }
        final Number q = parseDoubleCheckNull(values, 1);
        if ((q == null) || Double.isNaN(q.doubleValue())) {
            logLineWarning(INVALID_VALUE_ERROR_FORMAT, "discharge");
            return null;
        }
        if (this.dates.contains(day)) {
            logLineWarning("Duplicate date");
            return null;
        }
        this.dates.add(day);
        return new DailyDischargeDayLineImport(day, q.doubleValue());
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaGaugeName())
            return true;
        // else if (handleMetaGaugeNumber())
        // return true;
        else if (META_BETREIBER.matcher(this.currentLine).matches())
            return true;
        else if (META_PARAMETER.matcher(this.currentLine).matches())
            return true;
        else if (META_ZEITREIHE.matcher(this.currentLine).matches())
            return true;
        else if (META_REIHEBEGINN.matcher(this.currentLine).matches())
            return true;
        else if (META_REIHEENDE.matcher(this.currentLine).matches())
            return true;
        else
            return false;
    }

    private boolean handleMetaGaugeName() {
        final Matcher m = META_GAUGENAME.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_GAUGENAME);
            this.seriesHeader.setGaugeName(m.group(1).trim());
            return true;
        }
        return false;
    }

    // private boolean handleMetaGaugeNumber() {
    // final Matcher m = META_GAUGENUMBER.matcher(this.currentLine);
    // if (m.matches()) {
    // this.metaPatternsMatched.add(META_GAUGENUMBER);
    // this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1)));
    // return true;
    // }
    // return false;
    // }

    @Override
    protected boolean handleMetaColumnTitles() {
        final Matcher m = META_COLUMNTITLES.matcher(this.currentLine);
        if (!m.matches()) {
            return false;
        }
        this.metaPatternsMatched.add(META_COLUMNTITLES);
        this.columnTitles.clear();
        this.columnTitles.add(m.group(1));
        this.columnTitles.add(m.group(2));
        return true;
    }

    /**
     * Check meta data after all meta lines (#) have been read
     */
    @Override
    protected boolean checkMetaData() {
        if (!super.checkRiverExists())
            return false;
        this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName()));
        if (this.seriesHeader.getGauge() == null) {
            logError("Gauge not found (%s)", this.seriesHeader.getGaugeName());
            this.headerParsingState = ParsingState.STOP;
            return false;
        }
        if (super.checkMetaData() == false)
            return false;
        if (this.columnTitles.size() <= 1) {
            logError("No valid column title line (Datum, Q) found");
            this.headerParsingState = ParsingState.STOP;
            return false;
        }
        return true;
    }

}

http://dive4elements.wald.intevation.org