view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java @ 9663:7c1da1b3f6b8

Importer (s/u-info) extensions: further year/date checks for collision files
author mschaefer
date Fri, 03 Apr 2020 11:56:45 +0200
parents d86c7cb68b41
children
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.DailyDischargeDayLineImport;
import org.dive4elements.river.importer.sinfo.importitem.DailyDischargeSeriesImport;
import org.dive4elements.river.model.sinfo.DailyDischarge;
import org.dive4elements.river.model.sinfo.DailyDischargeValue;

/**
 * Reads and parses a daily discharge file
 *
 * @author Matthias Schäfer
 *
 */
public class DailyDischargeParser extends AbstractParser<DailyDischarge, DailyDischargeValue, DailyDischargeDayLineImport, DailyDischargeSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(DailyDischargeParser.class);

    static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss\\.csv", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE);

    // private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*",
    // Pattern.CASE_INSENSITIVE);

    private static final Pattern META_BETREIBER = Pattern.compile("^#\\s*Betreiber:.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_PARAMETER = Pattern.compile("^#\\s*Parameter-Name:.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_ZEITREIHE = Pattern.compile("^#\\s*# Zeitreihe.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_REIHEBEGINN = Pattern.compile("^#\\s*Beginn der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
    private static final Pattern META_REIHEENDE = Pattern.compile("^#\\s*Ende der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*(Datum)\\s*;\\s*(Q[^;]*)", Pattern.CASE_INSENSITIVE);

    private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");

    private final List<Date> dates;


    /***** CONSTRUCTORS *****/

    public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.dates = new ArrayList<>();
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoDailyDischarge();
    }

    /**
     * Creates a list of parsers for all daily discharge import files in a directory
     */
    public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<DailyDischargeParser> parsers = new ArrayList<>();
        if (importDir.exists()) {
            for (final File file : listFiles(importDir, IMPORT_FILENAME))
                parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river));
        }
        return parsers;
    }

    @Override
    protected KmMode kmMode() {
        return KmMode.NONE;
    }

    @Override
    protected DailyDischargeSeriesImport createSeriesImport(final String filename) {
        return new DailyDischargeSeriesImport(filename);
    }

    @Override
    protected DailyDischargeDayLineImport createKmLineImport(final Double km, final String[] values) {
        Date day = null;
        try {
            day = dateFormat.parse(values[0]);
        }
        catch (final Exception e) {
            logLineWarning("Invalid date");
            return null;
        }
        final Number q = parseDoubleCheckNull(values, 1);
        if ((q == null) || Double.isNaN(q.doubleValue())) {
            logLineWarning(INVALID_VALUE_ERROR_FORMAT, "discharge");
            return null;
        }
        if (this.dates.contains(day)) {
            logLineWarning("Duplicate date");
            return null;
        }
        this.dates.add(day);
        return new DailyDischargeDayLineImport(day, q.doubleValue());
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaGaugeName())
            return true;
        // else if (handleMetaGaugeNumber())
        // return true;
        else if (META_BETREIBER.matcher(this.currentLine).matches())
            return true;
        else if (META_PARAMETER.matcher(this.currentLine).matches())
            return true;
        else if (META_ZEITREIHE.matcher(this.currentLine).matches())
            return true;
        else if (META_REIHEBEGINN.matcher(this.currentLine).matches())
            return true;
        else if (META_REIHEENDE.matcher(this.currentLine).matches())
            return true;
        else
            return false;
    }

    private boolean handleMetaGaugeName() {
        final Matcher m = META_GAUGENAME.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_GAUGENAME);
            this.seriesHeader.setGaugeName(m.group(1).trim());
            return true;
        }
        return false;
    }

    // private boolean handleMetaGaugeNumber() {
    // final Matcher m = META_GAUGENUMBER.matcher(this.currentLine);
    // if (m.matches()) {
    // this.metaPatternsMatched.add(META_GAUGENUMBER);
    // this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1)));
    // return true;
    // }
    // return false;
    // }

    @Override
    protected boolean handleMetaColumnTitles() {
        final Matcher m = META_COLUMNTITLES.matcher(this.currentLine);
        if (!m.matches()) {
            return false;
        }
        this.metaPatternsMatched.add(META_COLUMNTITLES);
        this.columnTitles.clear();
        this.columnTitles.add(m.group(1));
        this.columnTitles.add(m.group(2));
        return true;
    }

    /**
     * Check meta data after all meta lines (#) have been read
     */
    @Override
    protected boolean checkMetaData() {
        if (!super.checkRiverExists())
            return false;
        this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName()));
        if (this.seriesHeader.getGauge() == null) {
            logError("Gauge not found (%s)", this.seriesHeader.getGaugeName());
            this.headerParsingState = ParsingState.STOP;
            return false;
        }
        if (super.checkMetaData() == false)
            return false;
        if (this.columnTitles.size() <= 1) {
            logError("No valid column title line (Datum, Q) found");
            this.headerParsingState = ParsingState.STOP;
            return false;
        }
        return true;
    }

}

http://dive4elements.wald.intevation.org