Mercurial > dive4elements > river
view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java @ 9658:d86c7cb68b41
Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date,
detecting wrong column titles and cancelling the import,
specific error message if gauge not found
author | mschaefer |
---|---|
date | Mon, 23 Mar 2020 15:33:40 +0100 |
parents | abe069eb180c |
children |
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde * Software engineering by * Björnsen Beratende Ingenieure GmbH * Dr. Schumacher Ingenieurbüro für Wasser und Umwelt * * This file is Free Software under the GNU AGPL (>=v3) * and comes with ABSOLUTELY NO WARRANTY! Check out the * documentation coming with Dive4Elements River for details. */ package org.dive4elements.river.importer.sinfo.parsers; import java.io.File; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.dive4elements.river.importer.Config; import org.dive4elements.river.importer.ImportRiver; import org.dive4elements.river.importer.common.AbstractParser; import org.dive4elements.river.importer.common.ParsingState; import org.dive4elements.river.importer.sinfo.importitem.DailyDischargeDayLineImport; import org.dive4elements.river.importer.sinfo.importitem.DailyDischargeSeriesImport; import org.dive4elements.river.model.sinfo.DailyDischarge; import org.dive4elements.river.model.sinfo.DailyDischargeValue; /** * Reads and parses a daily discharge file * * @author Matthias Schäfer * */ public class DailyDischargeParser extends AbstractParser<DailyDischarge, DailyDischargeValue, DailyDischargeDayLineImport, DailyDischargeSeriesImport> { /***** FIELDS *****/ private static final Logger log = Logger.getLogger(DailyDischargeParser.class); static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss\\.csv", Pattern.CASE_INSENSITIVE); private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); // private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*", // Pattern.CASE_INSENSITIVE); private static final Pattern META_BETREIBER = Pattern.compile("^#\\s*Betreiber:.*", Pattern.CASE_INSENSITIVE); private static final Pattern META_PARAMETER = Pattern.compile("^#\\s*Parameter-Name:.*", Pattern.CASE_INSENSITIVE); private static final Pattern META_ZEITREIHE = Pattern.compile("^#\\s*# Zeitreihe.*", Pattern.CASE_INSENSITIVE); private static final Pattern META_REIHEBEGINN = Pattern.compile("^#\\s*Beginn der Zeitreihe:.*", Pattern.CASE_INSENSITIVE); private static final Pattern META_REIHEENDE = Pattern.compile("^#\\s*Ende der Zeitreihe:.*", Pattern.CASE_INSENSITIVE); private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*(Datum)\\s*;\\s*(Q[^;]*)", Pattern.CASE_INSENSITIVE); private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy"); private final List<Date> dates; /***** CONSTRUCTORS *****/ public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) { super(importPath, rootRelativePath, river); this.dates = new ArrayList<>(); } /***** METHODS *****/ @Override protected Logger getLog() { return log; } /** * Whether this import type shall be skipped */ public static boolean shallSkip() { return Config.INSTANCE.skipSInfoDailyDischarge(); } /** * Creates a list of parsers for all daily discharge import files in a directory */ public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) { final List<DailyDischargeParser> parsers = new ArrayList<>(); if (importDir.exists()) { for (final File file : listFiles(importDir, IMPORT_FILENAME)) parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river)); } return parsers; } @Override protected KmMode kmMode() { return KmMode.NONE; } @Override protected DailyDischargeSeriesImport createSeriesImport(final String filename) { return new DailyDischargeSeriesImport(filename); } @Override protected DailyDischargeDayLineImport createKmLineImport(final Double km, final String[] values) { Date day = null; try { day = dateFormat.parse(values[0]); } catch (final Exception e) { logLineWarning("Invalid date"); return null; } final Number q = parseDoubleCheckNull(values, 1); if ((q == null) || Double.isNaN(q.doubleValue())) { logLineWarning(INVALID_VALUE_ERROR_FORMAT, "discharge"); return null; } if (this.dates.contains(day)) { logLineWarning("Duplicate date"); return null; } this.dates.add(day); return new DailyDischargeDayLineImport(day, q.doubleValue()); } @Override protected boolean handleMetaOther() { if (handleMetaGaugeName()) return true; // else if (handleMetaGaugeNumber()) // return true; else if (META_BETREIBER.matcher(this.currentLine).matches()) return true; else if (META_PARAMETER.matcher(this.currentLine).matches()) return true; else if (META_ZEITREIHE.matcher(this.currentLine).matches()) return true; else if (META_REIHEBEGINN.matcher(this.currentLine).matches()) return true; else if (META_REIHEENDE.matcher(this.currentLine).matches()) return true; else return false; } private boolean handleMetaGaugeName() { final Matcher m = META_GAUGENAME.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_GAUGENAME); this.seriesHeader.setGaugeName(m.group(1).trim()); return true; } return false; } // private boolean handleMetaGaugeNumber() { // final Matcher m = META_GAUGENUMBER.matcher(this.currentLine); // if (m.matches()) { // this.metaPatternsMatched.add(META_GAUGENUMBER); // this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1))); // return true; // } // return false; // } @Override protected boolean handleMetaColumnTitles() { final Matcher m = META_COLUMNTITLES.matcher(this.currentLine); if (!m.matches()) { return false; } this.metaPatternsMatched.add(META_COLUMNTITLES); this.columnTitles.clear(); this.columnTitles.add(m.group(1)); this.columnTitles.add(m.group(2)); return true; } /** * Check meta data after all meta lines (#) have been read */ @Override protected boolean checkMetaData() { if (!super.checkRiverExists()) return false; this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName())); if (this.seriesHeader.getGauge() == null) { logError("Gauge not found (%s)", this.seriesHeader.getGaugeName()); this.headerParsingState = ParsingState.STOP; return false; } if (super.checkMetaData() == false) return false; if (this.columnTitles.size() <= 1) { logError("No valid column title line (Datum, Q) found"); this.headerParsingState = ParsingState.STOP; return false; } return true; } }