changeset 9658:d86c7cb68b41

Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date, detecting wrong column titles and cancelling the import, specific error message if gauge not found
author mschaefer
date Mon, 23 Mar 2020 15:33:40 +0100
parents a79881a892c9
children 75bd347147ad
files backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeDayLineImport.java backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeSeriesImport.java backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java
diffstat 3 files changed, 71 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeDayLineImport.java	Mon Mar 23 15:26:50 2020 +0100
+++ b/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeDayLineImport.java	Mon Mar 23 15:33:40 2020 +0100
@@ -46,7 +46,7 @@
 
     @Override
     protected DailyDischargeValue queryValueItem(final Session session, final DailyDischarge parent) {
-        final Query query = session.createQuery("FROM DailyDischargeValue WHERE (DailyDischarge=:parent) AND (day=:day)");
+        final Query query = session.createQuery("FROM DailyDischargeValue WHERE (dailyDischarge=:parent) AND (day=:day)");
         query.setParameter("parent", parent);
         query.setParameter("day", this.day);
         final List rows = query.list();
--- a/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeSeriesImport.java	Mon Mar 23 15:26:50 2020 +0100
+++ b/backend/src/main/java/org/dive4elements/river/importer/sinfo/importitem/DailyDischargeSeriesImport.java	Mon Mar 23 15:33:40 2020 +0100
@@ -79,7 +79,7 @@
     }
 
     @Override
-    public List<DailyDischarge> querySeriesItem(final Session session, final River river) {
+    public List<DailyDischarge> querySeriesItem(final Session session, final River river, final boolean doQueryParent) {
         final Query query = session.createQuery("FROM DailyDischarge WHERE gauge=:gauge");
         query.setParameter("gauge", this.gauge);
         return query.list();
--- a/backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java	Mon Mar 23 15:26:50 2020 +0100
+++ b/backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java	Mon Mar 23 15:33:40 2020 +0100
@@ -11,7 +11,6 @@
 package org.dive4elements.river.importer.sinfo.parsers;
 
 import java.io.File;
-import java.io.FilenameFilter;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
@@ -42,21 +41,31 @@
 
     private static final Logger log = Logger.getLogger(DailyDischargeParser.class);
 
-    static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss.csv", Pattern.CASE_INSENSITIVE);
+    static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss\\.csv", Pattern.CASE_INSENSITIVE);
 
     private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE);
 
-    private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE);
+    // private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*",
+    // Pattern.CASE_INSENSITIVE);
 
-    private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*Datum\\s*;\\s*Q.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_BETREIBER = Pattern.compile("^#\\s*Betreiber:.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_PARAMETER = Pattern.compile("^#\\s*Parameter-Name:.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_ZEITREIHE = Pattern.compile("^#\\s*# Zeitreihe.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_REIHEBEGINN = Pattern.compile("^#\\s*Beginn der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
+    private static final Pattern META_REIHEENDE = Pattern.compile("^#\\s*Ende der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
+
+    private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*(Datum)\\s*;\\s*(Q[^;]*)", Pattern.CASE_INSENSITIVE);
 
     private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");
 
+    private final List<Date> dates;
+
 
     /***** CONSTRUCTORS *****/
 
     public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
         super(importPath, rootRelativePath, river);
+        this.dates = new ArrayList<>();
     }
 
 
@@ -80,13 +89,7 @@
     public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
         final List<DailyDischargeParser> parsers = new ArrayList<>();
         if (importDir.exists()) {
-            final File[] files = importDir.listFiles(new FilenameFilter() {
-                @Override
-                public boolean accept(final File dir, final String name) {
-                    return IMPORT_FILENAME.matcher(name).matches();
-                }
-            });
-            for (final File file : files)
+            for (final File file : listFiles(importDir, IMPORT_FILENAME))
                 parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river));
         }
         return parsers;
@@ -109,21 +112,37 @@
             day = dateFormat.parse(values[0]);
         }
         catch (final Exception e) {
-            logError("Invalid date in line " + this.in.getLineNumber());
+            logLineWarning("Invalid date");
             return null;
         }
-        if (parseDoubleWithNull(values[1]) == null) {
-            logError("Invalid discharge value in line " + this.in.getLineNumber());
+        final Number q = parseDoubleCheckNull(values, 1);
+        if ((q == null) || Double.isNaN(q.doubleValue())) {
+            logLineWarning(INVALID_VALUE_ERROR_FORMAT, "discharge");
             return null;
         }
-        return new DailyDischargeDayLineImport(day, Double.valueOf(parseDoubleWithNull(values[1]).doubleValue()));
+        if (this.dates.contains(day)) {
+            logLineWarning("Duplicate date");
+            return null;
+        }
+        this.dates.add(day);
+        return new DailyDischargeDayLineImport(day, q.doubleValue());
     }
 
     @Override
     protected boolean handleMetaOther() {
         if (handleMetaGaugeName())
             return true;
-        else if (handleMetaGaugeNumber())
+        // else if (handleMetaGaugeNumber())
+        // return true;
+        else if (META_BETREIBER.matcher(this.currentLine).matches())
+            return true;
+        else if (META_PARAMETER.matcher(this.currentLine).matches())
+            return true;
+        else if (META_ZEITREIHE.matcher(this.currentLine).matches())
+            return true;
+        else if (META_REIHEBEGINN.matcher(this.currentLine).matches())
+            return true;
+        else if (META_REIHEENDE.matcher(this.currentLine).matches())
             return true;
         else
             return false;
@@ -139,30 +158,50 @@
         return false;
     }
 
-    private boolean handleMetaGaugeNumber() {
-        final Matcher m = META_GAUGENUMBER.matcher(this.currentLine);
-        if (m.matches()) {
-            this.metaPatternsMatched.add(META_GAUGENUMBER);
-            this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1)));
-            return true;
-        }
-        return false;
-    }
+    // private boolean handleMetaGaugeNumber() {
+    // final Matcher m = META_GAUGENUMBER.matcher(this.currentLine);
+    // if (m.matches()) {
+    // this.metaPatternsMatched.add(META_GAUGENUMBER);
+    // this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1)));
+    // return true;
+    // }
+    // return false;
+    // }
 
     @Override
     protected boolean handleMetaColumnTitles() {
-        if (!META_COLUMNTITLES.matcher(this.currentLine).matches())
+        final Matcher m = META_COLUMNTITLES.matcher(this.currentLine);
+        if (!m.matches()) {
             return false;
+        }
         this.metaPatternsMatched.add(META_COLUMNTITLES);
         this.columnTitles.clear();
-        final String[] titles = this.currentLine.split(SEPARATOR_CHAR, 0);
-        for (int i = 0; i <= titles.length - 1; i++)
-            this.columnTitles.add(titles[i].trim());
+        this.columnTitles.add(m.group(1));
+        this.columnTitles.add(m.group(2));
+        return true;
+    }
+
+    /**
+     * Check meta data after all meta lines (#) have been read
+     */
+    @Override
+    protected boolean checkMetaData() {
+        if (!super.checkRiverExists())
+            return false;
         this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName()));
         if (this.seriesHeader.getGauge() == null) {
-            logError("Gauge not found, file skipped");
+            logError("Gauge not found (%s)", this.seriesHeader.getGaugeName());
             this.headerParsingState = ParsingState.STOP;
+            return false;
+        }
+        if (super.checkMetaData() == false)
+            return false;
+        if (this.columnTitles.size() <= 1) {
+            logError("No valid column title line (Datum, Q) found");
+            this.headerParsingState = ParsingState.STOP;
+            return false;
         }
         return true;
     }
+
 }

http://dive4elements.wald.intevation.org