diff backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java @ 9650:a2a42a6bac6b

Importer (s/u-info) extensions: outer try/catch for parse and log of line no, catching parsing exception if not enough value fields, parsing error and warning log messages with line number, detecting and rejecting duplicate data series, better differentiation between error and warning log messages
author mschaefer
date Mon, 23 Mar 2020 14:57:03 +0100
parents ddebd4c2fe93
children
line wrap: on
line diff
--- a/backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java	Wed Dec 04 16:59:25 2019 +0100
+++ b/backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java	Mon Mar 23 14:57:03 2020 +0100
@@ -13,7 +13,6 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FilenameFilter;
-import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
 import java.math.BigDecimal;
@@ -30,6 +29,8 @@
 import org.apache.log4j.Logger;
 import org.dive4elements.river.backend.utils.EpsilonComparator;
 import org.dive4elements.river.importer.ImportRiver;
+import org.dive4elements.river.importer.ImporterSession;
+import org.hibernate.Session;
 
 /**
  * Abstract base class for a parser of one FLYS csv data file.<br />
@@ -76,6 +77,8 @@
 
     private static DecimalFormat bigDecimalFormat;
 
+    protected static final String INVALID_VALUE_ERROR_FORMAT = "Invalid or missing %s value";
+
     static {
         bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT);
         bigDecimalFormat.setParseBigDecimal(true);
@@ -161,7 +164,7 @@
     }
 
 
-    /***** METHODS *****/
+    /***** FILE-METHODS *****/
 
     /**
      * Lists all files from a directory having a type extension (starting with dot)
@@ -181,10 +184,29 @@
     }
 
     /**
+     * Lists all files from a directory matching a file name pattern
+     */
+    protected static List<File> listFiles(final File importDir, final Pattern filenamePattern) {
+        final File[] files = importDir.listFiles(new FilenameFilter() {
+            @Override
+            public boolean accept(final File dir, final String name) {
+                return filenamePattern.matcher(name).matches();
+            }
+        });
+        final List<File> fl = new ArrayList<>();
+        if (files != null)
+            for (final File file : files)
+                fl.add(file);
+        return fl;
+    }
+
+    /***** PARSE-METHODS *****/
+
+    /**
      * Parses a file and adds series and values to the parser's collection
      */
     @Override
-    public void parse() throws IOException {
+    public void parse() throws Exception {
         logStartInfo();
         this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", ""));
         this.metaPatternsMatched.clear();
@@ -195,24 +217,32 @@
                 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING));
             }
             catch (final Exception e) {
-                logError("Could not open (" + e.getMessage() + ")");
+                logError("Could not open (%s)", e.getMessage());
                 this.headerParsingState = ParsingState.STOP;
             }
-            this.currentLine = null;
-            while (this.headerParsingState != ParsingState.STOP) {
-                this.currentLine = this.in.readLine();
-                if (this.currentLine == null)
-                    break;
-                this.currentLine = this.currentLine.trim();
-                if (this.currentLine.isEmpty())
-                    continue;
-                if (this.headerParsingState == ParsingState.CONTINUE)
-                    handleMetaLine();
-                else
-                    handleDataLine();
+            try {
+                this.currentLine = null;
+                while (this.headerParsingState != ParsingState.STOP) {
+                    this.currentLine = this.in.readLine();
+                    if (this.currentLine == null)
+                        break;
+                    this.currentLine = this.currentLine.trim();
+                    if (this.currentLine.isEmpty())
+                        continue;
+                    if (this.headerParsingState == ParsingState.CONTINUE) {
+                        handleMetaLine();
+                        if (this.headerParsingState == ParsingState.DONE)
+                            checkMetaData();
+                    }
+                    else
+                        handleDataLine();
+                }
+                if (this.headerParsingState != ParsingState.STOP)
+                    getLog().info(String.format("Number of values found: %d", this.seriesHeader.getValueCount()));
             }
-            if (this.headerParsingState != ParsingState.STOP)
-                getLog().info("Number of values found: " + this.seriesHeader.getValueCount());
+            catch (final Exception e) {
+                throw new Exception(String.format("Parsing error (last read line: %d)", this.in.getLineNumber() + 1), e);
+            }
         }
         finally {
             if (this.in != null) {
@@ -228,25 +258,7 @@
      * Writes the parse start info to the log
      */
     protected void logStartInfo() {
-        getLog().info("Start parsing:;'" + this.rootRelativePath + "'");
-    }
-
-    /**
-     * Stores the parsed series and values in the database
-     */
-    @Override
-    public void store() {
-        if (this.headerParsingState != ParsingState.STOP) {
-            this.seriesHeader.store(this.river.getPeer());
-            final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(),
-                    this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE));
-            if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT))
-                logWarning("Number of value inserts less than number parsed: " + counts);
-            else
-                getLog().info("Number of values records: " + counts);
-        }
-        else
-            logWarning("Severe parsing errors, not storing series '" + this.seriesHeader.getFilename() + "'");
+        getLog().info(String.format("Start parsing:;'%s'", this.rootRelativePath));
     }
 
     /**
@@ -259,9 +271,11 @@
     /**
      * Parses a number string with dot or comma as decimal char, and returning null in case of an error
      */
-    public static Number parseDoubleWithNull(final String text) {
+    public static Number parseDoubleCheckNull(final String[] values, final int index) {
+        if (index > values.length - 1)
+            return null;
         try {
-            return parseDouble(text);
+            return parseDouble(values[index]);
         }
         catch (final Exception e) {
             return null;
@@ -273,11 +287,25 @@
      *
      * @throws ParseException
      */
-    public static Number parseDouble(final String text) throws ParseException {
+    private static Number parseDouble(final String text) throws ParseException {
         return numberFormat.parse(text.replace(',', '.'));
     }
 
     /**
+     * Parses an integer number string , and returning null in case of an error
+     */
+    public static Integer parseIntegerCheckNull(final String[] values, final int index) {
+        if (index > values.length - 1)
+            return null;
+        try {
+            return Integer.valueOf((values[index]));
+        }
+        catch (final Exception e) {
+            return null;
+        }
+    }
+
+    /**
      * Parses a number string as a BigDecimal, replacing a comma with a dot first
      */
     public static BigDecimal parseDecimal(final String text) throws ParseException {
@@ -285,50 +313,13 @@
     }
 
     /**
-     * Gets the class's logger
-     */
-    protected abstract Logger getLog();
-
-    /**
-     * Logs an error message, appending the relative file path
-     */
-    protected void logError(final String message) {
-        getLog().error(message + ";" + this.rootRelativePath);
-    }
-
-    /**
-     * Logs a warning message, appending the relative file path
-     */
-    protected void logWarning(final String message) {
-        getLog().warn(message + ";" + this.rootRelativePath);
-    }
-
-    /**
-     * Logs an info message, appending the relative file path
-     */
-    protected void logInfo(final String message) {
-        getLog().info(message + ";" + this.rootRelativePath);
-    }
-
-    /**
-     * Logs a debug message, appending the relative file path
-     */
-    protected void logDebug(final String message) {
-        getLog().debug(message + ";" + this.rootRelativePath);
-    }
-
-    /**
-     * Logs a trace message, appending the relative file path
-     */
-    protected void logTrace(final String message) {
-        getLog().trace(message + ";" + this.rootRelativePath);
-    }
-
-    /**
      * Creates a new series import object
      */
     protected abstract HEADER createSeriesImport(final String filename);
 
+
+    /***** METAHEADER-PARSE-METHODS *****/
+
     protected void handleMetaLine() {
         if (META_SUBGROUP.matcher(this.currentLine).matches())
             return;
@@ -348,10 +339,12 @@
         else {
             if (this.currentLine.startsWith(START_META_CHAR)) {
                 if (this.headerParsingState != ParsingState.IGNORE)
-                    logWarning("Not matching any known meta type in line " + this.in.getLineNumber() + ", ignored");
+                    logLineWarning("Not matching any known meta type");
                 else
                     this.headerParsingState = ParsingState.CONTINUE;
             }
+            else
+                this.headerParsingState = ParsingState.DONE; // no more meta data expected, if neither meta line nor empty line
         }
     }
 
@@ -396,7 +389,8 @@
     /**
      * Parses a header line for the km table column header line
      *
-     * @return Whether the line has been handled and we are ready for reading the km values lines
+     * @return Whether the line has been handled (also in case of State=STOP),<br>
+     *         and we are ready for reading the km values lines (or cancel parsing)
      */
     protected boolean handleMetaColumnTitles() {
         if (META_COLUMNTITLES.matcher(this.currentLine).matches()) {
@@ -411,33 +405,76 @@
     }
 
     /**
+     * Check meta data after all meta data lines (#) have been read
+     */
+    protected boolean checkMetaData() {
+        if (this.columnTitles.size() <= 1) {
+            logError("No valid header line with column titles found");
+            this.headerParsingState = ParsingState.STOP;
+            return false;
+        }
+        if (checkSeriesExistsAlready()) {
+            logError("Data series/filename exists already in the database");
+            this.headerParsingState = ParsingState.STOP;
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Checks the existence of the active series in the database
+     */
+    protected boolean checkSeriesExistsAlready() {
+        if (!checkRiverExists())
+            return false;
+        final Session session = ImporterSession.getInstance().getDatabaseSession();
+        final List<DB_SERIES> rows = this.seriesHeader.querySeriesItem(session, this.river.getPeer(), true);
+        return !rows.isEmpty();
+    }
+
+    /**
+     * Checks the existence of the active river in the database
+     */
+    protected boolean checkRiverExists() {
+        return (this.river.getPeer(false) != null);
+    }
+
+
+    /***** VALUELINE-PARSE-METHODS *****/
+
+    /**
      * Parses a values line and adds the values record
      */
     protected void handleDataLine() {
         final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0);
         // Skip import line without data or only km
-        if (values.length < 2)
+        if (values.length < 2) {
+            logLineWarning("Too few data");
             return;
+        }
         Double km = Double.NaN;
         if (kmMode() != KmMode.NONE) {
             try {
                 km = Double.valueOf(parseDouble(values[0]).doubleValue());
                 if (kmMode() == KmMode.UNIQUE) {
                     if (this.kmExists.contains(km)) {
-                        logWarning("Ignoring duplicate station '" + values[0] + "' in line " + this.in.getLineNumber());
+                        logLineWarning("Duplicate km '%s'", values[0]);
                         return;
                     }
                     this.kmExists.add(km);
                 }
             }
             catch (final Exception e) {
-                logError("Not parseable km in line " + this.in.getLineNumber() + ": " + e.getMessage());
+                logLineWarning("Invalid km: %s", e.getMessage());
                 return;
             }
         }
         final KMLINE value = createKmLineImport(km, values);
-        if (value != null)
-            this.seriesHeader.addValue(value);
+        if (value != null) {
+            final boolean added = this.seriesHeader.addValue(value);
+            if (!added)
+                logLineWarning("Duplicate data line");
+        }
     }
 
     /**
@@ -454,4 +491,118 @@
      * @return value item, or null if parse error
      */
     protected abstract KMLINE createKmLineImport(final Double km, final String[] values);
+
+
+    /***** STORE-METHODS *****/
+
+    /**
+     * Stores the parsed series and values in the database
+     */
+    @Override
+    public void store() {
+        if (this.headerParsingState != ParsingState.STOP) {
+            this.seriesHeader.store(this.river.getPeer());
+            final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(),
+                    this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE));
+            if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT))
+                logWarning("Number of value inserts less than number parsed: %s", counts);
+            else
+                getLog().info("Number of values records: " + counts);
+        }
+        else
+            logWarning("Severe parsing errors, not storing series '%s'", this.seriesHeader.getFilename());
+    }
+
+
+    /***** LOG-METHODS *****/
+
+    /**
+     * Gets the class's logger
+     */
+    protected abstract Logger getLog();
+
+    /**
+     * Logs an error message, appending the relative file path
+     */
+    protected void logError(final String message) {
+        getLog().error(buildLogMessage(message));
+    }
+
+    /**
+     * Logs an error message, appending the relative file path
+     */
+    protected void logError(final String format, final Object... args) {
+        getLog().error(buildLogMessage(String.format(format, args)));
+    }
+
+    /**
+     * Logs an error message with current line number, appending the relative file path
+     */
+    protected void logLineError(final String message) {
+        getLog().error(buildLineLogMessage(message));
+    }
+
+    /**
+     * Logs an error message with current line number, appending the relative file path
+     */
+    protected void logLineError(final String format, final Object... args) {
+        getLog().error(buildLineLogMessage(String.format(format, args)));
+    }
+
+    /**
+     * Logs a warning message, appending the relative file path
+     */
+    protected void logWarning(final String message) {
+        getLog().warn(buildLogMessage(message));
+    }
+
+    /**
+     * Logs a warning message, appending the relative file path
+     */
+    protected void logWarning(final String format, final Object... args) {
+        getLog().warn(buildLogMessage(String.format(format, args)));
+    }
+
+    /**
+     * Logs a warning message, appending the line number and the relative file path
+     */
+    protected void logLineWarning(final String message) {
+        getLog().warn(buildLineLogMessage(message));
+    }
+
+    /**
+     * Logs a warning message, appending the line number and the relative file path
+     */
+    protected void logLineWarning(final String format, final Object... args) {
+        getLog().warn(buildLineLogMessage(String.format(format, args)));
+    }
+
+    /**
+     * Logs an info message, appending the relative file path
+     */
+    protected void logInfo(final String message) {
+        getLog().info(buildLogMessage(message));
+    }
+
+    /**
+     * Logs a debug message, appending the relative file path
+     */
+    protected void logDebug(final String message) {
+        getLog().debug(buildLogMessage(message));
+    }
+
+    /**
+     * Logs a trace message, appending the relative file path
+     */
+    protected void logTrace(final String message) {
+        getLog().trace(buildLogMessage(message));
+    }
+
+    private String buildLogMessage(final String message) {
+        return String.format("%s;%s", message, this.rootRelativePath);
+    }
+
+    private String buildLineLogMessage(final String message) {
+        return String.format("Line %d: %s;%s", this.in.getLineNumber(), message, this.rootRelativePath);
+    }
 }

http://dive4elements.wald.intevation.org