diff backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java @ 8975:a0a0a7f912ab

Added new columns bed_height.comment and sounding_width_info; extended the bed height parser for the new meta data and the min/max_height columns
author mschaefer
date Tue, 03 Apr 2018 10:40:57 +0200
parents 5e38e2924c07
children 2693bfaf503d
line wrap: on
line diff
--- a/backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java	Tue Apr 03 10:37:30 2018 +0200
+++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java	Tue Apr 03 10:40:57 2018 +0200
@@ -9,45 +9,40 @@
 package org.dive4elements.river.importer.parsers;
 
 import java.io.File;
-
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
 import java.math.BigDecimal;
-
 import java.text.NumberFormat;
 import java.text.ParseException;
-
 import java.util.ArrayList;
 import java.util.Date;
+import java.util.EnumMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.TreeSet;
-import java.util.Locale;
-
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import java.io.IOException;
-import java.io.LineNumberReader;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
-
 import org.apache.log4j.Logger;
-
+import org.dive4elements.river.backend.utils.DateUtil;
+import org.dive4elements.river.backend.utils.EpsilonComparator;
 import org.dive4elements.river.importer.ImportBedHeight;
+import org.dive4elements.river.importer.ImportBedHeightType;
 import org.dive4elements.river.importer.ImportBedHeightValue;
-import org.dive4elements.river.importer.ImportBedHeightType;
 import org.dive4elements.river.importer.ImportElevationModel;
 import org.dive4elements.river.importer.ImportLocationSystem;
 import org.dive4elements.river.importer.ImportRange;
 import org.dive4elements.river.importer.ImportTimeInterval;
 import org.dive4elements.river.importer.ImportUnit;
-import org.dive4elements.river.model.BedHeightType;
 import org.dive4elements.river.importer.ImporterSession;
-import org.dive4elements.river.backend.utils.EpsilonComparator;
-import org.dive4elements.river.backend.utils.DateUtil;
+import org.dive4elements.river.model.BedHeightType;
 
 public class BedHeightParser {
 
     private static final Logger log =
-        Logger.getLogger(BedHeightParser.class);
+            Logger.getLogger(BedHeightParser.class);
 
     public static final String ENCODING = "ISO-8859-1";
 
@@ -57,41 +52,65 @@
     public static final String SEPERATOR_CHAR  = ";";
 
     public static final Pattern META_YEAR =
-        Pattern.compile("^Jahr: [^0-9]*(\\d*).*");
+            Pattern.compile("^Jahr: [^0-9]*(\\d*).*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_TIMEINTERVAL =
-        Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*");
+            Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_TYPE =
-        Pattern.compile("^Aufnahmeart: (.*).*");
+            Pattern.compile("^Aufnahmeart: (.*).*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_LOCATION_SYSTEM =
-        Pattern.compile("^Lagesystem: (.*).*");
+            Pattern.compile("^Lagesystem: (.*).*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_CUR_ELEVATION_SYSTEM =
-        Pattern.compile("^H.hensystem:\\s(.*)?? \\[(.*)\\].*");
+            Pattern.compile("^H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_OLD_ELEVATION_SYSTEM =
-        Pattern.compile("^urspr.ngliches H.hensystem:\\s(.*)?? \\[(.*)\\].*");
+            Pattern.compile("^urspr.ngliches H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_RANGE =
-        Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*");
+            Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_EVALUATION_BY =
-        Pattern.compile("^Auswerter: (.*).*");
+            Pattern.compile("^Auswerter: (.*).*", Pattern.CASE_INSENSITIVE);
+
+    public static final Pattern META_SOUNDING_WIDTH = Pattern.compile("^ausgewertete Peilbreite:\\s*(\\S.*).*", Pattern.CASE_INSENSITIVE);
 
     public static final Pattern META_COMMENTS =
-        Pattern.compile("^Weitere Bemerkungen: (.*).*");
+            Pattern.compile("^Weitere Bemerkungen: (.*).*", Pattern.CASE_INSENSITIVE);
 
+    private static final Pattern META_COLUMNTITLES = Pattern.compile("^Fluss-km\\s*;.+", Pattern.CASE_INSENSITIVE);
 
-    protected static NumberFormat nf = NumberFormat.getInstance(
-        DEFAULT_LOCALE);
+    private enum ColTitlePattern {
+        KM("Fluss-km.*"), //
+        HEIGHT("mittlere Sohlh.he\\s*\\[(.*)\\].*"), //
+        UNCERTAINTY("Unsicherheit\\s*\\[(.*)\\].*"), //
+        GAP("Datenl.cke.*"), //
+        WIDTH("Peilbreite\\s*\\[(.*)\\].*"), //
+        MINHEIGHT("Minimale Sohlh.he\\s*\\[(.*)\\].*"), //
+        MAXHEIGHT("Maximale Sohlh.he\\s*\\[(.*)\\].*");
+
+        private final Pattern pattern;
+
+        private ColTitlePattern(final String regexp) {
+            this.pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE);
+        }
+
+        public Pattern getPattern() {
+            return this.pattern;
+        }
+    }
+
+    private final EnumMap<ColTitlePattern, Integer> cols = new EnumMap<>(ColTitlePattern.class);
+
+    protected static NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);
 
 
     protected List<ImportBedHeight> bedHeights;
 
 
-    protected ImportBedHeight newImportBedHeight(String description) {
+    protected ImportBedHeight newImportBedHeight(final String description) {
         return new ImportBedHeight(description);
     }
 
@@ -99,30 +118,26 @@
     protected TreeSet<Double> kmExists;
 
     public BedHeightParser() {
-        bedHeights = new ArrayList<ImportBedHeight>();
-        kmExists = new TreeSet<Double>(EpsilonComparator.CMP);
+        this.bedHeights = new ArrayList<>();
+        this.kmExists = new TreeSet<>(EpsilonComparator.CMP);
     }
 
 
     public List<ImportBedHeight> getBedHeights() {
-        return bedHeights;
+        return this.bedHeights;
     }
 
 
-    public void parse(File file) throws IOException {
+    public void parse(final File file) throws IOException {
         log.info("Parsing bed height single file '" + file + "'");
-
-        ImportBedHeight obj = newImportBedHeight(
-            file.getName().replaceAll("\\.csv", ""));
-
-        kmExists.clear();
-
+        final ImportBedHeight obj = newImportBedHeight(file.getName().replaceAll("\\.csv", ""));
+        this.kmExists.clear();
+        this.cols.clear();
+        for (final ColTitlePattern col : ColTitlePattern.values())
+            this.cols.put(col, -1);
         LineNumberReader in = null;
         try {
-            in =
-                new LineNumberReader(
-                new InputStreamReader(
-                new FileInputStream(file), ENCODING));
+            in = new LineNumberReader(new InputStreamReader(new FileInputStream(file), ENCODING));
 
             String line = null;
             while ((line = in.readLine()) != null) {
@@ -139,7 +154,7 @@
             }
 
             log.info("File contained " + obj.getValueCount() + " values.");
-            bedHeights.add(obj);
+            this.bedHeights.add(obj);
         }
         finally {
             if (in != null) {
@@ -149,8 +164,8 @@
     }
 
 
-    protected static String stripMetaLine(String line) {
-        String tmp = line.substring(1, line.length());
+    protected static String stripMetaLine(final String line) {
+        final String tmp = line.substring(1, line.length());
 
         if (tmp.startsWith(" ")) {
             return tmp.substring(1, tmp.length());
@@ -161,8 +176,8 @@
     }
 
 
-    protected void handleMetaLine(ImportBedHeight obj, String line) {
-        String meta = stripMetaLine(line);
+    protected void handleMetaLine(final ImportBedHeight obj, final String line) {
+        final String meta = stripMetaLine(line);
 
         if (handleMetaYear(obj, meta)) {
             return;
@@ -191,17 +206,22 @@
         else if (handleMetaOldElevationModel(obj, meta)) {
             return;
         }
+        else if (handleMetaSoundingWidth(obj, meta)) {
+            return;
+        }
+        else if (handleMetaColumnTitles(obj, meta)) {
+            return;
+        }
         else {
             log.warn("BHP: Meta line did not match any known type: " + line);
         }
     }
 
 
-    protected boolean handleMetaYear(ImportBedHeight obj, String line) {
-        Matcher m = META_YEAR.matcher(line);
-
+    protected boolean handleMetaYear(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_YEAR.matcher(line);
         if (m.matches()) {
-            String tmp = m.group(1);
+            final String tmp = m.group(1).trim();
             if (tmp.length() > 0) {
                 obj.setYear(Integer.parseInt(tmp));
             }
@@ -210,232 +230,190 @@
             }
             return true;
         }
-
-        return false;
-    }
-
-
-    protected boolean handleMetaTimeInterval(
-        ImportBedHeight obj,
-        String line
-    ) {
-        Matcher m = META_TIMEINTERVAL.matcher(line);
-
-        if (m.matches()) {
-            String lo = m.group(1);
-            String up = m.group(2);
-
-            log.debug("Found time interval: " + lo + " - " + up);
-
-            try {
-                int lower = Integer.valueOf(lo);
-                int upper = Integer.valueOf(up);
-
-                Date fromYear = DateUtil.getStartDateFromYear(lower);
-                Date toYear   = DateUtil.getEndDateFromYear(upper);
-
-                obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear));
-            }
-            catch (NumberFormatException e) {
-                log.warn("BHP: could not parse timeinterval", e);
-            }
-
-            return true;
-        }
-
-        return false;
-    }
-
-
-    protected boolean handleMetaComment(ImportBedHeight obj, String line) {
-        Matcher m = META_COMMENTS.matcher(line);
-
-        if (m.matches()) {
-            String tmp = m.group(1);
-
-            obj.setDescription(tmp);
-
-            return true;
-        }
-
         return false;
     }
 
 
-    protected boolean handleMetaEvaluationBy(
-        ImportBedHeight obj,
-        String                line
-    ) {
-        Matcher m = META_EVALUATION_BY.matcher(line);
-
+    protected boolean handleMetaTimeInterval(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_TIMEINTERVAL.matcher(line);
         if (m.matches()) {
-            String tmp = m.group(1);
-            tmp = tmp.replace(";", "");
-
-            obj.setEvaluationBy(tmp);
-
+            final String lo = m.group(1).trim();
+            final String up = m.group(2).trim();
+            log.debug("Found time interval: " + lo + " - " + up);
+            try {
+                final int lower = Integer.valueOf(lo);
+                final int upper = Integer.valueOf(up);
+                final Date fromYear = DateUtil.getStartDateFromYear(lower);
+                final Date toYear   = DateUtil.getEndDateFromYear(upper);
+                obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear));
+            }
+            catch (final NumberFormatException e) {
+                log.warn("BHP: could not parse timeinterval", e);
+            }
             return true;
         }
-
         return false;
     }
 
 
-    protected boolean handleMetaRange(ImportBedHeight obj, String line) {
-        Matcher m = META_RANGE.matcher(line);
-
-        if (m.matches() && m.groupCount() >= 2) {
-            String a = m.group(1).replace(";", "");
-            String b = m.group(2).replace(";", "");
-
-            try {
-                BigDecimal lower = new BigDecimal(nf.parse(a).doubleValue());
-                BigDecimal upper = new BigDecimal(nf.parse(b).doubleValue());
-
-                obj.setRange(new ImportRange(lower, upper));
-
-                return true;
-            }
-            catch (ParseException e) {
-                log.warn("BHP: could not parse range", e);
-            }
+    protected boolean handleMetaComment(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_COMMENTS.matcher(line);
+        if (m.matches()) {
+            final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim();
+            obj.setComment(tmp);
+            return true;
         }
-
         return false;
     }
 
 
-    protected boolean handleMetaType(ImportBedHeight obj, String line) {
-        Matcher m = META_TYPE.matcher(line);
-
+    protected boolean handleMetaEvaluationBy(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_EVALUATION_BY.matcher(line);
         if (m.matches()) {
-            String tmp = m.group(1).replace(";", "").trim();
+            final String tmp = m.group(1).replace(";", "").trim();
+            obj.setEvaluationBy(tmp);
+            return true;
+        }
+        return false;
+    }
 
-            BedHeightType bht = BedHeightType.fetchBedHeightTypeForType(
-                tmp,
-                ImporterSession.getInstance().getDatabaseSession());
 
+    protected boolean handleMetaRange(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_RANGE.matcher(line);
+        if (m.matches() && m.groupCount() >= 2) {
+            final String a = m.group(1).replace(";", "").trim();
+            final String b = m.group(2).replace(";", "").trim();
+            try {
+                final BigDecimal lower = new BigDecimal(nf.parse(a).doubleValue());
+                final BigDecimal upper = new BigDecimal(nf.parse(b).doubleValue());
+                obj.setRange(new ImportRange(lower, upper));
+                return true;
+            }
+            catch (final ParseException e) {
+                log.warn("BHP: could not parse range", e);
+            }
+        }
+        return false;
+    }
+
+
+    protected boolean handleMetaType(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_TYPE.matcher(line);
+        if (m.matches()) {
+            final String tmp = m.group(1).replace(";", "").trim();
+            final BedHeightType bht = BedHeightType.fetchBedHeightTypeForType(tmp, ImporterSession.getInstance().getDatabaseSession());
             if (bht != null) {
                 obj.setType(new ImportBedHeightType(bht));
                 return true;
             }
-
             log.error("Unknown bed height type: '" + tmp + "'. File ignored.");
         }
-
-        return false;
-    }
-
-
-    protected boolean handleMetaLocationSystem(
-        ImportBedHeight obj,
-        String          line
-    ) {
-        Matcher m = META_LOCATION_SYSTEM.matcher(line);
-
-        if (m.matches()) {
-            String tmp = m.group(1).replace(";", "");
-
-            obj.setLocationSystem(new ImportLocationSystem(tmp, tmp));
-
-            return true;
-        }
-
-        return false;
-    }
-
-
-    protected boolean handleMetaCurElevationModel(
-        ImportBedHeight obj,
-        String          line
-    ) {
-        Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line);
-
-        if (m.matches()) {
-            String name = m.group(1);
-            String unit = m.group(2);
-
-            obj.setCurElevationModel(new ImportElevationModel(
-                name,
-                new ImportUnit(unit)
-            ));
-
-            return true;
-        }
-
         return false;
     }
 
 
-    protected boolean handleMetaOldElevationModel(
-        ImportBedHeight obj,
-        String          line
-    ) {
-        Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line);
-
+    protected boolean handleMetaLocationSystem(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_LOCATION_SYSTEM.matcher(line);
         if (m.matches()) {
-            String name = m.group(1);
-            String unit = m.group(2);
-
-            obj.setOldElevationModel(new ImportElevationModel(
-                name,
-                new ImportUnit(unit)
-            ));
-
+            final String tmp = m.group(1).replace(";", "").trim();
+            obj.setLocationSystem(new ImportLocationSystem(tmp, tmp));
             return true;
         }
-
         return false;
     }
 
-    private Double parse(String []values, int idx, String msg)  {
 
-        if (idx >= 0 && idx < values.length && !values[idx].isEmpty()) {
-            try {
-                return nf.parse(values[idx]).doubleValue();
-            }
-            catch (ParseException e) {
-                log.warn("BSP: unparseable " + msg + " '" + values[idx] + "'");
-            }
+    protected boolean handleMetaCurElevationModel(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line);
+        if (m.matches()) {
+            final String name = m.group(1).trim();
+            final String unit = m.group(2).trim();
+            obj.setCurElevationModel(new ImportElevationModel(name, new ImportUnit(unit)));
+            return true;
         }
-
-        return null;
+        return false;
     }
 
-    protected void handleDataLine(ImportBedHeight obj, String line) {
-        String[] values = line.split(SEPERATOR_CHAR, 0);
 
+    protected boolean handleMetaOldElevationModel(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line);
+        if (m.matches()) {
+            final String name = m.group(1).trim();
+            final String unit = m.group(2).trim();
+            obj.setOldElevationModel(new ImportElevationModel(name, new ImportUnit(unit)));
+            return true;
+        }
+        return false;
+    }
+
+    protected boolean handleMetaSoundingWidth(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_SOUNDING_WIDTH.matcher(line);
+        if (m.matches()) {
+            final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim();
+            obj.setSoundingWidthInfo(tmp);
+            return true;
+        }
+        return false;
+    }
+
+
+    /**
+     * Tries to parse a line as column titles line
+     */
+    protected boolean handleMetaColumnTitles(final ImportBedHeight obj, final String line) {
+        final Matcher m = META_COLUMNTITLES.matcher(line);
+        if (m.matches()) {
+            final Matcher cm;
+            final String[] titles = line.split(SEPERATOR_CHAR, 0);
+            for (int i = 0; i <= titles.length - 1; i++) {
+                for (final ColTitlePattern col : ColTitlePattern.values()) {
+                    if (col.getPattern().matcher(titles[i]).matches()) {
+                        this.cols.put(col, i);
+                        break;
+                    }
+                }
+            }
+            return true;
+        }
+        return false;
+    }
+
+    protected void handleDataLine(final ImportBedHeight obj, final String line) {
+        final String[] values = line.split(SEPERATOR_CHAR, 0);
         if (values.length < 2) {
             // Do not import line without data or only km
             return;
         }
-
         Double km;
         try {
             km = new Double(nf.parse(values[0]).doubleValue());
-
-            if (kmExists.contains(km)) {
-                log.warn("duplicate station '" + km + "': -> ignored");
+            if (this.kmExists.contains(km)) {
+                log.warn("duplicate station '" + values[0] + "': -> ignored");
                 return;
             }
-
-            kmExists.add(km);
+            this.kmExists.add(km);
         }
-        catch (ParseException e) {
-            log.error("Error parsing km '" + values[0] + "': " +
-                e.getMessage());
+        catch (final ParseException e) {
+            log.error("Error parsing km '" + values[0] + "': " + e.getMessage());
             return;
         }
-
-        ImportBedHeightValue value = new ImportBedHeightValue(
-            (ImportBedHeight) obj,
-            km,
-            parse(values, 1, "height"),
-            parse(values, 2, "uncertainty"),
-            parse(values, 3, "data gap"),
-            parse(values, 4, "sounding width"));
+        final ImportBedHeightValue value = new ImportBedHeightValue(obj, km, parse(values, ColTitlePattern.HEIGHT),
+                parse(values, ColTitlePattern.UNCERTAINTY), parse(values, ColTitlePattern.GAP), parse(values, ColTitlePattern.WIDTH),
+                parse(values, ColTitlePattern.MINHEIGHT), parse(values, ColTitlePattern.MAXHEIGHT));
 
         obj.addValue(value);
     }
+
+    private Double parse(final String[] values, final ColTitlePattern col) {
+        final int idx = this.cols.get(col).intValue();
+        if ((idx >= 0) && (idx < values.length) && !values[idx].trim().isEmpty()) {
+            try {
+                return nf.parse(values[idx]).doubleValue();
+            }
+            catch (final ParseException e) {
+                log.warn("unparseable " + col.toString() + " '" + values[idx] + "'");
+            }
+        }
+        return null;
+    }
 }
-// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org