changeset 6328:53d08f33d094

Backend: Moved guessing of main values and there time intervals out of the STA parser. Same come will be useful to extend the WST parser to better handle official lines.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 13 Jun 2013 17:15:34 +0200
parents 447ed3dee890
children 5a7c48dbfeaa
files backend/src/main/java/org/dive4elements/river/importer/parsers/NameAndTimeInterval.java backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java
diffstat 3 files changed, 199 insertions(+), 176 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/NameAndTimeInterval.java	Thu Jun 13 17:15:34 2013 +0200
@@ -0,0 +1,196 @@
+/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
+ * Software engineering by Intevation GmbH
+ *
+ * This file is Free Software under the GNU AGPL (>=v3)
+ * and comes with ABSOLUTELY NO WARRANTY! Check out the
+ * documentation coming with Dive4Elements River for details.
+ */
+package org.dive4elements.river.importer.parsers;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+import org.dive4elements.river.importer.ImportTimeInterval;
+import org.dive4elements.river.utils.DateGuesser;
+
+public class NameAndTimeInterval {
+
+    private static Logger log = Logger.getLogger(NameAndTimeInterval.class);
+
+    // TODO: To be extented.
+    private static final Pattern MAIN_VALUE = Pattern.compile(
+        "^(HQ|MHW|GLQ|NMQ|HQEXT)(\\d*)$");
+
+    private String             name;
+    private ImportTimeInterval timeInterval;
+
+    public NameAndTimeInterval() {
+    }
+
+    public NameAndTimeInterval(String name) {
+        this(name, null);
+    }
+
+    public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) {
+        this.name         = name;
+        this.timeInterval = timeInterval;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public ImportTimeInterval getTimeInterval() {
+        return timeInterval;
+    }
+
+    @Override
+    public String toString() {
+        return "name: " + name + " time interval: " + timeInterval;
+    }
+
+    public static boolean isMainValue(String s) {
+        s = s.replace(" ", "").toUpperCase();
+        return MAIN_VALUE.matcher(s).matches();
+    }
+
+    public static NameAndTimeInterval parseName(String name) {
+        List<String> result = new ArrayList<String>();
+
+        unbracket(name, 0, result);
+
+        int length = result.size();
+
+        if (length < 1) { // Should not happen.
+            return new NameAndTimeInterval(name);
+        }
+
+        if (length == 1) { // No date at all -> use first part.
+            return new NameAndTimeInterval(result.get(0).trim());
+        }
+
+        if (length == 2) { // e.g. HQ(1994) or HQ(1994 - 1999)
+
+            String type = result.get(0).trim();
+            ImportTimeInterval timeInterval = null;
+
+            String datePart = result.get(1).trim();
+            if (isMainValue(datePart)) { // e.g. W(HQ100)
+                type += "(" + datePart + ")";
+                timeInterval = null;
+            }
+            else {
+                timeInterval = getTimeInterval(result.get(1).trim());
+
+                if (timeInterval == null) { // No date at all.
+                    type = name;
+                }
+            }
+
+            return new NameAndTimeInterval(type, timeInterval);
+        }
+
+        if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999))
+
+            String type =
+                result.get(0).trim() + "(" +
+                result.get(1).trim() + ")";
+
+            ImportTimeInterval timeInterval = getTimeInterval(
+                result.get(2).trim());
+
+            if (timeInterval == null) { // No date at all.
+                type = name;
+            }
+
+            return new NameAndTimeInterval(type, timeInterval);
+        }
+
+        // more than 3 elements return unmodified.
+
+        return new NameAndTimeInterval(name);
+    }
+
+    private static ImportTimeInterval getTimeInterval(String datePart) {
+
+        int minus = datePart.indexOf('-');
+
+        if (minus < 0) { // '-' not found
+
+            Date date = null;
+            try {
+                date = DateGuesser.guessDate(datePart);
+            }
+            catch (IllegalArgumentException iae) {
+                log.warn("STA: Invalid date '" + datePart + "'");
+                return null;
+            }
+
+            return new ImportTimeInterval(date);
+        }
+
+        // Found '-' so we have <from> - <to>
+        String startPart = datePart.substring(0, minus).trim();
+        String endPart   = datePart.substring(minus).trim();
+
+        Date startDate = null;
+        Date endDate   = null;
+
+        try {
+            startDate = DateGuesser.guessDate(startPart);
+        }
+        catch (IllegalArgumentException iae) {
+            log.warn("STA: Invalid start date '" + startPart + "'");
+        }
+
+        try {
+            endDate = DateGuesser.guessDate(endPart);
+        }
+        catch (IllegalArgumentException iae) {
+            log.warn("STA: Invalid end date '" + endPart + "'");
+        }
+
+        if (startDate == null) {
+            log.warn("STA: Need start date.");
+            return null;
+        }
+
+        return new ImportTimeInterval(startDate, endDate);
+    }
+
+    private static int unbracket(String s, int index, List<String> result) {
+        StringBuilder sb = new StringBuilder();
+        int length = s.length();
+        while (index < length) {
+            char c = s.charAt(index);
+            switch (c) {
+                case '(':
+                    index = unbracket(s, index+1, result);
+                    break;
+                case ')':
+                    result.add(0, sb.toString());
+                    return index+1;
+                default:
+                    sb.append(c);
+                    ++index;
+            }
+        }
+        result.add(0, sb.toString());
+
+        return index;
+    }
+
+    /*
+    public static void main(String [] args) {
+        for (String arg: args) {
+            NameAndTimeInterval nti = parseName(arg);
+            System.out.println(arg + " -> " + nti);
+        }
+    }
+    */
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
+
--- a/backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java	Thu Jun 13 13:12:48 2013 +0200
+++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java	Thu Jun 13 17:15:34 2013 +0200
@@ -19,10 +19,8 @@
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 
-import java.util.Date;
 import java.util.HashMap;
 import java.util.ArrayList;
-import java.util.List;
 
 import org.apache.log4j.Logger;
 
@@ -30,8 +28,6 @@
 import org.dive4elements.river.importer.ImportMainValue;
 import org.dive4elements.river.importer.ImportNamedMainValue;
 import org.dive4elements.river.importer.ImportGauge;
-import org.dive4elements.river.importer.ImportTimeInterval;
-import org.dive4elements.river.utils.DateGuesser;
 
 public class StaFileParser
 {
@@ -46,42 +42,6 @@
         Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" +
             Pattern.quote(TYPES) + "]).*");
 
-    // TODO: To be extented.
-    private static final Pattern MAIN_VALUE = Pattern.compile(
-        "^(HQ|MHW|GLQ|NMQ|HQEXT)(\\d*)$");
-
-    private static boolean isMainValue(String s) {
-        s = s.replace(" ", "").toUpperCase();
-        return MAIN_VALUE.matcher(s).matches();
-    }
-
-
-    public static final class NameAndTimeInterval {
-        private String             name;
-        private ImportTimeInterval timeInterval;
-
-        public NameAndTimeInterval(String name) {
-            this(name, null);
-        }
-
-        public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) {
-            this.name         = name;
-            this.timeInterval = timeInterval;
-        }
-
-        public String getName() {
-            return name;
-        }
-
-        public ImportTimeInterval getTimeInterval() {
-            return timeInterval;
-        }
-
-        @Override
-        public String toString() {
-            return "name: " + name + " time interval: " + timeInterval;
-        }
-    } // class NameAndTimeInterval
 
     public StaFileParser() {
     }
@@ -204,7 +164,8 @@
                         types.put(typeString, type);
                     }
                     String name = m.group(1);
-                    NameAndTimeInterval nat = parseName(name);
+                    NameAndTimeInterval nat =
+                        NameAndTimeInterval.parseName(name);
                     ImportNamedMainValue namedMainValue =
                         new ImportNamedMainValue(type, nat.getName());
                     namedMainValues.add(namedMainValue);
@@ -235,139 +196,5 @@
         return true;
     }
 
-    protected static NameAndTimeInterval parseName(String name) {
-        List<String> result = new ArrayList<String>();
-
-        unbracket(name, 0, result);
-
-        int length = result.size();
-
-        if (length < 1) { // Should not happen.
-            return new NameAndTimeInterval(name);
-        }
-
-        if (length == 1) { // No date at all -> use first part.
-            return new NameAndTimeInterval(result.get(0).trim());
-        }
-
-        if (length == 2) { // e.g. HQ(1994) or HQ(1994 - 1999)
-
-            String type = result.get(0).trim();
-            ImportTimeInterval timeInterval = null;
-
-            String datePart = result.get(1).trim();
-            if (isMainValue(datePart)) { // e.g. W(HQ100)
-                type += "(" + datePart + ")";
-                timeInterval = null;
-            }
-            else {
-                timeInterval = getTimeInterval(result.get(1).trim());
-
-                if (timeInterval == null) { // No date at all.
-                    type = name;
-                }
-            }
-
-            return new NameAndTimeInterval(type, timeInterval);
-        }
-
-        if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999))
-
-            String type =
-                result.get(0).trim() + "(" +
-                result.get(1).trim() + ")";
-
-            ImportTimeInterval timeInterval = getTimeInterval(
-                result.get(2).trim());
-
-            if (timeInterval == null) { // No date at all.
-                type = name;
-            }
-
-            return new NameAndTimeInterval(type, timeInterval);
-        }
-
-        // more than 3 elements return unmodified.
-
-        return new NameAndTimeInterval(name);
-    }
-
-    private static ImportTimeInterval getTimeInterval(String datePart) {
-
-        int minus = datePart.indexOf('-');
-
-        if (minus < 0) { // '-' not found
-
-            Date date = null;
-            try {
-                date = DateGuesser.guessDate(datePart);
-            }
-            catch (IllegalArgumentException iae) {
-                log.warn("STA: Invalid date '" + datePart + "'");
-                return null;
-            }
-
-            return new ImportTimeInterval(date);
-        }
-
-        // Found '-' so we have <from> - <to>
-        String startPart = datePart.substring(0, minus).trim();
-        String endPart   = datePart.substring(minus).trim();
-
-        Date startDate = null;
-        Date endDate   = null;
-
-        try {
-            startDate = DateGuesser.guessDate(startPart);
-        }
-        catch (IllegalArgumentException iae) {
-            log.warn("STA: Invalid start date '" + startPart + "'");
-        }
-
-        try {
-            endDate = DateGuesser.guessDate(endPart);
-        }
-        catch (IllegalArgumentException iae) {
-            log.warn("STA: Invalid end date '" + endPart + "'");
-        }
-
-        if (startDate == null) {
-            log.warn("STA: Need start date.");
-            return null;
-        }
-
-        return new ImportTimeInterval(startDate, endDate);
-    }
-
-    private static int unbracket(String s, int index, List<String> result) {
-        StringBuilder sb = new StringBuilder();
-        int length = s.length();
-        while (index < length) {
-            char c = s.charAt(index);
-            switch (c) {
-                case '(':
-                    index = unbracket(s, index+1, result);
-                    break;
-                case ')':
-                    result.add(0, sb.toString());
-                    return index+1;
-                default:
-                    sb.append(c);
-                    ++index;
-            }
-        }
-        result.add(0, sb.toString());
-
-        return index;
-    }
-
-    /*
-    public static void main(String [] args) {
-        for (String arg: args) {
-            NameAndTimeInterval nti = parseName(arg);
-            System.out.println(arg + " -> " + nti);
-        }
-    }
-    */
 }
 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java	Thu Jun 13 13:12:48 2013 +0200
+++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java	Thu Jun 13 17:15:34 2013 +0200
@@ -77,7 +77,7 @@
         this.wst = wst;
     }
 
-    public ImportTimeInterval guessDate(String string) {
+    public static ImportTimeInterval guessDate(String string) {
         try {
             return new ImportTimeInterval(
                 DateGuesser.guessDate(string));

http://dive4elements.wald.intevation.org