diff flys-backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java @ 5828:dfb26b03b179

Moved directories to org.dive4elements.river
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 11:53:11 +0200
parents flys-backend/src/main/java/de/intevation/flys/importer/parsers/StaFileParser.java@54077c9c9305
children 18619c1e7c2a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java	Thu Apr 25 11:53:11 2013 +0200
@@ -0,0 +1,338 @@
+package de.intevation.flys.importer.parsers;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+
+import java.math.BigDecimal;
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import java.util.Date;
+import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+
+import de.intevation.flys.importer.ImportMainValueType;
+import de.intevation.flys.importer.ImportMainValue;
+import de.intevation.flys.importer.ImportNamedMainValue;
+import de.intevation.flys.importer.ImportGauge;
+import de.intevation.flys.importer.ImportTimeInterval;
+import de.intevation.flys.utils.DateGuesser;
+
+public class StaFileParser
+{
+    private static Logger log = Logger.getLogger(StaFileParser.class);
+
+    public static final String ENCODING = "ISO-8859-1";
+
+    public static final String TYPES =
+        System.getProperty("flys.backend.main.value.types", "QWTD");
+
+    public static final boolean NOT_PARSE_GAUGE_NUMBERS =
+        Boolean.getBoolean("flys.backend.sta.not.parse.gauge.numbers");
+
+    public static final Pattern QWTD_ =
+        Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" +
+            Pattern.quote(TYPES) + "]).*");
+
+    public static final class NameAndTimeInterval {
+        private String             name;
+        private ImportTimeInterval timeInterval;
+
+        public NameAndTimeInterval(String name) {
+            this(name, null);
+        }
+
+        public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) {
+            this.name         = name;
+            this.timeInterval = timeInterval;
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public ImportTimeInterval getTimeInterval() {
+            return timeInterval;
+        }
+    } // class NameAndTimeInterval
+
+    public StaFileParser() {
+    }
+
+    public boolean parse(ImportGauge gauge) throws IOException {
+
+        File file = gauge.getStaFile();
+
+        log.info("parsing STA file: " + file);
+        LineNumberReader in = null;
+        try {
+            in =
+                new LineNumberReader(
+                new InputStreamReader(
+                new FileInputStream(file), ENCODING));
+
+            String line = in.readLine();
+
+            if (line == null) {
+                log.warn("STA file is empty.");
+                return false;
+            }
+
+            if (line.length() < 37) {
+                log.warn("First line in STA file is too short.");
+                return false;
+            }
+
+            String gaugeName = line.substring(16, 28).trim();
+
+            Long gaugeNumber = null;
+
+            if (!NOT_PARSE_GAUGE_NUMBERS) {
+                String gaugeNumberString = line.substring(8, 16).trim();
+
+                try {
+                    gaugeNumber = Long.parseLong(gaugeNumberString);
+                }
+                catch (NumberFormatException nfe) {
+                    log.warn("STA: '" + gaugeNumberString +
+                        "' is not a valid long number.");
+                }
+            }
+
+            gauge.setName(gaugeName);
+            gauge.setOfficialNumber(gaugeNumber);
+
+            if (log.isDebugEnabled()) {
+                log.debug(
+                    "name/number: '" + gaugeName + "' '" + gaugeNumber + "'");
+            }
+
+            String [] values = line.substring(38).trim().split("\\s+", 2);
+
+            if (values.length < 2) {
+                log.warn("STA: Not enough columns for aeo and datum.");
+            }
+            try {
+                gauge.setAeo(new BigDecimal(values[0].replace(",", ".")));
+                gauge.setDatum(new BigDecimal(values[1].replace(",", ".")));
+            }
+            catch (NumberFormatException nfe) {
+                log.warn("STA: cannot parse aeo or datum.");
+                return false;
+            }
+
+            line = in.readLine();
+
+            if (line == null) {
+                log.warn("STA file has not enough lines");
+                return false;
+            }
+
+            if (line.length() < 36) {
+                log.warn("STA: second line is too short");
+                return false;
+            }
+
+            try {
+                gauge.setStation(
+                    new BigDecimal(line.substring(29, 36).trim()));
+            }
+            catch (NumberFormatException nfe) {
+                log.warn("STA: parsing of the datum of the gauge failed");
+                return false;
+            }
+
+            // overread the next six lines
+            for (int i = 0; i < 6; ++i) {
+                if ((line = in.readLine()) == null) {
+                    log.warn("STA file is too short");
+                    return false;
+                }
+            }
+
+            HashMap<String, ImportMainValueType> types =
+                new HashMap<String, ImportMainValueType>();
+
+            ArrayList<ImportNamedMainValue> namedMainValues =
+                new ArrayList<ImportNamedMainValue>();
+
+            ArrayList<ImportMainValue> mainValues =
+                new ArrayList<ImportMainValue>();
+
+            while ((line = in.readLine()) != null) {
+                Matcher m = QWTD_.matcher(line);
+                if (m.matches()) {
+                    BigDecimal value;
+                    try {
+                        value = new BigDecimal(m.group(2).replace(",", "."));
+                    }
+                    catch (NumberFormatException nfe) {
+                        log.warn("STA: value not parseable in line "
+                            + in.getLineNumber());
+                        continue;
+                    }
+                    String typeString = m.group(3);
+                    log.debug("\t type: " + typeString);
+                    ImportMainValueType type = types.get(typeString);
+                    if (type == null) {
+                        type = new ImportMainValueType(typeString);
+                        types.put(typeString, type);
+                    }
+                    String name = m.group(1);
+                    NameAndTimeInterval nat = parseName(name);
+                    ImportNamedMainValue namedMainValue =
+                        new ImportNamedMainValue(type, nat.getName());
+                    namedMainValues.add(namedMainValue);
+
+                    ImportMainValue mainValue = new ImportMainValue(
+                        gauge,
+                        namedMainValue,
+                        value,
+                        nat.getTimeInterval());
+
+                    mainValues.add(mainValue);
+                }
+                else {
+                    // TODO: treat as a comment
+                }
+            }
+            gauge.setMainValueTypes(
+                new ArrayList<ImportMainValueType>(types.values()));
+            gauge.setNamedMainValues(namedMainValues);
+            gauge.setMainValues(mainValues);
+        }
+        finally {
+            if (in != null) {
+                in.close();
+            }
+        }
+        log.info("finished parsing STA file: " + file);
+        return true;
+    }
+
+    protected NameAndTimeInterval parseName(String name) {
+        List<String> result = new ArrayList<String>();
+
+        unbracket(name, 0, result);
+
+        int length = result.size();
+
+        if (length < 1) { // Should not happen.
+            return new NameAndTimeInterval(name);
+        }
+
+        if (length == 1) { // No date at all -> use first part.
+            return new NameAndTimeInterval(result.get(0).trim());
+        }
+
+        if (length == 2) { // e.g. W(1994) or W(1994 - 1999)
+            String type = result.get(0).trim();
+
+            ImportTimeInterval timeInterval = getTimeInterval(
+                result.get(1).trim());
+
+            if (timeInterval == null) { // No date at all.
+                type = name;
+            }
+
+            return new NameAndTimeInterval(type, timeInterval);
+        }
+
+        if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999))
+
+            String type =
+                result.get(0).trim() + "(" + 
+                result.get(1).trim() + ")";
+
+            ImportTimeInterval timeInterval = getTimeInterval(
+                result.get(2).trim());
+
+            if (timeInterval == null) { // No date at all.
+                type = name;
+            }
+
+            return new NameAndTimeInterval(type, timeInterval);
+        }
+
+        // more than 3 elements return unmodified.
+
+        return new NameAndTimeInterval(name);
+    }
+
+    private static ImportTimeInterval getTimeInterval(String datePart) {
+
+        int minus = datePart.indexOf('-');
+
+        if (minus < 0) { // '-' not found
+
+            Date date = null;
+            try {
+                date = DateGuesser.guessDate(datePart);
+            }
+            catch (IllegalArgumentException iae) {
+                log.warn("STA: Invalid date '" + datePart + "'");
+                return null;
+            }
+
+            return new ImportTimeInterval(date);
+        }
+
+        // Found '-' so we have <from> - <to>
+        String startPart = datePart.substring(0, minus).trim();
+        String endPart   = datePart.substring(minus).trim();
+
+        Date startDate = null;
+        Date endDate   = null;
+
+        try {
+            startDate = DateGuesser.guessDate(startPart);
+        }
+        catch (IllegalArgumentException iae) {
+            log.warn("STA: Invalid start date '" + startPart + "'");
+        }
+
+        try {
+            endDate = DateGuesser.guessDate(endPart);
+        }
+        catch (IllegalArgumentException iae) {
+            log.warn("STA: Invalid end date '" + endPart + "'");
+        }
+
+        if (startDate == null) {
+            log.warn("STA: Need start date.");
+            return null;
+        }
+
+        return new ImportTimeInterval(startDate, endDate);
+    }
+
+    private static int unbracket(String s, int index, List<String> result) {
+        StringBuilder sb = new StringBuilder();
+        int length = s.length();
+        while (index < length) {
+            char c = s.charAt(index);
+            switch (c) {
+                case '(':
+                    index = unbracket(s, index+1, result);
+                    break;
+                case ')':
+                    result.add(0, sb.toString());
+                    return index+1;
+                default:
+                    sb.append(c);
+                    ++index;
+            }
+        }
+        result.add(0, sb.toString());
+
+        return index;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org