diff flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java @ 2840:71175502d868

Added a parser for sediment yield files; started parsing and store values in db. flys-backend/trunk@4277 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Ingo Weinzierl <ingo.weinzierl@intevation.de>
date Thu, 19 Apr 2012 12:48:53 +0000
parents
children 0d27d02b1208
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java	Thu Apr 19 12:48:53 2012 +0000
@@ -0,0 +1,390 @@
+package de.intevation.flys.importer.parsers;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.text.NumberFormat;
+import java.text.ParseException;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+
+import de.intevation.flys.importer.ImportGrainFraction;
+import de.intevation.flys.importer.ImportSedimentYield;
+import de.intevation.flys.importer.ImportSedimentYieldValue;
+import de.intevation.flys.importer.ImportTimeInterval;
+import de.intevation.flys.importer.ImportUnit;
+import de.intevation.flys.model.GrainFraction;
+
+
+public class SedimentYieldParser extends LineParser {
+
+    private static final Logger log =
+        Logger.getLogger(SedimentYieldParser.class);
+
+
+    public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);
+
+
+    public static final String FRAKTION_START = "Fraktion:";
+
+    public static final String FRACTION_COARSE_STR =
+        "_Grobkorn.csv";
+
+    public static final String FRACTION_FINE_MIDDLE_STR =
+        "_Fein-Mittel-Kies.csv";
+
+    public static final String FRACTION_SAND =
+        "_Sand.csv";
+
+    public static final String FRACTION_SUSP_SAND =
+        "_susp_Sand.csv";
+
+    public static final String FRACTION_SUSP_SAND_BED =
+        "_susp_Sand_bettbildAnteil.csv";
+
+    public static final String FRACTION_SUSPENDED_SEDIMENT =
+        "_Schwebstoff.csv";
+
+
+    public static final Pattern TIMEINTERVAL_SINGLE =
+        Pattern.compile("\\D*([0-9]+?)\\D*");
+
+    public static final Pattern TIMEINTERVAL_EPOCH =
+        Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*");
+
+    public static final Pattern META_FRACTION =
+        Pattern.compile("^Fraktion: (.*)");
+
+    public static final Pattern META_UNIT =
+        Pattern.compile("^Einheit: \\[(.*)\\].*");
+
+    public static final Pattern META_COLUMN_NAMES =
+        Pattern.compile("^Fluss-km.*");
+
+    public static final Pattern META_GRAIN_FRACTION_A =
+        Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*");
+
+    public static final Pattern META_GRAIN_FRACTION_B =
+        Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)");
+
+    public static final Pattern META_GRAIN_FRACTION_C =
+        Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))");
+
+
+    protected List<ImportSedimentYield> sedimentYields;
+
+    protected ImportSedimentYield[] current;
+
+    protected ImportGrainFraction grainFraction;
+
+    protected ImportUnit unit;
+
+    protected String description;
+
+    protected String[] columnNames;
+
+
+    public SedimentYieldParser() {
+        sedimentYields = new ArrayList<ImportSedimentYield>();
+    }
+
+
+    @Override
+    public void parse(File file) throws IOException {
+        description = file.getName();
+
+        super.parse(file);
+    }
+
+
+    @Override
+    protected void reset() {
+        current       = null;
+        grainFraction = null;
+        unit          = null;
+    }
+
+
+    @Override
+    protected void finish() {
+        if (current != null) {
+            for (ImportSedimentYield isy: current) {
+                sedimentYields.add(isy);
+            }
+        }
+
+        description = null;
+    }
+
+
+    @Override
+    protected void handleLine(String line) {
+        if (line.startsWith(START_META_CHAR)) {
+            handleMetaLine(stripMetaLine(line));
+        }
+        else {
+            handleDataLine(line);
+        }
+    }
+
+
+    protected void handleMetaLine(String line) {
+        if (handleMetaUnit(line)) {
+            return;
+        }
+        else if (handleMetaFraction(line)) {
+            return;
+        }
+        else if (handleColumnNames(line)) {
+            return;
+        }
+        else {
+            log.warn("Unknown meta line: '" + line + "'");
+        }
+    }
+
+
+    protected boolean handleMetaUnit(String line) {
+        Matcher m = META_UNIT.matcher(line);
+
+        if (m.matches()) {
+            unit = new ImportUnit(m.group(1));
+            return true;
+        }
+
+        return false;
+    }
+
+
+    public boolean handleMetaFraction(String line) {
+        Matcher m = META_FRACTION.matcher(line);
+
+        if (m.matches()) {
+            String tmp = m.group(1);
+
+            this.grainFraction = buildGrainFraction(tmp);
+
+            return true;
+        }
+        else if (line.startsWith(FRAKTION_START)) {
+            String newLine = line.replace(FRAKTION_START, "").trim();
+            if (newLine.length() == 0) {
+                log.debug("Found total grain fraction.");
+                this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL);
+
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+
+    public boolean handleColumnNames(String line) {
+        Matcher m = META_COLUMN_NAMES.matcher(line);
+
+        if (m.matches()) {
+            columnNames = line.split(SEPERATOR_CHAR);
+
+            initializeSedimentYields();
+
+            return true;
+        }
+
+        return false;
+    }
+
+
+    protected void handleDataLine(String line) {
+        String[] vals = line.split(SEPERATOR_CHAR);
+
+        if (vals == null || vals.length < columnNames.length-1) {
+            log.warn("skip invalid data line: '" + line + "'");
+            return;
+        }
+
+        try {
+            Double km = nf.parse(vals[0]).doubleValue();
+
+            for (int i = 1, n = columnNames.length-1; i < n; i++) {
+                String curVal = vals[i];
+
+                if (curVal != null && curVal.length() > 0) {
+                    current[i-1].addValue(new ImportSedimentYieldValue(
+                        km, nf.parse(vals[i]).doubleValue()
+                    ));
+                }
+            }
+        }
+        catch (ParseException pe) {
+            log.warn("Error while parsing numbers in '" + line + "':", pe);
+        }
+    }
+
+
+    private void initializeSedimentYields() {
+        // skip first column (Fluss-km) and last column (Hinweise)
+        current = new ImportSedimentYield[columnNames.length-2];
+
+        for (int i = 0, n = columnNames.length; i < n-2; i++) {
+            current[i] = new ImportSedimentYield(this.description);
+            current[i].setTimeInterval(getTimeInterval(columnNames[i+1]));
+            current[i].setUnit(unit);
+            current[i].setGrainFraction(grainFraction);
+        }
+    }
+
+
+    private ImportTimeInterval getTimeInterval(String column) {
+        try {
+            Matcher a = TIMEINTERVAL_EPOCH.matcher(column);
+            if (a.matches()) {
+                int yearA = nf.parse(a.group(1)).intValue();
+                int yearB = nf.parse(a.group(2)).intValue();
+
+                return new ImportTimeInterval(
+                    getDateFromYear(yearA),
+                    getDateFromYear(yearB)
+                );
+            }
+
+            Matcher b = TIMEINTERVAL_SINGLE.matcher(column);
+            if (b.matches()) {
+                int year = nf.parse(b.group(1)).intValue();
+
+                return new ImportTimeInterval(getDateFromYear(year));
+            }
+
+            log.warn("Unknown time interval string: '" + column + "'");
+        }
+        catch (ParseException pe) {
+            log.warn("Error while parsing years: " + column, pe);
+        }
+
+        return null;
+    }
+
+
+    private ImportGrainFraction buildGrainFraction(String gfStr) {
+        Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr);
+        if (a.matches()) {
+            String lowerA = a.group(2);
+            String lowerB = a.group(3);
+
+            String upperA = a.group(4);
+            String upperB = a.group(5);
+
+            String unitStr = a.group(7);
+            String lower = lowerA != null ? lowerA : lowerB;
+            String upper = upperA != null ? upperA : upperB;
+
+            try {
+                return new ImportGrainFraction(
+                    getGrainFractionTypeName(this.description),
+                    nf.parse(lower).doubleValue(),
+                    nf.parse(upper).doubleValue(),
+                    new ImportUnit(unitStr)
+                );
+            }
+            catch (ParseException pe) {
+                log.warn("Error while parsing ranges of: '" + gfStr + "'");
+            }
+        }
+
+        Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr);
+        if (b.matches()) {
+            String lowerA  = b.group(4);
+            String lowerB  = b.group(5);
+            String upperA  = b.group(6);
+            String upperB  = b.group(7);
+            String unitStr = b.group(9);
+
+            String lower = lowerA != null ? lowerA : lowerB;
+            String upper = upperA != null ? upperA : upperB;
+
+            try {
+                return new ImportGrainFraction(
+                    getGrainFractionTypeName(this.description),
+                    nf.parse(lower).doubleValue(),
+                    nf.parse(upper).doubleValue(),
+                    new ImportUnit(unitStr)
+                );
+            }
+            catch (ParseException pe) {
+                log.warn("Error while parsing ranges of: '" + gfStr + "'");
+            }
+        }
+
+        Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr);
+        if (c.matches()) {
+            String oper     = c.group(1);
+            String valueStr = c.group(3);
+            String unitStr  = c.group(6);
+
+            try {
+                Double value = nf.parse(valueStr).doubleValue();
+
+                if (oper.equals(">")) {
+                    return new ImportGrainFraction(
+                        getGrainFractionTypeName(this.description),
+                        value,
+                        null,
+                        new ImportUnit(unitStr)
+                    );
+                }
+                else {
+                    return new ImportGrainFraction(
+                        getGrainFractionTypeName(this.description),
+                        null,
+                        value,
+                        new ImportUnit(unitStr)
+                    );
+                }
+            }
+            catch (ParseException pe) {
+                log.warn("Error while parsing ranges of: '" + gfStr + "'");
+            }
+        }
+
+        log.warn("Unknow grain fraction: '" + gfStr + "'");
+
+        return null;
+    }
+
+
+    public static String getGrainFractionTypeName(String filename) {
+        if (filename.endsWith(FRACTION_COARSE_STR)) {
+            return GrainFraction.COARSE;
+        }
+        else if (filename.endsWith(FRACTION_FINE_MIDDLE_STR)) {
+            return GrainFraction.FINE_MIDDLE;
+        }
+        else if (filename.endsWith(FRACTION_SAND)) {
+            return GrainFraction.SAND;
+        }
+        else if (filename.endsWith(FRACTION_SUSP_SAND)) {
+            return GrainFraction.SUSP_SAND;
+        }
+        else if (filename.endsWith(FRACTION_SUSP_SAND_BED)) {
+            return GrainFraction.SUSP_SAND_BED;
+        }
+        else if (filename.endsWith(FRACTION_SUSPENDED_SEDIMENT)) {
+            return GrainFraction.SUSPENDED_SEDIMENT;
+        }
+        else {
+            log.warn("Unknown grain fraction type: '" + filename + "'");
+            return "unknown";
+        }
+    }
+
+
+    public List<ImportSedimentYield> getSedimentYields() {
+        return sedimentYields;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org