changeset 2840:71175502d868

Added a parser for sediment yield files; started parsing and store values in db. flys-backend/trunk@4277 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Ingo Weinzierl <ingo.weinzierl@intevation.de>
date Thu, 19 Apr 2012 12:48:53 +0000
parents 163c037f2c7e
children 6be2bf2492f9
files flys-backend/ChangeLog flys-backend/doc/schema/oracle-minfo.sql flys-backend/src/main/java/de/intevation/flys/importer/ImportGrainFraction.java flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java flys-backend/src/main/java/de/intevation/flys/importer/ImportSedimentYield.java flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java flys-backend/src/main/java/de/intevation/flys/model/GrainFraction.java flys-backend/src/main/java/de/intevation/flys/model/SedimentYield.java
diffstat 8 files changed, 482 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/flys-backend/ChangeLog	Thu Apr 19 07:34:50 2012 +0000
+++ b/flys-backend/ChangeLog	Thu Apr 19 12:48:53 2012 +0000
@@ -1,3 +1,22 @@
+2012-04-19  Ingo Weinzierl <ingo@intevation.de>
+
+	* doc/schema/oracle-minfo.sql,
+	  src/main/java/de/intevation/flys/importer/ImportSedimentYield.java,
+	  src/main/java/de/intevation/flys/model/SedimentYield.java: Added a
+	  column 'description' to the sediment_yield relation.
+
+	* src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java:
+	  New parser for sediment yield data.
+
+	* src/main/java/de/intevation/flys/model/GrainFraction.java: Added constants
+	  that represent the names of the grain fraction types.
+
+	* src/main/java/de/intevation/flys/importer/ImportGrainFraction.java: New
+	  constructor that takes a name only.
+
+	* src/main/java/de/intevation/flys/importer/ImportRiver.java: Improved the
+	  process of parsing sediment yield files.
+
 2012-04-19  Ingo Weinzierl <ingo@intevation.de>
 
 	* src/main/java/de/intevation/flys/importer/Config.java: Added new config
--- a/flys-backend/doc/schema/oracle-minfo.sql	Thu Apr 19 07:34:50 2012 +0000
+++ b/flys-backend/doc/schema/oracle-minfo.sql	Thu Apr 19 12:48:53 2012 +0000
@@ -259,6 +259,7 @@
     grain_fraction_id   NUMBER(38,0),
     unit_id             NUMBER(38,0) NOT NULL,
     time_interval_id    NUMBER(38,0) NOT NULL,
+    description         VARCHAR(256),
     PRIMARY KEY (id),
     CONSTRAINT fk_sy_river_id FOREIGN KEY (river_id) REFERENCES rivers(id),
     CONSTRAINT fk_sy_grain_fraction_id FOREIGN KEY (grain_fraction_id) REFERENCES grain_fraction(id),
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportGrainFraction.java	Thu Apr 19 07:34:50 2012 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportGrainFraction.java	Thu Apr 19 12:48:53 2012 +0000
@@ -26,6 +26,11 @@
     private GrainFraction peer;
 
 
+    public ImportGrainFraction(String name) {
+        this.name = name;
+    }
+
+
     public ImportGrainFraction(
         String     name,
         Double     lower,
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java	Thu Apr 19 07:34:50 2012 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java	Thu Apr 19 12:48:53 2012 +0000
@@ -34,6 +34,7 @@
 import de.intevation.flys.importer.parsers.AnnotationClassifier;
 import de.intevation.flys.importer.parsers.PegelGltParser;
 import de.intevation.flys.importer.parsers.SedimentDensityParser;
+import de.intevation.flys.importer.parsers.SedimentYieldParser;
 import de.intevation.flys.importer.parsers.WstParser;
 
 import org.hibernate.Session;
@@ -410,19 +411,41 @@
         File[] singles = singleDir.listFiles();
         File[] epochs  = epochDir.listFiles();
 
+        SedimentYieldParser parser = new SedimentYieldParser();
+
         if (singles == null || singles.length == 0) {
             log.warn("Cannot parse directory '" + singleDir + "'");
         }
         else {
-            // TODO
+            for (File file: singles) {
+                if (file.isDirectory()) {
+                    for (File child: file.listFiles()) {
+                        parser.parse(child);
+                    }
+                }
+                else {
+                    parser.parse(file);
+                }
+            }
         }
 
         if (epochs == null || epochs.length == 0) {
             log.warn("Cannot parse directory '" + epochDir + "'");
         }
         else {
-            // TODO
+            for (File file: epochs) {
+                if (file.isDirectory()) {
+                    for (File child: file.listFiles()) {
+                        parser.parse(child);
+                    }
+                }
+                else {
+                    parser.parse(file);
+                }
+            }
         }
+
+        sedimentYields = parser.getSedimentYields();
     }
 
 
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportSedimentYield.java	Thu Apr 19 07:34:50 2012 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportSedimentYield.java	Thu Apr 19 12:48:53 2012 +0000
@@ -27,13 +27,16 @@
 
     private ImportTimeInterval timeInterval;
 
+    private String description;
+
     private List<ImportSedimentYieldValue> values;
 
     private SedimentYield peer;
 
 
-    public ImportSedimentYield() {
-        this.values = new ArrayList<ImportSedimentYieldValue>();
+    public ImportSedimentYield(String description) {
+        this.values      = new ArrayList<ImportSedimentYieldValue>();
+        this.description = description;
     }
 
 
@@ -102,19 +105,21 @@
                 "   river=:river and " +
                 "   grainFraction=:grainFraction and " +
                 "   unit=:unit and " +
-                "   timeInterval=:timeInterval"
+                "   timeInterval=:timeInterval and " +
+                "   description=:description"
             );
 
             query.setParameter("river", river);
             query.setParameter("grainFraction", gf);
             query.setParameter("unit", u);
             query.setParameter("timeInterval", ti);
+            query.setParameter("description", description);
 
             List<SedimentYield> yields = query.list();
             if (yields.isEmpty()) {
                 log.debug("create new SedimentYield");
 
-                peer = new SedimentYield(river, u, ti, gf);
+                peer = new SedimentYield(river, u, ti, gf, description);
                 session.save(peer);
             }
             else {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java	Thu Apr 19 12:48:53 2012 +0000
@@ -0,0 +1,390 @@
+package de.intevation.flys.importer.parsers;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.text.NumberFormat;
+import java.text.ParseException;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+
+import de.intevation.flys.importer.ImportGrainFraction;
+import de.intevation.flys.importer.ImportSedimentYield;
+import de.intevation.flys.importer.ImportSedimentYieldValue;
+import de.intevation.flys.importer.ImportTimeInterval;
+import de.intevation.flys.importer.ImportUnit;
+import de.intevation.flys.model.GrainFraction;
+
+
+public class SedimentYieldParser extends LineParser {
+
+    private static final Logger log =
+        Logger.getLogger(SedimentYieldParser.class);
+
+
+    public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);
+
+
+    public static final String FRAKTION_START = "Fraktion:";
+
+    public static final String FRACTION_COARSE_STR =
+        "_Grobkorn.csv";
+
+    public static final String FRACTION_FINE_MIDDLE_STR =
+        "_Fein-Mittel-Kies.csv";
+
+    public static final String FRACTION_SAND =
+        "_Sand.csv";
+
+    public static final String FRACTION_SUSP_SAND =
+        "_susp_Sand.csv";
+
+    public static final String FRACTION_SUSP_SAND_BED =
+        "_susp_Sand_bettbildAnteil.csv";
+
+    public static final String FRACTION_SUSPENDED_SEDIMENT =
+        "_Schwebstoff.csv";
+
+
+    public static final Pattern TIMEINTERVAL_SINGLE =
+        Pattern.compile("\\D*([0-9]+?)\\D*");
+
+    public static final Pattern TIMEINTERVAL_EPOCH =
+        Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*");
+
+    public static final Pattern META_FRACTION =
+        Pattern.compile("^Fraktion: (.*)");
+
+    public static final Pattern META_UNIT =
+        Pattern.compile("^Einheit: \\[(.*)\\].*");
+
+    public static final Pattern META_COLUMN_NAMES =
+        Pattern.compile("^Fluss-km.*");
+
+    public static final Pattern META_GRAIN_FRACTION_A =
+        Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*");
+
+    public static final Pattern META_GRAIN_FRACTION_B =
+        Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)");
+
+    public static final Pattern META_GRAIN_FRACTION_C =
+        Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))");
+
+
+    protected List<ImportSedimentYield> sedimentYields;
+
+    protected ImportSedimentYield[] current;
+
+    protected ImportGrainFraction grainFraction;
+
+    protected ImportUnit unit;
+
+    protected String description;
+
+    protected String[] columnNames;
+
+
+    public SedimentYieldParser() {
+        sedimentYields = new ArrayList<ImportSedimentYield>();
+    }
+
+
+    @Override
+    public void parse(File file) throws IOException {
+        description = file.getName();
+
+        super.parse(file);
+    }
+
+
+    @Override
+    protected void reset() {
+        current       = null;
+        grainFraction = null;
+        unit          = null;
+    }
+
+
+    @Override
+    protected void finish() {
+        if (current != null) {
+            for (ImportSedimentYield isy: current) {
+                sedimentYields.add(isy);
+            }
+        }
+
+        description = null;
+    }
+
+
+    @Override
+    protected void handleLine(String line) {
+        if (line.startsWith(START_META_CHAR)) {
+            handleMetaLine(stripMetaLine(line));
+        }
+        else {
+            handleDataLine(line);
+        }
+    }
+
+
+    protected void handleMetaLine(String line) {
+        if (handleMetaUnit(line)) {
+            return;
+        }
+        else if (handleMetaFraction(line)) {
+            return;
+        }
+        else if (handleColumnNames(line)) {
+            return;
+        }
+        else {
+            log.warn("Unknown meta line: '" + line + "'");
+        }
+    }
+
+
+    protected boolean handleMetaUnit(String line) {
+        Matcher m = META_UNIT.matcher(line);
+
+        if (m.matches()) {
+            unit = new ImportUnit(m.group(1));
+            return true;
+        }
+
+        return false;
+    }
+
+
+    public boolean handleMetaFraction(String line) {
+        Matcher m = META_FRACTION.matcher(line);
+
+        if (m.matches()) {
+            String tmp = m.group(1);
+
+            this.grainFraction = buildGrainFraction(tmp);
+
+            return true;
+        }
+        else if (line.startsWith(FRAKTION_START)) {
+            String newLine = line.replace(FRAKTION_START, "").trim();
+            if (newLine.length() == 0) {
+                log.debug("Found total grain fraction.");
+                this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL);
+
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+
+    public boolean handleColumnNames(String line) {
+        Matcher m = META_COLUMN_NAMES.matcher(line);
+
+        if (m.matches()) {
+            columnNames = line.split(SEPERATOR_CHAR);
+
+            initializeSedimentYields();
+
+            return true;
+        }
+
+        return false;
+    }
+
+
+    protected void handleDataLine(String line) {
+        String[] vals = line.split(SEPERATOR_CHAR);
+
+        if (vals == null || vals.length < columnNames.length-1) {
+            log.warn("skip invalid data line: '" + line + "'");
+            return;
+        }
+
+        try {
+            Double km = nf.parse(vals[0]).doubleValue();
+
+            for (int i = 1, n = columnNames.length-1; i < n; i++) {
+                String curVal = vals[i];
+
+                if (curVal != null && curVal.length() > 0) {
+                    current[i-1].addValue(new ImportSedimentYieldValue(
+                        km, nf.parse(vals[i]).doubleValue()
+                    ));
+                }
+            }
+        }
+        catch (ParseException pe) {
+            log.warn("Error while parsing numbers in '" + line + "':", pe);
+        }
+    }
+
+
+    private void initializeSedimentYields() {
+        // skip first column (Fluss-km) and last column (Hinweise)
+        current = new ImportSedimentYield[columnNames.length-2];
+
+        for (int i = 0, n = columnNames.length; i < n-2; i++) {
+            current[i] = new ImportSedimentYield(this.description);
+            current[i].setTimeInterval(getTimeInterval(columnNames[i+1]));
+            current[i].setUnit(unit);
+            current[i].setGrainFraction(grainFraction);
+        }
+    }
+
+
+    private ImportTimeInterval getTimeInterval(String column) {
+        try {
+            Matcher a = TIMEINTERVAL_EPOCH.matcher(column);
+            if (a.matches()) {
+                int yearA = nf.parse(a.group(1)).intValue();
+                int yearB = nf.parse(a.group(2)).intValue();
+
+                return new ImportTimeInterval(
+                    getDateFromYear(yearA),
+                    getDateFromYear(yearB)
+                );
+            }
+
+            Matcher b = TIMEINTERVAL_SINGLE.matcher(column);
+            if (b.matches()) {
+                int year = nf.parse(b.group(1)).intValue();
+
+                return new ImportTimeInterval(getDateFromYear(year));
+            }
+
+            log.warn("Unknown time interval string: '" + column + "'");
+        }
+        catch (ParseException pe) {
+            log.warn("Error while parsing years: " + column, pe);
+        }
+
+        return null;
+    }
+
+
+    private ImportGrainFraction buildGrainFraction(String gfStr) {
+        Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr);
+        if (a.matches()) {
+            String lowerA = a.group(2);
+            String lowerB = a.group(3);
+
+            String upperA = a.group(4);
+            String upperB = a.group(5);
+
+            String unitStr = a.group(7);
+            String lower = lowerA != null ? lowerA : lowerB;
+            String upper = upperA != null ? upperA : upperB;
+
+            try {
+                return new ImportGrainFraction(
+                    getGrainFractionTypeName(this.description),
+                    nf.parse(lower).doubleValue(),
+                    nf.parse(upper).doubleValue(),
+                    new ImportUnit(unitStr)
+                );
+            }
+            catch (ParseException pe) {
+                log.warn("Error while parsing ranges of: '" + gfStr + "'");
+            }
+        }
+
+        Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr);
+        if (b.matches()) {
+            String lowerA  = b.group(4);
+            String lowerB  = b.group(5);
+            String upperA  = b.group(6);
+            String upperB  = b.group(7);
+            String unitStr = b.group(9);
+
+            String lower = lowerA != null ? lowerA : lowerB;
+            String upper = upperA != null ? upperA : upperB;
+
+            try {
+                return new ImportGrainFraction(
+                    getGrainFractionTypeName(this.description),
+                    nf.parse(lower).doubleValue(),
+                    nf.parse(upper).doubleValue(),
+                    new ImportUnit(unitStr)
+                );
+            }
+            catch (ParseException pe) {
+                log.warn("Error while parsing ranges of: '" + gfStr + "'");
+            }
+        }
+
+        Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr);
+        if (c.matches()) {
+            String oper     = c.group(1);
+            String valueStr = c.group(3);
+            String unitStr  = c.group(6);
+
+            try {
+                Double value = nf.parse(valueStr).doubleValue();
+
+                if (oper.equals(">")) {
+                    return new ImportGrainFraction(
+                        getGrainFractionTypeName(this.description),
+                        value,
+                        null,
+                        new ImportUnit(unitStr)
+                    );
+                }
+                else {
+                    return new ImportGrainFraction(
+                        getGrainFractionTypeName(this.description),
+                        null,
+                        value,
+                        new ImportUnit(unitStr)
+                    );
+                }
+            }
+            catch (ParseException pe) {
+                log.warn("Error while parsing ranges of: '" + gfStr + "'");
+            }
+        }
+
+        log.warn("Unknow grain fraction: '" + gfStr + "'");
+
+        return null;
+    }
+
+
+    public static String getGrainFractionTypeName(String filename) {
+        if (filename.endsWith(FRACTION_COARSE_STR)) {
+            return GrainFraction.COARSE;
+        }
+        else if (filename.endsWith(FRACTION_FINE_MIDDLE_STR)) {
+            return GrainFraction.FINE_MIDDLE;
+        }
+        else if (filename.endsWith(FRACTION_SAND)) {
+            return GrainFraction.SAND;
+        }
+        else if (filename.endsWith(FRACTION_SUSP_SAND)) {
+            return GrainFraction.SUSP_SAND;
+        }
+        else if (filename.endsWith(FRACTION_SUSP_SAND_BED)) {
+            return GrainFraction.SUSP_SAND_BED;
+        }
+        else if (filename.endsWith(FRACTION_SUSPENDED_SEDIMENT)) {
+            return GrainFraction.SUSPENDED_SEDIMENT;
+        }
+        else {
+            log.warn("Unknown grain fraction type: '" + filename + "'");
+            return "unknown";
+        }
+    }
+
+
+    public List<ImportSedimentYield> getSedimentYields() {
+        return sedimentYields;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/model/GrainFraction.java	Thu Apr 19 07:34:50 2012 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/model/GrainFraction.java	Thu Apr 19 12:48:53 2012 +0000
@@ -20,6 +20,15 @@
 public class GrainFraction
 implements   Serializable
 {
+    public static final String TOTAL              = "total";
+    public static final String COARSE             = "coarse";
+    public static final String FINE_MIDDLE        = "fine_middle";
+    public static final String SAND               = "sand";
+    public static final String SUSP_SAND          = "susp_sand";
+    public static final String SUSP_SAND_BED      = "susp_sand_bed";
+    public static final String SUSPENDED_SEDIMENT = "suspended_sediment";
+
+
     private static Logger logger = Logger.getLogger(GrainFraction.class);
 
     private Integer id;
--- a/flys-backend/src/main/java/de/intevation/flys/model/SedimentYield.java	Thu Apr 19 07:34:50 2012 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/model/SedimentYield.java	Thu Apr 19 12:48:53 2012 +0000
@@ -34,6 +34,8 @@
 
     private TimeInterval timeInterval;
 
+    private String description;
+
     private List<SedimentYieldValue> values;
 
 
@@ -61,6 +63,19 @@
         this.grainFraction = grainFraction;
     }
 
+
+    public SedimentYield(
+        River         river,
+        Unit          unit,
+        TimeInterval  timeInterval,
+        GrainFraction grainFraction,
+        String        description
+    ) {
+        this(river, unit, timeInterval, grainFraction);
+
+        this.description = description;
+    }
+
     @Id
     @SequenceGenerator(
         name           = "SEQUENCE_SEDIMENT_YIELD_ID_SEQ",
@@ -117,5 +132,14 @@
     public void setTimeInterval(TimeInterval timeInterval) {
         this.timeInterval = timeInterval;
     }
+
+    @Column(name = "description")
+    public String getDescription() {
+        return description;
+    }
+
+    public void setDescription(String description) {
+        this.description = description;
+    }
 }
 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org