Mercurial > dive4elements > river
changeset 2840:71175502d868
Added a parser for sediment yield files; started parsing and store values in db.
flys-backend/trunk@4277 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Ingo Weinzierl <ingo.weinzierl@intevation.de> |
---|---|
date | Thu, 19 Apr 2012 12:48:53 +0000 |
parents | 163c037f2c7e |
children | 6be2bf2492f9 |
files | flys-backend/ChangeLog flys-backend/doc/schema/oracle-minfo.sql flys-backend/src/main/java/de/intevation/flys/importer/ImportGrainFraction.java flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java flys-backend/src/main/java/de/intevation/flys/importer/ImportSedimentYield.java flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java flys-backend/src/main/java/de/intevation/flys/model/GrainFraction.java flys-backend/src/main/java/de/intevation/flys/model/SedimentYield.java |
diffstat | 8 files changed, 482 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/flys-backend/ChangeLog Thu Apr 19 07:34:50 2012 +0000 +++ b/flys-backend/ChangeLog Thu Apr 19 12:48:53 2012 +0000 @@ -1,3 +1,22 @@ +2012-04-19 Ingo Weinzierl <ingo@intevation.de> + + * doc/schema/oracle-minfo.sql, + src/main/java/de/intevation/flys/importer/ImportSedimentYield.java, + src/main/java/de/intevation/flys/model/SedimentYield.java: Added a + column 'description' to the sediment_yield relation. + + * src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java: + New parser for sediment yield data. + + * src/main/java/de/intevation/flys/model/GrainFraction.java: Added constants + that represent the names of the grain fraction types. + + * src/main/java/de/intevation/flys/importer/ImportGrainFraction.java: New + constructor that takes a name only. + + * src/main/java/de/intevation/flys/importer/ImportRiver.java: Improved the + process of parsing sediment yield files. + 2012-04-19 Ingo Weinzierl <ingo@intevation.de> * src/main/java/de/intevation/flys/importer/Config.java: Added new config
--- a/flys-backend/doc/schema/oracle-minfo.sql Thu Apr 19 07:34:50 2012 +0000 +++ b/flys-backend/doc/schema/oracle-minfo.sql Thu Apr 19 12:48:53 2012 +0000 @@ -259,6 +259,7 @@ grain_fraction_id NUMBER(38,0), unit_id NUMBER(38,0) NOT NULL, time_interval_id NUMBER(38,0) NOT NULL, + description VARCHAR(256), PRIMARY KEY (id), CONSTRAINT fk_sy_river_id FOREIGN KEY (river_id) REFERENCES rivers(id), CONSTRAINT fk_sy_grain_fraction_id FOREIGN KEY (grain_fraction_id) REFERENCES grain_fraction(id),
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportGrainFraction.java Thu Apr 19 07:34:50 2012 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportGrainFraction.java Thu Apr 19 12:48:53 2012 +0000 @@ -26,6 +26,11 @@ private GrainFraction peer; + public ImportGrainFraction(String name) { + this.name = name; + } + + public ImportGrainFraction( String name, Double lower,
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Thu Apr 19 07:34:50 2012 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Thu Apr 19 12:48:53 2012 +0000 @@ -34,6 +34,7 @@ import de.intevation.flys.importer.parsers.AnnotationClassifier; import de.intevation.flys.importer.parsers.PegelGltParser; import de.intevation.flys.importer.parsers.SedimentDensityParser; +import de.intevation.flys.importer.parsers.SedimentYieldParser; import de.intevation.flys.importer.parsers.WstParser; import org.hibernate.Session; @@ -410,19 +411,41 @@ File[] singles = singleDir.listFiles(); File[] epochs = epochDir.listFiles(); + SedimentYieldParser parser = new SedimentYieldParser(); + if (singles == null || singles.length == 0) { log.warn("Cannot parse directory '" + singleDir + "'"); } else { - // TODO + for (File file: singles) { + if (file.isDirectory()) { + for (File child: file.listFiles()) { + parser.parse(child); + } + } + else { + parser.parse(file); + } + } } if (epochs == null || epochs.length == 0) { log.warn("Cannot parse directory '" + epochDir + "'"); } else { - // TODO + for (File file: epochs) { + if (file.isDirectory()) { + for (File child: file.listFiles()) { + parser.parse(child); + } + } + else { + parser.parse(file); + } + } } + + sedimentYields = parser.getSedimentYields(); }
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportSedimentYield.java Thu Apr 19 07:34:50 2012 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportSedimentYield.java Thu Apr 19 12:48:53 2012 +0000 @@ -27,13 +27,16 @@ private ImportTimeInterval timeInterval; + private String description; + private List<ImportSedimentYieldValue> values; private SedimentYield peer; - public ImportSedimentYield() { - this.values = new ArrayList<ImportSedimentYieldValue>(); + public ImportSedimentYield(String description) { + this.values = new ArrayList<ImportSedimentYieldValue>(); + this.description = description; } @@ -102,19 +105,21 @@ " river=:river and " + " grainFraction=:grainFraction and " + " unit=:unit and " + - " timeInterval=:timeInterval" + " timeInterval=:timeInterval and " + + " description=:description" ); query.setParameter("river", river); query.setParameter("grainFraction", gf); query.setParameter("unit", u); query.setParameter("timeInterval", ti); + query.setParameter("description", description); List<SedimentYield> yields = query.list(); if (yields.isEmpty()) { log.debug("create new SedimentYield"); - peer = new SedimentYield(river, u, ti, gf); + peer = new SedimentYield(river, u, ti, gf, description); session.save(peer); } else {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java Thu Apr 19 12:48:53 2012 +0000 @@ -0,0 +1,390 @@ +package de.intevation.flys.importer.parsers; + +import java.io.File; +import java.io.IOException; + +import java.text.NumberFormat; +import java.text.ParseException; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; + +import de.intevation.flys.importer.ImportGrainFraction; +import de.intevation.flys.importer.ImportSedimentYield; +import de.intevation.flys.importer.ImportSedimentYieldValue; +import de.intevation.flys.importer.ImportTimeInterval; +import de.intevation.flys.importer.ImportUnit; +import de.intevation.flys.model.GrainFraction; + + +public class SedimentYieldParser extends LineParser { + + private static final Logger log = + Logger.getLogger(SedimentYieldParser.class); + + + public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); + + + public static final String FRAKTION_START = "Fraktion:"; + + public static final String FRACTION_COARSE_STR = + "_Grobkorn.csv"; + + public static final String FRACTION_FINE_MIDDLE_STR = + "_Fein-Mittel-Kies.csv"; + + public static final String FRACTION_SAND = + "_Sand.csv"; + + public static final String FRACTION_SUSP_SAND = + "_susp_Sand.csv"; + + public static final String FRACTION_SUSP_SAND_BED = + "_susp_Sand_bettbildAnteil.csv"; + + public static final String FRACTION_SUSPENDED_SEDIMENT = + "_Schwebstoff.csv"; + + + public static final Pattern TIMEINTERVAL_SINGLE = + Pattern.compile("\\D*([0-9]+?)\\D*"); + + public static final Pattern TIMEINTERVAL_EPOCH = + Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); + + public static final Pattern META_FRACTION = + Pattern.compile("^Fraktion: (.*)"); + + public static final Pattern META_UNIT = + Pattern.compile("^Einheit: \\[(.*)\\].*"); + + public static final Pattern META_COLUMN_NAMES = + Pattern.compile("^Fluss-km.*"); + + public static final Pattern META_GRAIN_FRACTION_A = + Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*"); + + public static final Pattern META_GRAIN_FRACTION_B = + Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)"); + + public static final Pattern META_GRAIN_FRACTION_C = + Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))"); + + + protected List<ImportSedimentYield> sedimentYields; + + protected ImportSedimentYield[] current; + + protected ImportGrainFraction grainFraction; + + protected ImportUnit unit; + + protected String description; + + protected String[] columnNames; + + + public SedimentYieldParser() { + sedimentYields = new ArrayList<ImportSedimentYield>(); + } + + + @Override + public void parse(File file) throws IOException { + description = file.getName(); + + super.parse(file); + } + + + @Override + protected void reset() { + current = null; + grainFraction = null; + unit = null; + } + + + @Override + protected void finish() { + if (current != null) { + for (ImportSedimentYield isy: current) { + sedimentYields.add(isy); + } + } + + description = null; + } + + + @Override + protected void handleLine(String line) { + if (line.startsWith(START_META_CHAR)) { + handleMetaLine(stripMetaLine(line)); + } + else { + handleDataLine(line); + } + } + + + protected void handleMetaLine(String line) { + if (handleMetaUnit(line)) { + return; + } + else if (handleMetaFraction(line)) { + return; + } + else if (handleColumnNames(line)) { + return; + } + else { + log.warn("Unknown meta line: '" + line + "'"); + } + } + + + protected boolean handleMetaUnit(String line) { + Matcher m = META_UNIT.matcher(line); + + if (m.matches()) { + unit = new ImportUnit(m.group(1)); + return true; + } + + return false; + } + + + public boolean handleMetaFraction(String line) { + Matcher m = META_FRACTION.matcher(line); + + if (m.matches()) { + String tmp = m.group(1); + + this.grainFraction = buildGrainFraction(tmp); + + return true; + } + else if (line.startsWith(FRAKTION_START)) { + String newLine = line.replace(FRAKTION_START, "").trim(); + if (newLine.length() == 0) { + log.debug("Found total grain fraction."); + this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL); + + return true; + } + } + + return false; + } + + + public boolean handleColumnNames(String line) { + Matcher m = META_COLUMN_NAMES.matcher(line); + + if (m.matches()) { + columnNames = line.split(SEPERATOR_CHAR); + + initializeSedimentYields(); + + return true; + } + + return false; + } + + + protected void handleDataLine(String line) { + String[] vals = line.split(SEPERATOR_CHAR); + + if (vals == null || vals.length < columnNames.length-1) { + log.warn("skip invalid data line: '" + line + "'"); + return; + } + + try { + Double km = nf.parse(vals[0]).doubleValue(); + + for (int i = 1, n = columnNames.length-1; i < n; i++) { + String curVal = vals[i]; + + if (curVal != null && curVal.length() > 0) { + current[i-1].addValue(new ImportSedimentYieldValue( + km, nf.parse(vals[i]).doubleValue() + )); + } + } + } + catch (ParseException pe) { + log.warn("Error while parsing numbers in '" + line + "':", pe); + } + } + + + private void initializeSedimentYields() { + // skip first column (Fluss-km) and last column (Hinweise) + current = new ImportSedimentYield[columnNames.length-2]; + + for (int i = 0, n = columnNames.length; i < n-2; i++) { + current[i] = new ImportSedimentYield(this.description); + current[i].setTimeInterval(getTimeInterval(columnNames[i+1])); + current[i].setUnit(unit); + current[i].setGrainFraction(grainFraction); + } + } + + + private ImportTimeInterval getTimeInterval(String column) { + try { + Matcher a = TIMEINTERVAL_EPOCH.matcher(column); + if (a.matches()) { + int yearA = nf.parse(a.group(1)).intValue(); + int yearB = nf.parse(a.group(2)).intValue(); + + return new ImportTimeInterval( + getDateFromYear(yearA), + getDateFromYear(yearB) + ); + } + + Matcher b = TIMEINTERVAL_SINGLE.matcher(column); + if (b.matches()) { + int year = nf.parse(b.group(1)).intValue(); + + return new ImportTimeInterval(getDateFromYear(year)); + } + + log.warn("Unknown time interval string: '" + column + "'"); + } + catch (ParseException pe) { + log.warn("Error while parsing years: " + column, pe); + } + + return null; + } + + + private ImportGrainFraction buildGrainFraction(String gfStr) { + Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr); + if (a.matches()) { + String lowerA = a.group(2); + String lowerB = a.group(3); + + String upperA = a.group(4); + String upperB = a.group(5); + + String unitStr = a.group(7); + String lower = lowerA != null ? lowerA : lowerB; + String upper = upperA != null ? upperA : upperB; + + try { + return new ImportGrainFraction( + getGrainFractionTypeName(this.description), + nf.parse(lower).doubleValue(), + nf.parse(upper).doubleValue(), + new ImportUnit(unitStr) + ); + } + catch (ParseException pe) { + log.warn("Error while parsing ranges of: '" + gfStr + "'"); + } + } + + Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr); + if (b.matches()) { + String lowerA = b.group(4); + String lowerB = b.group(5); + String upperA = b.group(6); + String upperB = b.group(7); + String unitStr = b.group(9); + + String lower = lowerA != null ? lowerA : lowerB; + String upper = upperA != null ? upperA : upperB; + + try { + return new ImportGrainFraction( + getGrainFractionTypeName(this.description), + nf.parse(lower).doubleValue(), + nf.parse(upper).doubleValue(), + new ImportUnit(unitStr) + ); + } + catch (ParseException pe) { + log.warn("Error while parsing ranges of: '" + gfStr + "'"); + } + } + + Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr); + if (c.matches()) { + String oper = c.group(1); + String valueStr = c.group(3); + String unitStr = c.group(6); + + try { + Double value = nf.parse(valueStr).doubleValue(); + + if (oper.equals(">")) { + return new ImportGrainFraction( + getGrainFractionTypeName(this.description), + value, + null, + new ImportUnit(unitStr) + ); + } + else { + return new ImportGrainFraction( + getGrainFractionTypeName(this.description), + null, + value, + new ImportUnit(unitStr) + ); + } + } + catch (ParseException pe) { + log.warn("Error while parsing ranges of: '" + gfStr + "'"); + } + } + + log.warn("Unknow grain fraction: '" + gfStr + "'"); + + return null; + } + + + public static String getGrainFractionTypeName(String filename) { + if (filename.endsWith(FRACTION_COARSE_STR)) { + return GrainFraction.COARSE; + } + else if (filename.endsWith(FRACTION_FINE_MIDDLE_STR)) { + return GrainFraction.FINE_MIDDLE; + } + else if (filename.endsWith(FRACTION_SAND)) { + return GrainFraction.SAND; + } + else if (filename.endsWith(FRACTION_SUSP_SAND)) { + return GrainFraction.SUSP_SAND; + } + else if (filename.endsWith(FRACTION_SUSP_SAND_BED)) { + return GrainFraction.SUSP_SAND_BED; + } + else if (filename.endsWith(FRACTION_SUSPENDED_SEDIMENT)) { + return GrainFraction.SUSPENDED_SEDIMENT; + } + else { + log.warn("Unknown grain fraction type: '" + filename + "'"); + return "unknown"; + } + } + + + public List<ImportSedimentYield> getSedimentYields() { + return sedimentYields; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/model/GrainFraction.java Thu Apr 19 07:34:50 2012 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/model/GrainFraction.java Thu Apr 19 12:48:53 2012 +0000 @@ -20,6 +20,15 @@ public class GrainFraction implements Serializable { + public static final String TOTAL = "total"; + public static final String COARSE = "coarse"; + public static final String FINE_MIDDLE = "fine_middle"; + public static final String SAND = "sand"; + public static final String SUSP_SAND = "susp_sand"; + public static final String SUSP_SAND_BED = "susp_sand_bed"; + public static final String SUSPENDED_SEDIMENT = "suspended_sediment"; + + private static Logger logger = Logger.getLogger(GrainFraction.class); private Integer id;
--- a/flys-backend/src/main/java/de/intevation/flys/model/SedimentYield.java Thu Apr 19 07:34:50 2012 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/model/SedimentYield.java Thu Apr 19 12:48:53 2012 +0000 @@ -34,6 +34,8 @@ private TimeInterval timeInterval; + private String description; + private List<SedimentYieldValue> values; @@ -61,6 +63,19 @@ this.grainFraction = grainFraction; } + + public SedimentYield( + River river, + Unit unit, + TimeInterval timeInterval, + GrainFraction grainFraction, + String description + ) { + this(river, unit, timeInterval, grainFraction); + + this.description = description; + } + @Id @SequenceGenerator( name = "SEQUENCE_SEDIMENT_YIELD_ID_SEQ", @@ -117,5 +132,14 @@ public void setTimeInterval(TimeInterval timeInterval) { this.timeInterval = timeInterval; } + + @Column(name = "description") + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :