teichmann@5844: /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde teichmann@5844: * Software engineering by Intevation GmbH teichmann@5844: * teichmann@5844: * This file is Free Software under the GNU AGPL (>=v3) teichmann@5844: * and comes with ABSOLUTELY NO WARRANTY! Check out the teichmann@5844: * documentation coming with Dive4Elements River for details. teichmann@5844: */ teichmann@5844: teichmann@5829: package org.dive4elements.river.importer.parsers; ingo@2840: ingo@2840: import java.io.File; ingo@2840: import java.io.IOException; ingo@2840: ingo@2840: import java.text.NumberFormat; ingo@2840: import java.text.ParseException; ingo@2840: ingo@2840: import java.util.ArrayList; ingo@2840: import java.util.List; ingo@2840: import java.util.regex.Matcher; ingo@2840: import java.util.regex.Pattern; ingo@2840: ingo@2840: import org.apache.log4j.Logger; ingo@2840: teichmann@5829: import org.dive4elements.river.importer.ImportGrainFraction; teichmann@5829: import org.dive4elements.river.importer.ImportSedimentYield; teichmann@5829: import org.dive4elements.river.importer.ImportSedimentYieldValue; teichmann@5829: import org.dive4elements.river.importer.ImportTimeInterval; teichmann@5829: import org.dive4elements.river.importer.ImportUnit; teichmann@5829: import org.dive4elements.river.model.GrainFraction; ingo@2840: ingo@2840: ingo@2840: public class SedimentYieldParser extends LineParser { ingo@2840: ingo@2840: private static final Logger log = ingo@2840: Logger.getLogger(SedimentYieldParser.class); ingo@2840: ingo@2840: ingo@2840: public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); ingo@2840: ingo@2840: ingo@2840: public static final String FRAKTION_START = "Fraktion:"; ingo@2840: ingo@2840: public static final String FRACTION_COARSE_STR = tom@5184: ".*Grobkorn.*"; ingo@2840: ingo@2840: public static final String FRACTION_FINE_MIDDLE_STR = tom@5184: ".*Fein-Mittel-Kies.*"; tom@5277: ingo@2840: public static final String FRACTION_SAND = tom@5184: ".*Sand.*"; tom@5277: ingo@2840: public static final String FRACTION_SUSP_SAND = tom@5184: ".*susp_Sand.*"; tom@5277: ingo@2840: public static final String FRACTION_SUSP_SAND_BED = tom@5184: ".*bettbild_Anteil_susp_Sand.*"; rrenkert@4368: rrenkert@4368: public static final String FRACTION_SUSP_SAND_BED_EPOCH = tom@5184: ".*susp_Sand_bettbildAnteil.*"; ingo@2840: ingo@2840: public static final String FRACTION_SUSPENDED_SEDIMENT = tom@5184: ".*Schwebstoff.*"; ingo@2840: ingo@3941: public static final String FRACTION_TOTAL = tom@5184: ".*gesamt.*"; ingo@3941: ingo@2840: ingo@2840: public static final Pattern TIMEINTERVAL_SINGLE = ingo@2840: Pattern.compile("\\D*([0-9]+?)\\D*"); ingo@2840: ingo@2840: public static final Pattern TIMEINTERVAL_EPOCH = ingo@2840: Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); ingo@2840: ingo@2840: public static final Pattern META_FRACTION = ingo@2840: Pattern.compile("^Fraktion: (.*)"); ingo@2840: ingo@2840: public static final Pattern META_UNIT = ingo@2840: Pattern.compile("^Einheit: \\[(.*)\\].*"); ingo@2840: ingo@2840: public static final Pattern META_COLUMN_NAMES = ingo@2840: Pattern.compile("^Fluss-km.*"); ingo@2840: ingo@2840: public static final Pattern META_GRAIN_FRACTION_A = ingo@2840: Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*"); ingo@2840: ingo@2840: public static final Pattern META_GRAIN_FRACTION_B = ingo@2840: Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)"); ingo@2840: ingo@2840: public static final Pattern META_GRAIN_FRACTION_C = ingo@2840: Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))"); ingo@2840: ingo@2840: ingo@2840: protected List sedimentYields; ingo@2840: ingo@2840: protected ImportSedimentYield[] current; ingo@2840: ingo@2840: protected ImportGrainFraction grainFraction; ingo@2840: ingo@2840: protected ImportUnit unit; ingo@2840: ingo@2840: protected String description; ingo@2840: ingo@2840: protected String[] columnNames; ingo@2840: ingo@2840: ingo@2840: public SedimentYieldParser() { ingo@2840: sedimentYields = new ArrayList(); ingo@2840: } ingo@2840: ingo@2840: ingo@2840: @Override ingo@2840: public void parse(File file) throws IOException { ingo@2840: description = file.getName(); ingo@2840: ingo@2840: super.parse(file); ingo@2840: } ingo@2840: ingo@2840: ingo@2840: @Override ingo@2840: protected void reset() { ingo@2840: current = null; ingo@2840: grainFraction = null; ingo@2840: unit = null; ingo@2840: } ingo@2840: ingo@2840: ingo@2840: @Override ingo@2840: protected void finish() { ingo@2840: if (current != null) { ingo@2840: for (ImportSedimentYield isy: current) { ingo@2840: sedimentYields.add(isy); ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: description = null; ingo@2840: } ingo@2840: ingo@2840: ingo@2840: @Override ingo@4193: protected void handleLine(int lineNum, String line) { ingo@2840: if (line.startsWith(START_META_CHAR)) { ingo@2840: handleMetaLine(stripMetaLine(line)); ingo@2840: } ingo@2840: else { ingo@2840: handleDataLine(line); ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: ingo@2840: protected void handleMetaLine(String line) { ingo@2840: if (handleMetaUnit(line)) { ingo@2840: return; ingo@2840: } ingo@2840: else if (handleMetaFraction(line)) { ingo@2840: return; ingo@2840: } ingo@2840: else if (handleColumnNames(line)) { ingo@2840: return; ingo@2840: } ingo@2840: else { sascha@3662: log.warn("SYP: Unknown meta line: '" + line + "'"); ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: ingo@2840: protected boolean handleMetaUnit(String line) { ingo@2840: Matcher m = META_UNIT.matcher(line); ingo@2840: ingo@2840: if (m.matches()) { ingo@2840: unit = new ImportUnit(m.group(1)); ingo@2840: return true; ingo@2840: } ingo@2840: ingo@2840: return false; ingo@2840: } ingo@2840: ingo@2840: ingo@2840: public boolean handleMetaFraction(String line) { ingo@2840: Matcher m = META_FRACTION.matcher(line); ingo@2840: ingo@2840: if (m.matches()) { ingo@2840: String tmp = m.group(1); ingo@2840: ingo@2840: this.grainFraction = buildGrainFraction(tmp); ingo@2840: ingo@2840: return true; ingo@2840: } ingo@2840: else if (line.startsWith(FRAKTION_START)) { ingo@2840: String newLine = line.replace(FRAKTION_START, "").trim(); ingo@2840: if (newLine.length() == 0) { ingo@2840: log.debug("Found total grain fraction."); ingo@2840: this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL); ingo@2840: ingo@2840: return true; ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: return false; ingo@2840: } ingo@2840: ingo@2840: ingo@2840: public boolean handleColumnNames(String line) { ingo@2840: Matcher m = META_COLUMN_NAMES.matcher(line); ingo@2840: ingo@2840: if (m.matches()) { ingo@2840: columnNames = line.split(SEPERATOR_CHAR); ingo@2840: ingo@2840: initializeSedimentYields(); ingo@2840: ingo@2840: return true; ingo@2840: } ingo@2840: ingo@2840: return false; ingo@2840: } ingo@2840: ingo@2840: ingo@2840: protected void handleDataLine(String line) { ingo@2840: String[] vals = line.split(SEPERATOR_CHAR); ingo@2840: ingo@2840: if (vals == null || vals.length < columnNames.length-1) { sascha@3662: log.warn("SYP: skip invalid data line: '" + line + "'"); ingo@2840: return; ingo@2840: } ingo@2840: ingo@2840: try { ingo@2840: Double km = nf.parse(vals[0]).doubleValue(); ingo@2840: ingo@2840: for (int i = 1, n = columnNames.length-1; i < n; i++) { ingo@2840: String curVal = vals[i]; ingo@2840: ingo@2840: if (curVal != null && curVal.length() > 0) { ingo@2840: current[i-1].addValue(new ImportSedimentYieldValue( ingo@2840: km, nf.parse(vals[i]).doubleValue() ingo@2840: )); ingo@2840: } ingo@2840: } ingo@2840: } ingo@2840: catch (ParseException pe) { tom@5490: log.warn("SYP: unparseable number in data row '" + line + "':", pe); ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: ingo@2840: private void initializeSedimentYields() { ingo@2840: // skip first column (Fluss-km) and last column (Hinweise) ingo@2840: current = new ImportSedimentYield[columnNames.length-2]; ingo@2840: ingo@2840: for (int i = 0, n = columnNames.length; i < n-2; i++) { ingo@2840: current[i] = new ImportSedimentYield(this.description); ingo@2840: current[i].setTimeInterval(getTimeInterval(columnNames[i+1])); ingo@2840: current[i].setUnit(unit); ingo@2840: current[i].setGrainFraction(grainFraction); ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: ingo@2840: private ImportTimeInterval getTimeInterval(String column) { ingo@2840: try { ingo@2840: Matcher a = TIMEINTERVAL_EPOCH.matcher(column); ingo@2840: if (a.matches()) { ingo@2840: int yearA = nf.parse(a.group(1)).intValue(); ingo@2840: int yearB = nf.parse(a.group(2)).intValue(); ingo@2840: ingo@2840: return new ImportTimeInterval( tom@5845: getStartDateFromYear(yearA), tom@5845: getEndDateFromYear(yearB) ingo@2840: ); ingo@2840: } ingo@2840: ingo@2840: Matcher b = TIMEINTERVAL_SINGLE.matcher(column); ingo@2840: if (b.matches()) { ingo@2840: int year = nf.parse(b.group(1)).intValue(); ingo@2840: tom@5845: return new ImportTimeInterval(getStartDateFromYear(year)); ingo@2840: } ingo@2840: sascha@3662: log.warn("SYP: Unknown time interval string: '" + column + "'"); ingo@2840: } ingo@2840: catch (ParseException pe) { tom@5490: log.warn("SYP: Could not parse years: " + column, pe); ingo@2840: } ingo@2840: ingo@2840: return null; ingo@2840: } ingo@2840: ingo@2840: ingo@2840: private ImportGrainFraction buildGrainFraction(String gfStr) { ingo@2840: Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr); ingo@2840: if (a.matches()) { ingo@2840: String lowerA = a.group(2); ingo@2840: String lowerB = a.group(3); ingo@2840: ingo@2840: String upperA = a.group(4); ingo@2840: String upperB = a.group(5); ingo@2840: ingo@2840: String lower = lowerA != null ? lowerA : lowerB; ingo@2840: String upper = upperA != null ? upperA : upperB; ingo@2840: ingo@2840: try { ingo@2840: return new ImportGrainFraction( ingo@2840: getGrainFractionTypeName(this.description), ingo@2840: nf.parse(lower).doubleValue(), tom@5439: nf.parse(upper).doubleValue() ingo@2840: ); ingo@2840: } ingo@2840: catch (ParseException pe) { tom@5490: log.warn("SYP: Could not parse ranges of: '" + gfStr + "'"); ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr); ingo@2840: if (b.matches()) { ingo@2840: String lowerA = b.group(4); ingo@2840: String lowerB = b.group(5); ingo@2840: String upperA = b.group(6); ingo@2840: String upperB = b.group(7); ingo@2840: ingo@2840: String lower = lowerA != null ? lowerA : lowerB; ingo@2840: String upper = upperA != null ? upperA : upperB; ingo@2840: ingo@2840: try { ingo@2840: return new ImportGrainFraction( ingo@2840: getGrainFractionTypeName(this.description), ingo@2840: nf.parse(lower).doubleValue(), tom@5439: nf.parse(upper).doubleValue() ingo@2840: ); ingo@2840: } ingo@2840: catch (ParseException pe) { tom@5490: log.warn("SYP: Could not parse ranges of: '" + gfStr + "'"); ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr); ingo@2840: if (c.matches()) { ingo@2840: String oper = c.group(1); ingo@2840: String valueStr = c.group(3); ingo@2840: ingo@2840: try { ingo@2840: Double value = nf.parse(valueStr).doubleValue(); ingo@2840: ingo@2840: if (oper.equals(">")) { ingo@2840: return new ImportGrainFraction( ingo@2840: getGrainFractionTypeName(this.description), ingo@2840: value, tom@5439: null ingo@2840: ); ingo@2840: } ingo@2840: else { ingo@2840: return new ImportGrainFraction( ingo@2840: getGrainFractionTypeName(this.description), ingo@2840: null, tom@5439: value ingo@2840: ); ingo@2840: } ingo@2840: } ingo@2840: catch (ParseException pe) { tom@5490: log.warn("SYP: Could not parse ranges of: '" + gfStr + "'"); ingo@2840: } ingo@2840: } ingo@2840: tom@5184: log.warn("SYP: Unknown grain fraction: '" + gfStr + "'"); ingo@2840: ingo@2840: return null; ingo@2840: } ingo@2840: ingo@2840: ingo@2840: public static String getGrainFractionTypeName(String filename) { tom@5184: if (Pattern.matches(FRACTION_COARSE_STR, filename)) { tom@5184: return GrainFraction.COARSE; ingo@2840: } tom@5184: else if (Pattern.matches(FRACTION_FINE_MIDDLE_STR, filename)) { ingo@2840: return GrainFraction.FINE_MIDDLE; ingo@2840: } tom@5184: else if (Pattern.matches(FRACTION_SUSP_SAND_BED, filename) || tom@5184: Pattern.matches(FRACTION_SUSP_SAND_BED_EPOCH, filename)) { tom@5277: return GrainFraction.SUSP_SAND_BED; tom@5277: } tom@5184: else if (Pattern.matches(FRACTION_SUSP_SAND, filename)) { tom@5277: return GrainFraction.SUSP_SAND; tom@5277: } tom@5184: else if (Pattern.matches(FRACTION_SAND, filename)) { ingo@2840: return GrainFraction.SAND; ingo@2840: } tom@5184: else if (Pattern.matches(FRACTION_SUSPENDED_SEDIMENT, filename)) { ingo@2840: return GrainFraction.SUSPENDED_SEDIMENT; ingo@2840: } tom@5184: else if (Pattern.matches(FRACTION_TOTAL, filename)) { ingo@3941: return GrainFraction.TOTAL; ingo@3941: } ingo@2840: else { sascha@3662: log.warn("SYP: Unknown grain fraction '" + filename + "'"); ingo@2840: return "unknown"; ingo@2840: } ingo@2840: } ingo@2840: ingo@2840: ingo@2840: public List getSedimentYields() { ingo@2840: return sedimentYields; ingo@2840: } ingo@2840: } ingo@2840: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :