teichmann@5844: /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde teichmann@5844: * Software engineering by Intevation GmbH teichmann@5844: * teichmann@5992: * This file is Free Software under the GNU AGPL (>=v3) teichmann@5844: * and comes with ABSOLUTELY NO WARRANTY! Check out the teichmann@5992: * documentation coming with Dive4Elements River for details. teichmann@5844: */ teichmann@5844: teichmann@5829: package org.dive4elements.river.importer.parsers; ingo@2811: ingo@2811: import java.io.File; mschaefer@8975: import java.io.FileInputStream; mschaefer@8975: import java.io.IOException; mschaefer@8975: import java.io.InputStreamReader; mschaefer@8975: import java.io.LineNumberReader; ingo@2811: import java.math.BigDecimal; ingo@2811: import java.text.NumberFormat; ingo@2811: import java.text.ParseException; ingo@2811: import java.util.ArrayList; ingo@2811: import java.util.Date; mschaefer@8975: import java.util.EnumMap; ingo@2811: import java.util.List; mschaefer@8975: import java.util.Locale; tom@6273: import java.util.TreeSet; ingo@2811: import java.util.regex.Matcher; ingo@2811: import java.util.regex.Pattern; ingo@2811: ingo@2811: import org.apache.log4j.Logger; mschaefer@8975: import org.dive4elements.river.backend.utils.DateUtil; mschaefer@8975: import org.dive4elements.river.backend.utils.EpsilonComparator; tom@8559: import org.dive4elements.river.importer.ImportBedHeight; mschaefer@8975: import org.dive4elements.river.importer.ImportBedHeightType; tom@8559: import org.dive4elements.river.importer.ImportBedHeightValue; teichmann@5829: import org.dive4elements.river.importer.ImportElevationModel; teichmann@5829: import org.dive4elements.river.importer.ImportLocationSystem; teichmann@5829: import org.dive4elements.river.importer.ImportRange; teichmann@5829: import org.dive4elements.river.importer.ImportTimeInterval; teichmann@5829: import org.dive4elements.river.importer.ImportUnit; teichmann@5829: import org.dive4elements.river.importer.ImporterSession; mschaefer@8989: import org.dive4elements.river.importer.common.AbstractParser; mschaefer@8975: import org.dive4elements.river.model.BedHeightType; ingo@2811: tom@8557: public class BedHeightParser { ingo@2811: ingo@2811: private static final Logger log = mschaefer@8975: Logger.getLogger(BedHeightParser.class); ingo@2811: ingo@2811: public static final String ENCODING = "ISO-8859-1"; ingo@2811: ingo@2811: public static final Locale DEFAULT_LOCALE = Locale.GERMAN; ingo@2811: ingo@2811: public static final String START_META_CHAR = "#"; ingo@2811: public static final String SEPERATOR_CHAR = ";"; ingo@2811: ingo@2811: public static final Pattern META_YEAR = mschaefer@8975: Pattern.compile("^Jahr: [^0-9]*(\\d*).*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_TIMEINTERVAL = mschaefer@8975: Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_TYPE = mschaefer@8975: Pattern.compile("^Aufnahmeart: (.*).*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_LOCATION_SYSTEM = mschaefer@8975: Pattern.compile("^Lagesystem: (.*).*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_CUR_ELEVATION_SYSTEM = mschaefer@8975: Pattern.compile("^H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_OLD_ELEVATION_SYSTEM = mschaefer@8975: Pattern.compile("^urspr.ngliches H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_RANGE = mschaefer@8975: Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_EVALUATION_BY = mschaefer@8975: Pattern.compile("^Auswerter: (.*).*", Pattern.CASE_INSENSITIVE); mschaefer@8975: mschaefer@8975: public static final Pattern META_SOUNDING_WIDTH = Pattern.compile("^ausgewertete Peilbreite:\\s*(\\S.*).*", Pattern.CASE_INSENSITIVE); ingo@2811: ingo@2811: public static final Pattern META_COMMENTS = mschaefer@8975: Pattern.compile("^Weitere Bemerkungen: (.*).*", Pattern.CASE_INSENSITIVE); ingo@2811: mschaefer@8975: private static final Pattern META_COLUMNTITLES = Pattern.compile("^Fluss-km\\s*;.+", Pattern.CASE_INSENSITIVE); ingo@2811: mschaefer@8975: private enum ColTitlePattern { mschaefer@8975: KM("Fluss-km.*"), // mschaefer@8975: HEIGHT("mittlere Sohlh.he\\s*\\[(.*)\\].*"), // mschaefer@8975: UNCERTAINTY("Unsicherheit\\s*\\[(.*)\\].*"), // mschaefer@8975: GAP("Datenl.cke.*"), // mschaefer@8975: WIDTH("Peilbreite\\s*\\[(.*)\\].*"), // mschaefer@8975: MINHEIGHT("Minimale Sohlh.he\\s*\\[(.*)\\].*"), // mschaefer@9034: MAXHEIGHT("Maximale Sohlh.he\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT01("Feld\\s*1\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT02("Feld\\s*2\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT03("Feld\\s*3\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT04("Feld\\s*4\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT05("Feld\\s*5\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT06("Feld\\s*6\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT07("Feld\\s*7\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT08("Feld\\s*8\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT09("Feld\\s*9\\s*\\[(.*)\\].*"), // mschaefer@9034: HEIGHT10("Feld\\s*10\\s*\\[(.*)\\].*"); mschaefer@8975: mschaefer@8975: private final Pattern pattern; mschaefer@8975: mschaefer@8975: private ColTitlePattern(final String regexp) { mschaefer@8975: this.pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE); mschaefer@8975: } mschaefer@8975: mschaefer@8975: public Pattern getPattern() { mschaefer@8975: return this.pattern; mschaefer@8975: } mschaefer@9034: mschaefer@9034: public static ColTitlePattern getSectionPattern(final int index) { mschaefer@9034: return ColTitlePattern.valueOf(String.format("HEIGHT%02d", index)); mschaefer@9034: } mschaefer@8975: } mschaefer@8975: mschaefer@8975: private final EnumMap cols = new EnumMap<>(ColTitlePattern.class); mschaefer@8975: mschaefer@8975: protected static NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); ingo@2811: ingo@2811: tom@8559: protected List bedHeights; ingo@2811: ingo@2811: mschaefer@8975: protected ImportBedHeight newImportBedHeight(final String description) { tom@8559: return new ImportBedHeight(description); tom@8557: } ingo@2811: ingo@2811: tom@6273: protected TreeSet kmExists; ingo@2811: ingo@2811: public BedHeightParser() { mschaefer@8975: this.bedHeights = new ArrayList<>(); mschaefer@8975: this.kmExists = new TreeSet<>(EpsilonComparator.CMP); ingo@2811: } ingo@2811: ingo@2811: tom@8559: public List getBedHeights() { mschaefer@8975: return this.bedHeights; ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: public void parse(final File file) throws IOException { ingo@2811: log.info("Parsing bed height single file '" + file + "'"); mschaefer@8975: final ImportBedHeight obj = newImportBedHeight(file.getName().replaceAll("\\.csv", "")); mschaefer@8975: this.kmExists.clear(); mschaefer@8975: this.cols.clear(); mschaefer@8975: for (final ColTitlePattern col : ColTitlePattern.values()) mschaefer@8975: this.cols.put(col, -1); ingo@2811: LineNumberReader in = null; ingo@2811: try { mschaefer@8975: in = new LineNumberReader(new InputStreamReader(new FileInputStream(file), ENCODING)); ingo@2811: ingo@2811: String line = null; ingo@2811: while ((line = in.readLine()) != null) { ingo@2811: if ((line = line.trim()).length() == 0) { ingo@2811: continue; ingo@2811: } ingo@2811: ingo@2811: if (line.startsWith(START_META_CHAR)) { ingo@2811: handleMetaLine(obj, line); ingo@2811: } ingo@2811: else { ingo@2811: handleDataLine(obj, line); ingo@2811: } ingo@2811: } ingo@2811: ingo@2811: log.info("File contained " + obj.getValueCount() + " values."); mschaefer@8975: this.bedHeights.add(obj); ingo@2811: } ingo@2811: finally { ingo@2811: if (in != null) { ingo@2811: in.close(); ingo@2811: } ingo@2811: } ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: protected static String stripMetaLine(final String line) { mschaefer@8975: final String tmp = line.substring(1, line.length()); ingo@2811: ingo@2811: if (tmp.startsWith(" ")) { ingo@2811: return tmp.substring(1, tmp.length()); ingo@2811: } ingo@2811: else { ingo@2811: return tmp; ingo@2811: } ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: protected void handleMetaLine(final ImportBedHeight obj, final String line) { mschaefer@8975: final String meta = stripMetaLine(line); ingo@2811: ingo@2811: if (handleMetaYear(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaTimeInterval(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaComment(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaEvaluationBy(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaRange(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaType(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaLocationSystem(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaCurElevationModel(obj, meta)) { ingo@2811: return; ingo@2811: } ingo@2811: else if (handleMetaOldElevationModel(obj, meta)) { ingo@2811: return; ingo@2811: } mschaefer@8975: else if (handleMetaSoundingWidth(obj, meta)) { mschaefer@8975: return; mschaefer@8975: } mschaefer@8975: else if (handleMetaColumnTitles(obj, meta)) { mschaefer@8975: return; mschaefer@8975: } ingo@2811: else { sascha@3662: log.warn("BHP: Meta line did not match any known type: " + line); ingo@2811: } ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: protected boolean handleMetaYear(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_YEAR.matcher(line); ingo@2811: if (m.matches()) { mschaefer@8975: final String tmp = m.group(1).trim(); teichmann@5327: if (tmp.length() > 0) { teichmann@5327: obj.setYear(Integer.parseInt(tmp)); ingo@2811: } teichmann@5327: else { teichmann@5327: log.warn("BHP: No year given."); ingo@2811: } teichmann@5327: return true; ingo@2811: } ingo@2811: return false; ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: protected boolean handleMetaTimeInterval(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_TIMEINTERVAL.matcher(line); ingo@2811: if (m.matches()) { mschaefer@8975: final String lo = m.group(1).trim(); mschaefer@8975: final String up = m.group(2).trim(); mschaefer@8975: log.debug("Found time interval: " + lo + " - " + up); mschaefer@8975: try { mschaefer@8975: final int lower = Integer.valueOf(lo); mschaefer@8975: final int upper = Integer.valueOf(up); mschaefer@8975: final Date fromYear = DateUtil.getStartDateFromYear(lower); mschaefer@8975: final Date toYear = DateUtil.getEndDateFromYear(upper); mschaefer@8975: obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear)); mschaefer@8975: } mschaefer@8975: catch (final NumberFormatException e) { mschaefer@8975: log.warn("BHP: could not parse timeinterval", e); mschaefer@8975: } ingo@2811: return true; ingo@2811: } ingo@2811: return false; ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: protected boolean handleMetaComment(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_COMMENTS.matcher(line); mschaefer@8975: if (m.matches()) { mschaefer@8975: final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim(); mschaefer@9038: obj.setNotes(tmp); mschaefer@8975: return true; ingo@2811: } ingo@2811: return false; ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: protected boolean handleMetaEvaluationBy(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_EVALUATION_BY.matcher(line); ingo@2811: if (m.matches()) { mschaefer@8975: final String tmp = m.group(1).replace(";", "").trim(); mschaefer@8975: obj.setEvaluationBy(tmp); mschaefer@8975: return true; mschaefer@8975: } mschaefer@8975: return false; mschaefer@8975: } ingo@2811: ingo@2811: mschaefer@8975: protected boolean handleMetaRange(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_RANGE.matcher(line); mschaefer@8975: if (m.matches() && m.groupCount() >= 2) { mschaefer@8975: final String a = m.group(1).replace(";", "").trim(); mschaefer@8975: final String b = m.group(2).replace(";", "").trim(); mschaefer@8975: try { mschaefer@8989: final BigDecimal lower = AbstractParser.parseDecimal(a); mschaefer@8989: final BigDecimal upper = AbstractParser.parseDecimal(b); mschaefer@8975: obj.setRange(new ImportRange(lower, upper)); mschaefer@8975: return true; mschaefer@8975: } mschaefer@8989: catch (final Exception e) { mschaefer@8975: log.warn("BHP: could not parse range", e); mschaefer@8975: } mschaefer@8975: } mschaefer@8975: return false; mschaefer@8975: } mschaefer@8975: mschaefer@8975: mschaefer@8975: protected boolean handleMetaType(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_TYPE.matcher(line); mschaefer@8975: if (m.matches()) { mschaefer@8975: final String tmp = m.group(1).replace(";", "").trim(); mschaefer@8975: final BedHeightType bht = BedHeightType.fetchBedHeightTypeForType(tmp, ImporterSession.getInstance().getDatabaseSession()); teichmann@5273: if (bht != null) { teichmann@5273: obj.setType(new ImportBedHeightType(bht)); sascha@3798: return true; sascha@3798: } tom@5901: log.error("Unknown bed height type: '" + tmp + "'. File ignored."); ingo@2811: } ingo@2811: return false; ingo@2811: } ingo@2811: ingo@2811: mschaefer@8975: protected boolean handleMetaLocationSystem(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_LOCATION_SYSTEM.matcher(line); ingo@2811: if (m.matches()) { mschaefer@8975: final String tmp = m.group(1).replace(";", "").trim(); mschaefer@8975: obj.setLocationSystem(new ImportLocationSystem(tmp, tmp)); ingo@2811: return true; ingo@2811: } ingo@2811: return false; ingo@2811: } tom@8557: tom@8689: mschaefer@8975: protected boolean handleMetaCurElevationModel(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line); mschaefer@8975: if (m.matches()) { mschaefer@8975: final String name = m.group(1).trim(); mschaefer@8975: final String unit = m.group(2).trim(); mschaefer@8975: obj.setCurElevationModel(new ImportElevationModel(name, new ImportUnit(unit))); mschaefer@8975: return true; tom@8689: } mschaefer@8975: return false; tom@8689: } tom@8689: tom@8557: mschaefer@8975: protected boolean handleMetaOldElevationModel(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line); mschaefer@8975: if (m.matches()) { mschaefer@8975: final String name = m.group(1).trim(); mschaefer@8975: final String unit = m.group(2).trim(); mschaefer@8975: obj.setOldElevationModel(new ImportElevationModel(name, new ImportUnit(unit))); mschaefer@8975: return true; mschaefer@8975: } mschaefer@8975: return false; mschaefer@8975: } mschaefer@8975: mschaefer@8975: protected boolean handleMetaSoundingWidth(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_SOUNDING_WIDTH.matcher(line); mschaefer@8975: if (m.matches()) { mschaefer@8975: final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim(); mschaefer@8975: obj.setSoundingWidthInfo(tmp); mschaefer@8975: return true; mschaefer@8975: } mschaefer@8975: return false; mschaefer@8975: } mschaefer@8975: mschaefer@8975: mschaefer@8975: /** mschaefer@8975: * Tries to parse a line as column titles line mschaefer@8975: */ mschaefer@8975: protected boolean handleMetaColumnTitles(final ImportBedHeight obj, final String line) { mschaefer@8975: final Matcher m = META_COLUMNTITLES.matcher(line); mschaefer@8975: if (m.matches()) { mschaefer@8975: final Matcher cm; mschaefer@8975: final String[] titles = line.split(SEPERATOR_CHAR, 0); mschaefer@8975: for (int i = 0; i <= titles.length - 1; i++) { mschaefer@8975: for (final ColTitlePattern col : ColTitlePattern.values()) { mschaefer@8975: if (col.getPattern().matcher(titles[i]).matches()) { mschaefer@8975: this.cols.put(col, i); mschaefer@8975: break; mschaefer@8975: } mschaefer@8975: } mschaefer@8975: } mschaefer@8975: return true; mschaefer@8975: } mschaefer@8975: return false; mschaefer@8975: } mschaefer@8975: mschaefer@8975: protected void handleDataLine(final ImportBedHeight obj, final String line) { mschaefer@8975: final String[] values = line.split(SEPERATOR_CHAR, 0); tom@8681: if (values.length < 2) { tom@8681: // Do not import line without data or only km tom@8557: return; tom@8557: } tom@8557: Double km; tom@8557: try { tom@8557: km = new Double(nf.parse(values[0]).doubleValue()); mschaefer@8975: if (this.kmExists.contains(km)) { mschaefer@8975: log.warn("duplicate station '" + values[0] + "': -> ignored"); tom@8557: return; tom@8557: } mschaefer@8975: this.kmExists.add(km); tom@8557: } mschaefer@8975: catch (final ParseException e) { mschaefer@9651: log.warn("Error parsing km '" + values[0] + "': " + e.getMessage()); tom@8557: return; tom@8557: } mschaefer@8975: final ImportBedHeightValue value = new ImportBedHeightValue(obj, km, parse(values, ColTitlePattern.HEIGHT), mschaefer@8975: parse(values, ColTitlePattern.UNCERTAINTY), parse(values, ColTitlePattern.GAP), parse(values, ColTitlePattern.WIDTH), mschaefer@8975: parse(values, ColTitlePattern.MINHEIGHT), parse(values, ColTitlePattern.MAXHEIGHT)); mschaefer@9034: for (int i = 1; i <= 10; i++) mschaefer@9034: value.setSectionHeight(i, parse(values, ColTitlePattern.getSectionPattern(i))); tom@8557: tom@8557: obj.addValue(value); tom@8557: } mschaefer@8975: mschaefer@8975: private Double parse(final String[] values, final ColTitlePattern col) { mschaefer@8975: final int idx = this.cols.get(col).intValue(); mschaefer@8975: if ((idx >= 0) && (idx < values.length) && !values[idx].trim().isEmpty()) { mschaefer@8975: try { mschaefer@8975: return nf.parse(values[idx]).doubleValue(); mschaefer@8975: } mschaefer@8975: catch (final ParseException e) { mschaefer@8975: log.warn("unparseable " + col.toString() + " '" + values[idx] + "'"); mschaefer@8975: } mschaefer@8975: } mschaefer@8975: return null; mschaefer@8975: } ingo@2811: }