Mercurial > dive4elements > river
view backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java @ 8988:ae76f618d990
Checks added for missing import directory
author | mschaefer |
---|---|
date | Sun, 08 Apr 2018 18:09:32 +0200 |
parents | a0a0a7f912ab |
children | 2693bfaf503d |
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde * Software engineering by Intevation GmbH * * This file is Free Software under the GNU AGPL (>=v3) * and comes with ABSOLUTELY NO WARRANTY! Check out the * documentation coming with Dive4Elements River for details. */ package org.dive4elements.river.importer.parsers; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.math.BigDecimal; import java.text.NumberFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Date; import java.util.EnumMap; import java.util.List; import java.util.Locale; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.dive4elements.river.backend.utils.DateUtil; import org.dive4elements.river.backend.utils.EpsilonComparator; import org.dive4elements.river.importer.ImportBedHeight; import org.dive4elements.river.importer.ImportBedHeightType; import org.dive4elements.river.importer.ImportBedHeightValue; import org.dive4elements.river.importer.ImportElevationModel; import org.dive4elements.river.importer.ImportLocationSystem; import org.dive4elements.river.importer.ImportRange; import org.dive4elements.river.importer.ImportTimeInterval; import org.dive4elements.river.importer.ImportUnit; import org.dive4elements.river.importer.ImporterSession; import org.dive4elements.river.model.BedHeightType; public class BedHeightParser { private static final Logger log = Logger.getLogger(BedHeightParser.class); public static final String ENCODING = "ISO-8859-1"; public static final Locale DEFAULT_LOCALE = Locale.GERMAN; public static final String START_META_CHAR = "#"; public static final String SEPERATOR_CHAR = ";"; public static final Pattern META_YEAR = Pattern.compile("^Jahr: [^0-9]*(\\d*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_TIMEINTERVAL = Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_TYPE = Pattern.compile("^Aufnahmeart: (.*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_LOCATION_SYSTEM = Pattern.compile("^Lagesystem: (.*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_CUR_ELEVATION_SYSTEM = Pattern.compile("^H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE); public static final Pattern META_OLD_ELEVATION_SYSTEM = Pattern.compile("^urspr.ngliches H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE); public static final Pattern META_RANGE = Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_EVALUATION_BY = Pattern.compile("^Auswerter: (.*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_SOUNDING_WIDTH = Pattern.compile("^ausgewertete Peilbreite:\\s*(\\S.*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_COMMENTS = Pattern.compile("^Weitere Bemerkungen: (.*).*", Pattern.CASE_INSENSITIVE); private static final Pattern META_COLUMNTITLES = Pattern.compile("^Fluss-km\\s*;.+", Pattern.CASE_INSENSITIVE); private enum ColTitlePattern { KM("Fluss-km.*"), // HEIGHT("mittlere Sohlh.he\\s*\\[(.*)\\].*"), // UNCERTAINTY("Unsicherheit\\s*\\[(.*)\\].*"), // GAP("Datenl.cke.*"), // WIDTH("Peilbreite\\s*\\[(.*)\\].*"), // MINHEIGHT("Minimale Sohlh.he\\s*\\[(.*)\\].*"), // MAXHEIGHT("Maximale Sohlh.he\\s*\\[(.*)\\].*"); private final Pattern pattern; private ColTitlePattern(final String regexp) { this.pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE); } public Pattern getPattern() { return this.pattern; } } private final EnumMap<ColTitlePattern, Integer> cols = new EnumMap<>(ColTitlePattern.class); protected static NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); protected List<ImportBedHeight> bedHeights; protected ImportBedHeight newImportBedHeight(final String description) { return new ImportBedHeight(description); } protected TreeSet<Double> kmExists; public BedHeightParser() { this.bedHeights = new ArrayList<>(); this.kmExists = new TreeSet<>(EpsilonComparator.CMP); } public List<ImportBedHeight> getBedHeights() { return this.bedHeights; } public void parse(final File file) throws IOException { log.info("Parsing bed height single file '" + file + "'"); final ImportBedHeight obj = newImportBedHeight(file.getName().replaceAll("\\.csv", "")); this.kmExists.clear(); this.cols.clear(); for (final ColTitlePattern col : ColTitlePattern.values()) this.cols.put(col, -1); LineNumberReader in = null; try { in = new LineNumberReader(new InputStreamReader(new FileInputStream(file), ENCODING)); String line = null; while ((line = in.readLine()) != null) { if ((line = line.trim()).length() == 0) { continue; } if (line.startsWith(START_META_CHAR)) { handleMetaLine(obj, line); } else { handleDataLine(obj, line); } } log.info("File contained " + obj.getValueCount() + " values."); this.bedHeights.add(obj); } finally { if (in != null) { in.close(); } } } protected static String stripMetaLine(final String line) { final String tmp = line.substring(1, line.length()); if (tmp.startsWith(" ")) { return tmp.substring(1, tmp.length()); } else { return tmp; } } protected void handleMetaLine(final ImportBedHeight obj, final String line) { final String meta = stripMetaLine(line); if (handleMetaYear(obj, meta)) { return; } else if (handleMetaTimeInterval(obj, meta)) { return; } else if (handleMetaComment(obj, meta)) { return; } else if (handleMetaEvaluationBy(obj, meta)) { return; } else if (handleMetaRange(obj, meta)) { return; } else if (handleMetaType(obj, meta)) { return; } else if (handleMetaLocationSystem(obj, meta)) { return; } else if (handleMetaCurElevationModel(obj, meta)) { return; } else if (handleMetaOldElevationModel(obj, meta)) { return; } else if (handleMetaSoundingWidth(obj, meta)) { return; } else if (handleMetaColumnTitles(obj, meta)) { return; } else { log.warn("BHP: Meta line did not match any known type: " + line); } } protected boolean handleMetaYear(final ImportBedHeight obj, final String line) { final Matcher m = META_YEAR.matcher(line); if (m.matches()) { final String tmp = m.group(1).trim(); if (tmp.length() > 0) { obj.setYear(Integer.parseInt(tmp)); } else { log.warn("BHP: No year given."); } return true; } return false; } protected boolean handleMetaTimeInterval(final ImportBedHeight obj, final String line) { final Matcher m = META_TIMEINTERVAL.matcher(line); if (m.matches()) { final String lo = m.group(1).trim(); final String up = m.group(2).trim(); log.debug("Found time interval: " + lo + " - " + up); try { final int lower = Integer.valueOf(lo); final int upper = Integer.valueOf(up); final Date fromYear = DateUtil.getStartDateFromYear(lower); final Date toYear = DateUtil.getEndDateFromYear(upper); obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear)); } catch (final NumberFormatException e) { log.warn("BHP: could not parse timeinterval", e); } return true; } return false; } protected boolean handleMetaComment(final ImportBedHeight obj, final String line) { final Matcher m = META_COMMENTS.matcher(line); if (m.matches()) { final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim(); obj.setComment(tmp); return true; } return false; } protected boolean handleMetaEvaluationBy(final ImportBedHeight obj, final String line) { final Matcher m = META_EVALUATION_BY.matcher(line); if (m.matches()) { final String tmp = m.group(1).replace(";", "").trim(); obj.setEvaluationBy(tmp); return true; } return false; } protected boolean handleMetaRange(final ImportBedHeight obj, final String line) { final Matcher m = META_RANGE.matcher(line); if (m.matches() && m.groupCount() >= 2) { final String a = m.group(1).replace(";", "").trim(); final String b = m.group(2).replace(";", "").trim(); try { final BigDecimal lower = new BigDecimal(nf.parse(a).doubleValue()); final BigDecimal upper = new BigDecimal(nf.parse(b).doubleValue()); obj.setRange(new ImportRange(lower, upper)); return true; } catch (final ParseException e) { log.warn("BHP: could not parse range", e); } } return false; } protected boolean handleMetaType(final ImportBedHeight obj, final String line) { final Matcher m = META_TYPE.matcher(line); if (m.matches()) { final String tmp = m.group(1).replace(";", "").trim(); final BedHeightType bht = BedHeightType.fetchBedHeightTypeForType(tmp, ImporterSession.getInstance().getDatabaseSession()); if (bht != null) { obj.setType(new ImportBedHeightType(bht)); return true; } log.error("Unknown bed height type: '" + tmp + "'. File ignored."); } return false; } protected boolean handleMetaLocationSystem(final ImportBedHeight obj, final String line) { final Matcher m = META_LOCATION_SYSTEM.matcher(line); if (m.matches()) { final String tmp = m.group(1).replace(";", "").trim(); obj.setLocationSystem(new ImportLocationSystem(tmp, tmp)); return true; } return false; } protected boolean handleMetaCurElevationModel(final ImportBedHeight obj, final String line) { final Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line); if (m.matches()) { final String name = m.group(1).trim(); final String unit = m.group(2).trim(); obj.setCurElevationModel(new ImportElevationModel(name, new ImportUnit(unit))); return true; } return false; } protected boolean handleMetaOldElevationModel(final ImportBedHeight obj, final String line) { final Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line); if (m.matches()) { final String name = m.group(1).trim(); final String unit = m.group(2).trim(); obj.setOldElevationModel(new ImportElevationModel(name, new ImportUnit(unit))); return true; } return false; } protected boolean handleMetaSoundingWidth(final ImportBedHeight obj, final String line) { final Matcher m = META_SOUNDING_WIDTH.matcher(line); if (m.matches()) { final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim(); obj.setSoundingWidthInfo(tmp); return true; } return false; } /** * Tries to parse a line as column titles line */ protected boolean handleMetaColumnTitles(final ImportBedHeight obj, final String line) { final Matcher m = META_COLUMNTITLES.matcher(line); if (m.matches()) { final Matcher cm; final String[] titles = line.split(SEPERATOR_CHAR, 0); for (int i = 0; i <= titles.length - 1; i++) { for (final ColTitlePattern col : ColTitlePattern.values()) { if (col.getPattern().matcher(titles[i]).matches()) { this.cols.put(col, i); break; } } } return true; } return false; } protected void handleDataLine(final ImportBedHeight obj, final String line) { final String[] values = line.split(SEPERATOR_CHAR, 0); if (values.length < 2) { // Do not import line without data or only km return; } Double km; try { km = new Double(nf.parse(values[0]).doubleValue()); if (this.kmExists.contains(km)) { log.warn("duplicate station '" + values[0] + "': -> ignored"); return; } this.kmExists.add(km); } catch (final ParseException e) { log.error("Error parsing km '" + values[0] + "': " + e.getMessage()); return; } final ImportBedHeightValue value = new ImportBedHeightValue(obj, km, parse(values, ColTitlePattern.HEIGHT), parse(values, ColTitlePattern.UNCERTAINTY), parse(values, ColTitlePattern.GAP), parse(values, ColTitlePattern.WIDTH), parse(values, ColTitlePattern.MINHEIGHT), parse(values, ColTitlePattern.MAXHEIGHT)); obj.addValue(value); } private Double parse(final String[] values, final ColTitlePattern col) { final int idx = this.cols.get(col).intValue(); if ((idx >= 0) && (idx < values.length) && !values[idx].trim().isEmpty()) { try { return nf.parse(values[idx]).doubleValue(); } catch (final ParseException e) { log.warn("unparseable " + col.toString() + " '" + values[idx] + "'"); } } return null; } }