Mercurial > dive4elements > river
diff backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java @ 8975:a0a0a7f912ab
Added new columns bed_height.comment and sounding_width_info; extended the bed height parser for the new meta data and the min/max_height columns
author | mschaefer |
---|---|
date | Tue, 03 Apr 2018 10:40:57 +0200 |
parents | 5e38e2924c07 |
children | 2693bfaf503d |
line wrap: on
line diff
--- a/backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java Tue Apr 03 10:37:30 2018 +0200 +++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java Tue Apr 03 10:40:57 2018 +0200 @@ -9,45 +9,40 @@ package org.dive4elements.river.importer.parsers; import java.io.File; - +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.LineNumberReader; import java.math.BigDecimal; - import java.text.NumberFormat; import java.text.ParseException; - import java.util.ArrayList; import java.util.Date; +import java.util.EnumMap; import java.util.List; +import java.util.Locale; import java.util.TreeSet; -import java.util.Locale; - import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.FileInputStream; -import java.io.InputStreamReader; - import org.apache.log4j.Logger; - +import org.dive4elements.river.backend.utils.DateUtil; +import org.dive4elements.river.backend.utils.EpsilonComparator; import org.dive4elements.river.importer.ImportBedHeight; +import org.dive4elements.river.importer.ImportBedHeightType; import org.dive4elements.river.importer.ImportBedHeightValue; -import org.dive4elements.river.importer.ImportBedHeightType; import org.dive4elements.river.importer.ImportElevationModel; import org.dive4elements.river.importer.ImportLocationSystem; import org.dive4elements.river.importer.ImportRange; import org.dive4elements.river.importer.ImportTimeInterval; import org.dive4elements.river.importer.ImportUnit; -import org.dive4elements.river.model.BedHeightType; import org.dive4elements.river.importer.ImporterSession; -import org.dive4elements.river.backend.utils.EpsilonComparator; -import org.dive4elements.river.backend.utils.DateUtil; +import org.dive4elements.river.model.BedHeightType; public class BedHeightParser { private static final Logger log = - Logger.getLogger(BedHeightParser.class); + Logger.getLogger(BedHeightParser.class); public static final String ENCODING = "ISO-8859-1"; @@ -57,41 +52,65 @@ public static final String SEPERATOR_CHAR = ";"; public static final Pattern META_YEAR = - Pattern.compile("^Jahr: [^0-9]*(\\d*).*"); + Pattern.compile("^Jahr: [^0-9]*(\\d*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_TIMEINTERVAL = - Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*"); + Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_TYPE = - Pattern.compile("^Aufnahmeart: (.*).*"); + Pattern.compile("^Aufnahmeart: (.*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_LOCATION_SYSTEM = - Pattern.compile("^Lagesystem: (.*).*"); + Pattern.compile("^Lagesystem: (.*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_CUR_ELEVATION_SYSTEM = - Pattern.compile("^H.hensystem:\\s(.*)?? \\[(.*)\\].*"); + Pattern.compile("^H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE); public static final Pattern META_OLD_ELEVATION_SYSTEM = - Pattern.compile("^urspr.ngliches H.hensystem:\\s(.*)?? \\[(.*)\\].*"); + Pattern.compile("^urspr.ngliches H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE); public static final Pattern META_RANGE = - Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*"); + Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_EVALUATION_BY = - Pattern.compile("^Auswerter: (.*).*"); + Pattern.compile("^Auswerter: (.*).*", Pattern.CASE_INSENSITIVE); + + public static final Pattern META_SOUNDING_WIDTH = Pattern.compile("^ausgewertete Peilbreite:\\s*(\\S.*).*", Pattern.CASE_INSENSITIVE); public static final Pattern META_COMMENTS = - Pattern.compile("^Weitere Bemerkungen: (.*).*"); + Pattern.compile("^Weitere Bemerkungen: (.*).*", Pattern.CASE_INSENSITIVE); + private static final Pattern META_COLUMNTITLES = Pattern.compile("^Fluss-km\\s*;.+", Pattern.CASE_INSENSITIVE); - protected static NumberFormat nf = NumberFormat.getInstance( - DEFAULT_LOCALE); + private enum ColTitlePattern { + KM("Fluss-km.*"), // + HEIGHT("mittlere Sohlh.he\\s*\\[(.*)\\].*"), // + UNCERTAINTY("Unsicherheit\\s*\\[(.*)\\].*"), // + GAP("Datenl.cke.*"), // + WIDTH("Peilbreite\\s*\\[(.*)\\].*"), // + MINHEIGHT("Minimale Sohlh.he\\s*\\[(.*)\\].*"), // + MAXHEIGHT("Maximale Sohlh.he\\s*\\[(.*)\\].*"); + + private final Pattern pattern; + + private ColTitlePattern(final String regexp) { + this.pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE); + } + + public Pattern getPattern() { + return this.pattern; + } + } + + private final EnumMap<ColTitlePattern, Integer> cols = new EnumMap<>(ColTitlePattern.class); + + protected static NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); protected List<ImportBedHeight> bedHeights; - protected ImportBedHeight newImportBedHeight(String description) { + protected ImportBedHeight newImportBedHeight(final String description) { return new ImportBedHeight(description); } @@ -99,30 +118,26 @@ protected TreeSet<Double> kmExists; public BedHeightParser() { - bedHeights = new ArrayList<ImportBedHeight>(); - kmExists = new TreeSet<Double>(EpsilonComparator.CMP); + this.bedHeights = new ArrayList<>(); + this.kmExists = new TreeSet<>(EpsilonComparator.CMP); } public List<ImportBedHeight> getBedHeights() { - return bedHeights; + return this.bedHeights; } - public void parse(File file) throws IOException { + public void parse(final File file) throws IOException { log.info("Parsing bed height single file '" + file + "'"); - - ImportBedHeight obj = newImportBedHeight( - file.getName().replaceAll("\\.csv", "")); - - kmExists.clear(); - + final ImportBedHeight obj = newImportBedHeight(file.getName().replaceAll("\\.csv", "")); + this.kmExists.clear(); + this.cols.clear(); + for (final ColTitlePattern col : ColTitlePattern.values()) + this.cols.put(col, -1); LineNumberReader in = null; try { - in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); + in = new LineNumberReader(new InputStreamReader(new FileInputStream(file), ENCODING)); String line = null; while ((line = in.readLine()) != null) { @@ -139,7 +154,7 @@ } log.info("File contained " + obj.getValueCount() + " values."); - bedHeights.add(obj); + this.bedHeights.add(obj); } finally { if (in != null) { @@ -149,8 +164,8 @@ } - protected static String stripMetaLine(String line) { - String tmp = line.substring(1, line.length()); + protected static String stripMetaLine(final String line) { + final String tmp = line.substring(1, line.length()); if (tmp.startsWith(" ")) { return tmp.substring(1, tmp.length()); @@ -161,8 +176,8 @@ } - protected void handleMetaLine(ImportBedHeight obj, String line) { - String meta = stripMetaLine(line); + protected void handleMetaLine(final ImportBedHeight obj, final String line) { + final String meta = stripMetaLine(line); if (handleMetaYear(obj, meta)) { return; @@ -191,17 +206,22 @@ else if (handleMetaOldElevationModel(obj, meta)) { return; } + else if (handleMetaSoundingWidth(obj, meta)) { + return; + } + else if (handleMetaColumnTitles(obj, meta)) { + return; + } else { log.warn("BHP: Meta line did not match any known type: " + line); } } - protected boolean handleMetaYear(ImportBedHeight obj, String line) { - Matcher m = META_YEAR.matcher(line); - + protected boolean handleMetaYear(final ImportBedHeight obj, final String line) { + final Matcher m = META_YEAR.matcher(line); if (m.matches()) { - String tmp = m.group(1); + final String tmp = m.group(1).trim(); if (tmp.length() > 0) { obj.setYear(Integer.parseInt(tmp)); } @@ -210,232 +230,190 @@ } return true; } - - return false; - } - - - protected boolean handleMetaTimeInterval( - ImportBedHeight obj, - String line - ) { - Matcher m = META_TIMEINTERVAL.matcher(line); - - if (m.matches()) { - String lo = m.group(1); - String up = m.group(2); - - log.debug("Found time interval: " + lo + " - " + up); - - try { - int lower = Integer.valueOf(lo); - int upper = Integer.valueOf(up); - - Date fromYear = DateUtil.getStartDateFromYear(lower); - Date toYear = DateUtil.getEndDateFromYear(upper); - - obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear)); - } - catch (NumberFormatException e) { - log.warn("BHP: could not parse timeinterval", e); - } - - return true; - } - - return false; - } - - - protected boolean handleMetaComment(ImportBedHeight obj, String line) { - Matcher m = META_COMMENTS.matcher(line); - - if (m.matches()) { - String tmp = m.group(1); - - obj.setDescription(tmp); - - return true; - } - return false; } - protected boolean handleMetaEvaluationBy( - ImportBedHeight obj, - String line - ) { - Matcher m = META_EVALUATION_BY.matcher(line); - + protected boolean handleMetaTimeInterval(final ImportBedHeight obj, final String line) { + final Matcher m = META_TIMEINTERVAL.matcher(line); if (m.matches()) { - String tmp = m.group(1); - tmp = tmp.replace(";", ""); - - obj.setEvaluationBy(tmp); - + final String lo = m.group(1).trim(); + final String up = m.group(2).trim(); + log.debug("Found time interval: " + lo + " - " + up); + try { + final int lower = Integer.valueOf(lo); + final int upper = Integer.valueOf(up); + final Date fromYear = DateUtil.getStartDateFromYear(lower); + final Date toYear = DateUtil.getEndDateFromYear(upper); + obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear)); + } + catch (final NumberFormatException e) { + log.warn("BHP: could not parse timeinterval", e); + } return true; } - return false; } - protected boolean handleMetaRange(ImportBedHeight obj, String line) { - Matcher m = META_RANGE.matcher(line); - - if (m.matches() && m.groupCount() >= 2) { - String a = m.group(1).replace(";", ""); - String b = m.group(2).replace(";", ""); - - try { - BigDecimal lower = new BigDecimal(nf.parse(a).doubleValue()); - BigDecimal upper = new BigDecimal(nf.parse(b).doubleValue()); - - obj.setRange(new ImportRange(lower, upper)); - - return true; - } - catch (ParseException e) { - log.warn("BHP: could not parse range", e); - } + protected boolean handleMetaComment(final ImportBedHeight obj, final String line) { + final Matcher m = META_COMMENTS.matcher(line); + if (m.matches()) { + final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim(); + obj.setComment(tmp); + return true; } - return false; } - protected boolean handleMetaType(ImportBedHeight obj, String line) { - Matcher m = META_TYPE.matcher(line); - + protected boolean handleMetaEvaluationBy(final ImportBedHeight obj, final String line) { + final Matcher m = META_EVALUATION_BY.matcher(line); if (m.matches()) { - String tmp = m.group(1).replace(";", "").trim(); + final String tmp = m.group(1).replace(";", "").trim(); + obj.setEvaluationBy(tmp); + return true; + } + return false; + } - BedHeightType bht = BedHeightType.fetchBedHeightTypeForType( - tmp, - ImporterSession.getInstance().getDatabaseSession()); + protected boolean handleMetaRange(final ImportBedHeight obj, final String line) { + final Matcher m = META_RANGE.matcher(line); + if (m.matches() && m.groupCount() >= 2) { + final String a = m.group(1).replace(";", "").trim(); + final String b = m.group(2).replace(";", "").trim(); + try { + final BigDecimal lower = new BigDecimal(nf.parse(a).doubleValue()); + final BigDecimal upper = new BigDecimal(nf.parse(b).doubleValue()); + obj.setRange(new ImportRange(lower, upper)); + return true; + } + catch (final ParseException e) { + log.warn("BHP: could not parse range", e); + } + } + return false; + } + + + protected boolean handleMetaType(final ImportBedHeight obj, final String line) { + final Matcher m = META_TYPE.matcher(line); + if (m.matches()) { + final String tmp = m.group(1).replace(";", "").trim(); + final BedHeightType bht = BedHeightType.fetchBedHeightTypeForType(tmp, ImporterSession.getInstance().getDatabaseSession()); if (bht != null) { obj.setType(new ImportBedHeightType(bht)); return true; } - log.error("Unknown bed height type: '" + tmp + "'. File ignored."); } - - return false; - } - - - protected boolean handleMetaLocationSystem( - ImportBedHeight obj, - String line - ) { - Matcher m = META_LOCATION_SYSTEM.matcher(line); - - if (m.matches()) { - String tmp = m.group(1).replace(";", ""); - - obj.setLocationSystem(new ImportLocationSystem(tmp, tmp)); - - return true; - } - - return false; - } - - - protected boolean handleMetaCurElevationModel( - ImportBedHeight obj, - String line - ) { - Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line); - - if (m.matches()) { - String name = m.group(1); - String unit = m.group(2); - - obj.setCurElevationModel(new ImportElevationModel( - name, - new ImportUnit(unit) - )); - - return true; - } - return false; } - protected boolean handleMetaOldElevationModel( - ImportBedHeight obj, - String line - ) { - Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line); - + protected boolean handleMetaLocationSystem(final ImportBedHeight obj, final String line) { + final Matcher m = META_LOCATION_SYSTEM.matcher(line); if (m.matches()) { - String name = m.group(1); - String unit = m.group(2); - - obj.setOldElevationModel(new ImportElevationModel( - name, - new ImportUnit(unit) - )); - + final String tmp = m.group(1).replace(";", "").trim(); + obj.setLocationSystem(new ImportLocationSystem(tmp, tmp)); return true; } - return false; } - private Double parse(String []values, int idx, String msg) { - if (idx >= 0 && idx < values.length && !values[idx].isEmpty()) { - try { - return nf.parse(values[idx]).doubleValue(); - } - catch (ParseException e) { - log.warn("BSP: unparseable " + msg + " '" + values[idx] + "'"); - } + protected boolean handleMetaCurElevationModel(final ImportBedHeight obj, final String line) { + final Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line); + if (m.matches()) { + final String name = m.group(1).trim(); + final String unit = m.group(2).trim(); + obj.setCurElevationModel(new ImportElevationModel(name, new ImportUnit(unit))); + return true; } - - return null; + return false; } - protected void handleDataLine(ImportBedHeight obj, String line) { - String[] values = line.split(SEPERATOR_CHAR, 0); + protected boolean handleMetaOldElevationModel(final ImportBedHeight obj, final String line) { + final Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line); + if (m.matches()) { + final String name = m.group(1).trim(); + final String unit = m.group(2).trim(); + obj.setOldElevationModel(new ImportElevationModel(name, new ImportUnit(unit))); + return true; + } + return false; + } + + protected boolean handleMetaSoundingWidth(final ImportBedHeight obj, final String line) { + final Matcher m = META_SOUNDING_WIDTH.matcher(line); + if (m.matches()) { + final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim(); + obj.setSoundingWidthInfo(tmp); + return true; + } + return false; + } + + + /** + * Tries to parse a line as column titles line + */ + protected boolean handleMetaColumnTitles(final ImportBedHeight obj, final String line) { + final Matcher m = META_COLUMNTITLES.matcher(line); + if (m.matches()) { + final Matcher cm; + final String[] titles = line.split(SEPERATOR_CHAR, 0); + for (int i = 0; i <= titles.length - 1; i++) { + for (final ColTitlePattern col : ColTitlePattern.values()) { + if (col.getPattern().matcher(titles[i]).matches()) { + this.cols.put(col, i); + break; + } + } + } + return true; + } + return false; + } + + protected void handleDataLine(final ImportBedHeight obj, final String line) { + final String[] values = line.split(SEPERATOR_CHAR, 0); if (values.length < 2) { // Do not import line without data or only km return; } - Double km; try { km = new Double(nf.parse(values[0]).doubleValue()); - - if (kmExists.contains(km)) { - log.warn("duplicate station '" + km + "': -> ignored"); + if (this.kmExists.contains(km)) { + log.warn("duplicate station '" + values[0] + "': -> ignored"); return; } - - kmExists.add(km); + this.kmExists.add(km); } - catch (ParseException e) { - log.error("Error parsing km '" + values[0] + "': " + - e.getMessage()); + catch (final ParseException e) { + log.error("Error parsing km '" + values[0] + "': " + e.getMessage()); return; } - - ImportBedHeightValue value = new ImportBedHeightValue( - (ImportBedHeight) obj, - km, - parse(values, 1, "height"), - parse(values, 2, "uncertainty"), - parse(values, 3, "data gap"), - parse(values, 4, "sounding width")); + final ImportBedHeightValue value = new ImportBedHeightValue(obj, km, parse(values, ColTitlePattern.HEIGHT), + parse(values, ColTitlePattern.UNCERTAINTY), parse(values, ColTitlePattern.GAP), parse(values, ColTitlePattern.WIDTH), + parse(values, ColTitlePattern.MINHEIGHT), parse(values, ColTitlePattern.MAXHEIGHT)); obj.addValue(value); } + + private Double parse(final String[] values, final ColTitlePattern col) { + final int idx = this.cols.get(col).intValue(); + if ((idx >= 0) && (idx < values.length) && !values[idx].trim().isEmpty()) { + try { + return nf.parse(values[idx]).doubleValue(); + } + catch (final ParseException e) { + log.warn("unparseable " + col.toString() + " '" + values[idx] + "'"); + } + } + return null; + } } -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :