Mercurial > dive4elements > river
diff backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/TkhParser.java @ 8971:50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
author | mschaefer |
---|---|
date | Tue, 03 Apr 2018 10:18:30 +0200 |
parents | |
children | ae76f618d990 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/TkhParser.java Tue Apr 03 10:18:30 2018 +0200 @@ -0,0 +1,221 @@ +/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde + * Software engineering by + * Björnsen Beratende Ingenieure GmbH + * Dr. Schumacher Ingenieurbüro für Wasser und Umwelt + * + * This file is Free Software under the GNU AGPL (>=v3) + * and comes with ABSOLUTELY NO WARRANTY! Check out the + * documentation coming with Dive4Elements River for details. + */ + +package org.dive4elements.river.importer.sinfo.parsers; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.dive4elements.river.importer.Config; +import org.dive4elements.river.importer.ImportRiver; +import org.dive4elements.river.importer.common.AbstractParser; +import org.dive4elements.river.importer.common.ParsingState; +import org.dive4elements.river.importer.sinfo.importitem.TkhColumnSeriesImport; +import org.dive4elements.river.importer.sinfo.importitem.TkhKmLineImport; +import org.dive4elements.river.importer.sinfo.importitem.TkhSeriesImport; +import org.dive4elements.river.model.sinfo.TkhColumn; +import org.dive4elements.river.model.sinfo.TkhValue; + +/** + * Reads and parses the header of a TKH file and handles the parse and store of the columns + * + * @author Matthias Schäfer + * + */ +public class TkhParser extends AbstractParser<TkhColumn, TkhValue, TkhKmLineImport, TkhColumnSeriesImport> { + + /***** FIELDS *****/ + + private static final Logger log = Logger.getLogger(TkhParser.class); + + private static final Pattern META_YEAR = Pattern.compile("^#\\sBezugsjahr:\\s*([12]\\d\\d\\d).*", Pattern.CASE_INSENSITIVE); + + private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE); + + private static final Pattern META_SOUNDING = Pattern.compile("^#\\sPeilung:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE); + + private static final Pattern META_TYPE = Pattern.compile("^#\\sTyp:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE); + + private static final Pattern COLUMN_TITLE = Pattern.compile("Transportk((.)|(oe))rperh((.)|(oe))he\\s*\\((.+?)\\)\\s*\\[cm\\].*", Pattern.CASE_INSENSITIVE); + + private final TkhSeriesImport tkhGroup; + + private final List<TkhColumnParser> colParsers; + + + /***** CONSTRUCTORS *****/ + + public TkhParser(final File importPath, final File rootRelativePath, final ImportRiver river) { + super(importPath, rootRelativePath, river); + this.tkhGroup = new TkhSeriesImport(importPath.getName().replaceAll("\\.csv", "")); + this.seriesHeader = new TkhColumnSeriesImport(this.tkhGroup.getFilename(), this.tkhGroup, null, null); + this.colParsers = new ArrayList<>(); + } + + + /***** METHODS *****/ + + @Override + protected Logger getLog() { + return log; + } + + /** + * Whether this import type shall be skipped + */ + public static boolean shallSkip() { + return Config.INSTANCE.skipSInfoTkh(); + } + + /** + * Creates a list of parsers for all collision import files in a directory + */ + public static List<TkhParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) { + final List<TkhParser> parsers = new ArrayList<>(); + for (final File file : listFiles(importDir, ".csv")) + parsers.add(new TkhParser(file, new File(relativeDir, file.getName()), river)); + return parsers; + } + + @Override + public void parse() throws IOException { + getLog().info("Start parsing:;'" + this.rootRelativePath + "'"); + // this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); + this.metaPatternsMatched.clear(); + this.kmExists.clear(); + this.colParsers.clear(); + this.headerParsingState = ParsingState.CONTINUE; + try { + try { + this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); + } + catch (final Exception e) { + logError("Could not open (" + e.getMessage() + ")"); + this.headerParsingState = ParsingState.STOP; + } + this.currentLine = null; + while (this.headerParsingState == ParsingState.CONTINUE) { + this.currentLine = this.in.readLine(); + if (this.currentLine == null) + break; + this.currentLine = this.currentLine.trim(); + if (this.currentLine.isEmpty()) + continue; + handleMetaLine(); + } + } + finally { + if (this.in != null) { + this.in.close(); + this.in = null; + } + } + if (this.headerParsingState == ParsingState.STOP) { + logError("Parsing of the file stopped due to a severe error"); + return; + } + for (final TkhColumnParser colParser : this.colParsers) + colParser.parse(); + } + + @Override + protected boolean handleMetaOther() { + if (handleMetaYear()) + return true; + else if (handleMetaType()) + return true; + else if (handleMetaSounding()) + return true; + else if (handleMetaEvaluator()) + return true; + else + return false; + } + + private boolean handleMetaYear() { + final Matcher m = META_YEAR.matcher(this.currentLine); + if (m.matches()) { + this.metaPatternsMatched.add(META_YEAR); + this.tkhGroup.setYear(Integer.parseInt(m.group(1))); + return true; + } + return false; + } + + private boolean handleMetaType() { + final Matcher m = META_TYPE.matcher(this.currentLine); + return m.matches(); + } + + private boolean handleMetaSounding() { + final Matcher m = META_SOUNDING.matcher(this.currentLine); + if (m.matches()) { + this.metaPatternsMatched.add(META_SOUNDING); + this.tkhGroup.setSounding_info(parseMetaInfo(m.group(1).trim())); + return true; + } + return false; + } + + private boolean handleMetaEvaluator() { + final Matcher m = META_EVALUATOR.matcher(this.currentLine); + if (m.matches()) { + this.metaPatternsMatched.add(META_EVALUATOR); + this.tkhGroup.setEvaluation_by(parseMetaInfo(m.group(1).trim())); + return true; + } + return false; + } + + @Override + protected boolean handleMetaColumnTitles() { + if (!super.handleMetaColumnTitles()) + return false; + this.tkhGroup.setKmrange_info(this.seriesHeader.getKmrange_info()); + this.tkhGroup.setComment(this.seriesHeader.getComment()); + for (int i = 1; i <= this.columnTitles.size() - 1; i++) { + final Matcher m = COLUMN_TITLE.matcher(this.columnTitles.get(i)); + if (m.matches()) + this.colParsers.add(new TkhColumnParser(this.importPath, this.rootRelativePath, this.river, this.tkhGroup, i, m.group(7).trim())); + else + logWarning("No title found in column " + i + ", skipped"); + } + return true; + } + + @Override + public void store() { + if (this.headerParsingState != ParsingState.STOP) { + this.tkhGroup.getPeer(this.river.getPeer()); + for (final TkhColumnParser colParser : this.colParsers) + colParser.store(); + } + else + logWarning("Severe parsing errors, not storing series '" + this.tkhGroup.getFilename() + "'"); + } + + @Override + protected TkhColumnSeriesImport createSeriesImport(final String filename) { + throw new UnsupportedOperationException(); + } + + @Override + protected TkhKmLineImport createKmLineImport(final Double km, final String[] values) { + throw new UnsupportedOperationException(); + } +}