Mercurial > dive4elements > river
view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/InfrastructureParser.java @ 9659:75bd347147ad
Importer (s/u-info) extensions: infrastructures: detecting, logging, cancelling in case of wrong column titles,
detecting, logging and skipping lines with duplicate km+bank
author | mschaefer |
---|---|
date | Mon, 23 Mar 2020 15:37:37 +0100 |
parents | ac41551a8e4d |
children |
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde * Software engineering by * Björnsen Beratende Ingenieure GmbH * Dr. Schumacher Ingenieurbüro für Wasser und Umwelt * * This file is Free Software under the GNU AGPL (>=v3) * and comes with ABSOLUTELY NO WARRANTY! Check out the * documentation coming with Dive4Elements River for details. */ package org.dive4elements.river.importer.sinfo.parsers; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.dive4elements.river.importer.Config; import org.dive4elements.river.importer.ImportAnnotationType; import org.dive4elements.river.importer.ImportAttribute; import org.dive4elements.river.importer.ImportRiver; import org.dive4elements.river.importer.common.AbstractParser; import org.dive4elements.river.importer.common.ParsingState; import org.dive4elements.river.importer.sinfo.importitem.InfrastructureKmLineImport; import org.dive4elements.river.importer.sinfo.importitem.InfrastructureSeriesImport; import org.dive4elements.river.model.sinfo.Infrastructure; import org.dive4elements.river.model.sinfo.InfrastructureValue; /** * Reads and parses an infrastructure file * * @author Matthias Schäfer * */ public class InfrastructureParser extends AbstractParser<Infrastructure, InfrastructureValue, InfrastructureKmLineImport, InfrastructureSeriesImport> { /***** FIELDS *****/ private static final Logger log = Logger.getLogger(InfrastructureParser.class); private static final Pattern META_GROUP = Pattern.compile("^#\\sInfrastruktur:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE); private static final Pattern META_TYPE = Pattern.compile("^#\\sTyp\\/Bezeichnung:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE); private static final Pattern META_PROVIDER = Pattern.compile("^#\\sDatenherkunft:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE); private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE); private static final Pattern META_YEAR = Pattern.compile("^#\\sStand:\\s*([12]\\d\\d\\d).*", Pattern.CASE_INSENSITIVE); private static final Pattern HEIGHT_COLUMNTITLE = Pattern.compile("((H.he)|(Hoehe))\\s*\\[(.*)\\].*", Pattern.CASE_INSENSITIVE); private static final Pattern BANK_COLUMNTITLE = Pattern.compile("Uferseite.*", Pattern.CASE_INSENSITIVE); private static final String DB_BANK_LEFT = "links"; // TODO: improve database design to make this secure private static final String DB_BANK_RIGHT = "rechts"; private static final String DB_BANK_NULL = ""; private int heightColIndex; private int bankColIndex; private final HashMap<String, ImportAttribute> bankAttributes; /***** CONSTRUCTORS *****/ public InfrastructureParser(final File importPath, final File rootRelativePath, final ImportRiver river) { super(importPath, rootRelativePath, river); this.heightColIndex = -1; this.bankColIndex = -1; this.bankAttributes = new HashMap<>(); this.bankAttributes.put("links", new ImportAttribute(DB_BANK_LEFT)); this.bankAttributes.put("rechts", new ImportAttribute(DB_BANK_RIGHT)); this.bankAttributes.put("", new ImportAttribute(DB_BANK_NULL)); } /***** METHODS *****/ @Override protected Logger getLog() { return log; } /** * Whether this import type shall be skipped */ public static boolean shallSkip() { return Config.INSTANCE.skipSInfoInfrastructure(); } /** * Creates a list of parsers for all infrastructure import files in a directory */ public static List<InfrastructureParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) { final List<InfrastructureParser> parsers = new ArrayList<>(); if (importDir.exists()) for (final File file : listFiles(importDir, ".csv")) parsers.add(new InfrastructureParser(file, new File(relativeDir, file.getName()), river)); return parsers; } @Override protected InfrastructureSeriesImport createSeriesImport(final String filename) { return new InfrastructureSeriesImport(filename); } @Override protected KmMode kmMode() { return KmMode.DUPLICATES; } @Override protected boolean handleMetaOther() { if (handleMetaGroup()) return true; else if (handleMetaType()) return true; else if (handleMetaProvider()) return true; else if (handleMetaEvaluator()) return true; else if (handleMetaYear()) return true; else return false; } private boolean handleMetaGroup() { final Matcher m = META_GROUP.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_GROUP); if (this.river.getAnnotationClassifier() != null) { final ImportAnnotationType group = this.river.getAnnotationClassifier().classifyDescription(m.group(1).trim(), this.river.getAnnotationClassifier().getDefaultType()); this.seriesHeader.setGroup(group); log.info(String.format("Group name in file: '%s', will be assigned to database name '%s'", m.group(1).trim(), group.getName())); } else { log.error("No annotation types file configured, cannot process group '" + m.group(1).trim() + "'"); this.headerParsingState = ParsingState.STOP; } return true; } return false; } private boolean handleMetaType() { final Matcher m = META_TYPE.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_TYPE); if (this.river.getAnnotationClassifier() != null) { final ImportAnnotationType type = this.river.getAnnotationClassifier().classifyDescription(m.group(1).trim(), this.river.getAnnotationClassifier().getDefaultType()); this.seriesHeader.setType(type); log.info(String.format("Type name in file: '%s', will be assigned to database name '%s'", m.group(1).trim(), type.getName())); } else { log.error("No annotation types file configured, cannot process type '" + m.group(1).trim() + "'"); this.headerParsingState = ParsingState.STOP; } return true; } return false; } private boolean handleMetaProvider() { final Matcher m = META_PROVIDER.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_PROVIDER); this.seriesHeader.setProvider(parseMetaInfo(m.group(1).trim())); return true; } return false; } private boolean handleMetaEvaluator() { final Matcher m = META_EVALUATOR.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_EVALUATOR); this.seriesHeader.setEvaluation_by(parseMetaInfo(m.group(1).trim())); return true; } return false; } private boolean handleMetaYear() { final Matcher m = META_YEAR.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_YEAR); this.seriesHeader.setYear(Integer.parseInt(m.group(1))); return true; } return false; } @Override protected boolean handleMetaColumnTitles() { if (!super.handleMetaColumnTitles()) return false; for (int i = 1; i <= this.columnTitles.size() - 1; i++) { if (HEIGHT_COLUMNTITLE.matcher(this.columnTitles.get(i)).matches()) this.heightColIndex = i; else if (BANK_COLUMNTITLE.matcher(this.columnTitles.get(i)).matches()) this.bankColIndex = i; } if (this.bankColIndex < 0) { logLineError("Column of river side value could not be identified, missing column title 'Uferseite'"); this.headerParsingState = ParsingState.STOP; } if (this.heightColIndex < 0) { logLineError("Column of height values could not be identified, missing column title 'Höhe...'"); this.headerParsingState = ParsingState.STOP; } return true; } @Override protected InfrastructureKmLineImport createKmLineImport(final Double km, final String[] values) { final Number height = parseDoubleCheckNull(values, this.heightColIndex); if ((height == null) || Double.isNaN(height.doubleValue())) { logLineError(INVALID_VALUE_ERROR_FORMAT, "height"); return null; } final String bank = ((this.bankColIndex >= 0) && (values.length - 1 >= this.bankColIndex)) ? values[this.bankColIndex].trim().toLowerCase() : null; if ((bank == null) || !this.bankAttributes.containsKey(bank)) { logLineError("Invalid or missing bank value"); return null; } final InfrastructureKmLineImport kmLine = new InfrastructureKmLineImport(km, height.doubleValue(), this.bankAttributes.get(bank)); logTrace("createKmLineImport(" + km.toString() + ") => " + kmLine.getStation()); return kmLine; } }