view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/InfrastructureParser.java @ 9657:a79881a892c9

Importer (s/u-info) extensions: depth-evolution: corrected directory name Bezug_aktueller_GlW, detecting and logging of wrong units, then cancelling, various checks of the plausibility of the meta data year values and cancelling in case of errors, detecting and logging missing change values, skipping those lines
author mschaefer
date Mon, 23 Mar 2020 15:26:50 +0100
parents ac41551a8e4d
children 75bd347147ad
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportAnnotationType;
import org.dive4elements.river.importer.ImportAttribute;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.InfrastructureKmLineImport;
import org.dive4elements.river.importer.sinfo.importitem.InfrastructureSeriesImport;
import org.dive4elements.river.model.sinfo.Infrastructure;
import org.dive4elements.river.model.sinfo.InfrastructureValue;

/**
 * Reads and parses an infrastructure file
 *
 * @author Matthias Schäfer
 *
 */
public class InfrastructureParser extends AbstractParser<Infrastructure, InfrastructureValue, InfrastructureKmLineImport, InfrastructureSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(InfrastructureParser.class);

    private static final Pattern META_GROUP = Pattern.compile("^#\\sInfrastruktur:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_TYPE = Pattern.compile("^#\\sTyp\\/Bezeichnung:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_PROVIDER = Pattern.compile("^#\\sDatenherkunft:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_YEAR = Pattern.compile("^#\\sStand:\\s*([12]\\d\\d\\d).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern HEIGHT_COLUMNTITLE = Pattern.compile("((H.he)|(Hoehe))\\s*\\[(.*)\\].*", Pattern.CASE_INSENSITIVE);

    private static final Pattern BANK_COLUMNTITLE = Pattern.compile("Uferseite.*", Pattern.CASE_INSENSITIVE);

    private static final String DB_BANK_LEFT = "links"; // TODO: improve database design to make this secure

    private static final String DB_BANK_RIGHT = "rechts";

    private static final String DB_BANK_NULL = "";

    private int heightColIndex;

    private int bankColIndex;

    private final HashMap<String, ImportAttribute> bankAttributes;


    /***** CONSTRUCTORS *****/

    public InfrastructureParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.heightColIndex = -1;
        this.bankColIndex = -1;
        this.bankAttributes = new HashMap<>();
        this.bankAttributes.put("links", new ImportAttribute(DB_BANK_LEFT));
        this.bankAttributes.put("rechts", new ImportAttribute(DB_BANK_RIGHT));
        this.bankAttributes.put("", new ImportAttribute(DB_BANK_NULL));
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoInfrastructure();
    }

    /**
     * Creates a list of parsers for all infrastructure import files in a directory
     */
    public static List<InfrastructureParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<InfrastructureParser> parsers = new ArrayList<>();
        if (importDir.exists())
            for (final File file : listFiles(importDir, ".csv"))
                parsers.add(new InfrastructureParser(file, new File(relativeDir, file.getName()), river));
        return parsers;
    }

    @Override
    protected InfrastructureSeriesImport createSeriesImport(final String filename) {
        return new InfrastructureSeriesImport(filename);
    }

    @Override
    protected KmMode kmMode() {
        return KmMode.DUPLICATES;
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaGroup())
            return true;
        else if (handleMetaType())
            return true;
        else if (handleMetaProvider())
            return true;
        else if (handleMetaEvaluator())
            return true;
        else if (handleMetaYear())
            return true;
        else
            return false;
    }

    private boolean handleMetaGroup() {
        final Matcher m = META_GROUP.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_GROUP);
            if (this.river.getAnnotationClassifier() != null) {
                final ImportAnnotationType group = this.river.getAnnotationClassifier().classifyDescription(m.group(1).trim(),
                        this.river.getAnnotationClassifier().getDefaultType());
                this.seriesHeader.setGroup(group);
                log.info(String.format("Group name in file: '%s', will be assigned to database name '%s'", m.group(1).trim(), group.getName()));
            }
            else {
                log.error("No annotation types file configured, cannot process group '" + m.group(1).trim() + "'");
                this.headerParsingState = ParsingState.STOP;
            }
            return true;
        }
        return false;
    }

    private boolean handleMetaType() {
        final Matcher m = META_TYPE.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_TYPE);
            if (this.river.getAnnotationClassifier() != null) {
                final ImportAnnotationType type = this.river.getAnnotationClassifier().classifyDescription(m.group(1).trim(),
                        this.river.getAnnotationClassifier().getDefaultType());
                this.seriesHeader.setType(type);
                log.info(String.format("Type name in file: '%s', will be assigned to database name '%s'", m.group(1).trim(), type.getName()));
            }
            else {
                log.error("No annotation types file configured, cannot process type '" + m.group(1).trim() + "'");
                this.headerParsingState = ParsingState.STOP;
            }
            return true;
        }
        return false;
    }

    private boolean handleMetaProvider() {
        final Matcher m = META_PROVIDER.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_PROVIDER);
            this.seriesHeader.setProvider(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    private boolean handleMetaEvaluator() {
        final Matcher m = META_EVALUATOR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_EVALUATOR);
            this.seriesHeader.setEvaluation_by(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    private boolean handleMetaYear() {
        final Matcher m = META_YEAR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_YEAR);
            this.seriesHeader.setYear(Integer.parseInt(m.group(1)));
            return true;
        }
        return false;
    }

    @Override
    protected boolean handleMetaColumnTitles() {
        if (super.handleMetaColumnTitles()) {
            for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
                if (HEIGHT_COLUMNTITLE.matcher(this.columnTitles.get(i)).matches())
                    this.heightColIndex = i;
                else if (BANK_COLUMNTITLE.matcher(this.columnTitles.get(i)).matches())
                    this.bankColIndex = i;
            }
            if (this.bankColIndex < 0)
                logWarning("Column of river side value could not be identified, missing column title 'Uferseite'");
            if (this.heightColIndex < 0) {
                logError("Column of height values could not be identified, missing column title 'Höhe...'");
                this.headerParsingState = ParsingState.STOP;
                return false;
            }
            return true;
        }
        else
            return false;
    }

    @Override
    protected InfrastructureKmLineImport createKmLineImport(final Double km, final String[] values) {
        if (parseDoubleWithNull(values[this.heightColIndex]) == null) {
            logError("Invalid height value in line " + this.in.getLineNumber());
            return null;
        }
        if ((this.bankColIndex >= 0) && this.bankAttributes.containsKey(values[this.bankColIndex].trim().toLowerCase())) {
            final InfrastructureKmLineImport kmLine = new InfrastructureKmLineImport(km, parseDoubleWithNull(values[this.heightColIndex]).doubleValue(),
                    this.bankAttributes.get(values[this.bankColIndex].trim().toLowerCase()));
            logTrace("createKmLineImport(" + km.toString() + ") => " + kmLine.getStation());
            return kmLine;
        }
        else {
            logError("Invalid bank value in line " + this.in.getLineNumber());
            return null;
        }
    }
}

http://dive4elements.wald.intevation.org