Mercurial > dive4elements > river

/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportAnnotationType;
import org.dive4elements.river.importer.ImportAttribute;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.InfrastructureKmLineImport;
import org.dive4elements.river.importer.sinfo.importitem.InfrastructureSeriesImport;
import org.dive4elements.river.model.sinfo.Infrastructure;
import org.dive4elements.river.model.sinfo.InfrastructureValue;

/**
 * Reads and parses an infrastructure file
 *
 * @author Matthias Schäfer
 *
 */
public class InfrastructureParser extends AbstractParser<Infrastructure, InfrastructureValue, InfrastructureKmLineImport, InfrastructureSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(InfrastructureParser.class);

    private static final Pattern META_GROUP = Pattern.compile("^#\\sInfrastruktur:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_TYPE = Pattern.compile("^#\\sTyp\\/Bezeichnung:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_PROVIDER = Pattern.compile("^#\\sDatenherkunft:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_YEAR = Pattern.compile("^#\\sStand:\\s*([12]\\d\\d\\d).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern HEIGHT_COLUMNTITLE = Pattern.compile("((H.he)|(Hoehe))\\s*\\[(.*)\\].*", Pattern.CASE_INSENSITIVE);

    private static final Pattern BANK_COLUMNTITLE = Pattern.compile("Uferseite.*", Pattern.CASE_INSENSITIVE);

    private static final String DB_BANK_LEFT = "links"; // TODO: improve database design to make this secure

    private static final String DB_BANK_RIGHT = "rechts";

    private static final String DB_BANK_NULL = "";

    private int heightColIndex;

    private int bankColIndex;

    private final HashMap<String, ImportAttribute> bankAttributes;


    /***** CONSTRUCTORS *****/

    public InfrastructureParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.heightColIndex = -1;
        this.bankColIndex = -1;
        this.bankAttributes = new HashMap<>();
        this.bankAttributes.put("links", new ImportAttribute(DB_BANK_LEFT));
        this.bankAttributes.put("rechts", new ImportAttribute(DB_BANK_RIGHT));
        this.bankAttributes.put("", new ImportAttribute(DB_BANK_NULL));
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoInfrastructure();
    }

    /**
     * Creates a list of parsers for all infrastructure import files in a directory
     */
    public static List<InfrastructureParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<InfrastructureParser> parsers = new ArrayList<>();
        if (importDir.exists())
            for (final File file : listFiles(importDir, ".csv"))
                parsers.add(new InfrastructureParser(file, new File(relativeDir, file.getName()), river));
        return parsers;
    }

    @Override
    protected InfrastructureSeriesImport createSeriesImport(final String filename) {
        return new InfrastructureSeriesImport(filename);
    }

    @Override
    protected KmMode kmMode() {
        return KmMode.DUPLICATES;
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaGroup())
            return true;
        else if (handleMetaType())
            return true;
        else if (handleMetaProvider())
            return true;
        else if (handleMetaEvaluator())
            return true;
        else if (handleMetaYear())
            return true;
        else
            return false;
    }

    private boolean handleMetaGroup() {
        final Matcher m = META_GROUP.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_GROUP);
            if (this.river.getAnnotationClassifier() != null) {
                final ImportAnnotationType group = this.river.getAnnotationClassifier().classifyDescription(m.group(1).trim(),
                        this.river.getAnnotationClassifier().getDefaultType());
                this.seriesHeader.setGroup(group);
                log.info(String.format("Group name in file: '%s', will be assigned to database name '%s'", m.group(1).trim(), group.getName()));
            }
            else {
                log.error("No annotation types file configured, cannot process group '" + m.group(1).trim() + "'");
                this.headerParsingState = ParsingState.STOP;
            }
            return true;
        }
        return false;
    }

    private boolean handleMetaType() {
        final Matcher m = META_TYPE.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_TYPE);
            if (this.river.getAnnotationClassifier() != null) {
                final ImportAnnotationType type = this.river.getAnnotationClassifier().classifyDescription(m.group(1).trim(),
                        this.river.getAnnotationClassifier().getDefaultType());
                this.seriesHeader.setType(type);
                log.info(String.format("Type name in file: '%s', will be assigned to database name '%s'", m.group(1).trim(), type.getName()));
            }
            else {
                log.error("No annotation types file configured, cannot process type '" + m.group(1).trim() + "'");
                this.headerParsingState = ParsingState.STOP;
            }
            return true;
        }
        return false;
    }

    private boolean handleMetaProvider() {
        final Matcher m = META_PROVIDER.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_PROVIDER);
            this.seriesHeader.setProvider(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    private boolean handleMetaEvaluator() {
        final Matcher m = META_EVALUATOR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_EVALUATOR);
            this.seriesHeader.setEvaluation_by(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    private boolean handleMetaYear() {
        final Matcher m = META_YEAR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_YEAR);
            this.seriesHeader.setYear(Integer.parseInt(m.group(1)));
            return true;
        }
        return false;
    }

    @Override
    protected boolean handleMetaColumnTitles() {
        if (super.handleMetaColumnTitles()) {
            for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
                if (HEIGHT_COLUMNTITLE.matcher(this.columnTitles.get(i)).matches())
                    this.heightColIndex = i;
                else if (BANK_COLUMNTITLE.matcher(this.columnTitles.get(i)).matches())
                    this.bankColIndex = i;
            }
            if (this.bankColIndex < 0)
                logWarning("Column of river side value could not be identified, missing column title 'Uferseite'");
            if (this.heightColIndex < 0) {
                logError("Column of height values could not be identified, missing column title 'Höhe...'");
                this.headerParsingState = ParsingState.STOP;
                return false;
            }
            return true;
        }
        else
            return false;
    }

    @Override
    protected InfrastructureKmLineImport createKmLineImport(final Double km, final String[] values) {
        if (parseDoubleWithNull(values[this.heightColIndex]) == null) {
            logError("Invalid height value in line " + this.in.getLineNumber());
            return null;
        }
        if ((this.bankColIndex >= 0) && this.bankAttributes.containsKey(values[this.bankColIndex].trim().toLowerCase())) {
            final InfrastructureKmLineImport kmLine = new InfrastructureKmLineImport(km, parseDoubleWithNull(values[this.heightColIndex]).doubleValue(),
                    this.bankAttributes.get(values[this.bankColIndex].trim().toLowerCase()));
            logTrace("createKmLineImport(" + km.toString() + ") => " + kmLine.getStation());
            return kmLine;
        }
        else {
            logError("Invalid bank value in line " + this.in.getLineNumber());
            return null;
        }
    }
}
author	mschaefer
date	Mon, 23 Mar 2020 15:26:50 +0100
parents	ac41551a8e4d
children	75bd347147ad