view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/CollisionParser.java @ 9658:d86c7cb68b41

Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date, detecting wrong column titles and cancelling the import, specific error message if gauge not found
author mschaefer
date Mon, 23 Mar 2020 15:33:40 +0100
parents 8a2a777a8372
children 7c1da1b3f6b8
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.ImporterSession;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.CollisionKmLineImport;
import org.dive4elements.river.importer.sinfo.importitem.CollisionSeriesImport;
import org.dive4elements.river.importer.sinfo.importitem.CollisionTypeImport;
import org.dive4elements.river.model.sinfo.Collision;
import org.dive4elements.river.model.sinfo.CollisionType;
import org.dive4elements.river.model.sinfo.CollisionValue;
import org.hibernate.Session;

/**
 * Reads and parses a collision file
 *
 * @author Matthias Schäfer
 *
 */
public class CollisionParser extends AbstractParser<Collision, CollisionValue, CollisionKmLineImport, CollisionSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(CollisionParser.class);

    private static final Pattern META_YEAR = Pattern.compile("^#\\sJahr:\\s*([12]\\d\\d\\d).*", Pattern.CASE_INSENSITIVE);

    private enum ColTitlePattern {
        DATE("Datum.*"), //
        GAUGE_W("Pegelstand\\s*\\[cm\\].*"), //
        GAUGE_NAME("Bezugspegel.*"), //
        TYPE("Unfallart.*");

        private final Pattern pattern;

        ColTitlePattern(final String regexp) {
            this.pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE);
        }

        public Pattern getPattern() {
            return this.pattern;
        }
    }

    private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");

    private final EnumMap<ColTitlePattern, Integer> cols = new EnumMap<>(ColTitlePattern.class);

    private final HashMap<String, CollisionTypeImport> types;


    /***** CONSTRUCTORS *****/

    public CollisionParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        logDebug("CollisionParser.new: calling ImporterSession.getInstance");
        final Session session = ImporterSession.getInstance().getDatabaseSession();
        this.types = new HashMap<>();
        for (final CollisionType type : CollisionType.getTypes(session))
            this.types.put(type.getName().trim().toLowerCase(), new CollisionTypeImport(type.getName()));
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoCollision();
    }

    /**
     * Creates a list of parsers for all collision import files in a directory
     */
    public static List<CollisionParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<CollisionParser> parsers = new ArrayList<>();
        if (importDir.exists()) {
            for (final File file : listFiles(importDir, ".csv"))
                parsers.add(new CollisionParser(file, new File(relativeDir, file.getName()), river));
        }
        return parsers;
    }

    @Override
    protected CollisionSeriesImport createSeriesImport(final String filename) {
        return new CollisionSeriesImport(filename);
    }

    @Override
    protected KmMode kmMode() {
        return KmMode.DUPLICATES;
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaYear())
            return true;
        else
            return false;
    }

    private boolean handleMetaYear() {
        final Matcher m = META_YEAR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_YEAR);
            this.seriesHeader.setYear(Integer.parseInt(m.group(1)));
            return true;
        }
        return false;
    }

    @Override
    protected boolean handleMetaColumnTitles() {
        if (!super.handleMetaColumnTitles())
            return false;
        for (final ColTitlePattern col : ColTitlePattern.values())
            this.cols.put(col, -1);
        for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
            for (final ColTitlePattern col : ColTitlePattern.values()) {
                if (col.getPattern().matcher(this.columnTitles.get(i)).matches()) {
                    this.cols.put(col, i);
                    break;
                }
            }
        }
        if (this.cols.get(ColTitlePattern.DATE) < 0) {
            logLineError("Column of the event dates could not be identified, missing column title 'Datum'");
            this.headerParsingState = ParsingState.STOP;
            return true;
        }
        if (this.cols.get(ColTitlePattern.GAUGE_W) < 0) {
            logLineError("Column of the waterlevel could not be identified, missing column title 'Pegelstand [cm]'");
            this.headerParsingState = ParsingState.STOP;
            return true;
        }
        if (this.cols.get(ColTitlePattern.GAUGE_W) < 0) {
            logLineError("Column of the reference gauge could not be identified, missing column title 'Bezugspegel'");
            this.headerParsingState = ParsingState.STOP;
            return true;
        }
        if (this.cols.get(ColTitlePattern.TYPE) < 0) {
            logLineError("Column of the collision types could not be identified, missing column title 'Unfallart'");
            this.headerParsingState = ParsingState.STOP;
            return true;
        }
        if (!this.metaPatternsMatched.contains(META_YEAR)) {
            logError("Required meta info for the year is missing");
            this.headerParsingState = ParsingState.STOP;
            return true;
        }
        return true;
    }

    @Override
    protected CollisionKmLineImport createKmLineImport(final Double km, final String[] values) {
        Date eventDate = null;
        try {
            eventDate = dateFormat.parse(values[this.cols.get(ColTitlePattern.DATE)]);
        }
        catch (final Exception e) {
            logLineWarning("Invalid or missing date");
            return null;
        }
        final String typeName = values[this.cols.get(ColTitlePattern.TYPE)].trim();
        final String typeKey = typeName.toLowerCase();
        CollisionTypeImport type = null;
        if (this.types.containsKey(typeKey))
            type = this.types.get(typeKey);
        else {
            type = new CollisionTypeImport(typeName);
            this.types.put(typeKey, type);
        }
        String gaugeName = null;
        gaugeName = values[this.cols.get(ColTitlePattern.GAUGE_NAME)].trim();
        final Number gaugeW = parseDoubleCheckNull(values, this.cols.get(ColTitlePattern.GAUGE_W));
        if ((gaugeW == null) || Double.isNaN(gaugeW.doubleValue())) {
            logLineWarning(INVALID_VALUE_ERROR_FORMAT, "waterlevel");
            return null;
        }
        return new CollisionKmLineImport(km, type, eventDate, gaugeName, gaugeW.doubleValue());
    }
}

http://dive4elements.wald.intevation.org