view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/FlowDepthParser.java @ 9656:31549fdfaf4f

Importer (s/u-info) extensions: flow-depth: uniform formatting of from-to series names, warning instead of cancelling in case of missing column values, detecting, logging and skipping columns with wrong unit, better counting of inserted/updated values for each column
author mschaefer
date Mon, 23 Mar 2020 15:21:39 +0100
parents 4c5eeaff554c
children
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.ImporterSession;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.FlowDepthColumnSeriesImport;
import org.dive4elements.river.importer.sinfo.importitem.FlowDepthKmLineImport;
import org.dive4elements.river.importer.sinfo.importitem.FlowDepthSeriesImport;
import org.dive4elements.river.model.sinfo.FlowDepth;
import org.dive4elements.river.model.sinfo.FlowDepthColumn;
import org.dive4elements.river.model.sinfo.FlowDepthValue;
import org.hibernate.Session;

/**
 * Reads and parses the header of a flow depth file and handles the parse and store of the columns
 *
 * @author Matthias Schäfer
 *
 */
public class FlowDepthParser extends AbstractParser<FlowDepthColumn, FlowDepthValue, FlowDepthKmLineImport, FlowDepthColumnSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(FlowDepthParser.class);

    private static final Pattern META_YEAR = Pattern.compile("^#\\sBezugsjahr:\\s*([12]\\d\\d\\d).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_SOUNDING = Pattern.compile("^#\\sPeilung:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_TYPE = Pattern.compile("^#\\sTyp:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern COLUMN_TITLE = Pattern.compile("Flie((.)|(ss))tiefe\\s*\\((.+?)\\)\\s*\\[m\\].*", Pattern.CASE_INSENSITIVE);

    private final FlowDepthSeriesImport flowdepthGroup;

    private final List<FlowDepthColumnParser> colParsers;


    /***** CONSTRUCTORS *****/

    public FlowDepthParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.flowdepthGroup = new FlowDepthSeriesImport(importPath.getName().replaceAll("\\.csv", ""));
        this.seriesHeader = new FlowDepthColumnSeriesImport(this.flowdepthGroup.getFilename(), this.flowdepthGroup, null, null);
        this.colParsers = new ArrayList<>();
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoFlowDepth();
    }

    /**
     * Creates a list of parsers for all collision import files in a directory
     */
    public static List<FlowDepthParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<FlowDepthParser> parsers = new ArrayList<>();
        if (importDir.exists())
            for (final File file : listFiles(importDir, ".csv"))
                parsers.add(new FlowDepthParser(file, new File(relativeDir, file.getName()), river));
        return parsers;
    }

    @Override
    public void parse() throws Exception {
        getLog().info("Start parsing:;'" + this.rootRelativePath + "'");
        // this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", ""));
        this.metaPatternsMatched.clear();
        this.kmExists.clear();
        this.colParsers.clear();
        this.headerParsingState = ParsingState.CONTINUE;
        try {
            try {
                this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING));
            }
            catch (final Exception e) {
                logError("Could not open (%s)", e.getMessage());
                this.headerParsingState = ParsingState.STOP;
            }
            this.currentLine = null;
            while (this.headerParsingState == ParsingState.CONTINUE) {
                this.currentLine = this.in.readLine();
                if (this.currentLine == null)
                    break;
                this.currentLine = this.currentLine.trim();
                if (this.currentLine.isEmpty())
                    continue;
                handleMetaLine();
                if (this.headerParsingState == ParsingState.DONE)
                    checkMetaData();
            }
        }
        finally {
            if (this.in != null) {
                this.in.close();
                this.in = null;
            }
        }
        if (this.headerParsingState == ParsingState.STOP) {
            logError("Parsing of the file stopped due to a severe error");
            return;
        }
        for (final FlowDepthColumnParser colParser : this.colParsers)
            colParser.parse();
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaYear())
            return true;
        else if (handleMetaType())
            return true;
        else if (handleMetaSounding())
            return true;
        else if (handleMetaEvaluator())
            return true;
        else
            return false;
    }

    private boolean handleMetaYear() {
        final Matcher m = META_YEAR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_YEAR);
            this.flowdepthGroup.setYear(Integer.parseInt(m.group(1)));
            return true;
        }
        return false;
    }

    private boolean handleMetaType() {
        final Matcher m = META_TYPE.matcher(this.currentLine);
        return m.matches();
    }

    private boolean handleMetaSounding() {
        final Matcher m = META_SOUNDING.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_SOUNDING);
            this.flowdepthGroup.setSounding_info(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    private boolean handleMetaEvaluator() {
        final Matcher m = META_EVALUATOR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_EVALUATOR);
            this.flowdepthGroup.setEvaluation_by(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    @Override
    protected boolean handleMetaColumnTitles() {
        if (!super.handleMetaColumnTitles())
            return false;
        this.flowdepthGroup.setKmrange_info(this.seriesHeader.getKmrange_info());
        this.flowdepthGroup.setNotes(this.seriesHeader.getNotes());
        for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
            final Matcher m = COLUMN_TITLE.matcher(this.columnTitles.get(i));
            if (m.matches())
                this.colParsers.add(new FlowDepthColumnParser(this.importPath, this.rootRelativePath, this.river, this.flowdepthGroup, i, m.group(4).trim()));
            else
                logLineWarning("Invalid title/unit in column %d (%s)", i + 1, this.columnTitles.get(i));
        }
        return true;
    }

    /**
     * Checks the existence of the active series in the database
     */
    @Override
    protected boolean checkSeriesExistsAlready() {
        if (!checkRiverExists())
            return false;
        final Session session = ImporterSession.getInstance().getDatabaseSession();
        final List<FlowDepth> rows = this.flowdepthGroup.querySeriesItem(session, this.river.getPeer());
        return !rows.isEmpty();
    }


    @Override
    public void store() {
        if (this.headerParsingState != ParsingState.STOP) {
            this.flowdepthGroup.getPeer(this.river.getPeer());
            for (final FlowDepthColumnParser colParser : this.colParsers)
                colParser.store();
        }
        else
            logWarning("Severe parsing errors, not storing series '%s'", this.flowdepthGroup.getFilename());
    }

    @Override
    protected FlowDepthColumnSeriesImport createSeriesImport(final String filename) {
        throw new UnsupportedOperationException();
    }

    @Override
    protected FlowDepthKmLineImport createKmLineImport(final Double km, final String[] values) {
        throw new UnsupportedOperationException();
    }
}

http://dive4elements.wald.intevation.org