view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/TkhParser.java @ 9658:d86c7cb68b41

Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date, detecting wrong column titles and cancelling the import, specific error message if gauge not found
author mschaefer
date Mon, 23 Mar 2020 15:33:40 +0100
parents 1f57381b3bb5
children
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.ImporterSession;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.TkhColumnSeriesImport;
import org.dive4elements.river.importer.sinfo.importitem.TkhKmLineImport;
import org.dive4elements.river.importer.sinfo.importitem.TkhSeriesImport;
import org.dive4elements.river.model.sinfo.Tkh;
import org.dive4elements.river.model.sinfo.TkhColumn;
import org.dive4elements.river.model.sinfo.TkhValue;
import org.hibernate.Session;

/**
 * Reads and parses the header of a TKH file and handles the parse and store of the columns
 *
 * @author Matthias Schäfer
 *
 */
public class TkhParser extends AbstractParser<TkhColumn, TkhValue, TkhKmLineImport, TkhColumnSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(TkhParser.class);

    private static final Pattern META_DATUM = Pattern.compile("^#\\sDatum:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_SOUNDING = Pattern.compile("^#\\sPeilung:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_TYPE = Pattern.compile("^#\\sTyp:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern COLUMN_TITLE = Pattern.compile("Transportk((.)|(oe))rperh((.)|(oe))he\\s*\\((.+?)\\)\\s*\\[cm\\].*", Pattern.CASE_INSENSITIVE);

    public enum GroupDirectory {
        NONE(Tkh.Group.NONE, ""), //
        CALCULATION(Tkh.Group.CALCULATION, "Berechnungsergebnisse"), //
        MEASUREMENT(Tkh.Group.MEASUREMENT, "Naturmessungen");

        private final Tkh.Group group;
        private final String dirname;

        GroupDirectory(final Tkh.Group group, final String dirname) {
            this.group = group;
            this.dirname = dirname;
        }

        public Tkh.Group getGroup() {
            return this.group;
        }

        public String getDirName() {
            return this.dirname;
        }

        public static GroupDirectory forDirName(final String dirname) {
            for (final GroupDirectory gd : GroupDirectory.values()) {
                if (dirname.equalsIgnoreCase(gd.getDirName()))
                    return gd;
            }
            return NONE;
        }
    }

    private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");

    private final TkhSeriesImport tkhGroup;

    private final List<TkhColumnParser> colParsers;


    /***** CONSTRUCTORS *****/

    public TkhParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.tkhGroup = new TkhSeriesImport(importPath.getName().replaceAll("\\.csv", ""));
        this.tkhGroup.setGroup(GroupDirectory.forDirName(this.importPath.getParentFile().getName()).getGroup());
        this.seriesHeader = new TkhColumnSeriesImport(this.tkhGroup.getFilename(), this.tkhGroup, null, null);
        this.colParsers = new ArrayList<>();
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoTkh();
    }

    /**
     * Creates a list of parsers for all collision import files in a directory
     */
    public static List<TkhParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<TkhParser> parsers = new ArrayList<>();
        if (importDir.exists())
            for (final File file : listFiles(importDir, ".csv"))
                parsers.add(new TkhParser(file, new File(relativeDir, file.getName()), river));
        return parsers;
    }

    @Override
    public void parse() throws Exception {
        getLog().info("Start parsing:;'" + this.rootRelativePath + "'");
        // this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", ""));
        this.metaPatternsMatched.clear();
        this.kmExists.clear();
        this.colParsers.clear();
        this.headerParsingState = ParsingState.CONTINUE;
        try {
            try {
                this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING));
            }
            catch (final Exception e) {
                logError("Could not open (%s)", e.getMessage());
                this.headerParsingState = ParsingState.STOP;
            }
            this.currentLine = null;
            while (this.headerParsingState == ParsingState.CONTINUE) {
                this.currentLine = this.in.readLine();
                if (this.currentLine == null)
                    break;
                this.currentLine = this.currentLine.trim();
                if (this.currentLine.isEmpty())
                    continue;
                handleMetaLine();
                if (this.headerParsingState == ParsingState.DONE)
                    checkMetaData();
            }
        }
        finally {
            if (this.in != null) {
                this.in.close();
                this.in = null;
            }
        }
        if (this.headerParsingState == ParsingState.STOP) {
            logError("Parsing of the file stopped due to a severe error");
            return;
        }
        for (final TkhColumnParser colParser : this.colParsers)
            colParser.parse();
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaDay())
            return true;
        else if (handleMetaType())
            return true;
        else if (handleMetaSounding())
            return true;
        else if (handleMetaEvaluator())
            return true;
        else
            return false;
    }

    private boolean handleMetaDay() {
        final Matcher m = META_DATUM.matcher(this.currentLine);
        if (m.matches()) {
            Date day = null;
            try {
                if (!m.group(1).isEmpty())
                    day = dateFormat.parse(m.group(1));
            }
            catch (final Exception e) {
                logLineWarning("Invalid date");
            }
            this.metaPatternsMatched.add(META_DATUM);
            this.tkhGroup.setDay(day);
            if (day == null)
                logLineWarning("No date specified");
            return true;
        }
        return false;
    }

    private boolean handleMetaType() {
        final Matcher m = META_TYPE.matcher(this.currentLine);
        return m.matches();
    }

    private boolean handleMetaSounding() {
        final Matcher m = META_SOUNDING.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_SOUNDING);
            this.tkhGroup.setSounding_info(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    private boolean handleMetaEvaluator() {
        final Matcher m = META_EVALUATOR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_EVALUATOR);
            this.tkhGroup.setEvaluation_by(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    @Override
    protected boolean handleMetaColumnTitles() {
        if (!super.handleMetaColumnTitles())
            return false;
        this.tkhGroup.setKmrange_info(this.seriesHeader.getKmrange_info());
        this.tkhGroup.setNotes(this.seriesHeader.getNotes());
        for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
            final Matcher m = COLUMN_TITLE.matcher(this.columnTitles.get(i));
            if (m.matches())
                this.colParsers.add(new TkhColumnParser(this.importPath, this.rootRelativePath, this.river, this.tkhGroup, i, m.group(7).trim()));
            else
                logLineWarning("Invalid title in column %d (%s)", i + 1, this.columnTitles.get(i));
        }
        return true;
    }

    /**
     * Checks the existence of the active series in the database
     */
    @Override
    protected boolean checkSeriesExistsAlready() {
        if (!checkRiverExists())
            return false;
        final Session session = ImporterSession.getInstance().getDatabaseSession();
        final List<Tkh> rows = this.tkhGroup.querySeriesItem(session, this.river.getPeer());
        return !rows.isEmpty();
    }


    @Override
    public void store() {
        if (this.headerParsingState != ParsingState.STOP) {
            this.tkhGroup.getPeer(this.river.getPeer());
            for (final TkhColumnParser colParser : this.colParsers)
                colParser.store();
        }
        else
            logWarning("Severe parsing errors, not storing series '%s'", this.tkhGroup.getFilename());
    }

    @Override
    protected TkhColumnSeriesImport createSeriesImport(final String filename) {
        throw new UnsupportedOperationException();
    }

    @Override
    protected TkhKmLineImport createKmLineImport(final Double km, final String[] values) {
        throw new UnsupportedOperationException();
    }
}

http://dive4elements.wald.intevation.org