view backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/TkhParser.java @ 9636:ac41551a8e4d

Bundu/Bzws: Error message for missing channel/year, Nachtrag Pos. 20: import of two levels of infrastructure types
author mschaefer
date Mon, 11 Nov 2019 16:29:36 +0100
parents 4c5eeaff554c
children 1f57381b3bb5
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde
 * Software engineering by
 *  Björnsen Beratende Ingenieure GmbH
 *  Dr. Schumacher Ingenieurbüro für Wasser und Umwelt
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.sinfo.parsers;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.importer.Config;
import org.dive4elements.river.importer.ImportRiver;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.importer.common.ParsingState;
import org.dive4elements.river.importer.sinfo.importitem.TkhColumnSeriesImport;
import org.dive4elements.river.importer.sinfo.importitem.TkhKmLineImport;
import org.dive4elements.river.importer.sinfo.importitem.TkhSeriesImport;
import org.dive4elements.river.model.sinfo.Tkh;
import org.dive4elements.river.model.sinfo.TkhColumn;
import org.dive4elements.river.model.sinfo.TkhValue;

/**
 * Reads and parses the header of a TKH file and handles the parse and store of the columns
 *
 * @author Matthias Schäfer
 *
 */
public class TkhParser extends AbstractParser<TkhColumn, TkhValue, TkhKmLineImport, TkhColumnSeriesImport> {

    /***** FIELDS *****/

    private static final Logger log = Logger.getLogger(TkhParser.class);

    private static final Pattern META_DATUM = Pattern.compile("^#\\sDatum:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_EVALUATOR = Pattern.compile("^#\\sAuswerter:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_SOUNDING = Pattern.compile("^#\\sPeilung:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_TYPE = Pattern.compile("^#\\sTyp:\\s*([^;]*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern COLUMN_TITLE = Pattern.compile("Transportk((.)|(oe))rperh((.)|(oe))he\\s*\\((.+?)\\)\\s*\\[cm\\].*", Pattern.CASE_INSENSITIVE);

    public enum GroupDirectory {
        NONE(Tkh.Group.NONE, ""), //
        CALCULATION(Tkh.Group.CALCULATION, "Berechnungsergebnisse"), //
        MEASUREMENT(Tkh.Group.MEASUREMENT, "Naturmessungen");

        private final Tkh.Group group;
        private final String dirname;

        GroupDirectory(final Tkh.Group group, final String dirname) {
            this.group = group;
            this.dirname = dirname;
        }

        public Tkh.Group getGroup() {
            return this.group;
        }

        public String getDirName() {
            return this.dirname;
        }

        public static GroupDirectory forDirName(final String dirname) {
            for (final GroupDirectory gd : GroupDirectory.values()) {
                if (dirname.equalsIgnoreCase(gd.getDirName()))
                    return gd;
            }
            return NONE;
        }
    }

    private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");

    private final TkhSeriesImport tkhGroup;

    private final List<TkhColumnParser> colParsers;


    /***** CONSTRUCTORS *****/

    public TkhParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
        super(importPath, rootRelativePath, river);
        this.tkhGroup = new TkhSeriesImport(importPath.getName().replaceAll("\\.csv", ""));
        this.tkhGroup.setGroup(GroupDirectory.forDirName(this.importPath.getParentFile().getName()).getGroup());
        this.seriesHeader = new TkhColumnSeriesImport(this.tkhGroup.getFilename(), this.tkhGroup, null, null);
        this.colParsers = new ArrayList<>();
    }


    /***** METHODS *****/

    @Override
    protected Logger getLog() {
        return log;
    }

    /**
     * Whether this import type shall be skipped
     */
    public static boolean shallSkip() {
        return Config.INSTANCE.skipSInfoTkh();
    }

    /**
     * Creates a list of parsers for all collision import files in a directory
     */
    public static List<TkhParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
        final List<TkhParser> parsers = new ArrayList<>();
        if (importDir.exists())
            for (final File file : listFiles(importDir, ".csv"))
                parsers.add(new TkhParser(file, new File(relativeDir, file.getName()), river));
        return parsers;
    }

    @Override
    public void parse() throws IOException {
        getLog().info("Start parsing:;'" + this.rootRelativePath + "'");
        // this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", ""));
        this.metaPatternsMatched.clear();
        this.kmExists.clear();
        this.colParsers.clear();
        this.headerParsingState = ParsingState.CONTINUE;
        try {
            try {
                this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING));
            }
            catch (final Exception e) {
                logError("Could not open (" + e.getMessage() + ")");
                this.headerParsingState = ParsingState.STOP;
            }
            this.currentLine = null;
            while (this.headerParsingState == ParsingState.CONTINUE) {
                this.currentLine = this.in.readLine();
                if (this.currentLine == null)
                    break;
                this.currentLine = this.currentLine.trim();
                if (this.currentLine.isEmpty())
                    continue;
                handleMetaLine();
            }
        }
        finally {
            if (this.in != null) {
                this.in.close();
                this.in = null;
            }
        }
        if (this.headerParsingState == ParsingState.STOP) {
            logError("Parsing of the file stopped due to a severe error");
            return;
        }
        for (final TkhColumnParser colParser : this.colParsers)
            colParser.parse();
    }

    @Override
    protected boolean handleMetaOther() {
        if (handleMetaDay())
            return true;
        else if (handleMetaType())
            return true;
        else if (handleMetaSounding())
            return true;
        else if (handleMetaEvaluator())
            return true;
        else
            return false;
    }

    private boolean handleMetaDay() {
        final Matcher m = META_DATUM.matcher(this.currentLine);
        if (m.matches()) {
            Date day = null;
            try {
                if (!m.group(1).isEmpty())
                    day = dateFormat.parse(m.group(1));
            }
            catch (final Exception e) {
                logError("Invalid date in line " + this.in.getLineNumber());
            }
            this.metaPatternsMatched.add(META_DATUM);
            this.tkhGroup.setDay(day);
            if (day == null)
                logWarning("No date specified");
            return true;
        }
        return false;
    }

    private boolean handleMetaType() {
        final Matcher m = META_TYPE.matcher(this.currentLine);
        return m.matches();
    }

    private boolean handleMetaSounding() {
        final Matcher m = META_SOUNDING.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_SOUNDING);
            this.tkhGroup.setSounding_info(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    private boolean handleMetaEvaluator() {
        final Matcher m = META_EVALUATOR.matcher(this.currentLine);
        if (m.matches()) {
            this.metaPatternsMatched.add(META_EVALUATOR);
            this.tkhGroup.setEvaluation_by(parseMetaInfo(m.group(1).trim()));
            return true;
        }
        return false;
    }

    @Override
    protected boolean handleMetaColumnTitles() {
        if (!super.handleMetaColumnTitles())
            return false;
        this.tkhGroup.setKmrange_info(this.seriesHeader.getKmrange_info());
        this.tkhGroup.setNotes(this.seriesHeader.getNotes());
        for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
            final Matcher m = COLUMN_TITLE.matcher(this.columnTitles.get(i));
            if (m.matches())
                this.colParsers.add(new TkhColumnParser(this.importPath, this.rootRelativePath, this.river, this.tkhGroup, i, m.group(7).trim()));
            else
                logWarning("No title found in column " + i + ", skipped");
        }
        return true;
    }

    @Override
    public void store() {
        if (this.headerParsingState != ParsingState.STOP) {
            this.tkhGroup.getPeer(this.river.getPeer());
            for (final TkhColumnParser colParser : this.colParsers)
                colParser.store();
        }
        else
            logWarning("Severe parsing errors, not storing series '" + this.tkhGroup.getFilename() + "'");
    }

    @Override
    protected TkhColumnSeriesImport createSeriesImport(final String filename) {
        throw new UnsupportedOperationException();
    }

    @Override
    protected TkhKmLineImport createKmLineImport(final Double km, final String[] values) {
        throw new UnsupportedOperationException();
    }
}

http://dive4elements.wald.intevation.org