view backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java @ 9034:8aa7d9eaaa21

Added bed_height_values section heights height01 to height10
author mschaefer
date Mon, 30 Apr 2018 10:13:15 +0200
parents 2693bfaf503d
children 4c5eeaff554c
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.math.BigDecimal;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.EnumMap;
import java.util.List;
import java.util.Locale;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.backend.utils.DateUtil;
import org.dive4elements.river.backend.utils.EpsilonComparator;
import org.dive4elements.river.importer.ImportBedHeight;
import org.dive4elements.river.importer.ImportBedHeightType;
import org.dive4elements.river.importer.ImportBedHeightValue;
import org.dive4elements.river.importer.ImportElevationModel;
import org.dive4elements.river.importer.ImportLocationSystem;
import org.dive4elements.river.importer.ImportRange;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.importer.ImportUnit;
import org.dive4elements.river.importer.ImporterSession;
import org.dive4elements.river.importer.common.AbstractParser;
import org.dive4elements.river.model.BedHeightType;

public class BedHeightParser {

    private static final Logger log =
            Logger.getLogger(BedHeightParser.class);

    public static final String ENCODING = "ISO-8859-1";

    public static final Locale DEFAULT_LOCALE = Locale.GERMAN;

    public static final String START_META_CHAR = "#";
    public static final String SEPERATOR_CHAR  = ";";

    public static final Pattern META_YEAR =
            Pattern.compile("^Jahr: [^0-9]*(\\d*).*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_TIMEINTERVAL =
            Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_TYPE =
            Pattern.compile("^Aufnahmeart: (.*).*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_LOCATION_SYSTEM =
            Pattern.compile("^Lagesystem: (.*).*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_CUR_ELEVATION_SYSTEM =
            Pattern.compile("^H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_OLD_ELEVATION_SYSTEM =
            Pattern.compile("^urspr.ngliches H.hensystem:\\s(.*)?? \\[(.*)\\].*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_RANGE =
            Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_EVALUATION_BY =
            Pattern.compile("^Auswerter: (.*).*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_SOUNDING_WIDTH = Pattern.compile("^ausgewertete Peilbreite:\\s*(\\S.*).*", Pattern.CASE_INSENSITIVE);

    public static final Pattern META_COMMENTS =
            Pattern.compile("^Weitere Bemerkungen: (.*).*", Pattern.CASE_INSENSITIVE);

    private static final Pattern META_COLUMNTITLES = Pattern.compile("^Fluss-km\\s*;.+", Pattern.CASE_INSENSITIVE);

    private enum ColTitlePattern {
        KM("Fluss-km.*"), //
        HEIGHT("mittlere Sohlh.he\\s*\\[(.*)\\].*"), //
        UNCERTAINTY("Unsicherheit\\s*\\[(.*)\\].*"), //
        GAP("Datenl.cke.*"), //
        WIDTH("Peilbreite\\s*\\[(.*)\\].*"), //
        MINHEIGHT("Minimale Sohlh.he\\s*\\[(.*)\\].*"), //
        MAXHEIGHT("Maximale Sohlh.he\\s*\\[(.*)\\].*"), //
        HEIGHT01("Feld\\s*1\\s*\\[(.*)\\].*"), //
        HEIGHT02("Feld\\s*2\\s*\\[(.*)\\].*"), //
        HEIGHT03("Feld\\s*3\\s*\\[(.*)\\].*"), //
        HEIGHT04("Feld\\s*4\\s*\\[(.*)\\].*"), //
        HEIGHT05("Feld\\s*5\\s*\\[(.*)\\].*"), //
        HEIGHT06("Feld\\s*6\\s*\\[(.*)\\].*"), //
        HEIGHT07("Feld\\s*7\\s*\\[(.*)\\].*"), //
        HEIGHT08("Feld\\s*8\\s*\\[(.*)\\].*"), //
        HEIGHT09("Feld\\s*9\\s*\\[(.*)\\].*"), //
        HEIGHT10("Feld\\s*10\\s*\\[(.*)\\].*");

        private final Pattern pattern;

        private ColTitlePattern(final String regexp) {
            this.pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE);
        }

        public Pattern getPattern() {
            return this.pattern;
        }

        public static ColTitlePattern getSectionPattern(final int index) {
            return ColTitlePattern.valueOf(String.format("HEIGHT%02d", index));
        }
    }

    private final EnumMap<ColTitlePattern, Integer> cols = new EnumMap<>(ColTitlePattern.class);

    protected static NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);


    protected List<ImportBedHeight> bedHeights;


    protected ImportBedHeight newImportBedHeight(final String description) {
        return new ImportBedHeight(description);
    }


    protected TreeSet<Double> kmExists;

    public BedHeightParser() {
        this.bedHeights = new ArrayList<>();
        this.kmExists = new TreeSet<>(EpsilonComparator.CMP);
    }


    public List<ImportBedHeight> getBedHeights() {
        return this.bedHeights;
    }


    public void parse(final File file) throws IOException {
        log.info("Parsing bed height single file '" + file + "'");
        final ImportBedHeight obj = newImportBedHeight(file.getName().replaceAll("\\.csv", ""));
        this.kmExists.clear();
        this.cols.clear();
        for (final ColTitlePattern col : ColTitlePattern.values())
            this.cols.put(col, -1);
        LineNumberReader in = null;
        try {
            in = new LineNumberReader(new InputStreamReader(new FileInputStream(file), ENCODING));

            String line = null;
            while ((line = in.readLine()) != null) {
                if ((line = line.trim()).length() == 0) {
                    continue;
                }

                if (line.startsWith(START_META_CHAR)) {
                    handleMetaLine(obj, line);
                }
                else {
                    handleDataLine(obj, line);
                }
            }

            log.info("File contained " + obj.getValueCount() + " values.");
            this.bedHeights.add(obj);
        }
        finally {
            if (in != null) {
                in.close();
            }
        }
    }


    protected static String stripMetaLine(final String line) {
        final String tmp = line.substring(1, line.length());

        if (tmp.startsWith(" ")) {
            return tmp.substring(1, tmp.length());
        }
        else {
            return tmp;
        }
    }


    protected void handleMetaLine(final ImportBedHeight obj, final String line) {
        final String meta = stripMetaLine(line);

        if (handleMetaYear(obj, meta)) {
            return;
        }
        else if (handleMetaTimeInterval(obj, meta)) {
            return;
        }
        else if (handleMetaComment(obj, meta)) {
            return;
        }
        else if (handleMetaEvaluationBy(obj, meta)) {
            return;
        }
        else if (handleMetaRange(obj, meta)) {
            return;
        }
        else if (handleMetaType(obj, meta)) {
            return;
        }
        else if (handleMetaLocationSystem(obj, meta)) {
            return;
        }
        else if (handleMetaCurElevationModel(obj, meta)) {
            return;
        }
        else if (handleMetaOldElevationModel(obj, meta)) {
            return;
        }
        else if (handleMetaSoundingWidth(obj, meta)) {
            return;
        }
        else if (handleMetaColumnTitles(obj, meta)) {
            return;
        }
        else {
            log.warn("BHP: Meta line did not match any known type: " + line);
        }
    }


    protected boolean handleMetaYear(final ImportBedHeight obj, final String line) {
        final Matcher m = META_YEAR.matcher(line);
        if (m.matches()) {
            final String tmp = m.group(1).trim();
            if (tmp.length() > 0) {
                obj.setYear(Integer.parseInt(tmp));
            }
            else {
                log.warn("BHP: No year given.");
            }
            return true;
        }
        return false;
    }


    protected boolean handleMetaTimeInterval(final ImportBedHeight obj, final String line) {
        final Matcher m = META_TIMEINTERVAL.matcher(line);
        if (m.matches()) {
            final String lo = m.group(1).trim();
            final String up = m.group(2).trim();
            log.debug("Found time interval: " + lo + " - " + up);
            try {
                final int lower = Integer.valueOf(lo);
                final int upper = Integer.valueOf(up);
                final Date fromYear = DateUtil.getStartDateFromYear(lower);
                final Date toYear   = DateUtil.getEndDateFromYear(upper);
                obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear));
            }
            catch (final NumberFormatException e) {
                log.warn("BHP: could not parse timeinterval", e);
            }
            return true;
        }
        return false;
    }


    protected boolean handleMetaComment(final ImportBedHeight obj, final String line) {
        final Matcher m = META_COMMENTS.matcher(line);
        if (m.matches()) {
            final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim();
            obj.setComment(tmp);
            return true;
        }
        return false;
    }


    protected boolean handleMetaEvaluationBy(final ImportBedHeight obj, final String line) {
        final Matcher m = META_EVALUATION_BY.matcher(line);
        if (m.matches()) {
            final String tmp = m.group(1).replace(";", "").trim();
            obj.setEvaluationBy(tmp);
            return true;
        }
        return false;
    }


    protected boolean handleMetaRange(final ImportBedHeight obj, final String line) {
        final Matcher m = META_RANGE.matcher(line);
        if (m.matches() && m.groupCount() >= 2) {
            final String a = m.group(1).replace(";", "").trim();
            final String b = m.group(2).replace(";", "").trim();
            try {
                final BigDecimal lower = AbstractParser.parseDecimal(a);
                final BigDecimal upper = AbstractParser.parseDecimal(b);
                obj.setRange(new ImportRange(lower, upper));
                return true;
            }
            catch (final Exception e) {
                log.warn("BHP: could not parse range", e);
            }
        }
        return false;
    }


    protected boolean handleMetaType(final ImportBedHeight obj, final String line) {
        final Matcher m = META_TYPE.matcher(line);
        if (m.matches()) {
            final String tmp = m.group(1).replace(";", "").trim();
            final BedHeightType bht = BedHeightType.fetchBedHeightTypeForType(tmp, ImporterSession.getInstance().getDatabaseSession());
            if (bht != null) {
                obj.setType(new ImportBedHeightType(bht));
                return true;
            }
            log.error("Unknown bed height type: '" + tmp + "'. File ignored.");
        }
        return false;
    }


    protected boolean handleMetaLocationSystem(final ImportBedHeight obj, final String line) {
        final Matcher m = META_LOCATION_SYSTEM.matcher(line);
        if (m.matches()) {
            final String tmp = m.group(1).replace(";", "").trim();
            obj.setLocationSystem(new ImportLocationSystem(tmp, tmp));
            return true;
        }
        return false;
    }


    protected boolean handleMetaCurElevationModel(final ImportBedHeight obj, final String line) {
        final Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line);
        if (m.matches()) {
            final String name = m.group(1).trim();
            final String unit = m.group(2).trim();
            obj.setCurElevationModel(new ImportElevationModel(name, new ImportUnit(unit)));
            return true;
        }
        return false;
    }


    protected boolean handleMetaOldElevationModel(final ImportBedHeight obj, final String line) {
        final Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line);
        if (m.matches()) {
            final String name = m.group(1).trim();
            final String unit = m.group(2).trim();
            obj.setOldElevationModel(new ImportElevationModel(name, new ImportUnit(unit)));
            return true;
        }
        return false;
    }

    protected boolean handleMetaSoundingWidth(final ImportBedHeight obj, final String line) {
        final Matcher m = META_SOUNDING_WIDTH.matcher(line);
        if (m.matches()) {
            final String tmp = m.group(1).replace(SEPERATOR_CHAR, "").trim();
            obj.setSoundingWidthInfo(tmp);
            return true;
        }
        return false;
    }


    /**
     * Tries to parse a line as column titles line
     */
    protected boolean handleMetaColumnTitles(final ImportBedHeight obj, final String line) {
        final Matcher m = META_COLUMNTITLES.matcher(line);
        if (m.matches()) {
            final Matcher cm;
            final String[] titles = line.split(SEPERATOR_CHAR, 0);
            for (int i = 0; i <= titles.length - 1; i++) {
                for (final ColTitlePattern col : ColTitlePattern.values()) {
                    if (col.getPattern().matcher(titles[i]).matches()) {
                        this.cols.put(col, i);
                        break;
                    }
                }
            }
            return true;
        }
        return false;
    }

    protected void handleDataLine(final ImportBedHeight obj, final String line) {
        final String[] values = line.split(SEPERATOR_CHAR, 0);
        if (values.length < 2) {
            // Do not import line without data or only km
            return;
        }
        Double km;
        try {
            km = new Double(nf.parse(values[0]).doubleValue());
            if (this.kmExists.contains(km)) {
                log.warn("duplicate station '" + values[0] + "': -> ignored");
                return;
            }
            this.kmExists.add(km);
        }
        catch (final ParseException e) {
            log.error("Error parsing km '" + values[0] + "': " + e.getMessage());
            return;
        }
        final ImportBedHeightValue value = new ImportBedHeightValue(obj, km, parse(values, ColTitlePattern.HEIGHT),
                parse(values, ColTitlePattern.UNCERTAINTY), parse(values, ColTitlePattern.GAP), parse(values, ColTitlePattern.WIDTH),
                parse(values, ColTitlePattern.MINHEIGHT), parse(values, ColTitlePattern.MAXHEIGHT));
        for (int i = 1; i <= 10; i++)
            value.setSectionHeight(i, parse(values, ColTitlePattern.getSectionPattern(i)));

        obj.addValue(value);
    }

    private Double parse(final String[] values, final ColTitlePattern col) {
        final int idx = this.cols.get(col).intValue();
        if ((idx >= 0) && (idx < values.length) && !values[idx].trim().isEmpty()) {
            try {
                return nf.parse(values[idx]).doubleValue();
            }
            catch (final ParseException e) {
                log.warn("unparseable " + col.toString() + " '" + values[idx] + "'");
            }
        }
        return null;
    }
}

http://dive4elements.wald.intevation.org