view backend/src/main/java/org/dive4elements/river/importer/parsers/WaterlevelParser.java @ 9650:a2a42a6bac6b

Importer (s/u-info) extensions: outer try/catch for parse and log of line no, catching parsing exception if not enough value fields, parsing error and warning log messages with line number, detecting and rejecting duplicate data series, better differentiation between error and warning log messages
author mschaefer
date Mon, 23 Mar 2020 14:57:03 +0100
parents c43d8c1a4455
children
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.backend.utils.DateUtil;
import org.dive4elements.river.importer.ImportRange;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.importer.ImportUnit;
import org.dive4elements.river.importer.ImportWst;
import org.dive4elements.river.importer.ImportWstColumn;
import org.dive4elements.river.importer.ImportWstColumnValue;
import org.dive4elements.river.importer.ImportWstQRange;
import org.dive4elements.river.importer.common.AbstractParser;


/**
 * Parse CSV Waterlevel files.
 * As these waterlevels are probably used in fixation analysis
 * only, functionality to export them to "fixation"-wsts
 * has been added (the ImportWaterlevel*- stuff is actually
 * not needed to do so.)
 */
public class WaterlevelParser extends LineParser {

    private static final Logger log = Logger.getLogger(WaterlevelParser.class);

    private static final NumberFormat nf =
            NumberFormat.getInstance(DEFAULT_LOCALE);

    private static final Pattern META_Q_RANGE =
            Pattern.compile("Abfluss\\s\\[(.*)\\];(.*)");

    public static final Pattern META_UNIT =
            Pattern.compile("^Einheit: \\[(.*)\\].*");

    public static final BigDecimal INTERVAL_GAP = new BigDecimal("0.00001");

    private final List<ImportWst> waterlevels;

    private ImportWst current;

    /** The Waterlevel-Wst s will always have but one column. */
    private ImportWstColumn column;

    /** The current (incomplete) Q Range. */
    private ImportWstQRange currentQRange;

    /** The current (incomplete) km range for Q Range. */
    private ImportRange currentRange;

    private String currentDescription;


    public WaterlevelParser() {
        this.waterlevels = new ArrayList<>();
    }


    public List<ImportWst> getWaterlevels() {
        return this.waterlevels;
    }


    @Override
    public void parse(final File file) throws IOException {
        this.currentDescription = file.getName();

        super.parse(file);
    }


    @Override
    protected void reset() {
        this.currentQRange = null;
        this.current       = new ImportWst(this.currentDescription);
        this.current.setNumberColumns(1);
        this.column        = this.current.getColumn(0);
        this.column.setName(this.currentDescription);
        this.column.setDescription(this.currentDescription);

        // Try to extract and set the TimeInterval.
        final Matcher m = WaterlevelDifferencesParser.YEARS_IN_COLUMN.matcher(
                this.currentDescription);

        if (m.matches()) {
            final int startYear = Integer.parseInt(m.group(1));
            final int endYear   = Integer.parseInt(m.group(2));
            final ImportTimeInterval time = new ImportTimeInterval(
                    DateUtil.getStartDateFromYear(startYear),
                    DateUtil.getEndDateFromYear(endYear)
                    );
            this.column.setTimeInterval(time);
        } else {
            log.debug("No time interval in column header found: "
                    + this.currentDescription);
        }

        this.current.setKind(7);
    }


    @Override
    protected void finish() {
        if (this.current != null) {
            if (this.currentQRange != null) {
                final List<ImportWstColumnValue> cValues = this.column.getColumnValues();
                // Set end of range to last station
                // or expand range to minimal length in case it would be 0
                // TODO: should otherwise be extended to
                // (first station of next range - INTERVAL_GAP),
                // assuming always ascending stations
                final BigDecimal lastStation = cValues.get(cValues.size() -1)
                        .getPosition();
                if (lastStation.compareTo(this.currentRange.getA()) == 0) {
                    this.currentRange.setB(lastStation.add(INTERVAL_GAP));
                }
                else {
                    this.currentRange.setB(lastStation);
                }

                this.currentQRange.setRange(this.currentRange);
                this.column.addColumnQRange(this.currentQRange);
            }

            this.waterlevels.add(this.current);
        }
    }

    @Override
    protected void handleLine(final int lineNum, final String line) {
        if (line.startsWith(START_META_CHAR)) {
            handleMetaLine(stripMetaLine(line));
            return;
        }
        else if (handleQRange(line)) {
            return;
        }
        else {
            handleDataLine(line);
            return;
        }
    }


    private void handleMetaLine(final String meta) {
        final Matcher m = META_UNIT.matcher(meta);

        if (m.matches()) {
            final String unit = m.group(1);
            log.debug("Found unit: '" + unit + "'");

            this.current.setUnit(new ImportUnit(unit));
        }
    }


    private boolean handleQRange(final String line) {
        final Matcher m = META_Q_RANGE.matcher(line);

        if (m.matches()) {
            final String unitStr  = m.group(1);
            final String valueStr = m.group(2);
            try {
                if (this.currentQRange != null) {
                    // Finish off the last one.
                    final List<ImportWstColumnValue> cValues = this.column
                            .getColumnValues();
                    // Set end of range to last station.
                    this.currentRange.setB(cValues.get(cValues.size() -1)
                            .getPosition());
                    this.currentQRange.setRange(this.currentRange);
                    this.column.addColumnQRange(this.currentQRange);
                }
                this.currentQRange = new ImportWstQRange(null,
                        AbstractParser.parseDecimal(valueStr));
                this.currentRange = new ImportRange();

                log.debug("Found new Q range: Q=" + valueStr);

                return true;
            }
            catch (final ParseException pe) {
                log.warn("Unparseable Q range: '" + line + "'");
            }
        }

        return false;
    }


    private void handleDataLine(final String line) {
        final String[] cols = line.split(SEPERATOR_CHAR);

        if (cols == null || cols.length < 2) {
            log.warn("skip invalid waterlevel line: '" + line + "'");
            return;
        }

        try {
            // Store the value and remember the position for QRange, if needed.
            final BigDecimal station = AbstractParser.parseDecimal(cols[0]);
            final BigDecimal value = AbstractParser.parseDecimal(cols[1]);

            this.column.addColumnValue(station, value);

            if (this.currentRange.getA() == null) {
                this.currentRange.setA(station);
            }
        }
        catch (final ParseException pe) {
            log.warn("Unparseable number in data row: " + line);
        }
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org