view backend/src/main/java/org/dive4elements/river/importer/parsers/WaterlevelDifferencesParser.java @ 9650:a2a42a6bac6b

Importer (s/u-info) extensions: outer try/catch for parse and log of line no, catching parsing exception if not enough value fields, parsing error and warning log messages with line number, detecting and rejecting duplicate data series, better differentiation between error and warning log messages
author mschaefer
date Mon, 23 Mar 2020 14:57:03 +0100
parents c43d8c1a4455
children
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.dive4elements.river.backend.utils.DateUtil;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.importer.ImportUnit;
import org.dive4elements.river.importer.ImportWst;
import org.dive4elements.river.importer.ImportWstColumn;
import org.dive4elements.river.importer.ImportWstColumnValue;
import org.dive4elements.river.importer.ImportWstQRange;
import org.dive4elements.river.importer.common.AbstractParser;


/**
 * Parse WaterlevelDifferences CSV file.
 */
public class WaterlevelDifferencesParser extends LineParser {

    private static final Logger log =
            Logger.getLogger(WaterlevelDifferencesParser.class);

    private static final NumberFormat nf =
            NumberFormat.getInstance(DEFAULT_LOCALE);

    public static final Pattern META_UNIT =
            Pattern.compile("^Einheit: \\[(.*)\\].*");

    public static final Pattern YEARS_IN_COLUMN =
            Pattern.compile(".*(\\d{4})-(\\d{4})$");

    public static final BigDecimal INTERVAL_GAP = new BigDecimal("0.00001");

    /** List of parsed differences as ImportWst s. */
    private final List<ImportWst> differences;

    private ImportWstColumn[] columns;

    /** The currently processed dataset. */
    private ImportWst current;


    public WaterlevelDifferencesParser() {
        this.differences = new ArrayList<>();
    }


    /** Get the differences as wst parsed so far. */
    public List<ImportWst> getDifferences() {
        return this.differences;
    }


    /**
     * Parse a csv waterleveldifferenceparser and create a ImportWst object
     * from it.
     */
    @Override
    public void parse(final File file) throws IOException {
        this.current = new ImportWst(file.getName());
        this.current.setKind(6);

        super.parse(file);
    }


    /** No rewind implemented. */
    @Override
    protected void reset() {
    }


    @Override
    protected void finish() {
        if (this.columns != null && this.current != null) {
            // TODO figure out if its needed, as the columns
            //      are registered at their construction time.
            for (final ImportWstColumn col: this.columns) {
                // TODO place a current.addColumn(col); here?
            }

            this.differences.add(this.current);
        }

        // For all differences columns, add a single Q-Range with
        // -1.
        // Expand range to minimal length in case it would be 0
        // TODO: should otherwise be extended to
        // (first station of next range - INTERVAL_GAP),
        // assuming always ascending stations
        for (final ImportWstColumn column: this.columns) {
            final List<ImportWstColumnValue> cValues = column.getColumnValues();
            final BigDecimal a = cValues.get(0).getPosition();
            BigDecimal b = cValues.get(cValues.size() - 1).getPosition();
            if (a.compareTo(b) == 0) {
                b = b.add(INTERVAL_GAP);
            }
            column.addColumnQRange(
                    new ImportWstQRange(
                            a,
                            b,
                            new BigDecimal(-1d))
                    );
        }
        this.current = null;
        this.columns = null;
    }


    @Override
    protected void handleLine(final int lineNum, final String line) {
        if (line.startsWith(START_META_CHAR)) {
            handleMetaLine(stripMetaLine(line));
        }
        else {
            handleDataLine(line);
        }
    }


    private void handleMetaLine(final String meta) {
        if (handleMetaUnit(meta)) {
            return;
        }
        else {
            handleMetaColumnNames(meta);
        }
    }


    private boolean handleMetaUnit(final String meta) {
        final Matcher m = META_UNIT.matcher(meta);

        if (m.matches()) {
            final String unit = m.group(1);
            log.debug("Found unit: '" + unit + "'");

            this.current.setUnit(new ImportUnit(unit));

            return true;
        }

        return false;
    }


    private boolean handleMetaColumnNames(final String meta) {
        final Pattern META_COLUMN_NAMES = Pattern.compile("Fluss-km;(.*)");
        final Matcher m = META_COLUMN_NAMES.matcher(meta);

        if (m.matches()) {
            final String colStr = m.group(1);
            final String[] cols = colStr.split(SEPERATOR_CHAR);

            log.debug("Found " + cols.length + " columns.");

            initColumns(cols);

            return true;
        }

        return false;
    }


    /** Setup column structures with name, description and time interval. */
    private void initColumns(final String[] cols) {
        this.current.setNumberColumns(cols.length);
        this.columns = this.current.getColumns().toArray(
                new ImportWstColumn[cols.length]);

        for (int i = 0; i < cols.length; i++) {
            final String name = cols[i].replace("\"", "");

            log.debug("Create new column '" + name + "'");
            final ImportWstColumn column = this.current.getColumn(i);
            column.setName(name);
            column.setDescription(name);

            final Matcher m = YEARS_IN_COLUMN.matcher(name);

            if (m.matches()) {
                final int startYear = Integer.parseInt(m.group(1));
                final int endYear   = Integer.parseInt(m.group(2));
                final ImportTimeInterval time = new ImportTimeInterval(
                        DateUtil.getStartDateFromYear(startYear),
                        DateUtil.getEndDateFromYear(endYear)
                        );
                column.setTimeInterval(time);
            } else {
                log.debug("No time interval in column header found: " + name);
            }
        }
    }


    /** Handle one line of data, add one value for all columns.
     * @param line the line to parse
     */
    private void handleDataLine(final String line) {
        // Split by separator, do not exclude trailing empty string.
        final String[] cols = line.split(SEPERATOR_CHAR, -1);

        if (cols == null || cols.length < 2) {
            log.warn("skip invalid waterlevel-diff line: '" + line + "'");
            return;
        }

        try {
            // The first value in a line like 12,9;4,3;4,5 is the station,
            // later real values.
            final BigDecimal station = AbstractParser.parseDecimal(cols[0]);

            for (int i = 0; i < this.columns.length; i++) {
                final int idx = i+1;

                if (idx >= cols.length) {
                    log.warn("Insufficient column numbers: " + line);
                    continue;
                }

                final String value = cols[idx];

                if (value != null && !value.equals("")) {
                    try {
                        this.columns[i].addColumnValue(
                                station,
                                AbstractParser.parseDecimal(value));
                    }
                    catch (final ParseException pe) {
                        log.warn("Could not parse value: '" + value + "'");
                    }
                }
            }
        }
        catch (final ParseException pe) {
            log.warn("Could not parse station: '" + line + "'");
        }
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org