view backend/src/main/java/org/dive4elements/river/importer/parsers/SedimentLoadLSParser.java @ 8042:9342d7fe0ee7

Introduce LineParserException for CSV file parsing.
author Sascha L. Teichmann <teichmann@intevation.de>
date Wed, 16 Jul 2014 18:33:28 +0200
parents fd3a24336e6a
children bd0dea643440
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.IOException;

import java.text.NumberFormat;
import java.text.ParseException;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import org.dive4elements.river.importer.ImporterSession;
import org.dive4elements.river.importer.ImportGrainFraction;
import org.dive4elements.river.importer.ImportSedimentLoadLS;
import org.dive4elements.river.importer.ImportSedimentLoadLSValue;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.importer.ImportUnit;

import org.dive4elements.river.model.GrainFraction;

import org.dive4elements.river.utils.DateUtil;
import org.dive4elements.river.utils.EpsilonComparator;

/** Parses sediment load longitudinal section files. */
public class SedimentLoadLSParser extends LineParser {

    private static final Logger log =
        Logger.getLogger(SedimentLoadLSParser.class);


    public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);


    public static final Pattern TIMEINTERVAL_SINGLE =
        Pattern.compile("\\D*([0-9]+?)\\D*");

    public static final Pattern TIMEINTERVAL_EPOCH =
        Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*");

    public static final Pattern META_FRACTION =
        Pattern.compile("^Fraktion: (.*)");

    public static final Pattern META_FRACTION_NAME =
        Pattern.compile("^Fraktionsname: (.*)");

    public static final Pattern META_UNIT =
        Pattern.compile("^Einheit: \\[(.*)\\].*");

    public static final Pattern META_COLUMN_NAMES =
        Pattern.compile("^Fluss-km.*");

    public static final Pattern META_GRAIN_SIZE =
        Pattern.compile("([0-9]*,*[0-9]+)-([0-9]*,*[0-9]+) *mm");


    protected List<ImportSedimentLoadLS> sedimentLoadLSs;

    protected ImportSedimentLoadLS[] current;

    protected ImportGrainFraction grainFraction;

    protected ImportUnit unit;

    protected String description;

    protected String[] columnNames;

    private String upper;

    private String lower;


    public SedimentLoadLSParser() {
        sedimentLoadLSs = new ArrayList<ImportSedimentLoadLS>();
    }


    @Override
    public void parse(File file) throws IOException {
        description = file.getName();

        super.parse(file);
    }


    @Override
    protected void reset() {
        current       = null;
        grainFraction = null;
        unit          = null;
    }


    @Override
    protected void finish() {
        if (current != null) {
            for (ImportSedimentLoadLS isy: current) {
                sedimentLoadLSs.add(isy);
            }
        }

        description = null;
    }


    @Override
    protected void handleLine(int lineNum, String line) throws LineParserException {
        if (line.startsWith(START_META_CHAR)) {
            handleMetaLine(stripMetaLine(line));
        }
        else {
            handleDataLine(line);
        }
    }


    protected void handleMetaLine(String line) throws LineParserException {
        if (handleMetaUnit(line)) {
            return;
        }
        if (handleMetaFraction(line)) {
            return;
        }
        if (handleMetaFractionName(line)) {
            return;
        }
        if (handleColumnNames(line)) {
            return;
        }
        log.warn("SLLSP: Unknown meta line: '" + line + "'");
    }


    protected boolean handleMetaUnit(String line) {
        Matcher m = META_UNIT.matcher(line);

        if (m.matches()) {
            unit = new ImportUnit(m.group(1));
            return true;
        }

        return false;
    }


    public boolean handleMetaFraction(String line) {
        Matcher m = META_FRACTION.matcher(line);

        if (m.matches()) {
            String interval = m.group(1);

            Matcher sizes = META_GRAIN_SIZE.matcher(interval);
            if (sizes.matches()) {
                lower = sizes.group(1);
                upper = sizes.group(2);

                return true;
            }

            log.warn("SLLSP: Unrecognized grain-size interval. Ignored.");
            return true;

        }

        return false;
    }


    public boolean handleMetaFractionName(String line) {
        Matcher m = META_FRACTION_NAME.matcher(line);

        if (m.matches()) {
            String name = m.group(1);


            GrainFraction gf = ImporterSession.getInstance().getGrainFraction(name);

            if (gf != null) {

                if (lower != null && upper != null) {
                    // Validate grain size interval
                    try {
                        Double lowval = nf.parse(lower).doubleValue();
                        Double upval = nf.parse(upper).doubleValue();

                        if (EpsilonComparator.CMP.compare(lowval,
                                gf.getLower()) != 0 ||
                            EpsilonComparator.CMP.compare(upval,
                                gf.getUpper()) != 0) {
                            log.warn("SLLSP: Invalid grain size for grain fraction '" +
                                     name + "'. Ignored.");
                        }
                    }
                    catch (ParseException pe) {
                        log.warn("SLLSP: Could not parse grain-size interval. Ignored.");
                    }
                }

                grainFraction = new ImportGrainFraction(gf);
                return true;
            }

            log.error("SLLSP: Unknown grain fraction: '" + name + "'");
        }

        return false;
    }


    public boolean handleColumnNames(String line) throws LineParserException {
        Matcher m = META_COLUMN_NAMES.matcher(line);

        if (m.matches()) {
            columnNames = line.split(SEPERATOR_CHAR);

            // 'Fluss-km', 'Hinweise' and at least one data column required
            if (columnNames.length < 3) {
                throw new LineParserException("SLLSP: missing columns.");
            }

            initializeSedimentLoadLSs();

            return true;
        }

        return false;
    }


    protected void handleDataLine(String line) {
        String[] vals = line.split(SEPERATOR_CHAR);

        if (vals == null || vals.length < columnNames.length-1) {
            log.warn("SLLSP: skip invalid data line: '" + line + "'");
            return;
        }

        try {
            Double km = nf.parse(vals[0]).doubleValue();

            for (int i = 1, n = columnNames.length-1; i < n; i++) {
                String curVal = vals[i];

                if (curVal != null && curVal.length() > 0) {
                    current[i-1].addValue(new ImportSedimentLoadLSValue(
                        km, nf.parse(vals[i]).doubleValue()
                    ));
                }
            }
        }
        catch (ParseException pe) {
            log.warn("SLLSP: unparseable number in data row '" + line + "':", pe);
        }
    }


    /** Initialize SedimentLoadLSs from columns, set the kind
     * with respect to file location (offical epoch or not?) */
    private void initializeSedimentLoadLSs() {
        // skip first column (Fluss-km) and last column (Hinweise)
        current = new ImportSedimentLoadLS[columnNames.length-2];

        Integer kind;

        if (inputFile.getAbsolutePath().contains("amtliche Epochen")) {
            kind = new Integer(1);
        }
        else {
            kind = new Integer(0);
        }

        for (int i = 0, n = columnNames.length; i < n-2; i++) {
            current[i] = new ImportSedimentLoadLS(this.description);
            current[i].setTimeInterval(getTimeInterval(columnNames[i+1]));
            current[i].setUnit(unit);
            current[i].setGrainFraction(grainFraction);
            current[i].setKind(kind);
        }
    }


    private ImportTimeInterval getTimeInterval(String column) {
        try {
            Matcher a = TIMEINTERVAL_EPOCH.matcher(column);
            if (a.matches()) {
                int yearA = nf.parse(a.group(1)).intValue();
                int yearB = nf.parse(a.group(2)).intValue();

                return new ImportTimeInterval(
                    DateUtil.getStartDateFromYear(yearA),
                    DateUtil.getEndDateFromYear(yearB)
                );
            }

            Matcher b = TIMEINTERVAL_SINGLE.matcher(column);
            if (b.matches()) {
                int year = nf.parse(b.group(1)).intValue();

                return new ImportTimeInterval(DateUtil.getStartDateFromYear(year));
            }

            log.warn("SLLSP: Unknown time interval string: '" + column + "'");
        }
        catch (ParseException pe) {
            log.warn("SLLSP: Could not parse years: " + column, pe);
        }

        return null;
    }


    public List<ImportSedimentLoadLS> getSedimentLoadLSs() {
        return sedimentLoadLSs;
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org