view backend/src/main/java/org/dive4elements/river/importer/parsers/SedimentYieldParser.java @ 7252:c894b7b45c4c

Replaced some tabs by spaces. Guys, please configure your editors not to insert tabs!
author Sascha L. Teichmann <teichmann@intevation.de>
date Sat, 05 Oct 2013 12:48:44 +0200
parents 563f3793d334
children 9ec6def1a7dd
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.IOException;

import java.text.NumberFormat;
import java.text.ParseException;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import org.dive4elements.river.importer.ImportGrainFraction;
import org.dive4elements.river.importer.ImportSedimentYield;
import org.dive4elements.river.importer.ImportSedimentYieldValue;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.importer.ImportUnit;
import org.dive4elements.river.model.GrainFraction;


/** Parses Sediment Yield files. */
public class SedimentYieldParser extends LineParser {

    private static final Logger log =
        Logger.getLogger(SedimentYieldParser.class);


    public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);


    public static final String FRAKTION_START = "Fraktion:";

    public static final String FRACTION_COARSE_STR =
        ".*Grobkorn.*";

    public static final String FRACTION_FINE_MIDDLE_STR =
        ".*Fein.Mittel.Kies.*";

    public static final String FRACTION_SAND =
        ".*Sand.*";

    public static final String FRACTION_SUSP_SAND =
        ".*susp.Sand.*";

    public static final String FRACTION_SUSP_SAND_BED =
        ".*bettbild.Anteil.susp.Sand.*";

    public static final String FRACTION_SUSP_SAND_BED_EPOCH =
        ".*susp.Sand.bettbildAnteil.*";

    public static final String FRACTION_SUSPENDED_SEDIMENT =
        ".*Schwebstoff.*";

    public static final String FRACTION_TOTAL =
        ".*gesamt.*";


    public static final Pattern TIMEINTERVAL_SINGLE =
        Pattern.compile("\\D*([0-9]+?)\\D*");

    public static final Pattern TIMEINTERVAL_EPOCH =
        Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*");

    public static final Pattern META_FRACTION =
        Pattern.compile("^Fraktion: (.*)");

    public static final Pattern META_UNIT =
        Pattern.compile("^Einheit: \\[(.*)\\].*");

    public static final Pattern META_COLUMN_NAMES =
        Pattern.compile("^Fluss-km.*");

    public static final Pattern META_GRAIN_FRACTION_A =
        Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*");

    public static final Pattern META_GRAIN_FRACTION_B =
        Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)");

    public static final Pattern META_GRAIN_FRACTION_C =
        Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))");


    protected List<ImportSedimentYield> sedimentYields;

    protected ImportSedimentYield[] current;

    protected ImportGrainFraction grainFraction;

    protected ImportUnit unit;

    protected String description;

    protected String[] columnNames;


    public SedimentYieldParser() {
        sedimentYields = new ArrayList<ImportSedimentYield>();
    }


    @Override
    public void parse(File file) throws IOException {
        description = file.getName();

        super.parse(file);
    }


    @Override
    protected void reset() {
        current       = null;
        grainFraction = null;
        unit          = null;
    }


    @Override
    protected void finish() {
        if (current != null) {
            for (ImportSedimentYield isy: current) {
                sedimentYields.add(isy);
            }
        }

        description = null;
    }


    @Override
    protected void handleLine(int lineNum, String line) {
        if (line.startsWith(START_META_CHAR)) {
            handleMetaLine(stripMetaLine(line));
        }
        else {
            handleDataLine(line);
        }
    }


    protected void handleMetaLine(String line) {
        if (handleMetaUnit(line)) {
            return;
        }
        else if (handleMetaFraction(line)) {
            return;
        }
        else if (handleColumnNames(line)) {
            return;
        }
        else {
            log.warn("SYP: Unknown meta line: '" + line + "'");
        }
    }


    protected boolean handleMetaUnit(String line) {
        Matcher m = META_UNIT.matcher(line);

        if (m.matches()) {
            unit = new ImportUnit(m.group(1));
            return true;
        }

        return false;
    }


    public boolean handleMetaFraction(String line) {
        Matcher m = META_FRACTION.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1);

            this.grainFraction = buildGrainFraction(tmp);

            return true;
        }
        else if (line.startsWith(FRAKTION_START)) {
            String newLine = line.replace(FRAKTION_START, "").trim();
            if (newLine.length() == 0) {
                log.debug("Found total grain fraction.");
                this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL);

                return true;
            }
        }

        return false;
    }


    public boolean handleColumnNames(String line) {
        Matcher m = META_COLUMN_NAMES.matcher(line);

        if (m.matches()) {
            columnNames = line.split(SEPERATOR_CHAR);

            initializeSedimentYields();

            return true;
        }

        return false;
    }


    protected void handleDataLine(String line) {
        String[] vals = line.split(SEPERATOR_CHAR);

        if (vals == null || vals.length < columnNames.length-1) {
            log.warn("SYP: skip invalid data line: '" + line + "'");
            return;
        }

        try {
            Double km = nf.parse(vals[0]).doubleValue();

            for (int i = 1, n = columnNames.length-1; i < n; i++) {
                String curVal = vals[i];

                if (curVal != null && curVal.length() > 0) {
                    current[i-1].addValue(new ImportSedimentYieldValue(
                        km, nf.parse(vals[i]).doubleValue()
                    ));
                }
            }
        }
        catch (ParseException pe) {
            log.warn("SYP: unparseable number in data row '" + line + "':", pe);
        }
    }


    /** Initialize SedimentYields from columns, set the kind
     * with respect to file location (offical epoch or not?) */
    private void initializeSedimentYields() {
        // skip first column (Fluss-km) and last column (Hinweise)
        current = new ImportSedimentYield[columnNames.length-2];

        Integer kind;

        if (inputFile.getAbsolutePath().contains("amtliche Epochen")) {
            kind = new Integer(1);
        }
        else {
            kind = new Integer(0);
        }

        for (int i = 0, n = columnNames.length; i < n-2; i++) {
            current[i] = new ImportSedimentYield(this.description);
            current[i].setTimeInterval(getTimeInterval(columnNames[i+1]));
            current[i].setUnit(unit);
            current[i].setGrainFraction(grainFraction);
            current[i].setKind(kind);
        }
    }


    private ImportTimeInterval getTimeInterval(String column) {
        try {
            Matcher a = TIMEINTERVAL_EPOCH.matcher(column);
            if (a.matches()) {
                int yearA = nf.parse(a.group(1)).intValue();
                int yearB = nf.parse(a.group(2)).intValue();

                return new ImportTimeInterval(
                    getStartDateFromYear(yearA),
                    getEndDateFromYear(yearB)
                );
            }

            Matcher b = TIMEINTERVAL_SINGLE.matcher(column);
            if (b.matches()) {
                int year = nf.parse(b.group(1)).intValue();

                return new ImportTimeInterval(getStartDateFromYear(year));
            }

            log.warn("SYP: Unknown time interval string: '" + column + "'");
        }
        catch (ParseException pe) {
            log.warn("SYP: Could not parse years: " + column, pe);
        }

        return null;
    }


    private ImportGrainFraction buildGrainFraction(String gfStr) {
        Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr);
        if (a.matches()) {
            String lowerA = a.group(2);
            String lowerB = a.group(3);

            String upperA = a.group(4);
            String upperB = a.group(5);

            String lower = lowerA != null ? lowerA : lowerB;
            String upper = upperA != null ? upperA : upperB;

            try {
                return new ImportGrainFraction(
                    getGrainFractionTypeName(this.description),
                    nf.parse(lower).doubleValue(),
                    nf.parse(upper).doubleValue()
                );
            }
            catch (ParseException pe) {
                log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
            }
        }

        Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr);
        if (b.matches()) {
            String lowerA  = b.group(4);
            String lowerB  = b.group(5);
            String upperA  = b.group(6);
            String upperB  = b.group(7);

            String lower = lowerA != null ? lowerA : lowerB;
            String upper = upperA != null ? upperA : upperB;

            try {
                return new ImportGrainFraction(
                    getGrainFractionTypeName(this.description),
                    nf.parse(lower).doubleValue(),
                    nf.parse(upper).doubleValue()
                );
            }
            catch (ParseException pe) {
                log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
            }
        }

        Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr);
        if (c.matches()) {
            String oper     = c.group(1);
            String valueStr = c.group(3);

            try {
                Double value = nf.parse(valueStr).doubleValue();

                if (oper.equals(">")) {
                    return new ImportGrainFraction(
                        getGrainFractionTypeName(this.description),
                        value,
                        null
                    );
                }
                else {
                    return new ImportGrainFraction(
                        getGrainFractionTypeName(this.description),
                        null,
                        value
                    );
                }
            }
            catch (ParseException pe) {
                log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
            }
        }

        log.warn("SYP: Unknown grain fraction: '" + gfStr + "'");

        return null;
    }


    public static String getGrainFractionTypeName(String filename) {
        if (Pattern.matches(FRACTION_COARSE_STR, filename)) {
            return GrainFraction.COARSE;
        }
        else if (Pattern.matches(FRACTION_FINE_MIDDLE_STR, filename)) {
            return GrainFraction.FINE_MIDDLE;
        }
        else if (Pattern.matches(FRACTION_SUSP_SAND_BED, filename) ||
            Pattern.matches(FRACTION_SUSP_SAND_BED_EPOCH, filename)) {
            return GrainFraction.SUSP_SAND_BED;
        }
        else if (Pattern.matches(FRACTION_SUSP_SAND, filename)) {
            return GrainFraction.SUSP_SAND;
        }
        else if (Pattern.matches(FRACTION_SAND, filename)) {
            return GrainFraction.SAND;
        }
        else if (Pattern.matches(FRACTION_SUSPENDED_SEDIMENT, filename)) {
            return GrainFraction.SUSPENDED_SEDIMENT;
        }
        else if (Pattern.matches(FRACTION_TOTAL, filename)) {
            return GrainFraction.TOTAL;
        }
        else {
            log.warn("SYP: Unknown grain fraction '" + filename + "'");
            return "unknown";
        }
    }


    public List<ImportSedimentYield> getSedimentYields() {
        return sedimentYields;
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org