view backend/src/main/java/org/dive4elements/river/importer/parsers/SedimentYieldParser.java @ 6328:53d08f33d094

Backend: Moved guessing of main values and there time intervals out of the STA parser. Same come will be useful to extend the WST parser to better handle official lines.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 13 Jun 2013 17:15:34 +0200
parents 231baa52daa3
children 1e2dfd91274d
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.IOException;

import java.text.NumberFormat;
import java.text.ParseException;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import org.dive4elements.river.importer.ImportGrainFraction;
import org.dive4elements.river.importer.ImportSedimentYield;
import org.dive4elements.river.importer.ImportSedimentYieldValue;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.importer.ImportUnit;
import org.dive4elements.river.model.GrainFraction;


public class SedimentYieldParser extends LineParser {

    private static final Logger log =
        Logger.getLogger(SedimentYieldParser.class);


    public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);


    public static final String FRAKTION_START = "Fraktion:";

    public static final String FRACTION_COARSE_STR =
        ".*Grobkorn.*";

    public static final String FRACTION_FINE_MIDDLE_STR =
        ".*Fein.Mittel.Kies.*";

    public static final String FRACTION_SAND =
        ".*Sand.*";

    public static final String FRACTION_SUSP_SAND =
        ".*susp.Sand.*";

    public static final String FRACTION_SUSP_SAND_BED =
        ".*bettbild.Anteil.susp.Sand.*";

    public static final String FRACTION_SUSP_SAND_BED_EPOCH =
        ".*susp.Sand.bettbildAnteil.*";

    public static final String FRACTION_SUSPENDED_SEDIMENT =
        ".*Schwebstoff.*";

    public static final String FRACTION_TOTAL =
        ".*gesamt.*";


    public static final Pattern TIMEINTERVAL_SINGLE =
        Pattern.compile("\\D*([0-9]+?)\\D*");

    public static final Pattern TIMEINTERVAL_EPOCH =
        Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*");

    public static final Pattern META_FRACTION =
        Pattern.compile("^Fraktion: (.*)");

    public static final Pattern META_UNIT =
        Pattern.compile("^Einheit: \\[(.*)\\].*");

    public static final Pattern META_COLUMN_NAMES =
        Pattern.compile("^Fluss-km.*");

    public static final Pattern META_GRAIN_FRACTION_A =
        Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*");

    public static final Pattern META_GRAIN_FRACTION_B =
        Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)");

    public static final Pattern META_GRAIN_FRACTION_C =
        Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))");


    protected List<ImportSedimentYield> sedimentYields;

    protected ImportSedimentYield[] current;

    protected ImportGrainFraction grainFraction;

    protected ImportUnit unit;

    protected String description;

    protected String[] columnNames;


    public SedimentYieldParser() {
        sedimentYields = new ArrayList<ImportSedimentYield>();
    }


    @Override
    public void parse(File file) throws IOException {
        description = file.getName();

        super.parse(file);
    }


    @Override
    protected void reset() {
        current       = null;
        grainFraction = null;
        unit          = null;
    }


    @Override
    protected void finish() {
        if (current != null) {
            for (ImportSedimentYield isy: current) {
                sedimentYields.add(isy);
            }
        }

        description = null;
    }


    @Override
    protected void handleLine(int lineNum, String line) {
        if (line.startsWith(START_META_CHAR)) {
            handleMetaLine(stripMetaLine(line));
        }
        else {
            handleDataLine(line);
        }
    }


    protected void handleMetaLine(String line) {
        if (handleMetaUnit(line)) {
            return;
        }
        else if (handleMetaFraction(line)) {
            return;
        }
        else if (handleColumnNames(line)) {
            return;
        }
        else {
            log.warn("SYP: Unknown meta line: '" + line + "'");
        }
    }


    protected boolean handleMetaUnit(String line) {
        Matcher m = META_UNIT.matcher(line);

        if (m.matches()) {
            unit = new ImportUnit(m.group(1));
            return true;
        }

        return false;
    }


    public boolean handleMetaFraction(String line) {
        Matcher m = META_FRACTION.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1);

            this.grainFraction = buildGrainFraction(tmp);

            return true;
        }
        else if (line.startsWith(FRAKTION_START)) {
            String newLine = line.replace(FRAKTION_START, "").trim();
            if (newLine.length() == 0) {
                log.debug("Found total grain fraction.");
                this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL);

                return true;
            }
        }

        return false;
    }


    public boolean handleColumnNames(String line) {
        Matcher m = META_COLUMN_NAMES.matcher(line);

        if (m.matches()) {
            columnNames = line.split(SEPERATOR_CHAR);

            initializeSedimentYields();

            return true;
        }

        return false;
    }


    protected void handleDataLine(String line) {
        String[] vals = line.split(SEPERATOR_CHAR);

        if (vals == null || vals.length < columnNames.length-1) {
            log.warn("SYP: skip invalid data line: '" + line + "'");
            return;
        }

        try {
            Double km = nf.parse(vals[0]).doubleValue();

            for (int i = 1, n = columnNames.length-1; i < n; i++) {
                String curVal = vals[i];

                if (curVal != null && curVal.length() > 0) {
                    current[i-1].addValue(new ImportSedimentYieldValue(
                        km, nf.parse(vals[i]).doubleValue()
                    ));
                }
            }
        }
        catch (ParseException pe) {
            log.warn("SYP: unparseable number in data row '" + line + "':", pe);
        }
    }


    private void initializeSedimentYields() {
        // skip first column (Fluss-km) and last column (Hinweise)
        current = new ImportSedimentYield[columnNames.length-2];

        for (int i = 0, n = columnNames.length; i < n-2; i++) {
            current[i] = new ImportSedimentYield(this.description);
            current[i].setTimeInterval(getTimeInterval(columnNames[i+1]));
            current[i].setUnit(unit);
            current[i].setGrainFraction(grainFraction);
        }
    }


    private ImportTimeInterval getTimeInterval(String column) {
        try {
            Matcher a = TIMEINTERVAL_EPOCH.matcher(column);
            if (a.matches()) {
                int yearA = nf.parse(a.group(1)).intValue();
                int yearB = nf.parse(a.group(2)).intValue();

                return new ImportTimeInterval(
                    getStartDateFromYear(yearA),
                    getEndDateFromYear(yearB)
                );
            }

            Matcher b = TIMEINTERVAL_SINGLE.matcher(column);
            if (b.matches()) {
                int year = nf.parse(b.group(1)).intValue();

                return new ImportTimeInterval(getStartDateFromYear(year));
            }

            log.warn("SYP: Unknown time interval string: '" + column + "'");
        }
        catch (ParseException pe) {
            log.warn("SYP: Could not parse years: " + column, pe);
        }

        return null;
    }


    private ImportGrainFraction buildGrainFraction(String gfStr) {
        Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr);
        if (a.matches()) {
            String lowerA = a.group(2);
            String lowerB = a.group(3);

            String upperA = a.group(4);
            String upperB = a.group(5);

            String lower = lowerA != null ? lowerA : lowerB;
            String upper = upperA != null ? upperA : upperB;

            try {
                return new ImportGrainFraction(
                    getGrainFractionTypeName(this.description),
                    nf.parse(lower).doubleValue(),
                    nf.parse(upper).doubleValue()
                );
            }
            catch (ParseException pe) {
                log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
            }
        }

        Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr);
        if (b.matches()) {
            String lowerA  = b.group(4);
            String lowerB  = b.group(5);
            String upperA  = b.group(6);
            String upperB  = b.group(7);

            String lower = lowerA != null ? lowerA : lowerB;
            String upper = upperA != null ? upperA : upperB;

            try {
                return new ImportGrainFraction(
                    getGrainFractionTypeName(this.description),
                    nf.parse(lower).doubleValue(),
                    nf.parse(upper).doubleValue()
                );
            }
            catch (ParseException pe) {
                log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
            }
        }

        Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr);
        if (c.matches()) {
            String oper     = c.group(1);
            String valueStr = c.group(3);

            try {
                Double value = nf.parse(valueStr).doubleValue();

                if (oper.equals(">")) {
                    return new ImportGrainFraction(
                        getGrainFractionTypeName(this.description),
                        value,
                        null
                    );
                }
                else {
                    return new ImportGrainFraction(
                        getGrainFractionTypeName(this.description),
                        null,
                        value
                    );
                }
            }
            catch (ParseException pe) {
                log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
            }
        }

        log.warn("SYP: Unknown grain fraction: '" + gfStr + "'");

        return null;
    }


    public static String getGrainFractionTypeName(String filename) {
        if (Pattern.matches(FRACTION_COARSE_STR, filename)) {
	    return GrainFraction.COARSE;
        }
        else if (Pattern.matches(FRACTION_FINE_MIDDLE_STR, filename)) {
            return GrainFraction.FINE_MIDDLE;
        }
        else if (Pattern.matches(FRACTION_SUSP_SAND_BED, filename) ||
            Pattern.matches(FRACTION_SUSP_SAND_BED_EPOCH, filename)) {
            return GrainFraction.SUSP_SAND_BED;
        }
        else if (Pattern.matches(FRACTION_SUSP_SAND, filename)) {
            return GrainFraction.SUSP_SAND;
        }
	else if (Pattern.matches(FRACTION_SAND, filename)) {
            return GrainFraction.SAND;
        }
        else if (Pattern.matches(FRACTION_SUSPENDED_SEDIMENT, filename)) {
            return GrainFraction.SUSPENDED_SEDIMENT;
        }
        else if (Pattern.matches(FRACTION_TOTAL, filename)) {
            return GrainFraction.TOTAL;
        }
        else {
            log.warn("SYP: Unknown grain fraction '" + filename + "'");
            return "unknown";
        }
    }


    public List<ImportSedimentYield> getSedimentYields() {
        return sedimentYields;
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org