view backend/src/main/java/org/dive4elements/river/importer/parsers/BedHeightParser.java @ 6328:53d08f33d094

Backend: Moved guessing of main values and there time intervals out of the STA parser. Same come will be useful to extend the WST parser to better handle official lines.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 13 Jun 2013 17:15:34 +0200
parents fe8e3d5c25d4
children cc21c197d204 b5e0b28c1516
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;

import java.math.BigDecimal;

import java.text.NumberFormat;
import java.text.ParseException;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import java.io.IOException;
import java.io.LineNumberReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;

import org.apache.log4j.Logger;

import org.dive4elements.river.importer.ImportBedHeight;
import org.dive4elements.river.importer.ImportBedHeightType;
import org.dive4elements.river.importer.ImportElevationModel;
import org.dive4elements.river.importer.ImportLocationSystem;
import org.dive4elements.river.importer.ImportRange;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.importer.ImportUnit;
import org.dive4elements.river.model.BedHeightType;
import org.dive4elements.river.importer.ImporterSession;
import org.dive4elements.river.importer.parsers.LineParser;

public abstract class BedHeightParser {

    private static final Logger log =
        Logger.getLogger(BedHeightParser.class);

    public static final String ENCODING = "ISO-8859-1";

    public static final Locale DEFAULT_LOCALE = Locale.GERMAN;

    public static final String START_META_CHAR = "#";
    public static final String SEPERATOR_CHAR  = ";";

    public static final Pattern META_YEAR =
        Pattern.compile("^Jahr: [^0-9]*(\\d*).*");

    public static final Pattern META_TIMEINTERVAL =
        Pattern.compile("^Zeitraum: Epoche (\\d*)-(\\d*).*");

    public static final Pattern META_TYPE =
        Pattern.compile("^Aufnahmeart: (.*).*");

    public static final Pattern META_LOCATION_SYSTEM =
        Pattern.compile("^Lagesystem: (.*).*");

    public static final Pattern META_CUR_ELEVATION_SYSTEM =
        Pattern.compile("^H.hensystem:\\s(\\w++) (.* )??\\[(.*)\\].*");

    public static final Pattern META_OLD_ELEVATION_SYSTEM =
        Pattern.compile("^urspr.ngliches H.hensystem:\\s(\\w++) (.* )??\\[(.*)\\].*");

    public static final Pattern META_SOUNDING_WIDTH =
        Pattern.compile("^ausgewertete Peilbreite: (\\d*).*");

    public static final Pattern META_RANGE =
        Pattern.compile("^Strecke:\\D*(\\d++.?\\d*) ?- ?(\\d++.?\\d*).*");

    public static final Pattern META_EVALUATION_BY =
        Pattern.compile("^Auswerter: (.*).*");

    public static final Pattern META_COMMENTS =
        Pattern.compile("^Weitere Bemerkungen: (.*).*");


    protected static NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);


    protected List<ImportBedHeight> bedHeights;


    protected abstract ImportBedHeight newImportBedHeight(String description);

    /** Handle a line of file that contains data (in contrast to comments, meta). */
    protected abstract void handleDataLine(
        ImportBedHeight importBedHeight,
        String          line
    );



    public BedHeightParser() {
        this.bedHeights = new ArrayList<ImportBedHeight>();
    }


    public List<ImportBedHeight> getBedHeights() {
        return bedHeights;
    }


    public void parse(File file) throws IOException {
        log.info("Parsing bed height single file '" + file + "'");

        ImportBedHeight obj = newImportBedHeight(file.getName().replaceAll("\\.csv", ""));

        LineNumberReader in =
            new LineNumberReader(
            new InputStreamReader(
            new FileInputStream(file), ENCODING));

        try {
            String line = null;
            while ((line = in.readLine()) != null) {
                if ((line = line.trim()).length() == 0) {
                    continue;
                }

                if (line.startsWith(START_META_CHAR)) {
                    handleMetaLine(obj, line);
                }
                else {
                    handleDataLine(obj, line);
                }
            }

            log.info("File contained " + obj.getValueCount() + " values.");
            bedHeights.add(obj);
        }
        finally {
            in.close();
        }
    }


    protected static String stripMetaLine(String line) {
        String tmp = line.substring(1, line.length());

        if (tmp.startsWith(" ")) {
            return tmp.substring(1, tmp.length());
        }
        else {
            return tmp;
        }
    }


    protected void handleMetaLine(ImportBedHeight obj, String line) {
        String meta = stripMetaLine(line);

        if (handleMetaYear(obj, meta)) {
            return;
        }
        else if (handleMetaTimeInterval(obj, meta)) {
            return;
        }
        else if (handleMetaSoundingWidth(obj, meta)) {
            return;
        }
        else if (handleMetaComment(obj, meta)) {
            return;
        }
        else if (handleMetaEvaluationBy(obj, meta)) {
            return;
        }
        else if (handleMetaRange(obj, meta)) {
            return;
        }
        else if (handleMetaType(obj, meta)) {
            return;
        }
        else if (handleMetaLocationSystem(obj, meta)) {
            return;
        }
        else if (handleMetaCurElevationModel(obj, meta)) {
            return;
        }
        else if (handleMetaOldElevationModel(obj, meta)) {
            return;
        }
        else {
            log.warn("BHP: Meta line did not match any known type: " + line);
        }
    }


    protected boolean handleMetaYear(ImportBedHeight obj, String line) {
        Matcher m = META_YEAR.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1);
            if (tmp.length() > 0) {
                obj.setYear(Integer.parseInt(tmp));
            }
            else {
                log.warn("BHP: No year given.");
            }
            return true;
        }

        return false;
    }


    protected boolean handleMetaTimeInterval(ImportBedHeight obj, String line) {
        Matcher m = META_TIMEINTERVAL.matcher(line);

        if (m.matches()) {
            String lo = m.group(1);
            String up = m.group(2);

            log.debug("Found time interval: " + lo + " - " + up);

            try {
                int lower = Integer.valueOf(lo);
                int upper = Integer.valueOf(up);

                Date fromYear = LineParser.getStartDateFromYear(lower);
                Date toYear   = LineParser.getEndDateFromYear(upper);

                obj.setTimeInterval(new ImportTimeInterval(fromYear, toYear));
            }
            catch (NumberFormatException e) {
                log.warn("BHP: could not parse timeinterval", e);
            }

            return true;
        }

        return false;
    }


    protected boolean handleMetaSoundingWidth(ImportBedHeight obj, String line) {
        Matcher m = META_SOUNDING_WIDTH.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1);

            try {
                obj.setSoundingWidth(Integer.valueOf(tmp));
                return true;
            }
            catch (NumberFormatException e) {
                log.warn("BHP: Could not parse sounding width in line '" + line + 
			 "'. -> Set default value '0'");
            }
            obj.setSoundingWidth(0);
	    return true;
        }

        return false;
    }


    protected boolean handleMetaComment(ImportBedHeight obj, String line) {
        Matcher m = META_COMMENTS.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1);

            obj.setDescription(tmp);

            return true;
        }

        return false;
    }


    protected boolean handleMetaEvaluationBy(
        ImportBedHeight obj,
        String                line
    ) {
        Matcher m = META_EVALUATION_BY.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1);
            tmp = tmp.replace(";", "");

            obj.setEvaluationBy(tmp);

            return true;
        }

        return false;
    }


    protected boolean handleMetaRange(ImportBedHeight obj, String line) {
        Matcher m = META_RANGE.matcher(line);

        if (m.matches() && m.groupCount() >= 2) {
            String a = m.group(1).replace(";", "");
            String b = m.group(2).replace(";", "");

            try {
                BigDecimal lower = new BigDecimal(nf.parse(a).doubleValue());
                BigDecimal upper = new BigDecimal(nf.parse(b).doubleValue());

                obj.setRange(new ImportRange(lower, upper));

                return true;
            }
            catch (ParseException e) {
                log.warn("BHP: could not parse range", e);
            }
        }

        return false;
    }


    protected boolean handleMetaType(ImportBedHeight obj, String line) {
        Matcher m = META_TYPE.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1).replace(";", "").trim();

            BedHeightType bht = BedHeightType.fetchBedHeightTypeForType(
                tmp, ImporterSession.getInstance().getDatabaseSession());

            if (bht != null) {
                obj.setType(new ImportBedHeightType(bht));
                return true;
            }

            log.error("Unknown bed height type: '" + tmp + "'. File ignored.");
        }

        return false;
    }


    protected boolean handleMetaLocationSystem(
        ImportBedHeight obj,
        String          line
    ) {
        Matcher m = META_LOCATION_SYSTEM.matcher(line);

        if (m.matches()) {
            String tmp = m.group(1).replace(";", "");

            obj.setLocationSystem(new ImportLocationSystem(tmp, tmp));

            return true;
        }

        return false;
    }


    protected boolean handleMetaCurElevationModel(
        ImportBedHeight obj,
        String          line
    ) {
        Matcher m = META_CUR_ELEVATION_SYSTEM.matcher(line);

        if (m.matches()) {
            String name = m.group(1);
            String num  = m.group(2);
            String unit = m.group(3);

            obj.setCurElevationModel(new ImportElevationModel(
                name + " " + num,
                new ImportUnit(unit)
            ));

            return true;
        }

        return false;
    }


    protected boolean handleMetaOldElevationModel(
        ImportBedHeight obj,
        String          line
    ) {
        Matcher m = META_OLD_ELEVATION_SYSTEM.matcher(line);

        if (m.matches()) {
            String name = m.group(1);
            String num  = m.group(2);
            String unit = m.group(3);

            obj.setOldElevationModel(new ImportElevationModel(
                name + " " + num,
                new ImportUnit(unit)
            ));

            return true;
        }

        return false;
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org