view backend/src/main/java/org/dive4elements/river/importer/parsers/DA66Parser.java @ 6328:53d08f33d094

Backend: Moved guessing of main values and there time intervals out of the STA parser. Same come will be useful to extend the WST parser to better handle official lines.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 13 Jun 2013 17:15:34 +0200
parents 4c3ccf2b0304
children 3bb1c62ad732
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3)
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details.
 */

package org.dive4elements.river.importer.parsers;

import org.dive4elements.artifacts.common.utils.FileTools;

import org.dive4elements.river.importer.XY;

import org.dive4elements.river.utils.EpsilonComparator;

import java.io.File;
import java.io.IOException;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;


/**
 * To create cross-sections, generate: Map<double,list<xy>> from files
 * in da66 format.
 */
public class DA66Parser extends LineParser implements CrossSectionParser
{
    /** Private logger. */
    private static Logger logger = Logger.getLogger(DA66Parser.class);

    private static String HEAD_HEAD = "00";
    private static String HEAD_GEOM = "66"; // "Values"
    private static String HEAD_ENDG = "88"; // Probably never used.

    /** Regex to match lines of files in da66 format. */
    private static final Pattern LINE_PATTERN =
        Pattern.compile("^([0-9 -]{2})" + // Type (00|66|88)
                        "([0-9 -]{5})" + // unset
                        "([0-9 -]{2})" + // id
                        "([0-9 -]{9})" + // station
                        "([0-9 -]{2})" + // running number
                        "([0-9 -]{1})?" + // point id
                        /*
                        Would be great if we could express the pattern as this:
                        ([0-9 -]{1})([0-9 -JKMLMNOPQR]{7})([0-9 -]{7})+
                        */
                        "([0-9 -JKMLMNOPQR]{7})?" + // y
                        "([0-9 -]{7})?" + // z
                        "([0-9 -]{1})?" + // point id
                        "([0-9 -JKMLMNOPQR]{7})?" + // y
                        "([0-9 -]{7})?" + // z
                        "([0-9 -]{1})?" + // point id
                        "([0-9 -JKMLMNOPQR]{7})?" + // y
                        "([0-9 -]{7})?" + // z
                        "([0-9 -]{1})?" + // point id
                        "([0-9 -JKMLMNOPQR]{7})?" + // y
                        "([0-9 -]{7})?" // z
                        );


    /** Indices to match group of main regex. */
    private static enum FIELD {
        HEAD      ( 1),
        UNSET     ( 2),
        ID        ( 3),
        STATION   ( 4),
        RUNNR     ( 5),
        POINT_1_ID( 6),
        POINT_1_Y ( 7),
        POINT_1_Z ( 8),
        POINT_2_ID( 9),
        POINT_2_Y (10),
        POINT_2_Z (11),
        POINT_3_ID(12),
        POINT_3_Y (13),
        POINT_3_Z (14),
        POINT_4_ID(15),
        POINT_4_Y (16),
        POINT_4_Z (17);

        private int idx;
        FIELD(int idx) {
            this.idx = idx;
        }
        int getIdx() {
            return idx;
        }
    }


    /** Header lines of da66 can define a type. */
    private static enum Type {
        DATE                     ( 0),
        HEKTOSTONE_LEFT          ( 1), //grm. "Standlinie"
        HEKTOSTONE_RIGHT         ( 2),
        CHANNEL_LEFT             ( 3), //grm. "Fahrrinne"
        CHANNEL_RIGHT            ( 4),
        CHANNEL_2_LEFT           ( 5),
        CHANNEL_2_RIGHT          ( 6),
        GIW_1972                 ( 7),
        GROIN_DIST_LEFT          ( 8), //grm. "Buhnenkopfabstand links"
        GROIN_HEIGHT_LEFT        ( 9),
        GROIN_SLOPE_LEFT         (10),
        GROIN_DIST_RIGHT         (11),
        GROIN_HEIGHT_RIGHT       (12),
        GROIN_SLOPE_RIGHT        (13),
        STRIKE_LEFT              (14), //grm. "Streichlinie links"
        AXIS                     (15),
        STRIKE_RIGHT             (16),
        GROIN_BACK_SLOPE_LEFT    (17), //grm. "Buhnenrueckenneigung"
        GROIN_BACK_SLOPE_RIGHT   (18),
        GIW_1932                 (19),
        GIW_1982                 (20),
        STAND_ISLAND_1           (21),
        STAND_ISLAND_2           (22),
        STAND_ISLAND_3           (23),
        STAND_ISLAND_4           (24),
        UNSPECIFIED_1            (25),
        UNSPECIFIED_2            (26),
        HHW                      (27),
        OLD_PROFILE_NULL         (28),
        AW_1978                  (29),
        SIGN_LEFT                (30),
        SIGN_RIGHT               (31),
        DIST_SIGNAL_CHANNEL_LEFT (32),
        DIST_SIGNAL_CHANNEL_RIGHT(33),
        UNSPECIFIED_3            (34),
        UNSPECIFIED_4            (35),
        UNSPECIFIED_5            (36),
        UNSPECIFIED_6            (37),
        SHORE_LEFT               (38),
        SHORE_RIGHT              (39),
        UNSPECIFIED_7            (40);

        private final int id;
        Type(int id) {
            this.id = id;
        }
        public int getId() {
            return id;
        }
    }


    /** Available types. */
    private static HashMap<Integer, Type> typeMap;


    /** Types we can deal with. */
    private static List<Type> implementedTypes;


    static {
        typeMap = new HashMap<Integer, Type>();
        for (Type t: Type.values()) {
            typeMap.put(new Integer(t.getId()), t);
        }
        // TODO populate and respect header type.
        implementedTypes = new ArrayList<Type>();
        //implementedTypes.add(..);
    }


    /** The current line to which add points. */
    private List<XY> currentLine;


    /** Data collected so far, last element will be currentLine. */
    protected Map<Double, List<XY>> data;


    /** Trivial constructor. */
    public DA66Parser() {
        data = new TreeMap<Double, List<XY>>(EpsilonComparator.CMP);
    }


    /** Get the description of the cross section parsed. */
    @Override
    public String getDescription() {
        return FileTools.removeExtension(getFileName());
    }


    /** Get the year of this cross sections measurement. */
    @Override
    public Integer getYear() {
        return null;
    }


    /**
     * Return the data parsed.
     * @return map of stations (km) to list of points.
     */
    @Override
    public Map<Double, List<XY>> getData() {
        return data;
    }


    /**
     * Walk a directory tree, parse its *.da66 files and store the
     * data found.
     */
    public void parseDA66s(File root, final Callback callback) {

        FileTools.walkTree(root, new FileTools.FileVisitor() {
            @Override
            public boolean visit(File file) {
                if (file.isFile() && file.canRead()
                && file.getName().toLowerCase().endsWith(".d66")
                && (callback == null || callback.accept(file))) {
                    reset();
                    try {
                        parse(file);
                        logger.info("parsing done");
                        if (callback != null) {
                            callback.parsed(DA66Parser.this);
                        }
                    }
                    catch (IOException ioe) {
                        logger.error("IOException while parsing file");
                        return false;
                    }
                }
                return true;
            }
        });
    }


    /**
     * Get the Index of the last cross-section lines point.
     * @return last points index, -1 if not available.
     */
    private int lastPointIdx() {
        if (currentLine == null || currentLine.isEmpty()) {
            return -1;
        }
        XY lastPoint = this.currentLine.get(currentLine.size()-1);
        return lastPoint.getIndex();
    }


    /** Returns station, deciding if it could in cm, in which case convert. */
    private double stationInKm(double station) {
        if (station > 10000) {
            return station/100000d;
        }
        else {
            return station;
        }
    }


    /** Apply the convention how to deal with numbers < -99.999 .*/
    private String applyLetterConvention(String orig) {
        if (orig.endsWith("-")) {
            return "-" + orig.replace("-","");
        }
        else if (orig.endsWith("J")) {
            return "-" + orig.replace("J","1");
        }
        else if (orig.endsWith("K")) {
            return "-" + orig.replace("K","2");
        }
        else if (orig.endsWith("L")) {
            return "-" + orig.replace("L","3");
        }
        else if (orig.endsWith("M")) {
            return "-" + orig.replace("M","4");
        }
        else if (orig.endsWith("N")) {
            return "-" + orig.replace("N","5");
        }
        else if (orig.endsWith("O")) {
            return "-" + orig.replace("O","6");
        }
        else if (orig.endsWith("P")) {
            return "-" + orig.replace("P","7");
        }
        else if (orig.endsWith("Q")) {
            return "-" + orig.replace("Q","8");
        }
        else if (orig.endsWith("R")) {
            return "-" + orig.replace("R","9");
        }
        else {
            return orig;
        }
    }

    /**
     * Add a Point (YZ,Index) to the current cross section line.
     * @param y The y coordinate of new point.
     * @param z The z coordinate of new point.
     * @param idx Ignored, the parameter of new point.
     * @return true if point could been added, false otherwise (e.g. not
     *         parsable y or z values.
     */
    private boolean addPoint(String y, String z, String idx) {
        if (z == null || y == null || idx == null) {
            logger.error("Incomplete point definition");
            return false;
        }

        double iy;
        double iz;
        // Handle letter convention.
        y = applyLetterConvention(y);
        try {
            iy = Double.parseDouble(y) / 1000d;
            iz = Double.parseDouble(z) / 1000d;
        }
        catch(java.lang.NumberFormatException nfe) {
            logger.error("Could not parse Number: " + nfe.getMessage());
            return false;
        }

        // We ignore idx, and increment instead.
        int index;
        int lastPointIdx = lastPointIdx();
        if (lastPointIdx <= 0) {
            index = 1;
        } else {
            index = lastPointIdx + 1;
        }

        currentLine.add(new XY(iy, iz, index));
        return true;
    }


    /** Called before consuming first line of file. */
    public void reset() {
        data.clear();
        currentLine = new ArrayList<XY>();
    }


    /**
     * Called for each line. Try to extract info from a da66 line.
     */
    @Override
    protected void handleLine(int lineNum, String line) {
        String head = line.substring(0,2);
        if (HEAD_HEAD.equals(head)) {
                //logger.debug("New station");
                Matcher m = LINE_PATTERN.matcher(line);
                if (m.find()) {
                    // Actually matches!
                    // TODO 'move' last line to match river axis
                    // TODO find river axis intersection
                    currentLine = new ArrayList<XY>();
                    double station = stationInKm(Double.parseDouble(m.group(FIELD.STATION.getIdx())));
                    data.put(station, currentLine);
                }
                else {
                    logger.error("HEAD line bad.");
                }
        }
        else if (HEAD_GEOM.equals(head)) {
            Matcher m = LINE_PATTERN.matcher(line);
            if (m.find()) {
                //logger.info("Station: " + m.group(FIELD.STATION.getIdx()));
                // TODO if last station differs, error and abort
                if (m.group(FIELD.POINT_1_ID.getIdx()) != null) {
                    // Point 1
                    if(addPoint(
                        m.group(FIELD.POINT_1_Y.getIdx()),
                        m.group(FIELD.POINT_1_Z.getIdx()),
                        m.group(FIELD.POINT_1_ID.getIdx()))) {
                        // Point added.
                    }
                    else {
                        // Problematic point.
                        logger.error("A point could not be added");
                    }
                }
                if (m.group(FIELD.POINT_2_ID.getIdx()) != null) {
                    // Point 2
                    if(addPoint(
                        m.group(FIELD.POINT_2_Y.getIdx()),
                        m.group(FIELD.POINT_2_Z.getIdx()),
                        m.group(FIELD.POINT_2_ID.getIdx()))) {
                        // Point added.
                    }
                    else {
                        // Problematic point.
                        logger.error("A point could not be added");
                    }
                }
                if (m.group(FIELD.POINT_3_ID.getIdx()) != null) {
                    // Point 3
                    if(addPoint(
                        m.group(FIELD.POINT_3_Y.getIdx()),
                        m.group(FIELD.POINT_3_Z.getIdx()),
                        m.group(FIELD.POINT_3_ID.getIdx()))) {
                        // Point added.
                    }
                    else {
                        // Problematic point.
                        logger.error("A point could not be added");
                    }
                }
                if (m.group(FIELD.POINT_4_ID.getIdx()) != null) {
                    // Point 4
                    if(addPoint(
                        m.group(FIELD.POINT_4_Y.getIdx()),
                        m.group(FIELD.POINT_4_Z.getIdx()),
                        m.group(FIELD.POINT_4_ID.getIdx()))) {
                        // Point added.
                    }
                    else {
                        // Problematic point.
                        logger.error("A point could not be added");
                    }
                }
            }
            else {
                logger.warn("Line could not be parsed: ");
                logger.warn(line);
            }
        }
        else if (HEAD_GEOM.equals(head)) {
            logger.debug("Hit a 88");
        }
        else {
            logger.error("Do not know how to treat da66 line:");
            logger.error(line);
        }
    }


    /** Called when file is fully consumed. */
    @Override
    protected void finish() {
        // TODO 'move' last line to match river axis
        logger.info("Parsed " + data.size() + " lines");
    }


    /** Parses files given as arguments. */
    public static void main(String [] args) {

        DA66Parser parser = new DA66Parser();

        logger.warn("Start parsing files.");
        for (String arg: args) {
            parser.parseDA66s(new File(arg), null);
            logger.warn("Parsing a file.");
        }
        logger.error("Finished parsing files.");
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org