view flys-backend/src/main/java/de/intevation/flys/importer/PRFParser.java @ 1197:ce3dacc6ea92

PRFParser: extract km from lines. TODO: extract data. flys-backend/trunk@2297 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Thu, 07 Jul 2011 09:29:31 +0000
parents 46127af605ba
children 661a9304f2f5
line wrap: on
line source
package de.intevation.flys.importer;

import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;

import java.util.regex.Pattern;
import java.util.regex.Matcher;

import java.io.File;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.log4j.Logger;

public class PRFParser
{
    private static Logger log = Logger.getLogger(PRFParser.class);

    public static final String ENCODING =
        System.getProperty("flys.backend.prf.encoding", "ISO-8859-1");

    public static final Pattern DATA_PATTERN =
        Pattern.compile(
            "\\((\\d+)x\\s*,\\s*(\\d+)\\(" +
            "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?");

    public static final Pattern KM_PATTERN =
        Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?");

    public static class DataFormat {

        protected int deleteChars;
        protected int maxRepetitions;
        protected int firstIntegerPlaces;
        protected int firstFractionPlaces;
        protected int secondIntegerPlaces;
        protected int secondFractionPlaces;

        public DataFormat() {
        }

        public DataFormat(Matcher m) {
            deleteChars          = Integer.parseInt(m.group(1));
            maxRepetitions       = Integer.parseInt(m.group(2));
            firstIntegerPlaces   = Integer.parseInt(m.group(3));
            firstFractionPlaces  = Integer.parseInt(m.group(4));
            secondIntegerPlaces  = Integer.parseInt(m.group(5));
            secondFractionPlaces = Integer.parseInt(m.group(6));
        }

        public boolean extractData(String line, Map<Double, Double> dest) 
        throws NumberFormatException
        {
            //TODO: Implement me!
            return true;
        }
    } // class DataFormat

    public static class KMFormat {
        protected int deleteChars;
        protected int integerPlaces;
        protected int fractionPlaces;

        protected double scale;
        protected double shift;

        public KMFormat() {
        }

        public KMFormat(Matcher m) {
            deleteChars    = Integer.parseInt(m.group(1));
            integerPlaces  = Integer.parseInt(m.group(2));
            fractionPlaces = Integer.parseInt(m.group(3));

            shift = Math.pow(10, fractionPlaces);
            scale = 1d/shift;
        }

        public double extractKm(String line) throws NumberFormatException {

            if (line.length() <= deleteChars) {
                throw new NumberFormatException("line too short");
            }

            String kmS =
                line.substring(deleteChars, deleteChars+integerPlaces);

            double km = Double.parseDouble(kmS.trim());

            return fractionPlaces > 0
                ? ((int)((scale*km)*shift))/shift
                : km;
        }
    } // class KMFormat

    protected Map<Double, Map<Double, Double>> data;

    public PRFParser() {
        data = new TreeMap<Double, Map<Double, Double>>();
    }


    public boolean parse(File file) {

        if (!(file.isFile() && file.canRead())) {
            log.warn("cannot open file '" + file + "'");
            return false;
        }

        log.info("parsing PRF file: '" + file + "'");

        LineNumberReader in = null;

        try {
            in =
                new LineNumberReader(
                new InputStreamReader(
                new FileInputStream(file), ENCODING));

            String line = in.readLine();

            if (line == null || (line = line.trim()).length() == 0) {
                log.warn("file is empty.");
                return false;
            }

            Matcher m = DATA_PATTERN.matcher(line);

            if (!m.matches()) {
                log.warn("First line does not look like a PRF data pattern.");
                return false;
            }

            DataFormat dataFormat = new DataFormat(m);

            if ((line = in.readLine()) == null
            || (line = line.trim()).length() == 0) {
                log.warn("premature EOF. Expected integer in line 2");
                return false;
            }

            try {
                if (Integer.parseInt(line) != dataFormat.maxRepetitions) {
                    log.warn("Expected " +
                        dataFormat.maxRepetitions + " in line 2");
                    return false;
                }
            }
            catch (NumberFormatException nfe) {
                log.warn("invalid integer in line 2", nfe);
                return false;
            }

            if ((line = in.readLine()) == null) {
                log.warn(
                    "premature EOF. Expected pattern for km extraction");
                return false;
            }

            m = KM_PATTERN.matcher(line);

            if (!m.matches()) {
                log.warn(
                    "line 4 does not look like a PRF km extraction pattern.");
                return false;
            }

            KMFormat kmFormat = new KMFormat(m);

            if ((line = in.readLine()) == null
            || (line = line.trim()).length() == 0) {
                log.warn("premature EOF. Expected skip row count.");
                return false;
            }

            int lineSkipCount;
            try {
                if ((lineSkipCount = Integer.parseInt(line)) < 0) {
                    throw new IllegalArgumentException(lineSkipCount + " < 0");
                }
            }
            catch (NumberFormatException nfe) {
                log.warn(
                    "line 5 is not an positive integer.");
                return false;
            }

            int skip = lineSkipCount;

            while ((line = in.readLine()) != null) {
                if (skip > 0) {
                    --skip;
                    continue;
                }
                double km;
                try {
                    km = kmFormat.extractKm(line);
                }
                catch (NumberFormatException iae) {
                    log.warn("cannot extract km in line + " + in.getLineNumber());
                    return false;
                }

                Double station = Double.valueOf(km);

                Map<Double, Double> kmData = data.get(station);

                if (kmData == null) {
                    log.debug("found new km: " + station);
                    kmData = new TreeMap<Double, Double>();
                    data.put(station, kmData);
                }

                try {
                    if (!dataFormat.extractData(line, kmData)) {
                        skip = lineSkipCount;
                    }
                }
                catch (NumberFormatException nfe) {
                    log.warn("cannot extract data from line " + in.getLineNumber());
                    return false;
                }
            }
        }
        catch (IOException ioe) {
            log.error(ioe);
            return false;
        }
        finally {
            if (in != null) {
                try {
                    in.close();
                }
                catch (IOException ioe) {
                    log.error(ioe);
                }
            }
        }

        return true;
    }

    public void reset() {
        data.clear();
    }

    public static void parsePRFs(File root) {

        PRFParser parser = new PRFParser();

        Stack<File> stack = new Stack<File>();
        stack.push(root);

        while (!stack.empty()) {
            File file = stack.pop();
            if (file.isDirectory()) {
                File [] files = file.listFiles();
                if (files != null) {
                    for (File f: files) {
                        stack.push(f);
                    }
                }
            }
            else if (file.isFile()
                && file.getName().toLowerCase().endsWith(".prf")
            ) {
                parser.reset();
                boolean success = parser.parse(file);
                log.info("parsing " + (success ? "succeeded" : "failed"));
            }
        }
    }

    public static void main(String [] args) {

        for (String arg: args) {
            parsePRFs(new File(arg));
        }
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org