diff backend/src/main/java/org/dive4elements/river/importer/parsers/PRFParser.java @ 5838:5aa05a7a34b7

Rename modules to more fitting names.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 15:23:37 +0200
parents flys-backend/src/main/java/org/dive4elements/river/importer/parsers/PRFParser.java@18619c1e7c2a
children 4dd33b86dc61
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/PRFParser.java	Thu Apr 25 15:23:37 2013 +0200
@@ -0,0 +1,419 @@
+package org.dive4elements.river.importer.parsers;
+
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collections;
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import java.io.File;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+
+import org.dive4elements.river.importer.XY;
+
+import org.dive4elements.artifacts.common.utils.FileTools;
+
+
+/**
+ * Parse files in .prf format and generate a mapping of double
+ * (km) to List of Points (XY).
+ */
+public class PRFParser implements CrossSectionParser
+{
+    private static Logger log = Logger.getLogger(PRFParser.class);
+
+    public static final String ENCODING =
+        System.getProperty("flys.backend.prf.encoding", "ISO-8859-1");
+
+    public static final Pattern DATA_PATTERN =
+        Pattern.compile(
+            "\\((\\d+)x\\s*,\\s*(\\d+)\\(" +
+            "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?");
+
+    public static final Pattern KM_PATTERN =
+        Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?");
+
+    public static final Pattern YEAR_PATTERN =
+        Pattern.compile("(\\d{4})");
+
+    public static final int MIN_YEAR = 1800;
+    public static final int MAX_YEAR = 2100;
+
+    public static class DataFormat {
+
+        protected int deleteChars;
+        protected int maxRepetitions;
+        protected int firstIntegerPlaces;
+        protected int firstFractionPlaces;
+        protected int secondIntegerPlaces;
+        protected int secondFractionPlaces;
+
+        protected double firstShift;
+        protected double secondShift;
+
+        public DataFormat() {
+        }
+
+        public DataFormat(Matcher m) {
+            deleteChars          = Integer.parseInt(m.group(1));
+            maxRepetitions       = Integer.parseInt(m.group(2));
+            firstIntegerPlaces   = Integer.parseInt(m.group(3));
+            firstFractionPlaces  = Integer.parseInt(m.group(4));
+            secondIntegerPlaces  = Integer.parseInt(m.group(5));
+            secondFractionPlaces = Integer.parseInt(m.group(6));
+
+            firstShift  = Math.pow(10, firstFractionPlaces);
+            secondShift = Math.pow(10, secondFractionPlaces);
+        }
+
+        public int extractData(String line, List<XY> kmData) {
+            int L = line.length();
+            if (L <= deleteChars) {
+                return -1;
+            }
+
+            int pos = deleteChars;
+
+            boolean debug = log.isDebugEnabled();
+
+
+            int rep = 0;
+            for (;rep < maxRepetitions; ++rep) {
+                if (pos >= L || pos + firstIntegerPlaces >= L) {
+                    break;
+                }
+                String first = line.substring(
+                    pos, pos + firstIntegerPlaces);
+
+                String second = line.substring(
+                    pos + firstIntegerPlaces,
+                    Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces));
+
+                double x, y;
+                try {
+                    x = Double.parseDouble(first);
+                    y = Double.parseDouble(second);
+                }
+                catch (NumberFormatException nfe) {
+                    // broken line -> substract from dataset skip
+                    return -1;
+                }
+
+                if (first.indexOf('.') < 0) {
+                    x /= firstShift;
+                }
+
+                if (firstFractionPlaces > 0) {
+                    x = (int)(x*firstShift)/firstShift;
+                }
+
+                if (second.indexOf('.') < 0) {
+                    y /= secondShift;
+                }
+
+                if (secondFractionPlaces > 0) {
+                    y = (int)(y*secondShift)/secondShift;
+                }
+
+                kmData.add(new XY(x, y, kmData.size()));
+
+                pos += firstIntegerPlaces + secondIntegerPlaces;
+            }
+
+            return rep == maxRepetitions ? 1 : 0;
+        }
+    } // class DataFormat
+
+    public static class KMFormat {
+
+        protected int deleteChars;
+        protected int integerPlaces;
+        protected int fractionPlaces;
+
+        protected double shift;
+
+        public KMFormat() {
+        }
+
+        public KMFormat(Matcher m) {
+            deleteChars    = Integer.parseInt(m.group(1));
+            integerPlaces  = Integer.parseInt(m.group(2));
+            fractionPlaces = Integer.parseInt(m.group(3));
+
+            shift = Math.pow(10, fractionPlaces);
+        }
+
+        public double extractKm(String line) throws NumberFormatException {
+
+            if (line.length() <= deleteChars) {
+                throw new NumberFormatException("line too short");
+            }
+
+            String kmS =
+                line.substring(deleteChars, deleteChars+integerPlaces);
+
+            double km = Double.parseDouble(kmS.trim());
+
+            if (kmS.indexOf('.') < 0) {
+                km /= shift;
+            }
+
+            return fractionPlaces > 0
+                ? ((int)(km*shift))/shift
+                : km;
+        }
+    } // class KMFormat
+
+    protected Map<Double, List<XY>> data;
+
+    protected Integer year;
+
+    protected String description;
+
+
+    public PRFParser() {
+        data = new TreeMap<Double, List<XY>>();
+    }
+
+    @Override
+    public Integer getYear() {
+        return year;
+    }
+
+    public void setYear(Integer year) {
+        this.year = year;
+    }
+
+    @Override
+    public String getDescription() {
+        return description;
+    }
+
+    public void setDescription(String description) {
+        this.description = description;
+    }
+
+    @Override
+    public Map<Double, List<XY>> getData() {
+        return data;
+    }
+
+    public void setData(Map<Double, List<XY>> data) {
+        this.data = data;
+    }
+
+    protected void sortLists() {
+        for (List<XY> xy: data.values()) {
+            Collections.sort(xy);
+        }
+    }
+
+    public static final Integer findYear(String s) {
+        Matcher m = YEAR_PATTERN.matcher(s);
+        while (m.find()) {
+            int year = Integer.parseInt(m.group(1));
+            if (year >= MIN_YEAR && year <= MAX_YEAR) {
+                return Integer.valueOf(year);
+            }
+        }
+        return null;
+    }
+
+    public boolean parse(File file) {
+
+        if (!(file.isFile() && file.canRead())) {
+            log.warn("PRF: cannot open file '" + file + "'");
+            return false;
+        }
+
+        log.info("parsing PRF file: '" + file + "'");
+
+        description = FileTools.removeExtension(file.getName());
+
+        year = findYear(file.getName());
+
+        if (year == null) {
+            File parent = file.getParentFile();
+            if (parent != null) {
+                description = parent.getName() + "/" + description;
+                year = findYear(parent.getName());
+            }
+        }
+
+        if (year != null) {
+            log.info("year of sounding: " + year);
+        }
+
+        LineNumberReader in = null;
+
+        try {
+            in =
+                new LineNumberReader(
+                new InputStreamReader(
+                new FileInputStream(file), ENCODING));
+
+            String line = in.readLine();
+
+            if (line == null || (line = line.trim()).length() == 0) {
+                log.warn("PRF: file is empty.");
+                return false;
+            }
+
+            Matcher m = DATA_PATTERN.matcher(line);
+
+            if (!m.matches()) {
+                log.warn("PRF: First line does not look like a PRF data pattern.");
+                return false;
+            }
+
+            DataFormat dataFormat = new DataFormat(m);
+
+            if ((line = in.readLine()) == null
+            || (line = line.trim()).length() == 0) {
+                log.warn("PRF: premature EOF. Expected integer in line 2");
+                return false;
+            }
+
+            try {
+                if (Integer.parseInt(line) != dataFormat.maxRepetitions) {
+                    log.warn("PRF: Expected " +
+                        dataFormat.maxRepetitions + " in line 2");
+                    return false;
+                }
+            }
+            catch (NumberFormatException nfe) {
+                log.warn("PRF: invalid integer in line 2", nfe);
+                return false;
+            }
+
+            if ((line = in.readLine()) == null) {
+                log.warn(
+                    "PRF: premature EOF. Expected pattern for km extraction");
+                return false;
+            }
+
+            m = KM_PATTERN.matcher(line);
+
+            if (!m.matches()) {
+                log.warn(
+                    "PRF: line 4 does not look like a PRF km extraction pattern.");
+                return false;
+            }
+
+            KMFormat kmFormat = new KMFormat(m);
+
+            if ((line = in.readLine()) == null
+            || (line = line.trim()).length() == 0) {
+                log.warn("PRF: premature EOF. Expected skip row count.");
+                return false;
+            }
+
+            int lineSkipCount;
+            try {
+                if ((lineSkipCount = Integer.parseInt(line)) < 0) {
+                    throw new IllegalArgumentException(lineSkipCount + " < 0");
+                }
+            }
+            catch (NumberFormatException nfe) {
+                log.warn(
+                    "PRF: line 5 is not an positive integer.");
+                return false;
+            }
+
+            int skip = lineSkipCount;
+
+            while ((line = in.readLine()) != null) {
+                if (skip > 0) {
+                    --skip;
+                    continue;
+                }
+                double km;
+                try {
+                    km = kmFormat.extractKm(line);
+                }
+                catch (NumberFormatException iae) {
+                    log.warn("PRF: cannot extract km in line " + in.getLineNumber());
+                    return false;
+                }
+
+                Double station = Double.valueOf(km);
+
+                List<XY> kmData = data.get(station);
+
+                if (kmData == null) {
+                    //log.debug("found new km: " + station);
+                    kmData = new ArrayList<XY>();
+                    data.put(station, kmData);
+                }
+
+                int c = dataFormat.extractData(line, kmData);
+                if (c < 1) {
+                    skip = lineSkipCount + c;
+                }
+            }
+
+            // sort all the lists by x and index
+            sortLists();
+        }
+        catch (IOException ioe) {
+            log.error("Error reading PRF file.", ioe);
+            return false;
+        }
+        finally {
+            if (in != null) {
+                try {
+                    in.close();
+                }
+                catch (IOException ioe) {
+                    log.error("Error closing PRF file.", ioe);
+                }
+            }
+        }
+
+        return true;
+    }
+
+    public void reset() {
+        data.clear();
+        year        = null;
+        description = null;
+    }
+
+    public void parsePRFs(File root, final CrossSectionParser.Callback callback) {
+
+        FileTools.walkTree(root, new FileTools.FileVisitor() {
+            @Override
+            public boolean visit(File file) {
+                if (file.isFile() && file.canRead()
+                && file.getName().toLowerCase().endsWith(".prf")
+                && (callback == null || callback.accept(file))) {
+                    reset();
+                    boolean success = parse(file);
+                    log.info("parsing " + (success ? "succeeded" : "failed"));
+                    if (success && callback != null) {
+                        callback.parsed(PRFParser.this);
+                    }
+                }
+                return true;
+            }
+        });
+    }
+
+    public static void main(String [] args) {
+
+        PRFParser parser = new PRFParser();
+
+        for (String arg: args) {
+            parser.parsePRFs(new File(arg), null);
+        }
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org