Mercurial > dive4elements > river
diff flys-backend/src/main/java/de/intevation/flys/importer/parsers/PRFParser.java @ 1211:f08fe480092c
Moved file parsers to separate package.
flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Fri, 15 Jul 2011 13:07:45 +0000 |
parents | |
children | cc88db4a5b34 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/PRFParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,424 @@ +package de.intevation.flys.importer.parsers; + +import java.util.Map; +import java.util.Stack; +import java.util.TreeMap; +import java.util.List; +import java.util.ArrayList; +import java.util.Collections; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.io.File; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.log4j.Logger; + +import de.intevation.flys.importer.XY; + +public class PRFParser +{ + private static Logger log = Logger.getLogger(PRFParser.class); + + public static final String ENCODING = + System.getProperty("flys.backend.prf.encoding", "ISO-8859-1"); + + public static final Pattern DATA_PATTERN = + Pattern.compile( + "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + + "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); + + public static final Pattern KM_PATTERN = + Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); + + public static final Pattern YEAR_PATTERN = + Pattern.compile("(\\d{4})"); + + public static final int MIN_YEAR = 1800; + public static final int MAX_YEAR = 2100; + + public interface Callback { + boolean prfAccept(File file); + void prfParsed(PRFParser parser); + } // interface Parser + + public static class DataFormat { + + protected int deleteChars; + protected int maxRepetitions; + protected int firstIntegerPlaces; + protected int firstFractionPlaces; + protected int secondIntegerPlaces; + protected int secondFractionPlaces; + + protected double firstShift; + protected double secondShift; + + public DataFormat() { + } + + public DataFormat(Matcher m) { + deleteChars = Integer.parseInt(m.group(1)); + maxRepetitions = Integer.parseInt(m.group(2)); + firstIntegerPlaces = Integer.parseInt(m.group(3)); + firstFractionPlaces = Integer.parseInt(m.group(4)); + secondIntegerPlaces = Integer.parseInt(m.group(5)); + secondFractionPlaces = Integer.parseInt(m.group(6)); + + firstShift = Math.pow(10, firstFractionPlaces); + secondShift = Math.pow(10, secondFractionPlaces); + } + + public int extractData(String line, List<XY> kmData) { + int L = line.length(); + if (L <= deleteChars) { + return -1; + } + + int pos = deleteChars; + + boolean debug = log.isDebugEnabled(); + + + int rep = 0; + for (;rep < maxRepetitions; ++rep) { + if (pos >= L || pos + firstIntegerPlaces >= L) { + break; + } + String first = line.substring( + pos, pos + firstIntegerPlaces); + + String second = line.substring( + pos + firstIntegerPlaces, + Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces)); + + double x, y; + try { + x = Double.parseDouble(first); + y = Double.parseDouble(second); + } + catch (NumberFormatException nfe) { + // broken line -> substract from dataset skip + return -1; + } + + if (first.indexOf('.') < 0) { + x /= firstShift; + } + + if (firstFractionPlaces > 0) { + x = (int)(x*firstShift)/firstShift; + } + + if (second.indexOf('.') < 0) { + y /= secondShift; + } + + if (secondFractionPlaces > 0) { + y = (int)(y*secondShift)/secondShift; + } + + kmData.add(new XY(x, y, kmData.size())); + + pos += firstIntegerPlaces + secondIntegerPlaces; + } + + return rep == maxRepetitions ? 1 : 0; + } + } // class DataFormat + + public static class KMFormat { + + protected int deleteChars; + protected int integerPlaces; + protected int fractionPlaces; + + protected double shift; + + public KMFormat() { + } + + public KMFormat(Matcher m) { + deleteChars = Integer.parseInt(m.group(1)); + integerPlaces = Integer.parseInt(m.group(2)); + fractionPlaces = Integer.parseInt(m.group(3)); + + shift = Math.pow(10, fractionPlaces); + } + + public double extractKm(String line) throws NumberFormatException { + + if (line.length() <= deleteChars) { + throw new NumberFormatException("line too short"); + } + + String kmS = + line.substring(deleteChars, deleteChars+integerPlaces); + + double km = Double.parseDouble(kmS.trim()); + + if (kmS.indexOf('.') < 0) { + km /= shift; + } + + return fractionPlaces > 0 + ? ((int)(km*shift))/shift + : km; + } + } // class KMFormat + + protected Map<Double, List<XY>> data; + + protected Integer year; + + protected String description; + + + public PRFParser() { + data = new TreeMap<Double, List<XY>>(); + } + + public Integer getYear() { + return year; + } + + public void setYear(Integer year) { + this.year = year; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public Map<Double, List<XY>> getData() { + return data; + } + + public void setData(Map<Double, List<XY>> data) { + this.data = data; + } + + protected void sortLists() { + for (List<XY> xy: data.values()) { + Collections.sort(xy); + } + } + + public static final Integer findYear(String s) { + Matcher m = YEAR_PATTERN.matcher(s); + while (m.find()) { + int year = Integer.parseInt(m.group(1)); + if (year >= MIN_YEAR && year <= MAX_YEAR) { + return Integer.valueOf(year); + } + } + return null; + } + + public boolean parse(File file) { + + if (!(file.isFile() && file.canRead())) { + log.warn("cannot open file '" + file + "'"); + return false; + } + + log.info("parsing PRF file: '" + file + "'"); + + description = file.getName(); + + year = findYear(file.getName()); + + if (year == null) { + File parent = file.getParentFile(); + if (parent != null) { + description = parent.getName() + "/" + description; + year = findYear(parent.getName()); + } + } + + if (year != null) { + log.info("year of sounding: " + year); + } + + LineNumberReader in = null; + + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = in.readLine(); + + if (line == null || (line = line.trim()).length() == 0) { + log.warn("file is empty."); + return false; + } + + Matcher m = DATA_PATTERN.matcher(line); + + if (!m.matches()) { + log.warn("First line does not look like a PRF data pattern."); + return false; + } + + DataFormat dataFormat = new DataFormat(m); + + if ((line = in.readLine()) == null + || (line = line.trim()).length() == 0) { + log.warn("premature EOF. Expected integer in line 2"); + return false; + } + + try { + if (Integer.parseInt(line) != dataFormat.maxRepetitions) { + log.warn("Expected " + + dataFormat.maxRepetitions + " in line 2"); + return false; + } + } + catch (NumberFormatException nfe) { + log.warn("invalid integer in line 2", nfe); + return false; + } + + if ((line = in.readLine()) == null) { + log.warn( + "premature EOF. Expected pattern for km extraction"); + return false; + } + + m = KM_PATTERN.matcher(line); + + if (!m.matches()) { + log.warn( + "line 4 does not look like a PRF km extraction pattern."); + return false; + } + + KMFormat kmFormat = new KMFormat(m); + + if ((line = in.readLine()) == null + || (line = line.trim()).length() == 0) { + log.warn("premature EOF. Expected skip row count."); + return false; + } + + int lineSkipCount; + try { + if ((lineSkipCount = Integer.parseInt(line)) < 0) { + throw new IllegalArgumentException(lineSkipCount + " < 0"); + } + } + catch (NumberFormatException nfe) { + log.warn( + "line 5 is not an positive integer."); + return false; + } + + int skip = lineSkipCount; + + while ((line = in.readLine()) != null) { + if (skip > 0) { + --skip; + continue; + } + double km; + try { + km = kmFormat.extractKm(line); + } + catch (NumberFormatException iae) { + log.warn("cannot extract km in line + " + in.getLineNumber()); + return false; + } + + Double station = Double.valueOf(km); + + List<XY> kmData = data.get(station); + + if (kmData == null) { + //log.debug("found new km: " + station); + kmData = new ArrayList<XY>(); + data.put(station, kmData); + } + + int c = dataFormat.extractData(line, kmData); + if (c < 1) { + skip = lineSkipCount + c; + } + } + + // sort all the lists by x and index + sortLists(); + } + catch (IOException ioe) { + log.error(ioe); + return false; + } + finally { + if (in != null) { + try { + in.close(); + } + catch (IOException ioe) { + log.error(ioe); + } + } + } + + return true; + } + + public void reset() { + data.clear(); + year = null; + description = null; + } + + public void parsePRFs(File root, Callback callback) { + + Stack<File> stack = new Stack<File>(); + stack.push(root); + + while (!stack.empty()) { + File file = stack.pop(); + if (file.isDirectory()) { + File [] files = file.listFiles(); + if (files != null) { + for (File f: files) { + stack.push(f); + } + } + } + else if (file.isFile() + && file.getName().toLowerCase().endsWith(".prf") + && (callback == null || callback.prfAccept(file)) + ) { + reset(); + boolean success = parse(file); + log.info("parsing " + (success ? "succeeded" : "failed")); + if (success && callback != null) { + callback.prfParsed(this); + } + } + } + } + + public static void main(String [] args) { + + PRFParser parser = new PRFParser(); + + for (String arg: args) { + parser.parsePRFs(new File(arg), null); + } + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :