teichmann@5829: package org.dive4elements.river.importer.parsers; sascha@1211: sascha@1211: import java.util.Map; sascha@1211: import java.util.TreeMap; sascha@1211: import java.util.List; sascha@1211: import java.util.ArrayList; sascha@1211: import java.util.Collections; sascha@1211: sascha@1211: import java.util.regex.Pattern; sascha@1211: import java.util.regex.Matcher; sascha@1211: sascha@1211: import java.io.File; sascha@1211: import java.io.InputStreamReader; sascha@1211: import java.io.LineNumberReader; sascha@1211: import java.io.FileInputStream; sascha@1211: import java.io.IOException; sascha@1211: sascha@1211: import org.apache.log4j.Logger; sascha@1211: teichmann@5829: import org.dive4elements.river.importer.XY; sascha@1211: teichmann@5829: import org.dive4elements.artifacts.common.utils.FileTools; sascha@1213: felix@4680: felix@4680: /** felix@4680: * Parse files in .prf format and generate a mapping of double felix@4680: * (km) to List of Points (XY). felix@4680: */ felix@4715: public class PRFParser implements CrossSectionParser sascha@1211: { sascha@1211: private static Logger log = Logger.getLogger(PRFParser.class); sascha@1211: sascha@1211: public static final String ENCODING = sascha@1211: System.getProperty("flys.backend.prf.encoding", "ISO-8859-1"); sascha@1211: sascha@1211: public static final Pattern DATA_PATTERN = sascha@1211: Pattern.compile( sascha@1211: "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + sascha@1211: "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); sascha@1211: sascha@1211: public static final Pattern KM_PATTERN = sascha@1211: Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); sascha@1211: sascha@1211: public static final Pattern YEAR_PATTERN = sascha@1211: Pattern.compile("(\\d{4})"); sascha@1211: sascha@1211: public static final int MIN_YEAR = 1800; sascha@1211: public static final int MAX_YEAR = 2100; sascha@1211: sascha@1211: public static class DataFormat { sascha@1211: sascha@1211: protected int deleteChars; sascha@1211: protected int maxRepetitions; sascha@1211: protected int firstIntegerPlaces; sascha@1211: protected int firstFractionPlaces; sascha@1211: protected int secondIntegerPlaces; sascha@1211: protected int secondFractionPlaces; sascha@1211: sascha@1211: protected double firstShift; sascha@1211: protected double secondShift; sascha@1211: sascha@1211: public DataFormat() { sascha@1211: } sascha@1211: sascha@1211: public DataFormat(Matcher m) { sascha@1211: deleteChars = Integer.parseInt(m.group(1)); sascha@1211: maxRepetitions = Integer.parseInt(m.group(2)); sascha@1211: firstIntegerPlaces = Integer.parseInt(m.group(3)); sascha@1211: firstFractionPlaces = Integer.parseInt(m.group(4)); sascha@1211: secondIntegerPlaces = Integer.parseInt(m.group(5)); sascha@1211: secondFractionPlaces = Integer.parseInt(m.group(6)); sascha@1211: sascha@1211: firstShift = Math.pow(10, firstFractionPlaces); sascha@1211: secondShift = Math.pow(10, secondFractionPlaces); sascha@1211: } sascha@1211: sascha@1211: public int extractData(String line, List kmData) { sascha@1211: int L = line.length(); sascha@1211: if (L <= deleteChars) { sascha@1211: return -1; sascha@1211: } sascha@1211: sascha@1211: int pos = deleteChars; sascha@1211: sascha@1211: boolean debug = log.isDebugEnabled(); sascha@1211: sascha@1211: sascha@1211: int rep = 0; sascha@1211: for (;rep < maxRepetitions; ++rep) { sascha@1211: if (pos >= L || pos + firstIntegerPlaces >= L) { sascha@1211: break; sascha@1211: } sascha@1211: String first = line.substring( sascha@1211: pos, pos + firstIntegerPlaces); sascha@1211: sascha@1211: String second = line.substring( sascha@3334: pos + firstIntegerPlaces, sascha@1211: Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces)); sascha@1211: sascha@1211: double x, y; sascha@1211: try { sascha@1211: x = Double.parseDouble(first); sascha@1211: y = Double.parseDouble(second); sascha@1211: } sascha@1211: catch (NumberFormatException nfe) { sascha@1211: // broken line -> substract from dataset skip sascha@1211: return -1; sascha@1211: } sascha@1211: sascha@1211: if (first.indexOf('.') < 0) { sascha@1211: x /= firstShift; sascha@1211: } sascha@1211: sascha@1211: if (firstFractionPlaces > 0) { sascha@1211: x = (int)(x*firstShift)/firstShift; sascha@1211: } sascha@1211: sascha@1211: if (second.indexOf('.') < 0) { sascha@1211: y /= secondShift; sascha@1211: } sascha@1211: sascha@1211: if (secondFractionPlaces > 0) { sascha@1211: y = (int)(y*secondShift)/secondShift; sascha@1211: } sascha@1211: sascha@1211: kmData.add(new XY(x, y, kmData.size())); sascha@1211: sascha@1211: pos += firstIntegerPlaces + secondIntegerPlaces; sascha@1211: } sascha@1211: sascha@1211: return rep == maxRepetitions ? 1 : 0; sascha@1211: } sascha@1211: } // class DataFormat sascha@1211: sascha@1211: public static class KMFormat { sascha@1211: sascha@1211: protected int deleteChars; sascha@1211: protected int integerPlaces; sascha@1211: protected int fractionPlaces; sascha@1211: sascha@1211: protected double shift; sascha@1211: sascha@1211: public KMFormat() { sascha@1211: } sascha@1211: sascha@1211: public KMFormat(Matcher m) { sascha@1211: deleteChars = Integer.parseInt(m.group(1)); sascha@1211: integerPlaces = Integer.parseInt(m.group(2)); sascha@1211: fractionPlaces = Integer.parseInt(m.group(3)); sascha@1211: sascha@1211: shift = Math.pow(10, fractionPlaces); sascha@1211: } sascha@1211: sascha@1211: public double extractKm(String line) throws NumberFormatException { sascha@1211: sascha@1211: if (line.length() <= deleteChars) { sascha@1211: throw new NumberFormatException("line too short"); sascha@1211: } sascha@1211: sascha@1211: String kmS = sascha@1211: line.substring(deleteChars, deleteChars+integerPlaces); sascha@1211: sascha@1211: double km = Double.parseDouble(kmS.trim()); sascha@1211: sascha@1211: if (kmS.indexOf('.') < 0) { sascha@1211: km /= shift; sascha@1211: } sascha@1211: sascha@1211: return fractionPlaces > 0 sascha@1211: ? ((int)(km*shift))/shift sascha@1211: : km; sascha@1211: } sascha@1211: } // class KMFormat sascha@1211: sascha@1211: protected Map> data; sascha@1211: sascha@1211: protected Integer year; sascha@1211: sascha@1211: protected String description; sascha@1211: sascha@1211: sascha@1211: public PRFParser() { sascha@1211: data = new TreeMap>(); sascha@1211: } sascha@1211: felix@4715: @Override sascha@1211: public Integer getYear() { sascha@1211: return year; sascha@1211: } sascha@1211: sascha@1211: public void setYear(Integer year) { sascha@1211: this.year = year; sascha@1211: } sascha@1211: felix@4715: @Override sascha@1211: public String getDescription() { sascha@1211: return description; sascha@1211: } sascha@1211: sascha@1211: public void setDescription(String description) { sascha@1211: this.description = description; sascha@1211: } sascha@1211: felix@4715: @Override sascha@1211: public Map> getData() { sascha@1211: return data; sascha@1211: } sascha@1211: sascha@1211: public void setData(Map> data) { sascha@1211: this.data = data; sascha@1211: } sascha@1211: sascha@1211: protected void sortLists() { sascha@1211: for (List xy: data.values()) { sascha@1211: Collections.sort(xy); sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: public static final Integer findYear(String s) { sascha@1211: Matcher m = YEAR_PATTERN.matcher(s); sascha@1211: while (m.find()) { sascha@1211: int year = Integer.parseInt(m.group(1)); sascha@1211: if (year >= MIN_YEAR && year <= MAX_YEAR) { sascha@1211: return Integer.valueOf(year); sascha@1211: } sascha@1211: } sascha@1211: return null; sascha@1211: } sascha@1211: sascha@1211: public boolean parse(File file) { sascha@1211: sascha@1211: if (!(file.isFile() && file.canRead())) { sascha@3660: log.warn("PRF: cannot open file '" + file + "'"); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: log.info("parsing PRF file: '" + file + "'"); sascha@1211: felix@4729: description = FileTools.removeExtension(file.getName()); sascha@1211: sascha@1211: year = findYear(file.getName()); sascha@1211: sascha@1211: if (year == null) { sascha@1211: File parent = file.getParentFile(); sascha@1211: if (parent != null) { sascha@1211: description = parent.getName() + "/" + description; sascha@1211: year = findYear(parent.getName()); sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: if (year != null) { sascha@1211: log.info("year of sounding: " + year); sascha@1211: } sascha@1211: sascha@1211: LineNumberReader in = null; sascha@1211: sascha@1211: try { sascha@1211: in = sascha@1211: new LineNumberReader( sascha@1211: new InputStreamReader( sascha@1211: new FileInputStream(file), ENCODING)); sascha@1211: sascha@1211: String line = in.readLine(); sascha@1211: sascha@1211: if (line == null || (line = line.trim()).length() == 0) { sascha@3660: log.warn("PRF: file is empty."); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: Matcher m = DATA_PATTERN.matcher(line); sascha@1211: sascha@1211: if (!m.matches()) { sascha@3660: log.warn("PRF: First line does not look like a PRF data pattern."); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: DataFormat dataFormat = new DataFormat(m); sascha@1211: sascha@1211: if ((line = in.readLine()) == null sascha@1211: || (line = line.trim()).length() == 0) { sascha@3660: log.warn("PRF: premature EOF. Expected integer in line 2"); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: try { sascha@1211: if (Integer.parseInt(line) != dataFormat.maxRepetitions) { sascha@3660: log.warn("PRF: Expected " + sascha@1211: dataFormat.maxRepetitions + " in line 2"); sascha@1211: return false; sascha@1211: } sascha@1211: } sascha@1211: catch (NumberFormatException nfe) { sascha@3660: log.warn("PRF: invalid integer in line 2", nfe); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: if ((line = in.readLine()) == null) { sascha@1211: log.warn( sascha@3660: "PRF: premature EOF. Expected pattern for km extraction"); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: m = KM_PATTERN.matcher(line); sascha@1211: sascha@1211: if (!m.matches()) { sascha@1211: log.warn( sascha@3660: "PRF: line 4 does not look like a PRF km extraction pattern."); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: KMFormat kmFormat = new KMFormat(m); sascha@1211: sascha@1211: if ((line = in.readLine()) == null sascha@1211: || (line = line.trim()).length() == 0) { sascha@3660: log.warn("PRF: premature EOF. Expected skip row count."); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: int lineSkipCount; sascha@1211: try { sascha@1211: if ((lineSkipCount = Integer.parseInt(line)) < 0) { sascha@1211: throw new IllegalArgumentException(lineSkipCount + " < 0"); sascha@1211: } sascha@1211: } sascha@1211: catch (NumberFormatException nfe) { sascha@1211: log.warn( sascha@3660: "PRF: line 5 is not an positive integer."); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: int skip = lineSkipCount; sascha@1211: sascha@1211: while ((line = in.readLine()) != null) { sascha@1211: if (skip > 0) { sascha@1211: --skip; sascha@1211: continue; sascha@1211: } sascha@1211: double km; sascha@1211: try { sascha@1211: km = kmFormat.extractKm(line); sascha@1211: } sascha@1211: catch (NumberFormatException iae) { sascha@3660: log.warn("PRF: cannot extract km in line " + in.getLineNumber()); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: Double station = Double.valueOf(km); sascha@1211: sascha@1211: List kmData = data.get(station); sascha@1211: sascha@1211: if (kmData == null) { sascha@1211: //log.debug("found new km: " + station); sascha@1211: kmData = new ArrayList(); sascha@1211: data.put(station, kmData); sascha@1211: } sascha@1211: sascha@1211: int c = dataFormat.extractData(line, kmData); sascha@1211: if (c < 1) { sascha@1211: skip = lineSkipCount + c; sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: // sort all the lists by x and index sascha@1211: sortLists(); sascha@1211: } sascha@1211: catch (IOException ioe) { sascha@3659: log.error("Error reading PRF file.", ioe); sascha@1211: return false; sascha@1211: } sascha@1211: finally { sascha@1211: if (in != null) { sascha@1211: try { sascha@1211: in.close(); sascha@1211: } sascha@1211: catch (IOException ioe) { sascha@3659: log.error("Error closing PRF file.", ioe); sascha@1211: } sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: return true; sascha@1211: } sascha@1211: sascha@1211: public void reset() { sascha@1211: data.clear(); sascha@1211: year = null; sascha@1211: description = null; sascha@1211: } sascha@1211: felix@4715: public void parsePRFs(File root, final CrossSectionParser.Callback callback) { sascha@1211: sascha@1213: FileTools.walkTree(root, new FileTools.FileVisitor() { sascha@1213: @Override sascha@1213: public boolean visit(File file) { sascha@1216: if (file.isFile() && file.canRead() sascha@1213: && file.getName().toLowerCase().endsWith(".prf") felix@4715: && (callback == null || callback.accept(file))) { sascha@1213: reset(); sascha@1213: boolean success = parse(file); sascha@1213: log.info("parsing " + (success ? "succeeded" : "failed")); sascha@1213: if (success && callback != null) { felix@4715: callback.parsed(PRFParser.this); sascha@1211: } sascha@1211: } sascha@1213: return true; sascha@1211: } sascha@1213: }); sascha@1211: } sascha@1211: sascha@1211: public static void main(String [] args) { sascha@1211: sascha@1211: PRFParser parser = new PRFParser(); sascha@1211: sascha@1211: for (String arg: args) { sascha@1211: parser.parsePRFs(new File(arg), null); sascha@1211: } sascha@1211: } sascha@1211: } sascha@1211: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :