Mercurial > dive4elements > river
view flys-backend/src/main/java/de/intevation/flys/importer/PRFParser.java @ 1208:2b57edd94735
Bumped Hibernate up to 3.6.5
flys-backend/trunk@2326 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Wed, 13 Jul 2011 12:38:11 +0000 |
parents | c7370734b872 |
children |
line wrap: on
line source
package de.intevation.flys.importer; import java.util.Map; import java.util.Stack; import java.util.TreeMap; import java.util.List; import java.util.ArrayList; import java.util.Collections; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.File; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.io.FileInputStream; import java.io.IOException; import org.apache.log4j.Logger; public class PRFParser { private static Logger log = Logger.getLogger(PRFParser.class); public static final String ENCODING = System.getProperty("flys.backend.prf.encoding", "ISO-8859-1"); public static final Pattern DATA_PATTERN = Pattern.compile( "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); public static final Pattern KM_PATTERN = Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); public static final Pattern YEAR_PATTERN = Pattern.compile("(\\d{4})"); public static final int MIN_YEAR = 1800; public static final int MAX_YEAR = 2100; public interface Callback { boolean prfAccept(File file); void prfParsed(PRFParser parser); } // interface Parser public static class DataFormat { protected int deleteChars; protected int maxRepetitions; protected int firstIntegerPlaces; protected int firstFractionPlaces; protected int secondIntegerPlaces; protected int secondFractionPlaces; protected double firstShift; protected double secondShift; public DataFormat() { } public DataFormat(Matcher m) { deleteChars = Integer.parseInt(m.group(1)); maxRepetitions = Integer.parseInt(m.group(2)); firstIntegerPlaces = Integer.parseInt(m.group(3)); firstFractionPlaces = Integer.parseInt(m.group(4)); secondIntegerPlaces = Integer.parseInt(m.group(5)); secondFractionPlaces = Integer.parseInt(m.group(6)); firstShift = Math.pow(10, firstFractionPlaces); secondShift = Math.pow(10, secondFractionPlaces); } public int extractData(String line, List<XY> kmData) { int L = line.length(); if (L <= deleteChars) { return -1; } int pos = deleteChars; boolean debug = log.isDebugEnabled(); int rep = 0; for (;rep < maxRepetitions; ++rep) { if (pos >= L || pos + firstIntegerPlaces >= L) { break; } String first = line.substring( pos, pos + firstIntegerPlaces); String second = line.substring( pos + firstIntegerPlaces, Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces)); double x, y; try { x = Double.parseDouble(first); y = Double.parseDouble(second); } catch (NumberFormatException nfe) { // broken line -> substract from dataset skip return -1; } if (first.indexOf('.') < 0) { x /= firstShift; } if (firstFractionPlaces > 0) { x = (int)(x*firstShift)/firstShift; } if (second.indexOf('.') < 0) { y /= secondShift; } if (secondFractionPlaces > 0) { y = (int)(y*secondShift)/secondShift; } kmData.add(new XY(x, y, kmData.size())); pos += firstIntegerPlaces + secondIntegerPlaces; } return rep == maxRepetitions ? 1 : 0; } } // class DataFormat public static class KMFormat { protected int deleteChars; protected int integerPlaces; protected int fractionPlaces; protected double shift; public KMFormat() { } public KMFormat(Matcher m) { deleteChars = Integer.parseInt(m.group(1)); integerPlaces = Integer.parseInt(m.group(2)); fractionPlaces = Integer.parseInt(m.group(3)); shift = Math.pow(10, fractionPlaces); } public double extractKm(String line) throws NumberFormatException { if (line.length() <= deleteChars) { throw new NumberFormatException("line too short"); } String kmS = line.substring(deleteChars, deleteChars+integerPlaces); double km = Double.parseDouble(kmS.trim()); if (kmS.indexOf('.') < 0) { km /= shift; } return fractionPlaces > 0 ? ((int)(km*shift))/shift : km; } } // class KMFormat protected Map<Double, List<XY>> data; protected Integer year; protected String description; public PRFParser() { data = new TreeMap<Double, List<XY>>(); } public Integer getYear() { return year; } public void setYear(Integer year) { this.year = year; } public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } public Map<Double, List<XY>> getData() { return data; } public void setData(Map<Double, List<XY>> data) { this.data = data; } protected void sortLists() { for (List<XY> xy: data.values()) { Collections.sort(xy); } } public static final Integer findYear(String s) { Matcher m = YEAR_PATTERN.matcher(s); while (m.find()) { int year = Integer.parseInt(m.group(1)); if (year >= MIN_YEAR && year <= MAX_YEAR) { return Integer.valueOf(year); } } return null; } public boolean parse(File file) { if (!(file.isFile() && file.canRead())) { log.warn("cannot open file '" + file + "'"); return false; } log.info("parsing PRF file: '" + file + "'"); description = file.getName(); year = findYear(file.getName()); if (year == null) { File parent = file.getParentFile(); if (parent != null) { description = parent.getName() + "/" + description; year = findYear(parent.getName()); } } if (year != null) { log.info("year of sounding: " + year); } LineNumberReader in = null; try { in = new LineNumberReader( new InputStreamReader( new FileInputStream(file), ENCODING)); String line = in.readLine(); if (line == null || (line = line.trim()).length() == 0) { log.warn("file is empty."); return false; } Matcher m = DATA_PATTERN.matcher(line); if (!m.matches()) { log.warn("First line does not look like a PRF data pattern."); return false; } DataFormat dataFormat = new DataFormat(m); if ((line = in.readLine()) == null || (line = line.trim()).length() == 0) { log.warn("premature EOF. Expected integer in line 2"); return false; } try { if (Integer.parseInt(line) != dataFormat.maxRepetitions) { log.warn("Expected " + dataFormat.maxRepetitions + " in line 2"); return false; } } catch (NumberFormatException nfe) { log.warn("invalid integer in line 2", nfe); return false; } if ((line = in.readLine()) == null) { log.warn( "premature EOF. Expected pattern for km extraction"); return false; } m = KM_PATTERN.matcher(line); if (!m.matches()) { log.warn( "line 4 does not look like a PRF km extraction pattern."); return false; } KMFormat kmFormat = new KMFormat(m); if ((line = in.readLine()) == null || (line = line.trim()).length() == 0) { log.warn("premature EOF. Expected skip row count."); return false; } int lineSkipCount; try { if ((lineSkipCount = Integer.parseInt(line)) < 0) { throw new IllegalArgumentException(lineSkipCount + " < 0"); } } catch (NumberFormatException nfe) { log.warn( "line 5 is not an positive integer."); return false; } int skip = lineSkipCount; while ((line = in.readLine()) != null) { if (skip > 0) { --skip; continue; } double km; try { km = kmFormat.extractKm(line); } catch (NumberFormatException iae) { log.warn("cannot extract km in line + " + in.getLineNumber()); return false; } Double station = Double.valueOf(km); List<XY> kmData = data.get(station); if (kmData == null) { //log.debug("found new km: " + station); kmData = new ArrayList<XY>(); data.put(station, kmData); } int c = dataFormat.extractData(line, kmData); if (c < 1) { skip = lineSkipCount + c; } } // sort all the lists by x and index sortLists(); } catch (IOException ioe) { log.error(ioe); return false; } finally { if (in != null) { try { in.close(); } catch (IOException ioe) { log.error(ioe); } } } return true; } public void reset() { data.clear(); year = null; description = null; } public void parsePRFs(File root, Callback callback) { Stack<File> stack = new Stack<File>(); stack.push(root); while (!stack.empty()) { File file = stack.pop(); if (file.isDirectory()) { File [] files = file.listFiles(); if (files != null) { for (File f: files) { stack.push(f); } } } else if (file.isFile() && file.getName().toLowerCase().endsWith(".prf") && (callback == null || callback.prfAccept(file)) ) { reset(); boolean success = parse(file); log.info("parsing " + (success ? "succeeded" : "failed")); if (success && callback != null) { callback.prfParsed(this); } } } } public static void main(String [] args) { PRFParser parser = new PRFParser(); for (String arg: args) { parser.parsePRFs(new File(arg), null); } } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :