Mercurial > dive4elements > river
changeset 1211:f08fe480092c
Moved file parsers to separate package.
flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
line wrap: on
line diff
--- a/flys-backend/ChangeLog Fri Jul 15 11:12:14 2011 +0000 +++ b/flys-backend/ChangeLog Fri Jul 15 13:07:45 2011 +0000 @@ -1,3 +1,11 @@ +2011-07-15 Sascha L. Teichmann <sascha.teichmann@intevation.de> + + * src/main/java/de/intevation/flys/importer/parsers/*.java: + New package. Moved the file parsers (*.gew, *.sta, *.at, *.glt, *.prf, *.km, *.wst) + into this package. + + * src/main/java/de/intevation/flys/importer/*.java: Adjusted the imports. + 2011-07-15 Sascha L. Teichmann <sascha.teichmann@intevation.de> * src/main/java/de/intevation/flys/model/HYKFormation.java,
--- a/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,229 +0,0 @@ -package de.intevation.flys.importer; - -import org.w3c.dom.Document; -import org.w3c.dom.NodeList; -import org.w3c.dom.Element; - -import javax.xml.xpath.XPathConstants; - -import java.util.Map; -import java.util.HashMap; -import java.util.List; -import java.util.ArrayList; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -import org.apache.log4j.Logger; - -import de.intevation.artifacts.common.utils.XMLUtils; - -public class AnnotationClassifier -{ - private static Logger log = Logger.getLogger(Importer.class); - - public static final String TYPES_XPATH = - "/annotation/types/type"; - - public static final String FILE_PATTERNS_XPATH = - "/annotation/patterns/file"; - - public static final String DESCRIPTION_PATTERNS_XPATH = - "/annotation/patterns/line"; - - - public static class Pair { - - protected Pattern pattern; - protected ImportAnnotationType annType; - - public Pair(Pattern pattern, ImportAnnotationType annType) { - this.pattern = pattern; - this.annType = annType; - } - - public ImportAnnotationType match(String s) { - Matcher m = pattern.matcher(s); - return m.matches() ? annType : null; - } - } // class Pair - - - protected Map<String, ImportAnnotationType> types; - protected List<Pair> filePatterns; - protected List<Pair> descPatterns; - - protected ImportAnnotationType defaultType; - - public AnnotationClassifier() { - } - - public AnnotationClassifier(Document rules) { - types = new HashMap<String, ImportAnnotationType>(); - filePatterns = new ArrayList<Pair>(); - descPatterns = new ArrayList<Pair>(); - - buildRules(rules); - } - - protected void buildRules(Document rules) { - buildTypes(rules); - buildFilePatterns(rules); - buildDescriptionPatterns(rules); - } - - protected void buildTypes(Document rules) { - - NodeList typeList = (NodeList)XMLUtils.xpath( - rules, - TYPES_XPATH, - XPathConstants.NODESET, - null); - - if (typeList == null) { - log.info("no rules found."); - return; - } - - for (int i = 0, N = typeList.getLength(); i < N; ++i) { - Element typeElement = (Element)typeList.item(i); - String name = typeElement.getAttribute("name"); - if (name.length() == 0) { - log.warn("rule has no name"); - continue; - } - - ImportAnnotationType aic = new ImportAnnotationType(name); - - types.put(name, aic); - - if (typeElement.getAttribute("default").equals("true")) { - defaultType = aic; - } - } - } - - protected void buildFilePatterns(Document rules) { - - NodeList patternList = (NodeList)XMLUtils.xpath( - rules, - FILE_PATTERNS_XPATH, - XPathConstants.NODESET, - null); - - if (patternList == null) { - log.info("no file patterns found."); - return; - } - - for (int i = 0, N = patternList.getLength(); i < N; ++i) { - Element element = (Element)patternList.item(i); - Pair pair = buildPair(element); - if (pair != null) { - filePatterns.add(pair); - } - } - } - - protected void buildDescriptionPatterns(Document rules) { - - NodeList patternList = (NodeList)XMLUtils.xpath( - rules, - DESCRIPTION_PATTERNS_XPATH, - XPathConstants.NODESET, - null); - - if (patternList == null) { - log.info("no line patterns found."); - return; - } - - for (int i = 0, N = patternList.getLength(); i < N; ++i) { - Element element = (Element)patternList.item(i); - Pair pair = buildPair(element); - if (pair != null) { - descPatterns.add(pair); - } - } - } - - protected Pair buildPair(Element element) { - String pattern = element.getAttribute("pattern"); - String type = element.getAttribute("type"); - - if (pattern.length() == 0) { - log.warn("pattern has no 'pattern' attribute."); - return null; - } - - if (type.length() == 0) { - log.warn("pattern has no 'type' attribute."); - return null; - } - - ImportAnnotationType annType = types.get(type); - - if (annType == null) { - log.warn("pattern has unknown type '" + type + "'"); - return null; - } - - Pattern p; - - try { - p = Pattern.compile(pattern, - Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); - } - catch (IllegalArgumentException iae) { - log.warn("pattern '" + pattern + "' is invalid.", iae); - return null; - } - - return new Pair(p, annType); - } - - public ImportAnnotationType getDefaultType() { - return defaultType; - } - - public ImportAnnotationType classifyFile(String filename) { - return classifyFile(filename, null); - } - - public ImportAnnotationType classifyFile( - String filename, - ImportAnnotationType def - ) { - if (filename.toLowerCase().endsWith(".km")) { - filename = filename.substring(0, filename.length()-3); - } - - for (Pair pair: filePatterns) { - ImportAnnotationType annType = pair.match(filename); - if (annType != null) { - return annType; - } - } - - return def; - } - - public ImportAnnotationType classifyDescription(String description) { - return classifyDescription(description, null); - } - - public ImportAnnotationType classifyDescription( - String description, - ImportAnnotationType def - ) { - for (Pair pair: descPatterns) { - ImportAnnotationType annType = pair.match(description); - if (annType != null) { - return annType; - } - } - - return def; - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,189 +0,0 @@ -package de.intevation.flys.importer; - -import java.util.HashMap; -import java.util.TreeSet; -import java.util.List; -import java.util.ArrayList; - -import java.io.IOException; -import java.io.File; -import java.io.LineNumberReader; -import java.io.InputStreamReader; -import java.io.FileInputStream; - -import java.math.BigDecimal; - -import org.apache.log4j.Logger; - -import de.intevation.flys.utils.FileTools; - -public class AnnotationsParser -{ - private static Logger log = Logger.getLogger(AnnotationsParser.class); - - public static final String ENCODING = "ISO-8859-1"; - - public static final String [] TO_SCAN = { - "Basisdaten", - "Streckendaten" - }; - - protected HashMap<String, ImportAttribute> attributes; - protected HashMap<String, ImportPosition> positions; - protected TreeSet<ImportAnnotation> annotations; - protected AnnotationClassifier classifier; - - public AnnotationsParser() { - this(null); - } - - public AnnotationsParser(AnnotationClassifier classifier) { - attributes = new HashMap<String, ImportAttribute>(); - positions = new HashMap<String, ImportPosition>(); - annotations = new TreeSet<ImportAnnotation>(); - this.classifier = classifier; - } - - public void parseFile(File file) throws IOException { - log.info("parsing km file: '" + file + "'"); - - ImportAnnotationType defaultIAT = null; - - if (classifier != null) { - defaultIAT = classifier.classifyFile( - file.getName(), - classifier.getDefaultType()); - } - - LineNumberReader in = null; - try { - in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); - - String line = null; - while ((line = in.readLine()) != null) { - if ((line = line.trim()).length() == 0 - || line.startsWith("*")) { - continue; - } - - String [] parts = line.split("\\s*;\\s*"); - - if (parts.length < 3) { - log.warn("not enough columns in line " - + in.getLineNumber()); - continue; - } - - ImportPosition position = positions.get(parts[0]); - if (position == null) { - position = new ImportPosition(parts[0]); - positions.put(parts[0], position); - } - - ImportAttribute attribute = attributes.get(parts[1]); - if (attribute == null) { - attribute = new ImportAttribute(parts[1]); - attributes.put(parts[1], attribute); - } - - String [] r = parts[2].replace(",", ".").split("\\s*#\\s*"); - - BigDecimal from, to; - - try { - from = new BigDecimal(r[0]); - to = r.length < 2 ? null : new BigDecimal(r[1]); - if (to != null && from.compareTo(to) > 0) { - BigDecimal t = from; from = to; to = t; - } - } - catch (NumberFormatException nfe) { - log.warn("invalid number in line " + in.getLineNumber()); - continue; - } - - ImportEdge edge = null; - - if (parts.length == 4) { // Only 'Unterkante' - try { - edge = new ImportEdge( - null, - new BigDecimal(parts[3].trim().replace(',', '.'))); - } - catch (NumberFormatException nfe) { - log.warn("cannot parse 'Unterkante' in line " + - in.getLineNumber()); - } - } - else if (parts.length > 4) { // 'Unterkante' and 'Oberkante' - String bottom = parts[3].trim().replace(',', '.'); - String top = parts[4].trim().replace(',', '.'); - try { - BigDecimal b = bottom.length() == 0 - ? null - : new BigDecimal(bottom); - BigDecimal t = top.length() == 0 - ? null - : new BigDecimal(top); - edge = new ImportEdge(t, b); - } - catch (NumberFormatException nfe) { - log.warn( - "cannot parse 'Unterkante' or 'Oberkante' in line " - + in.getLineNumber()); - } - } - - ImportRange range = new ImportRange(from, to); - - ImportAnnotationType type = classifier != null - ? classifier.classifyDescription(line, defaultIAT) - : null; - - ImportAnnotation annotation = new ImportAnnotation( - attribute, position, range, edge, type); - - if (!annotations.add(annotation)) { - log.warn("duplicated annotation '" + parts[0] + - "' in line " + in.getLineNumber()); - } - } - } - finally { - if (in != null) { - in.close(); - } - } - } - - public void parse(File root) throws IOException { - - for (String toScan: TO_SCAN) { - File directory = FileTools.repair(new File(root, toScan)); - if (!directory.isDirectory()) { - log.warn("'" + directory + "' is not a directory."); - continue; - } - File [] files = directory.listFiles(); - if (files == null) { - log.warn("cannot list directory '" + directory + "'"); - continue; - } - - for (File file: files) { - if (file.isFile() && file.canRead() - && file.getName().toLowerCase().endsWith(".km")) { - parseFile(file); - } - } - } // for all directories to scan - } - - public List<ImportAnnotation> getAnnotations() { - return new ArrayList<ImportAnnotation>(annotations); - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/AtFileParser.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,204 +0,0 @@ -package de.intevation.flys.importer; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; -import java.io.IOException; -import java.math.BigDecimal; - -import org.apache.log4j.Logger; - -import de.intevation.flys.importer.ImportDischargeTable; -import de.intevation.flys.importer.ImportDischargeTableValue; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -import java.util.Date; -import java.util.Calendar; - -public class AtFileParser { - - public static final String ENCODING = "ISO-8859-1"; - - private static Logger logger = Logger.getLogger(AtFileParser.class); - - - // regular expression from hell to find out time range - public static final Pattern DATE_LINE = Pattern.compile( - "^\\*\\s*Abflu[^t]+tafel?\\s*([^\\d]+)" + - "(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4})\\s*(?:(?:bis)|-)?\\s*" + - "(?:(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4}))?\\s*.*$"); - - public AtFileParser() { - } - - - public ImportDischargeTable parse(File file) throws IOException { - return parse(file, "", 0); - } - - public ImportDischargeTable parse( - File file, - String prefix, - int kind - ) - throws IOException { - - logger.info("parsing AT file: " + file); - - BufferedReader br = null; - - String line = null; - - boolean beginning = true; - - ImportDischargeTable dischargeTable = - new ImportDischargeTable(kind, prefix + file.getName()); - - Date from = null; - Date to = null; - - try { - br = new BufferedReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); - - while ((line = br.readLine()) != null) { - - String tmp = line.trim(); - - if (tmp.length() == 0) { - continue; - } - - Matcher m = DATE_LINE.matcher(tmp); - if (m.matches()) { - from = guessDate(m.group(2), m.group(3), m.group(4)); - to = guessDate(m.group(5), m.group(6), m.group(7)); - if (from == null) { - Date t = from; from = to; to = t; - } - continue; - } - - if (tmp.startsWith("#! name=")) { - // XXX Skip the name, because we don't know where to save - // it at the moment - - //String name = tmp.substring(8); - continue; - } - - if (tmp.startsWith("#") || tmp.startsWith("*")) { - continue; - } - - String[] splits = tmp.replace(',', '.').split("\\s+"); - - if ((splits.length < 2) || (splits.length > 11)) { - logger.warn("Found an invalid row in the AT file."); - continue; - } - - String strW = splits[0].trim(); - double W = Double.parseDouble(strW); - - /* shift is used to differenciate between lines with - * exactly 10 Qs and lines with less than 10 Qs. The shift - * is only modified when it is the first line. - */ - int shift = -1; - - if (splits.length != 11 && beginning) { - shift = 10 - splits.length; - } - - - for (int i = 1; i < splits.length; i++) { - double iW = W + shift + i; - double iQ = Double.parseDouble(splits[i].trim()); - - dischargeTable.addDischargeTableValue( - new ImportDischargeTableValue( - new BigDecimal(iQ/100.0), - new BigDecimal(iW/100.0))); - } - - beginning = false; - } - } - catch (NumberFormatException pe) { - logger.warn(pe.getMessage()); - } - finally { - if (br != null) { - br.close(); - } - } - - if (from != null) { - if (to != null && from.compareTo(to) > 0) { - Date t = from; from = to; to = t; - } - logger.info("from: " + from + " to: " + to); - ImportTimeInterval interval = new ImportTimeInterval(from, to); - dischargeTable.setTimeInterval(interval); - } - - logger.info("Finished parsing AT file: " + file); - - return dischargeTable; - } - - public static Date guessDate(String day, String month, String year) { - if (day == null && month == null && year == null) { - return null; - } - - logger.debug("day: " + day + " month: " + month + " year: " + year); - - int dayI = 15; - if (day != null) { - try { - dayI = Integer.parseInt(day.trim()); - } - catch (NumberFormatException nfe) { - } - } - - int monthI = 6; - if (month != null) { - try { - monthI = Integer.parseInt(month.trim()); - } - catch (NumberFormatException nfe) { - } - } - - int yearI = 1900; - if (year != null) { - try { - yearI = Integer.parseInt(year.trim()); - if (yearI < 100) { - if (yearI < 20) { - yearI += 2000; - } - else { - yearI += 1900; - } - } - } - catch (NumberFormatException nfe) { - } - } - - Calendar cal = Calendar.getInstance(); - cal.set(yearI, monthI-1, dayI, 12, 0, 0); - long ms = cal.getTimeInMillis(); - cal.setTimeInMillis(ms - ms%1000); - return cal.getTime(); - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportGauge.java Fri Jul 15 11:12:14 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportGauge.java Fri Jul 15 13:07:45 2011 +0000 @@ -19,6 +19,9 @@ import org.apache.log4j.Logger; +import de.intevation.flys.importer.parsers.AtFileParser; +import de.intevation.flys.importer.parsers.StaFileParser; + public class ImportGauge { private static Logger log = Logger.getLogger(ImportGauge.class);
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Fri Jul 15 11:12:14 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Fri Jul 15 13:07:45 2011 +0000 @@ -20,6 +20,12 @@ import de.intevation.flys.utils.FileTools; import de.intevation.flys.utils.FileTools.HashedFile; +import de.intevation.flys.importer.parsers.PRFParser; +import de.intevation.flys.importer.parsers.AnnotationsParser; +import de.intevation.flys.importer.parsers.AnnotationClassifier; +import de.intevation.flys.importer.parsers.PegelGltParser; +import de.intevation.flys.importer.parsers.WstParser; + import org.hibernate.Session; import org.hibernate.Query;
--- a/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java Fri Jul 15 11:12:14 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java Fri Jul 15 13:07:45 2011 +0000 @@ -2,6 +2,9 @@ import de.intevation.artifacts.common.utils.XMLUtils; +import de.intevation.flys.importer.parsers.InfoGewParser; +import de.intevation.flys.importer.parsers.AnnotationClassifier; + import java.io.File; import java.io.IOException;
--- a/flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,133 +0,0 @@ -package de.intevation.flys.importer; - -import java.io.File; - -import java.util.List; -import java.util.ArrayList; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.FileInputStream; -import java.io.InputStreamReader; - -import org.apache.log4j.Logger; - -import de.intevation.flys.utils.FileTools; - -public class InfoGewParser -{ - private static Logger log = Logger.getLogger(InfoGewParser.class); - - public static final String ENCODING = "ISO-8859-1"; - - public static final Pattern GEWAESSER = - Pattern.compile("^\\s*Gew\u00e4sser\\s*:\\s*(.+)"); - - public static final Pattern WST_DATEI = - Pattern.compile("^\\s*WSTDatei\\s*:\\s*(.+)"); - - public static final Pattern BB_INFO = - Pattern.compile("^\\s*B\\+B-Info\\s*:\\s*(.+)"); - - protected ArrayList<ImportRiver> rivers; - - protected AnnotationClassifier annotationClassifier; - - public InfoGewParser() { - this(null); - } - - public InfoGewParser(AnnotationClassifier annotationClassifier) { - rivers = new ArrayList<ImportRiver>(); - this.annotationClassifier = annotationClassifier; - } - - public List<ImportRiver> getRivers() { - return rivers; - } - - public static final String normalize(String f) { - return f.replace("\\", "/").replace("/", File.separator); - } - - public void parse(File file) throws IOException { - - LineNumberReader in = null; - - File root = file.getParentFile(); - - try { - in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); - - String line = null; - - String riverName = null; - File wstFile = null; - File bbInfoFile = null; - - while ((line = in.readLine()) != null) { - if ((line = line.trim()).length() == 0) { - continue; - } - Matcher m = GEWAESSER.matcher(line); - - if (m.matches()) { - String river = m.group(1); - log.info("Found river '" + river + "'"); - if (riverName != null) { - rivers.add(new ImportRiver( - riverName, - wstFile, - bbInfoFile, - annotationClassifier)); - } - riverName = river; - wstFile = null; - bbInfoFile = null; - } - else if ((m = WST_DATEI.matcher(line)).matches()) { - String wstFilename = m.group(1); - File wst = new File(wstFilename = normalize(wstFilename)); - if (!wst.isAbsolute()) { - wst = new File(root, wstFilename); - } - wst = FileTools.repair(wst); - log.info("Found wst file '" + wst + "'"); - if (!wst.isFile() || !wst.canRead()) { - log.warn("cannot access WST file '" + wstFilename + "'"); - continue; - } - wstFile = wst; - } - else if ((m = BB_INFO.matcher(line)).matches()) { - //TODO: Make it relative to the wst file. - String bbInfo = m.group(1); - bbInfoFile = new File(normalize(bbInfo)); - } - } - if (riverName != null) { - rivers.add(new ImportRiver( - riverName, - wstFile, - bbInfoFile, - annotationClassifier)); - } - } - finally { - if (in != null) { - in.close(); - } - } - - for (ImportRiver river: rivers) { - river.parseDependencies(); - } - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/PRFParser.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,422 +0,0 @@ -package de.intevation.flys.importer; - -import java.util.Map; -import java.util.Stack; -import java.util.TreeMap; -import java.util.List; -import java.util.ArrayList; -import java.util.Collections; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -import java.io.File; -import java.io.InputStreamReader; -import java.io.LineNumberReader; -import java.io.FileInputStream; -import java.io.IOException; - -import org.apache.log4j.Logger; - -public class PRFParser -{ - private static Logger log = Logger.getLogger(PRFParser.class); - - public static final String ENCODING = - System.getProperty("flys.backend.prf.encoding", "ISO-8859-1"); - - public static final Pattern DATA_PATTERN = - Pattern.compile( - "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + - "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); - - public static final Pattern KM_PATTERN = - Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); - - public static final Pattern YEAR_PATTERN = - Pattern.compile("(\\d{4})"); - - public static final int MIN_YEAR = 1800; - public static final int MAX_YEAR = 2100; - - public interface Callback { - boolean prfAccept(File file); - void prfParsed(PRFParser parser); - } // interface Parser - - public static class DataFormat { - - protected int deleteChars; - protected int maxRepetitions; - protected int firstIntegerPlaces; - protected int firstFractionPlaces; - protected int secondIntegerPlaces; - protected int secondFractionPlaces; - - protected double firstShift; - protected double secondShift; - - public DataFormat() { - } - - public DataFormat(Matcher m) { - deleteChars = Integer.parseInt(m.group(1)); - maxRepetitions = Integer.parseInt(m.group(2)); - firstIntegerPlaces = Integer.parseInt(m.group(3)); - firstFractionPlaces = Integer.parseInt(m.group(4)); - secondIntegerPlaces = Integer.parseInt(m.group(5)); - secondFractionPlaces = Integer.parseInt(m.group(6)); - - firstShift = Math.pow(10, firstFractionPlaces); - secondShift = Math.pow(10, secondFractionPlaces); - } - - public int extractData(String line, List<XY> kmData) { - int L = line.length(); - if (L <= deleteChars) { - return -1; - } - - int pos = deleteChars; - - boolean debug = log.isDebugEnabled(); - - - int rep = 0; - for (;rep < maxRepetitions; ++rep) { - if (pos >= L || pos + firstIntegerPlaces >= L) { - break; - } - String first = line.substring( - pos, pos + firstIntegerPlaces); - - String second = line.substring( - pos + firstIntegerPlaces, - Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces)); - - double x, y; - try { - x = Double.parseDouble(first); - y = Double.parseDouble(second); - } - catch (NumberFormatException nfe) { - // broken line -> substract from dataset skip - return -1; - } - - if (first.indexOf('.') < 0) { - x /= firstShift; - } - - if (firstFractionPlaces > 0) { - x = (int)(x*firstShift)/firstShift; - } - - if (second.indexOf('.') < 0) { - y /= secondShift; - } - - if (secondFractionPlaces > 0) { - y = (int)(y*secondShift)/secondShift; - } - - kmData.add(new XY(x, y, kmData.size())); - - pos += firstIntegerPlaces + secondIntegerPlaces; - } - - return rep == maxRepetitions ? 1 : 0; - } - } // class DataFormat - - public static class KMFormat { - - protected int deleteChars; - protected int integerPlaces; - protected int fractionPlaces; - - protected double shift; - - public KMFormat() { - } - - public KMFormat(Matcher m) { - deleteChars = Integer.parseInt(m.group(1)); - integerPlaces = Integer.parseInt(m.group(2)); - fractionPlaces = Integer.parseInt(m.group(3)); - - shift = Math.pow(10, fractionPlaces); - } - - public double extractKm(String line) throws NumberFormatException { - - if (line.length() <= deleteChars) { - throw new NumberFormatException("line too short"); - } - - String kmS = - line.substring(deleteChars, deleteChars+integerPlaces); - - double km = Double.parseDouble(kmS.trim()); - - if (kmS.indexOf('.') < 0) { - km /= shift; - } - - return fractionPlaces > 0 - ? ((int)(km*shift))/shift - : km; - } - } // class KMFormat - - protected Map<Double, List<XY>> data; - - protected Integer year; - - protected String description; - - - public PRFParser() { - data = new TreeMap<Double, List<XY>>(); - } - - public Integer getYear() { - return year; - } - - public void setYear(Integer year) { - this.year = year; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public Map<Double, List<XY>> getData() { - return data; - } - - public void setData(Map<Double, List<XY>> data) { - this.data = data; - } - - protected void sortLists() { - for (List<XY> xy: data.values()) { - Collections.sort(xy); - } - } - - public static final Integer findYear(String s) { - Matcher m = YEAR_PATTERN.matcher(s); - while (m.find()) { - int year = Integer.parseInt(m.group(1)); - if (year >= MIN_YEAR && year <= MAX_YEAR) { - return Integer.valueOf(year); - } - } - return null; - } - - public boolean parse(File file) { - - if (!(file.isFile() && file.canRead())) { - log.warn("cannot open file '" + file + "'"); - return false; - } - - log.info("parsing PRF file: '" + file + "'"); - - description = file.getName(); - - year = findYear(file.getName()); - - if (year == null) { - File parent = file.getParentFile(); - if (parent != null) { - description = parent.getName() + "/" + description; - year = findYear(parent.getName()); - } - } - - if (year != null) { - log.info("year of sounding: " + year); - } - - LineNumberReader in = null; - - try { - in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); - - String line = in.readLine(); - - if (line == null || (line = line.trim()).length() == 0) { - log.warn("file is empty."); - return false; - } - - Matcher m = DATA_PATTERN.matcher(line); - - if (!m.matches()) { - log.warn("First line does not look like a PRF data pattern."); - return false; - } - - DataFormat dataFormat = new DataFormat(m); - - if ((line = in.readLine()) == null - || (line = line.trim()).length() == 0) { - log.warn("premature EOF. Expected integer in line 2"); - return false; - } - - try { - if (Integer.parseInt(line) != dataFormat.maxRepetitions) { - log.warn("Expected " + - dataFormat.maxRepetitions + " in line 2"); - return false; - } - } - catch (NumberFormatException nfe) { - log.warn("invalid integer in line 2", nfe); - return false; - } - - if ((line = in.readLine()) == null) { - log.warn( - "premature EOF. Expected pattern for km extraction"); - return false; - } - - m = KM_PATTERN.matcher(line); - - if (!m.matches()) { - log.warn( - "line 4 does not look like a PRF km extraction pattern."); - return false; - } - - KMFormat kmFormat = new KMFormat(m); - - if ((line = in.readLine()) == null - || (line = line.trim()).length() == 0) { - log.warn("premature EOF. Expected skip row count."); - return false; - } - - int lineSkipCount; - try { - if ((lineSkipCount = Integer.parseInt(line)) < 0) { - throw new IllegalArgumentException(lineSkipCount + " < 0"); - } - } - catch (NumberFormatException nfe) { - log.warn( - "line 5 is not an positive integer."); - return false; - } - - int skip = lineSkipCount; - - while ((line = in.readLine()) != null) { - if (skip > 0) { - --skip; - continue; - } - double km; - try { - km = kmFormat.extractKm(line); - } - catch (NumberFormatException iae) { - log.warn("cannot extract km in line + " + in.getLineNumber()); - return false; - } - - Double station = Double.valueOf(km); - - List<XY> kmData = data.get(station); - - if (kmData == null) { - //log.debug("found new km: " + station); - kmData = new ArrayList<XY>(); - data.put(station, kmData); - } - - int c = dataFormat.extractData(line, kmData); - if (c < 1) { - skip = lineSkipCount + c; - } - } - - // sort all the lists by x and index - sortLists(); - } - catch (IOException ioe) { - log.error(ioe); - return false; - } - finally { - if (in != null) { - try { - in.close(); - } - catch (IOException ioe) { - log.error(ioe); - } - } - } - - return true; - } - - public void reset() { - data.clear(); - year = null; - description = null; - } - - public void parsePRFs(File root, Callback callback) { - - Stack<File> stack = new Stack<File>(); - stack.push(root); - - while (!stack.empty()) { - File file = stack.pop(); - if (file.isDirectory()) { - File [] files = file.listFiles(); - if (files != null) { - for (File f: files) { - stack.push(f); - } - } - } - else if (file.isFile() - && file.getName().toLowerCase().endsWith(".prf") - && (callback == null || callback.prfAccept(file)) - ) { - reset(); - boolean success = parse(file); - log.info("parsing " + (success ? "succeeded" : "failed")); - if (success && callback != null) { - callback.prfParsed(this); - } - } - } - } - - public static void main(String [] args) { - - PRFParser parser = new PRFParser(); - - for (String arg: args) { - parser.parsePRFs(new File(arg), null); - } - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/PegelGltParser.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ -package de.intevation.flys.importer; - -import java.io.File; - -import java.util.List; -import java.util.ArrayList; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.FileInputStream; -import java.io.InputStreamReader; - -import java.math.BigDecimal; - -import org.apache.log4j.Logger; - -import de.intevation.flys.utils.FileTools; - -public class PegelGltParser -{ - private static Logger log = Logger.getLogger(PegelGltParser.class); - - public static final String ENCODING = "ISO-8859-1"; - - public static final String KM = "km:"; - - protected List<ImportGauge> gauges; - - public PegelGltParser() { - gauges = new ArrayList<ImportGauge>(); - } - - public List<ImportGauge> getGauges() { - return gauges; - } - - public void parse(File file) throws IOException { - - File parent = file.getParentFile(); - - log.info("parsing GLT file '" + file + "'"); - LineNumberReader in = null; - try { - in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); - - String line = null; - while ((line = in.readLine()) != null) { - if ((line = line.trim()).length() == 0) { - continue; - } - - int kmPos = line.indexOf(KM); - if (kmPos < 0) { - log.warn("no gauge found in line " + in.getLineNumber()); - continue; - } - - String gaugeName = line.substring(0, kmPos).trim(); - log.info("Found gauge '" + gaugeName + "'"); - - line = line.substring(kmPos + KM.length()).trim(); - - String [] parts = line.split("\\s+"); - if (parts.length < 4) { - log.warn("line " + in.getLineNumber() - + " has not enough columns"); - continue; - } - - BigDecimal from = new BigDecimal(parts[0].replace(",", ".")); - BigDecimal to = new BigDecimal(parts[1].replace(",", ".")); - if (from.compareTo(from) > 0) { - BigDecimal t = from; from = to; to = t; - } - ImportRange range = new ImportRange(from, to); - File staFile = FileTools.repair(new File(parent, parts[2])); - File atFile = FileTools.repair(new File(parent, parts[3])); - - if (log.isDebugEnabled()) { - log.debug("\tfrom: " + from); - log.debug("\tto: " + to); - log.debug("\tsta: " + staFile); - log.debug("\tat: " + atFile); - } - - gauges.add(new ImportGauge(range, staFile, atFile)); - } - } - finally { - if (in != null) { - in.close(); - } - } - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/StaFileParser.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,160 +0,0 @@ -package de.intevation.flys.importer; - -import java.io.File; -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.FileInputStream; -import java.io.InputStreamReader; - -import java.math.BigDecimal; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -import java.util.HashMap; -import java.util.ArrayList; - -import org.apache.log4j.Logger; - -public class StaFileParser -{ - private static Logger log = Logger.getLogger(StaFileParser.class); - - public static final String ENCODING = "ISO-8859-1"; - - public static final String TYPES = - System.getProperty("flys.backend.main.value.types", "QWTD"); - - public static final Pattern QWTD_ = - Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + - Pattern.quote(TYPES) + "]).*"); - - public StaFileParser() { - } - - public boolean parse(ImportGauge gauge) throws IOException { - - File file = gauge.getStaFile(); - - log.info("parsing STA file: " + file); - LineNumberReader in = null; - try { - in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); - - String line = in.readLine(); - - if (line == null) { - log.warn("STA file is empty."); - return false; - } - - if (line.length() < 37) { - log.warn("first line in STA file is too short."); - return false; - } - - gauge.setName(line.substring(16, 37).trim()); - - String [] values = line.substring(38).trim().split("\\s+", 2); - - if (values.length < 2) { - log.warn("Not enough columns for aeo and datum"); - } - try { - gauge.setAeo(new BigDecimal(values[0].replace(",", "."))); - gauge.setDatum(new BigDecimal(values[1].replace(",", "."))); - } - catch (NumberFormatException nfe) { - log.warn("cannot parse aeo or datum"); - return false; - } - - line = in.readLine(); - - if (line == null) { - log.warn("STA file has not enough lines"); - return false; - } - - if (line.length() < 36) { - log.warn("second line is too short"); - return false; - } - - try { - gauge.setStation( - new BigDecimal(line.substring(29, 36).trim())); - } - catch (NumberFormatException nfe) { - log.warn("parsing of the datum of the gauge failed"); - return false; - } - - // overread the next six lines - for (int i = 0; i < 6; ++i) { - if ((line = in.readLine()) == null) { - log.warn("STA file is too short"); - return false; - } - } - - HashMap<String, ImportMainValueType> types = - new HashMap<String, ImportMainValueType>(); - - ArrayList<ImportNamedMainValue> namedMainValues = - new ArrayList<ImportNamedMainValue>(); - - ArrayList<ImportMainValue> mainValues = - new ArrayList<ImportMainValue>(); - - while ((line = in.readLine()) != null) { - Matcher m = QWTD_.matcher(line); - if (m.matches()) { - BigDecimal value; - try { - value = new BigDecimal(m.group(2).replace(",", ".")); - } - catch (NumberFormatException nfe) { - log.warn("value not parseable in line " - + in.getLineNumber()); - continue; - } - String typeString = m.group(3); - log.debug("\t type: " + typeString); - ImportMainValueType type = types.get(typeString); - if (type == null) { - type = new ImportMainValueType(typeString); - types.put(typeString, type); - } - String name = m.group(1); - ImportNamedMainValue namedMainValue = - new ImportNamedMainValue(type, name); - namedMainValues.add(namedMainValue); - - ImportMainValue mainValue = - new ImportMainValue(gauge, namedMainValue, value); - - mainValues.add(mainValue); - } - else { - // TODO: treat as a comment - } - } - gauge.setMainValueTypes( - new ArrayList<ImportMainValueType>(types.values())); - gauge.setNamedMainValues(namedMainValues); - gauge.setMainValues(mainValues); - } - finally { - if (in != null) { - in.close(); - } - } - log.info("finished parsing STA file: " + file); - return true; - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java Fri Jul 15 11:12:14 2011 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,425 +0,0 @@ -package de.intevation.flys.importer; - -import java.util.ArrayList; -import java.util.HashSet; - -import java.io.File; -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.InputStreamReader; -import java.io.FileInputStream; - -import java.text.NumberFormat; - -import org.apache.log4j.Logger; - -import de.intevation.flys.utils.StringUtil; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -import java.math.BigDecimal; - -public class WstParser -{ - private static Logger log = Logger.getLogger(WstParser.class); - - public static final String COLUMN_BEZ_TEXT = "column-bez-text"; - public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; - public static final String COLUMN_QUELLE = "column-quelle"; - public static final String COLUMN_DATUM = "column-datum"; - - public static final BigDecimal UNDEFINED_ZERO = - new BigDecimal(0.0); - public static final BigDecimal MIN_RANGE = - new BigDecimal(-Double.MAX_VALUE); - public static final BigDecimal MAX_RANGE = - new BigDecimal(Double.MAX_VALUE); - - public static final String ENCODING = "ISO-8859-1"; - - public static final Pattern UNIT_COMMENT = - Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)"); - - public static final Pattern UNIT = - Pattern.compile("[^\\[]*\\[([^]]+)\\].*"); - - public static final BigDecimal INTERVAL_GAP = - new BigDecimal(0.00001); - - protected ImportWst wst; - - protected ImportRange lastRange; - - public WstParser() { - } - - public ImportWst getWst() { - return wst; - } - - public void setWst(ImportWst wst) { - this.wst = wst; - } - - public void parse(File file) throws IOException { - - log.info("Parsing WST file '" + file + "'"); - - wst = new ImportWst(file.getName()); - - LineNumberReader in = null; - try { - in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); - - String input; - boolean first = true; - int columnCount = 0; - - String [] lsBezeichner = null; - String [] langBezeichner = null; - int [] colNaWidths = null; - String [] quellen = null; - String [] daten = null; - - BigDecimal [] aktAbfluesse = null; - BigDecimal [] firstAbfluesse = null; - - BigDecimal minKm = MAX_RANGE; - BigDecimal maxKm = MIN_RANGE; - - boolean columnHeaderChecked = false; - - String einheit = "Wasserstand [NN + m]"; - - HashSet<BigDecimal> kms = new HashSet<BigDecimal>(); - - while ((input = in.readLine()) != null) { - String line = input; - if (first) { // fetch number of columns - if ((line = line.trim()).length() == 0) { - continue; - } - try { - columnCount = Integer.parseInt(line); - if (columnCount <= 0) { - throw new NumberFormatException( - "number columns <= 0"); - } - log.debug("Number of columns: " + columnCount); - wst.setNumberColumns(columnCount); - lsBezeichner = new String[columnCount]; - } - catch (NumberFormatException nfe) { - log.warn(nfe); - continue; - } - first = false; - continue; - } - - line = line.replace(',', '.'); - - if (line.startsWith("*\u001f")) { - BigDecimal [] data = - parseLineAsDouble(line, columnCount, false, true); - - if (aktAbfluesse != null) { - addInterval(minKm, maxKm, aktAbfluesse); - minKm = MAX_RANGE; - maxKm = MIN_RANGE; - } - - aktAbfluesse = new BigDecimal[columnCount]; - log.debug("new q range: " + columnCount); - for (int i = 0; i < Math.min(columnCount, data.length); ++i) { - if (data[i] != null) { - log.debug(" column: " + data[i]); - aktAbfluesse[i] = data[i]; - } - } - - if (firstAbfluesse == null) { - firstAbfluesse = (BigDecimal [])aktAbfluesse.clone(); - } - continue; - } - - if (line.startsWith("*!")) { - String spezial = line.substring(2).trim(); - - if (spezial.length() == 0) { - continue; - } - - if (spezial.startsWith(COLUMN_BEZ_TEXT)) { - spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); - if (spezial.length() == 0) { - continue; - } - langBezeichner = StringUtil.splitQuoted(spezial, '"'); - } - else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { - spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); - - if (spezial.length() == 0) { - continue; - } - - String[] split = spezial.split("\\s+"); - - colNaWidths = new int[split.length]; - for (int i=0; i < split.length; i++) { - colNaWidths[i] = Integer.parseInt(split[i]); - } - } - else if (spezial.startsWith(COLUMN_QUELLE)) { - if (spezial.length() == 0) { - continue; - } - quellen = StringUtil.splitQuoted(spezial, '"'); - } - else if (spezial.startsWith(COLUMN_DATUM)) { - spezial = spezial.substring(COLUMN_DATUM.length()).trim(); - if (spezial.length() == 0) { - continue; - } - daten = StringUtil.splitQuoted(spezial, '"'); - } - continue; - } - - if (line.length() < 11) { - continue; - } - - if (line.startsWith("*")) { - Matcher m = UNIT_COMMENT.matcher(line); - if (m.matches()) { - log.debug("unit comment found"); - // XXX: This hack is needed because desktop - // FLYS is broken figuring out the unit - String [] units = m.group(1).split("\\s{2,}"); - m = UNIT.matcher(units[0]); - einheit = m.matches() ? m.group(1) : units[0]; - log.debug("unit: " + einheit); - } - continue; - } - - if (firstAbfluesse != null) { - if (!columnHeaderChecked) { - int unknownCount = 0; - HashSet<String> uniqueColumnNames = - new HashSet<String>(); - for (int i = 0; i < lsBezeichner.length; ++i) { - if (lsBezeichner[i] == null - || lsBezeichner[i].length() == 0) { - double q = firstAbfluesse[i].doubleValue(); - if (q < 0.001) { - lsBezeichner[i] = - "<unbekannt #" + unknownCount + ">"; - ++unknownCount; - } - else { - lsBezeichner[i] = "Q="+format(q); - } - } - String candidate = lsBezeichner[i]; - int collision = 1; - while (!uniqueColumnNames.add(candidate)) { - candidate = lsBezeichner[i] + - " (" + collision + ")"; - ++collision; - } - wst.getColumn(i).setName(candidate); - } - columnHeaderChecked = true; - } - - BigDecimal [] data = - parseLineAsDouble(line, columnCount, true, false); - - BigDecimal kaem = data[0]; - - if (!kms.add(kaem)) { - log.warn( - "km " + kaem + - " (line " + in.getLineNumber() + - ") found more than once. -> ignored"); - continue; - } - - if (kaem.compareTo(minKm) < 0) { - minKm = kaem; - } - if (kaem.compareTo(maxKm) > 0) { - maxKm = kaem; - } - - // extract values - for (int i = 0; i < columnCount; ++i) { - addValue(kaem, data[i+1], i); - } - - } - else { // firstAbfluesse == null - if (langBezeichner != null) { - lsBezeichner = StringUtil.fitArray( - langBezeichner, lsBezeichner); - } - else if (colNaWidths != null) { - for (int j = 0, i = 0, N = input.length(); - j < colNaWidths.length && i < N; - i += colNaWidths[j++] - ) { - lsBezeichner[j] = input.substring( - i, i+colNaWidths[j]).trim(); - } - } - else { - // first column begins at position 8 in line - for (int i = 8, col = 0; i < input.length(); i += 9) { - if ((i + 9) > input.length()) { - i = input.length() - 10; - } - // one column header is 9 chars wide - lsBezeichner[col++] = - input.substring(i, i + 9).trim(); - - if (col == lsBezeichner.length) { - break; - } - } - } - } - - } - addInterval(minKm, maxKm, aktAbfluesse); - } - finally { - if (in != null) { - in.close(); - } - } - } - - protected void addValue(BigDecimal km, BigDecimal w, int index) { - if (w != null) { - ImportWstColumn column = wst.getColumn(index); - column.addColumnValue(km, w); - } - } - - private static final NumberFormat NF = getNumberFormat(); - - private static final NumberFormat getNumberFormat() { - NumberFormat nf = NumberFormat.getInstance(); - nf.setMinimumFractionDigits(2); - nf.setMaximumFractionDigits(2); - return nf; - } - - protected static String format(double value) { - return NF.format(value); - } - - protected void addInterval( - BigDecimal from, - BigDecimal to, - BigDecimal [] values - ) { - log.debug("addInterval: " + from + " " + to); - - if (values == null || from == MAX_RANGE) { - return; - } - - if (to.compareTo(from) < 0) { - BigDecimal t = from; from = to; to = t; - } - - ImportRange range = new ImportRange(from, to); - - // little workaround to make the q ranges tightly fit. - // Leave a very small gap to ensure that the range queries - // still work. - - if (lastRange != null) { - double d1 = Math.abs( - lastRange.getB().doubleValue() - range.getA().doubleValue()); - double d2 = Math.abs( - range.getB().doubleValue() - lastRange.getA().doubleValue()); - - if (d1 < d2) { - lastRange.setB(range.getA().subtract(INTERVAL_GAP)); - } - else { - range.setA(lastRange.getB().subtract(INTERVAL_GAP)); - } - } - - for (int i = 0; i < values.length; ++i) { - ImportWstColumn column = wst.getColumn(i); - ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]); - column.addColumnQRange(wstQRange); - } - - lastRange = range; - } - - private static final BigDecimal [] parseLineAsDouble( - String line, - int count, - boolean bStation, - boolean bParseEmptyAsZero - ) { - String [] tokens = parseLine(line, count, bStation); - - BigDecimal [] doubles = new BigDecimal[tokens.length]; - - for (int i = 0; i < doubles.length; ++i) { - String token = tokens[i].trim(); - if (token.length() != 0) { - doubles[i] = new BigDecimal(token); - } - else if (bParseEmptyAsZero) { - doubles[i] = UNDEFINED_ZERO; - } - } - - return doubles; - } - - private static String [] parseLine( - String line, - int tokenCount, - boolean bParseStation - ) { - ArrayList<String> strings = new ArrayList<String>(); - - if (bParseStation) { - if (line.length() < 8) { - throw new IllegalArgumentException("station too short"); - } - strings.add(line.substring(0, 8)); - } - - int pos = 9; - for (int i = 0; i < tokenCount; ++i) { - if (line.length() >= pos + 8) { - strings.add(line.substring(pos, pos + 8)); - } - else { - strings.add(""); - } - pos += 9; - } - - return strings.toArray(new String[strings.size()]); - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/AnnotationClassifier.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,232 @@ +package de.intevation.flys.importer.parsers; + +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.w3c.dom.Element; + +import javax.xml.xpath.XPathConstants; + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import org.apache.log4j.Logger; + +import de.intevation.artifacts.common.utils.XMLUtils; + +import de.intevation.flys.importer.ImportAnnotationType; +import de.intevation.flys.importer.Importer; + +public class AnnotationClassifier +{ + private static Logger log = Logger.getLogger(Importer.class); + + public static final String TYPES_XPATH = + "/annotation/types/type"; + + public static final String FILE_PATTERNS_XPATH = + "/annotation/patterns/file"; + + public static final String DESCRIPTION_PATTERNS_XPATH = + "/annotation/patterns/line"; + + + public static class Pair { + + protected Pattern pattern; + protected ImportAnnotationType annType; + + public Pair(Pattern pattern, ImportAnnotationType annType) { + this.pattern = pattern; + this.annType = annType; + } + + public ImportAnnotationType match(String s) { + Matcher m = pattern.matcher(s); + return m.matches() ? annType : null; + } + } // class Pair + + + protected Map<String, ImportAnnotationType> types; + protected List<Pair> filePatterns; + protected List<Pair> descPatterns; + + protected ImportAnnotationType defaultType; + + public AnnotationClassifier() { + } + + public AnnotationClassifier(Document rules) { + types = new HashMap<String, ImportAnnotationType>(); + filePatterns = new ArrayList<Pair>(); + descPatterns = new ArrayList<Pair>(); + + buildRules(rules); + } + + protected void buildRules(Document rules) { + buildTypes(rules); + buildFilePatterns(rules); + buildDescriptionPatterns(rules); + } + + protected void buildTypes(Document rules) { + + NodeList typeList = (NodeList)XMLUtils.xpath( + rules, + TYPES_XPATH, + XPathConstants.NODESET, + null); + + if (typeList == null) { + log.info("no rules found."); + return; + } + + for (int i = 0, N = typeList.getLength(); i < N; ++i) { + Element typeElement = (Element)typeList.item(i); + String name = typeElement.getAttribute("name"); + if (name.length() == 0) { + log.warn("rule has no name"); + continue; + } + + ImportAnnotationType aic = new ImportAnnotationType(name); + + types.put(name, aic); + + if (typeElement.getAttribute("default").equals("true")) { + defaultType = aic; + } + } + } + + protected void buildFilePatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + FILE_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no file patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + filePatterns.add(pair); + } + } + } + + protected void buildDescriptionPatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + DESCRIPTION_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no line patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + descPatterns.add(pair); + } + } + } + + protected Pair buildPair(Element element) { + String pattern = element.getAttribute("pattern"); + String type = element.getAttribute("type"); + + if (pattern.length() == 0) { + log.warn("pattern has no 'pattern' attribute."); + return null; + } + + if (type.length() == 0) { + log.warn("pattern has no 'type' attribute."); + return null; + } + + ImportAnnotationType annType = types.get(type); + + if (annType == null) { + log.warn("pattern has unknown type '" + type + "'"); + return null; + } + + Pattern p; + + try { + p = Pattern.compile(pattern, + Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); + } + catch (IllegalArgumentException iae) { + log.warn("pattern '" + pattern + "' is invalid.", iae); + return null; + } + + return new Pair(p, annType); + } + + public ImportAnnotationType getDefaultType() { + return defaultType; + } + + public ImportAnnotationType classifyFile(String filename) { + return classifyFile(filename, null); + } + + public ImportAnnotationType classifyFile( + String filename, + ImportAnnotationType def + ) { + if (filename.toLowerCase().endsWith(".km")) { + filename = filename.substring(0, filename.length()-3); + } + + for (Pair pair: filePatterns) { + ImportAnnotationType annType = pair.match(filename); + if (annType != null) { + return annType; + } + } + + return def; + } + + public ImportAnnotationType classifyDescription(String description) { + return classifyDescription(description, null); + } + + public ImportAnnotationType classifyDescription( + String description, + ImportAnnotationType def + ) { + for (Pair pair: descPatterns) { + ImportAnnotationType annType = pair.match(description); + if (annType != null) { + return annType; + } + } + + return def; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/AnnotationsParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,196 @@ +package de.intevation.flys.importer.parsers; + +import java.util.HashMap; +import java.util.TreeSet; +import java.util.List; +import java.util.ArrayList; + +import java.io.IOException; +import java.io.File; +import java.io.LineNumberReader; +import java.io.InputStreamReader; +import java.io.FileInputStream; + +import java.math.BigDecimal; + +import org.apache.log4j.Logger; + +import de.intevation.flys.utils.FileTools; + +import de.intevation.flys.importer.ImportAnnotation; +import de.intevation.flys.importer.ImportRange; +import de.intevation.flys.importer.ImportEdge; +import de.intevation.flys.importer.ImportAnnotationType; +import de.intevation.flys.importer.ImportAttribute; +import de.intevation.flys.importer.ImportPosition; + +public class AnnotationsParser +{ + private static Logger log = Logger.getLogger(AnnotationsParser.class); + + public static final String ENCODING = "ISO-8859-1"; + + public static final String [] TO_SCAN = { + "Basisdaten", + "Streckendaten" + }; + + protected HashMap<String, ImportAttribute> attributes; + protected HashMap<String, ImportPosition> positions; + protected TreeSet<ImportAnnotation> annotations; + protected AnnotationClassifier classifier; + + public AnnotationsParser() { + this(null); + } + + public AnnotationsParser(AnnotationClassifier classifier) { + attributes = new HashMap<String, ImportAttribute>(); + positions = new HashMap<String, ImportPosition>(); + annotations = new TreeSet<ImportAnnotation>(); + this.classifier = classifier; + } + + public void parseFile(File file) throws IOException { + log.info("parsing km file: '" + file + "'"); + + ImportAnnotationType defaultIAT = null; + + if (classifier != null) { + defaultIAT = classifier.classifyFile( + file.getName(), + classifier.getDefaultType()); + } + + LineNumberReader in = null; + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = null; + while ((line = in.readLine()) != null) { + if ((line = line.trim()).length() == 0 + || line.startsWith("*")) { + continue; + } + + String [] parts = line.split("\\s*;\\s*"); + + if (parts.length < 3) { + log.warn("not enough columns in line " + + in.getLineNumber()); + continue; + } + + ImportPosition position = positions.get(parts[0]); + if (position == null) { + position = new ImportPosition(parts[0]); + positions.put(parts[0], position); + } + + ImportAttribute attribute = attributes.get(parts[1]); + if (attribute == null) { + attribute = new ImportAttribute(parts[1]); + attributes.put(parts[1], attribute); + } + + String [] r = parts[2].replace(",", ".").split("\\s*#\\s*"); + + BigDecimal from, to; + + try { + from = new BigDecimal(r[0]); + to = r.length < 2 ? null : new BigDecimal(r[1]); + if (to != null && from.compareTo(to) > 0) { + BigDecimal t = from; from = to; to = t; + } + } + catch (NumberFormatException nfe) { + log.warn("invalid number in line " + in.getLineNumber()); + continue; + } + + ImportEdge edge = null; + + if (parts.length == 4) { // Only 'Unterkante' + try { + edge = new ImportEdge( + null, + new BigDecimal(parts[3].trim().replace(',', '.'))); + } + catch (NumberFormatException nfe) { + log.warn("cannot parse 'Unterkante' in line " + + in.getLineNumber()); + } + } + else if (parts.length > 4) { // 'Unterkante' and 'Oberkante' + String bottom = parts[3].trim().replace(',', '.'); + String top = parts[4].trim().replace(',', '.'); + try { + BigDecimal b = bottom.length() == 0 + ? null + : new BigDecimal(bottom); + BigDecimal t = top.length() == 0 + ? null + : new BigDecimal(top); + edge = new ImportEdge(t, b); + } + catch (NumberFormatException nfe) { + log.warn( + "cannot parse 'Unterkante' or 'Oberkante' in line " + + in.getLineNumber()); + } + } + + ImportRange range = new ImportRange(from, to); + + ImportAnnotationType type = classifier != null + ? classifier.classifyDescription(line, defaultIAT) + : null; + + ImportAnnotation annotation = new ImportAnnotation( + attribute, position, range, edge, type); + + if (!annotations.add(annotation)) { + log.warn("duplicated annotation '" + parts[0] + + "' in line " + in.getLineNumber()); + } + } + } + finally { + if (in != null) { + in.close(); + } + } + } + + public void parse(File root) throws IOException { + + for (String toScan: TO_SCAN) { + File directory = FileTools.repair(new File(root, toScan)); + if (!directory.isDirectory()) { + log.warn("'" + directory + "' is not a directory."); + continue; + } + File [] files = directory.listFiles(); + if (files == null) { + log.warn("cannot list directory '" + directory + "'"); + continue; + } + + for (File file: files) { + if (file.isFile() && file.canRead() + && file.getName().toLowerCase().endsWith(".km")) { + parseFile(file); + } + } + } // for all directories to scan + } + + public List<ImportAnnotation> getAnnotations() { + return new ArrayList<ImportAnnotation>(annotations); + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/AtFileParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,206 @@ +package de.intevation.flys.importer.parsers; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.io.IOException; +import java.math.BigDecimal; + +import org.apache.log4j.Logger; + +import de.intevation.flys.importer.ImportDischargeTable; +import de.intevation.flys.importer.ImportDischargeTableValue; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.util.Date; +import java.util.Calendar; + +import de.intevation.flys.importer.ImportTimeInterval; + +public class AtFileParser { + + public static final String ENCODING = "ISO-8859-1"; + + private static Logger logger = Logger.getLogger(AtFileParser.class); + + + // regular expression from hell to find out time range + public static final Pattern DATE_LINE = Pattern.compile( + "^\\*\\s*Abflu[^t]+tafel?\\s*([^\\d]+)" + + "(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4})\\s*(?:(?:bis)|-)?\\s*" + + "(?:(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4}))?\\s*.*$"); + + public AtFileParser() { + } + + + public ImportDischargeTable parse(File file) throws IOException { + return parse(file, "", 0); + } + + public ImportDischargeTable parse( + File file, + String prefix, + int kind + ) + throws IOException { + + logger.info("parsing AT file: " + file); + + BufferedReader br = null; + + String line = null; + + boolean beginning = true; + + ImportDischargeTable dischargeTable = + new ImportDischargeTable(kind, prefix + file.getName()); + + Date from = null; + Date to = null; + + try { + br = new BufferedReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + while ((line = br.readLine()) != null) { + + String tmp = line.trim(); + + if (tmp.length() == 0) { + continue; + } + + Matcher m = DATE_LINE.matcher(tmp); + if (m.matches()) { + from = guessDate(m.group(2), m.group(3), m.group(4)); + to = guessDate(m.group(5), m.group(6), m.group(7)); + if (from == null) { + Date t = from; from = to; to = t; + } + continue; + } + + if (tmp.startsWith("#! name=")) { + // XXX Skip the name, because we don't know where to save + // it at the moment + + //String name = tmp.substring(8); + continue; + } + + if (tmp.startsWith("#") || tmp.startsWith("*")) { + continue; + } + + String[] splits = tmp.replace(',', '.').split("\\s+"); + + if ((splits.length < 2) || (splits.length > 11)) { + logger.warn("Found an invalid row in the AT file."); + continue; + } + + String strW = splits[0].trim(); + double W = Double.parseDouble(strW); + + /* shift is used to differenciate between lines with + * exactly 10 Qs and lines with less than 10 Qs. The shift + * is only modified when it is the first line. + */ + int shift = -1; + + if (splits.length != 11 && beginning) { + shift = 10 - splits.length; + } + + + for (int i = 1; i < splits.length; i++) { + double iW = W + shift + i; + double iQ = Double.parseDouble(splits[i].trim()); + + dischargeTable.addDischargeTableValue( + new ImportDischargeTableValue( + new BigDecimal(iQ/100.0), + new BigDecimal(iW/100.0))); + } + + beginning = false; + } + } + catch (NumberFormatException pe) { + logger.warn(pe.getMessage()); + } + finally { + if (br != null) { + br.close(); + } + } + + if (from != null) { + if (to != null && from.compareTo(to) > 0) { + Date t = from; from = to; to = t; + } + logger.info("from: " + from + " to: " + to); + ImportTimeInterval interval = new ImportTimeInterval(from, to); + dischargeTable.setTimeInterval(interval); + } + + logger.info("Finished parsing AT file: " + file); + + return dischargeTable; + } + + public static Date guessDate(String day, String month, String year) { + if (day == null && month == null && year == null) { + return null; + } + + logger.debug("day: " + day + " month: " + month + " year: " + year); + + int dayI = 15; + if (day != null) { + try { + dayI = Integer.parseInt(day.trim()); + } + catch (NumberFormatException nfe) { + } + } + + int monthI = 6; + if (month != null) { + try { + monthI = Integer.parseInt(month.trim()); + } + catch (NumberFormatException nfe) { + } + } + + int yearI = 1900; + if (year != null) { + try { + yearI = Integer.parseInt(year.trim()); + if (yearI < 100) { + if (yearI < 20) { + yearI += 2000; + } + else { + yearI += 1900; + } + } + } + catch (NumberFormatException nfe) { + } + } + + Calendar cal = Calendar.getInstance(); + cal.set(yearI, monthI-1, dayI, 12, 0, 0); + long ms = cal.getTimeInMillis(); + cal.setTimeInMillis(ms - ms%1000); + return cal.getTime(); + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/InfoGewParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,135 @@ +package de.intevation.flys.importer.parsers; + +import java.io.File; + +import java.util.List; +import java.util.ArrayList; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.FileInputStream; +import java.io.InputStreamReader; + +import org.apache.log4j.Logger; + +import de.intevation.flys.utils.FileTools; + +import de.intevation.flys.importer.ImportRiver; + +public class InfoGewParser +{ + private static Logger log = Logger.getLogger(InfoGewParser.class); + + public static final String ENCODING = "ISO-8859-1"; + + public static final Pattern GEWAESSER = + Pattern.compile("^\\s*Gew\u00e4sser\\s*:\\s*(.+)"); + + public static final Pattern WST_DATEI = + Pattern.compile("^\\s*WSTDatei\\s*:\\s*(.+)"); + + public static final Pattern BB_INFO = + Pattern.compile("^\\s*B\\+B-Info\\s*:\\s*(.+)"); + + protected ArrayList<ImportRiver> rivers; + + protected AnnotationClassifier annotationClassifier; + + public InfoGewParser() { + this(null); + } + + public InfoGewParser(AnnotationClassifier annotationClassifier) { + rivers = new ArrayList<ImportRiver>(); + this.annotationClassifier = annotationClassifier; + } + + public List<ImportRiver> getRivers() { + return rivers; + } + + public static final String normalize(String f) { + return f.replace("\\", "/").replace("/", File.separator); + } + + public void parse(File file) throws IOException { + + LineNumberReader in = null; + + File root = file.getParentFile(); + + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = null; + + String riverName = null; + File wstFile = null; + File bbInfoFile = null; + + while ((line = in.readLine()) != null) { + if ((line = line.trim()).length() == 0) { + continue; + } + Matcher m = GEWAESSER.matcher(line); + + if (m.matches()) { + String river = m.group(1); + log.info("Found river '" + river + "'"); + if (riverName != null) { + rivers.add(new ImportRiver( + riverName, + wstFile, + bbInfoFile, + annotationClassifier)); + } + riverName = river; + wstFile = null; + bbInfoFile = null; + } + else if ((m = WST_DATEI.matcher(line)).matches()) { + String wstFilename = m.group(1); + File wst = new File(wstFilename = normalize(wstFilename)); + if (!wst.isAbsolute()) { + wst = new File(root, wstFilename); + } + wst = FileTools.repair(wst); + log.info("Found wst file '" + wst + "'"); + if (!wst.isFile() || !wst.canRead()) { + log.warn("cannot access WST file '" + wstFilename + "'"); + continue; + } + wstFile = wst; + } + else if ((m = BB_INFO.matcher(line)).matches()) { + //TODO: Make it relative to the wst file. + String bbInfo = m.group(1); + bbInfoFile = new File(normalize(bbInfo)); + } + } + if (riverName != null) { + rivers.add(new ImportRiver( + riverName, + wstFile, + bbInfoFile, + annotationClassifier)); + } + } + finally { + if (in != null) { + in.close(); + } + } + + for (ImportRiver river: rivers) { + river.parseDependencies(); + } + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/PRFParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,424 @@ +package de.intevation.flys.importer.parsers; + +import java.util.Map; +import java.util.Stack; +import java.util.TreeMap; +import java.util.List; +import java.util.ArrayList; +import java.util.Collections; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.io.File; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.log4j.Logger; + +import de.intevation.flys.importer.XY; + +public class PRFParser +{ + private static Logger log = Logger.getLogger(PRFParser.class); + + public static final String ENCODING = + System.getProperty("flys.backend.prf.encoding", "ISO-8859-1"); + + public static final Pattern DATA_PATTERN = + Pattern.compile( + "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + + "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); + + public static final Pattern KM_PATTERN = + Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); + + public static final Pattern YEAR_PATTERN = + Pattern.compile("(\\d{4})"); + + public static final int MIN_YEAR = 1800; + public static final int MAX_YEAR = 2100; + + public interface Callback { + boolean prfAccept(File file); + void prfParsed(PRFParser parser); + } // interface Parser + + public static class DataFormat { + + protected int deleteChars; + protected int maxRepetitions; + protected int firstIntegerPlaces; + protected int firstFractionPlaces; + protected int secondIntegerPlaces; + protected int secondFractionPlaces; + + protected double firstShift; + protected double secondShift; + + public DataFormat() { + } + + public DataFormat(Matcher m) { + deleteChars = Integer.parseInt(m.group(1)); + maxRepetitions = Integer.parseInt(m.group(2)); + firstIntegerPlaces = Integer.parseInt(m.group(3)); + firstFractionPlaces = Integer.parseInt(m.group(4)); + secondIntegerPlaces = Integer.parseInt(m.group(5)); + secondFractionPlaces = Integer.parseInt(m.group(6)); + + firstShift = Math.pow(10, firstFractionPlaces); + secondShift = Math.pow(10, secondFractionPlaces); + } + + public int extractData(String line, List<XY> kmData) { + int L = line.length(); + if (L <= deleteChars) { + return -1; + } + + int pos = deleteChars; + + boolean debug = log.isDebugEnabled(); + + + int rep = 0; + for (;rep < maxRepetitions; ++rep) { + if (pos >= L || pos + firstIntegerPlaces >= L) { + break; + } + String first = line.substring( + pos, pos + firstIntegerPlaces); + + String second = line.substring( + pos + firstIntegerPlaces, + Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces)); + + double x, y; + try { + x = Double.parseDouble(first); + y = Double.parseDouble(second); + } + catch (NumberFormatException nfe) { + // broken line -> substract from dataset skip + return -1; + } + + if (first.indexOf('.') < 0) { + x /= firstShift; + } + + if (firstFractionPlaces > 0) { + x = (int)(x*firstShift)/firstShift; + } + + if (second.indexOf('.') < 0) { + y /= secondShift; + } + + if (secondFractionPlaces > 0) { + y = (int)(y*secondShift)/secondShift; + } + + kmData.add(new XY(x, y, kmData.size())); + + pos += firstIntegerPlaces + secondIntegerPlaces; + } + + return rep == maxRepetitions ? 1 : 0; + } + } // class DataFormat + + public static class KMFormat { + + protected int deleteChars; + protected int integerPlaces; + protected int fractionPlaces; + + protected double shift; + + public KMFormat() { + } + + public KMFormat(Matcher m) { + deleteChars = Integer.parseInt(m.group(1)); + integerPlaces = Integer.parseInt(m.group(2)); + fractionPlaces = Integer.parseInt(m.group(3)); + + shift = Math.pow(10, fractionPlaces); + } + + public double extractKm(String line) throws NumberFormatException { + + if (line.length() <= deleteChars) { + throw new NumberFormatException("line too short"); + } + + String kmS = + line.substring(deleteChars, deleteChars+integerPlaces); + + double km = Double.parseDouble(kmS.trim()); + + if (kmS.indexOf('.') < 0) { + km /= shift; + } + + return fractionPlaces > 0 + ? ((int)(km*shift))/shift + : km; + } + } // class KMFormat + + protected Map<Double, List<XY>> data; + + protected Integer year; + + protected String description; + + + public PRFParser() { + data = new TreeMap<Double, List<XY>>(); + } + + public Integer getYear() { + return year; + } + + public void setYear(Integer year) { + this.year = year; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public Map<Double, List<XY>> getData() { + return data; + } + + public void setData(Map<Double, List<XY>> data) { + this.data = data; + } + + protected void sortLists() { + for (List<XY> xy: data.values()) { + Collections.sort(xy); + } + } + + public static final Integer findYear(String s) { + Matcher m = YEAR_PATTERN.matcher(s); + while (m.find()) { + int year = Integer.parseInt(m.group(1)); + if (year >= MIN_YEAR && year <= MAX_YEAR) { + return Integer.valueOf(year); + } + } + return null; + } + + public boolean parse(File file) { + + if (!(file.isFile() && file.canRead())) { + log.warn("cannot open file '" + file + "'"); + return false; + } + + log.info("parsing PRF file: '" + file + "'"); + + description = file.getName(); + + year = findYear(file.getName()); + + if (year == null) { + File parent = file.getParentFile(); + if (parent != null) { + description = parent.getName() + "/" + description; + year = findYear(parent.getName()); + } + } + + if (year != null) { + log.info("year of sounding: " + year); + } + + LineNumberReader in = null; + + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = in.readLine(); + + if (line == null || (line = line.trim()).length() == 0) { + log.warn("file is empty."); + return false; + } + + Matcher m = DATA_PATTERN.matcher(line); + + if (!m.matches()) { + log.warn("First line does not look like a PRF data pattern."); + return false; + } + + DataFormat dataFormat = new DataFormat(m); + + if ((line = in.readLine()) == null + || (line = line.trim()).length() == 0) { + log.warn("premature EOF. Expected integer in line 2"); + return false; + } + + try { + if (Integer.parseInt(line) != dataFormat.maxRepetitions) { + log.warn("Expected " + + dataFormat.maxRepetitions + " in line 2"); + return false; + } + } + catch (NumberFormatException nfe) { + log.warn("invalid integer in line 2", nfe); + return false; + } + + if ((line = in.readLine()) == null) { + log.warn( + "premature EOF. Expected pattern for km extraction"); + return false; + } + + m = KM_PATTERN.matcher(line); + + if (!m.matches()) { + log.warn( + "line 4 does not look like a PRF km extraction pattern."); + return false; + } + + KMFormat kmFormat = new KMFormat(m); + + if ((line = in.readLine()) == null + || (line = line.trim()).length() == 0) { + log.warn("premature EOF. Expected skip row count."); + return false; + } + + int lineSkipCount; + try { + if ((lineSkipCount = Integer.parseInt(line)) < 0) { + throw new IllegalArgumentException(lineSkipCount + " < 0"); + } + } + catch (NumberFormatException nfe) { + log.warn( + "line 5 is not an positive integer."); + return false; + } + + int skip = lineSkipCount; + + while ((line = in.readLine()) != null) { + if (skip > 0) { + --skip; + continue; + } + double km; + try { + km = kmFormat.extractKm(line); + } + catch (NumberFormatException iae) { + log.warn("cannot extract km in line + " + in.getLineNumber()); + return false; + } + + Double station = Double.valueOf(km); + + List<XY> kmData = data.get(station); + + if (kmData == null) { + //log.debug("found new km: " + station); + kmData = new ArrayList<XY>(); + data.put(station, kmData); + } + + int c = dataFormat.extractData(line, kmData); + if (c < 1) { + skip = lineSkipCount + c; + } + } + + // sort all the lists by x and index + sortLists(); + } + catch (IOException ioe) { + log.error(ioe); + return false; + } + finally { + if (in != null) { + try { + in.close(); + } + catch (IOException ioe) { + log.error(ioe); + } + } + } + + return true; + } + + public void reset() { + data.clear(); + year = null; + description = null; + } + + public void parsePRFs(File root, Callback callback) { + + Stack<File> stack = new Stack<File>(); + stack.push(root); + + while (!stack.empty()) { + File file = stack.pop(); + if (file.isDirectory()) { + File [] files = file.listFiles(); + if (files != null) { + for (File f: files) { + stack.push(f); + } + } + } + else if (file.isFile() + && file.getName().toLowerCase().endsWith(".prf") + && (callback == null || callback.prfAccept(file)) + ) { + reset(); + boolean success = parse(file); + log.info("parsing " + (success ? "succeeded" : "failed")); + if (success && callback != null) { + callback.prfParsed(this); + } + } + } + } + + public static void main(String [] args) { + + PRFParser parser = new PRFParser(); + + for (String arg: args) { + parser.parsePRFs(new File(arg), null); + } + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/PegelGltParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,102 @@ +package de.intevation.flys.importer.parsers; + +import java.io.File; + +import java.util.List; +import java.util.ArrayList; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.FileInputStream; +import java.io.InputStreamReader; + +import java.math.BigDecimal; + +import org.apache.log4j.Logger; + +import de.intevation.flys.utils.FileTools; + +import de.intevation.flys.importer.ImportGauge; +import de.intevation.flys.importer.ImportRange; + +public class PegelGltParser +{ + private static Logger log = Logger.getLogger(PegelGltParser.class); + + public static final String ENCODING = "ISO-8859-1"; + + public static final String KM = "km:"; + + protected List<ImportGauge> gauges; + + public PegelGltParser() { + gauges = new ArrayList<ImportGauge>(); + } + + public List<ImportGauge> getGauges() { + return gauges; + } + + public void parse(File file) throws IOException { + + File parent = file.getParentFile(); + + log.info("parsing GLT file '" + file + "'"); + LineNumberReader in = null; + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = null; + while ((line = in.readLine()) != null) { + if ((line = line.trim()).length() == 0) { + continue; + } + + int kmPos = line.indexOf(KM); + if (kmPos < 0) { + log.warn("no gauge found in line " + in.getLineNumber()); + continue; + } + + String gaugeName = line.substring(0, kmPos).trim(); + log.info("Found gauge '" + gaugeName + "'"); + + line = line.substring(kmPos + KM.length()).trim(); + + String [] parts = line.split("\\s+"); + if (parts.length < 4) { + log.warn("line " + in.getLineNumber() + + " has not enough columns"); + continue; + } + + BigDecimal from = new BigDecimal(parts[0].replace(",", ".")); + BigDecimal to = new BigDecimal(parts[1].replace(",", ".")); + if (from.compareTo(from) > 0) { + BigDecimal t = from; from = to; to = t; + } + ImportRange range = new ImportRange(from, to); + File staFile = FileTools.repair(new File(parent, parts[2])); + File atFile = FileTools.repair(new File(parent, parts[3])); + + if (log.isDebugEnabled()) { + log.debug("\tfrom: " + from); + log.debug("\tto: " + to); + log.debug("\tsta: " + staFile); + log.debug("\tat: " + atFile); + } + + gauges.add(new ImportGauge(range, staFile, atFile)); + } + } + finally { + if (in != null) { + in.close(); + } + } + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/StaFileParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,165 @@ +package de.intevation.flys.importer.parsers; + +import java.io.File; +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.FileInputStream; +import java.io.InputStreamReader; + +import java.math.BigDecimal; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.util.HashMap; +import java.util.ArrayList; + +import org.apache.log4j.Logger; + +import de.intevation.flys.importer.ImportMainValueType; +import de.intevation.flys.importer.ImportMainValue; +import de.intevation.flys.importer.ImportNamedMainValue; +import de.intevation.flys.importer.ImportGauge; + +public class StaFileParser +{ + private static Logger log = Logger.getLogger(StaFileParser.class); + + public static final String ENCODING = "ISO-8859-1"; + + public static final String TYPES = + System.getProperty("flys.backend.main.value.types", "QWTD"); + + public static final Pattern QWTD_ = + Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + + Pattern.quote(TYPES) + "]).*"); + + public StaFileParser() { + } + + public boolean parse(ImportGauge gauge) throws IOException { + + File file = gauge.getStaFile(); + + log.info("parsing STA file: " + file); + LineNumberReader in = null; + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = in.readLine(); + + if (line == null) { + log.warn("STA file is empty."); + return false; + } + + if (line.length() < 37) { + log.warn("first line in STA file is too short."); + return false; + } + + gauge.setName(line.substring(16, 37).trim()); + + String [] values = line.substring(38).trim().split("\\s+", 2); + + if (values.length < 2) { + log.warn("Not enough columns for aeo and datum"); + } + try { + gauge.setAeo(new BigDecimal(values[0].replace(",", "."))); + gauge.setDatum(new BigDecimal(values[1].replace(",", "."))); + } + catch (NumberFormatException nfe) { + log.warn("cannot parse aeo or datum"); + return false; + } + + line = in.readLine(); + + if (line == null) { + log.warn("STA file has not enough lines"); + return false; + } + + if (line.length() < 36) { + log.warn("second line is too short"); + return false; + } + + try { + gauge.setStation( + new BigDecimal(line.substring(29, 36).trim())); + } + catch (NumberFormatException nfe) { + log.warn("parsing of the datum of the gauge failed"); + return false; + } + + // overread the next six lines + for (int i = 0; i < 6; ++i) { + if ((line = in.readLine()) == null) { + log.warn("STA file is too short"); + return false; + } + } + + HashMap<String, ImportMainValueType> types = + new HashMap<String, ImportMainValueType>(); + + ArrayList<ImportNamedMainValue> namedMainValues = + new ArrayList<ImportNamedMainValue>(); + + ArrayList<ImportMainValue> mainValues = + new ArrayList<ImportMainValue>(); + + while ((line = in.readLine()) != null) { + Matcher m = QWTD_.matcher(line); + if (m.matches()) { + BigDecimal value; + try { + value = new BigDecimal(m.group(2).replace(",", ".")); + } + catch (NumberFormatException nfe) { + log.warn("value not parseable in line " + + in.getLineNumber()); + continue; + } + String typeString = m.group(3); + log.debug("\t type: " + typeString); + ImportMainValueType type = types.get(typeString); + if (type == null) { + type = new ImportMainValueType(typeString); + types.put(typeString, type); + } + String name = m.group(1); + ImportNamedMainValue namedMainValue = + new ImportNamedMainValue(type, name); + namedMainValues.add(namedMainValue); + + ImportMainValue mainValue = + new ImportMainValue(gauge, namedMainValue, value); + + mainValues.add(mainValue); + } + else { + // TODO: treat as a comment + } + } + gauge.setMainValueTypes( + new ArrayList<ImportMainValueType>(types.values())); + gauge.setNamedMainValues(namedMainValues); + gauge.setMainValues(mainValues); + } + finally { + if (in != null) { + in.close(); + } + } + log.info("finished parsing STA file: " + file); + return true; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/WstParser.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,430 @@ +package de.intevation.flys.importer.parsers; + +import java.util.ArrayList; +import java.util.HashSet; + +import java.io.File; +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.InputStreamReader; +import java.io.FileInputStream; + +import java.text.NumberFormat; + +import org.apache.log4j.Logger; + +import de.intevation.flys.utils.StringUtil; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.math.BigDecimal; + +import de.intevation.flys.importer.ImportWstQRange; +import de.intevation.flys.importer.ImportWstColumn; +import de.intevation.flys.importer.ImportRange; +import de.intevation.flys.importer.ImportWst; + +public class WstParser +{ + private static Logger log = Logger.getLogger(WstParser.class); + + public static final String COLUMN_BEZ_TEXT = "column-bez-text"; + public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; + public static final String COLUMN_QUELLE = "column-quelle"; + public static final String COLUMN_DATUM = "column-datum"; + + public static final BigDecimal UNDEFINED_ZERO = + new BigDecimal(0.0); + public static final BigDecimal MIN_RANGE = + new BigDecimal(-Double.MAX_VALUE); + public static final BigDecimal MAX_RANGE = + new BigDecimal(Double.MAX_VALUE); + + public static final String ENCODING = "ISO-8859-1"; + + public static final Pattern UNIT_COMMENT = + Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)"); + + public static final Pattern UNIT = + Pattern.compile("[^\\[]*\\[([^]]+)\\].*"); + + public static final BigDecimal INTERVAL_GAP = + new BigDecimal(0.00001); + + protected ImportWst wst; + + protected ImportRange lastRange; + + public WstParser() { + } + + public ImportWst getWst() { + return wst; + } + + public void setWst(ImportWst wst) { + this.wst = wst; + } + + public void parse(File file) throws IOException { + + log.info("Parsing WST file '" + file + "'"); + + wst = new ImportWst(file.getName()); + + LineNumberReader in = null; + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String input; + boolean first = true; + int columnCount = 0; + + String [] lsBezeichner = null; + String [] langBezeichner = null; + int [] colNaWidths = null; + String [] quellen = null; + String [] daten = null; + + BigDecimal [] aktAbfluesse = null; + BigDecimal [] firstAbfluesse = null; + + BigDecimal minKm = MAX_RANGE; + BigDecimal maxKm = MIN_RANGE; + + boolean columnHeaderChecked = false; + + String einheit = "Wasserstand [NN + m]"; + + HashSet<BigDecimal> kms = new HashSet<BigDecimal>(); + + while ((input = in.readLine()) != null) { + String line = input; + if (first) { // fetch number of columns + if ((line = line.trim()).length() == 0) { + continue; + } + try { + columnCount = Integer.parseInt(line); + if (columnCount <= 0) { + throw new NumberFormatException( + "number columns <= 0"); + } + log.debug("Number of columns: " + columnCount); + wst.setNumberColumns(columnCount); + lsBezeichner = new String[columnCount]; + } + catch (NumberFormatException nfe) { + log.warn(nfe); + continue; + } + first = false; + continue; + } + + line = line.replace(',', '.'); + + if (line.startsWith("*\u001f")) { + BigDecimal [] data = + parseLineAsDouble(line, columnCount, false, true); + + if (aktAbfluesse != null) { + addInterval(minKm, maxKm, aktAbfluesse); + minKm = MAX_RANGE; + maxKm = MIN_RANGE; + } + + aktAbfluesse = new BigDecimal[columnCount]; + log.debug("new q range: " + columnCount); + for (int i = 0; i < Math.min(columnCount, data.length); ++i) { + if (data[i] != null) { + log.debug(" column: " + data[i]); + aktAbfluesse[i] = data[i]; + } + } + + if (firstAbfluesse == null) { + firstAbfluesse = (BigDecimal [])aktAbfluesse.clone(); + } + continue; + } + + if (line.startsWith("*!")) { + String spezial = line.substring(2).trim(); + + if (spezial.length() == 0) { + continue; + } + + if (spezial.startsWith(COLUMN_BEZ_TEXT)) { + spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); + if (spezial.length() == 0) { + continue; + } + langBezeichner = StringUtil.splitQuoted(spezial, '"'); + } + else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { + spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); + + if (spezial.length() == 0) { + continue; + } + + String[] split = spezial.split("\\s+"); + + colNaWidths = new int[split.length]; + for (int i=0; i < split.length; i++) { + colNaWidths[i] = Integer.parseInt(split[i]); + } + } + else if (spezial.startsWith(COLUMN_QUELLE)) { + if (spezial.length() == 0) { + continue; + } + quellen = StringUtil.splitQuoted(spezial, '"'); + } + else if (spezial.startsWith(COLUMN_DATUM)) { + spezial = spezial.substring(COLUMN_DATUM.length()).trim(); + if (spezial.length() == 0) { + continue; + } + daten = StringUtil.splitQuoted(spezial, '"'); + } + continue; + } + + if (line.length() < 11) { + continue; + } + + if (line.startsWith("*")) { + Matcher m = UNIT_COMMENT.matcher(line); + if (m.matches()) { + log.debug("unit comment found"); + // XXX: This hack is needed because desktop + // FLYS is broken figuring out the unit + String [] units = m.group(1).split("\\s{2,}"); + m = UNIT.matcher(units[0]); + einheit = m.matches() ? m.group(1) : units[0]; + log.debug("unit: " + einheit); + } + continue; + } + + if (firstAbfluesse != null) { + if (!columnHeaderChecked) { + int unknownCount = 0; + HashSet<String> uniqueColumnNames = + new HashSet<String>(); + for (int i = 0; i < lsBezeichner.length; ++i) { + if (lsBezeichner[i] == null + || lsBezeichner[i].length() == 0) { + double q = firstAbfluesse[i].doubleValue(); + if (q < 0.001) { + lsBezeichner[i] = + "<unbekannt #" + unknownCount + ">"; + ++unknownCount; + } + else { + lsBezeichner[i] = "Q="+format(q); + } + } + String candidate = lsBezeichner[i]; + int collision = 1; + while (!uniqueColumnNames.add(candidate)) { + candidate = lsBezeichner[i] + + " (" + collision + ")"; + ++collision; + } + wst.getColumn(i).setName(candidate); + } + columnHeaderChecked = true; + } + + BigDecimal [] data = + parseLineAsDouble(line, columnCount, true, false); + + BigDecimal kaem = data[0]; + + if (!kms.add(kaem)) { + log.warn( + "km " + kaem + + " (line " + in.getLineNumber() + + ") found more than once. -> ignored"); + continue; + } + + if (kaem.compareTo(minKm) < 0) { + minKm = kaem; + } + if (kaem.compareTo(maxKm) > 0) { + maxKm = kaem; + } + + // extract values + for (int i = 0; i < columnCount; ++i) { + addValue(kaem, data[i+1], i); + } + + } + else { // firstAbfluesse == null + if (langBezeichner != null) { + lsBezeichner = StringUtil.fitArray( + langBezeichner, lsBezeichner); + } + else if (colNaWidths != null) { + for (int j = 0, i = 0, N = input.length(); + j < colNaWidths.length && i < N; + i += colNaWidths[j++] + ) { + lsBezeichner[j] = input.substring( + i, i+colNaWidths[j]).trim(); + } + } + else { + // first column begins at position 8 in line + for (int i = 8, col = 0; i < input.length(); i += 9) { + if ((i + 9) > input.length()) { + i = input.length() - 10; + } + // one column header is 9 chars wide + lsBezeichner[col++] = + input.substring(i, i + 9).trim(); + + if (col == lsBezeichner.length) { + break; + } + } + } + } + + } + addInterval(minKm, maxKm, aktAbfluesse); + } + finally { + if (in != null) { + in.close(); + } + } + } + + protected void addValue(BigDecimal km, BigDecimal w, int index) { + if (w != null) { + ImportWstColumn column = wst.getColumn(index); + column.addColumnValue(km, w); + } + } + + private static final NumberFormat NF = getNumberFormat(); + + private static final NumberFormat getNumberFormat() { + NumberFormat nf = NumberFormat.getInstance(); + nf.setMinimumFractionDigits(2); + nf.setMaximumFractionDigits(2); + return nf; + } + + protected static String format(double value) { + return NF.format(value); + } + + protected void addInterval( + BigDecimal from, + BigDecimal to, + BigDecimal [] values + ) { + log.debug("addInterval: " + from + " " + to); + + if (values == null || from == MAX_RANGE) { + return; + } + + if (to.compareTo(from) < 0) { + BigDecimal t = from; from = to; to = t; + } + + ImportRange range = new ImportRange(from, to); + + // little workaround to make the q ranges tightly fit. + // Leave a very small gap to ensure that the range queries + // still work. + + if (lastRange != null) { + double d1 = Math.abs( + lastRange.getB().doubleValue() - range.getA().doubleValue()); + double d2 = Math.abs( + range.getB().doubleValue() - lastRange.getA().doubleValue()); + + if (d1 < d2) { + lastRange.setB(range.getA().subtract(INTERVAL_GAP)); + } + else { + range.setA(lastRange.getB().subtract(INTERVAL_GAP)); + } + } + + for (int i = 0; i < values.length; ++i) { + ImportWstColumn column = wst.getColumn(i); + ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]); + column.addColumnQRange(wstQRange); + } + + lastRange = range; + } + + private static final BigDecimal [] parseLineAsDouble( + String line, + int count, + boolean bStation, + boolean bParseEmptyAsZero + ) { + String [] tokens = parseLine(line, count, bStation); + + BigDecimal [] doubles = new BigDecimal[tokens.length]; + + for (int i = 0; i < doubles.length; ++i) { + String token = tokens[i].trim(); + if (token.length() != 0) { + doubles[i] = new BigDecimal(token); + } + else if (bParseEmptyAsZero) { + doubles[i] = UNDEFINED_ZERO; + } + } + + return doubles; + } + + private static String [] parseLine( + String line, + int tokenCount, + boolean bParseStation + ) { + ArrayList<String> strings = new ArrayList<String>(); + + if (bParseStation) { + if (line.length() < 8) { + throw new IllegalArgumentException("station too short"); + } + strings.add(line.substring(0, 8)); + } + + int pos = 9; + for (int i = 0; i < tokenCount; ++i) { + if (line.length() >= pos + 8) { + strings.add(line.substring(pos, pos + 8)); + } + else { + strings.add(""); + } + pos += 9; + } + + return strings.toArray(new String[strings.size()]); + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :