# HG changeset patch # User Sascha L. Teichmann # Date 1308486372 0 # Node ID 763c4137d6e1a2a075287b30e1252ae247e4068b # Parent e09f00ecb915d1173f01b0c02f9dfaf71ef2079a Added classification of annotation types. Needs testing! flys-backend/trunk@2162 c6561f87-3c4e-4783-a992-168aeb5c3f6f diff -r e09f00ecb915 -r 763c4137d6e1 flys-backend/ChangeLog --- a/flys-backend/ChangeLog Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/ChangeLog Sun Jun 19 12:26:12 2011 +0000 @@ -1,4 +1,50 @@ -2011-06-14 Sascha L. Teichmann +2011-06-19 Sascha L. Teichmann + + Added classification of annotation types. Needs testing! + + * doc/annotation-types.xml: New. Rules to classify the different + types of annotations. The classification works like this: + + There are unique types like 'Bruecke', 'Pegel' and so on. + They are defined in the /annotation/type section and + identified by their name. One of the types can be set + as the default type if no rule applies. + + In the /annotation/pattern section are two types of pattern. + + 1 - file pattern: If a KM file is opened its filename is + matched against the regular expressions of these + patterns. If a match is found the corresponding type + is used as the default type in the open file. + If no match is found the global default type is used + as the default type. + + 2 - line patterns: For each line of an open KM file these + patterns are applied to find a match. If a match is + found the corresponding type is used as the type of + the annotation. If no match is found the default + file default is assumed to be the right type. For + the file default see 1. + + * src/main/java/de/intevation/flys/importer/Importer.java: + To activate the annotation type classification set + the system property + + 'flys.backend.importer.annotation.types' + + to the path of a XML looking like the annotation-types.xml + file. If the system property is not set no classification + is done. + + * src/main/java/de/intevation/flys/importer/AnnotationClassifier.java: + New. Implements the classification. + + * src/main/java/de/intevation/flys/importer/AnnotationsParser.java, + src/main/java/de/intevation/flys/importer/InfoGewParser.java, + src/main/java/de/intevation/flys/importer/ImportRiver.java: + Looped through the annotation type classification. + +2011-06-18 Sascha L. Teichmann * src/main/java/de/intevation/flys/model/River.java: Added method to find gauge by a position lying in its range. diff -r e09f00ecb915 -r 763c4137d6e1 flys-backend/doc/annotation-types.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/doc/annotation-types.xml Sun Jun 19 12:26:12 2011 +0000 @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e09f00ecb915 -r 763c4137d6e1 flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java Sun Jun 19 12:26:12 2011 +0000 @@ -0,0 +1,233 @@ +package de.intevation.flys.importer; + +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.w3c.dom.Element; + +import javax.xml.xpath.XPathConstants; + +import java.io.File; +import java.io.IOException; + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import org.apache.log4j.Logger; + +import de.intevation.artifacts.common.utils.XMLUtils; + +public class AnnotationClassifier +{ + private static Logger log = Logger.getLogger(Importer.class); + + public static final String TYPES_XPATH = + "/annotations/types/type"; + + public static final String FILE_PATTERNS_XPATH = + "/annotations/patterns/file"; + + public static final String DESCRIPTION_PATTERNS_XPATH = + "/annotations/patterns/line"; + + + public static class Pair { + + protected Pattern pattern; + protected ImportAnnotationType annType; + + public Pair(Pattern pattern, ImportAnnotationType annType) { + this.pattern = pattern; + this.annType = annType; + } + + public ImportAnnotationType match(String s) { + Matcher m = pattern.matcher(s); + return m.matches() ? annType : null; + } + } // class Pair + + + protected Map types; + protected List filePatterns; + protected List descPatterns; + + protected ImportAnnotationType defaultType; + + public AnnotationClassifier() { + } + + public AnnotationClassifier(Document rules) { + types = new HashMap(); + filePatterns = new ArrayList(); + descPatterns = new ArrayList(); + + buildRules(rules); + } + + protected void buildRules(Document rules) { + + buildTypes(rules); + buildFilePatterns(rules); + buildDescriptionPatterns(rules); + } + + protected void buildTypes(Document rules) { + + NodeList typeList = (NodeList)XMLUtils.xpath( + rules, + TYPES_XPATH, + XPathConstants.NODESET, + null); + + if (typeList == null) { + log.info("no rules found."); + return; + } + + for (int i = 0, N = typeList.getLength(); i < N; ++i) { + Element typeElement = (Element)typeList.item(i); + String name = typeElement.getAttribute("name"); + if (name.length() == 0) { + log.warn("rule has no name"); + continue; + } + + ImportAnnotationType aic = new ImportAnnotationType(name); + + types.put(name, aic); + + if (typeElement.getAttribute("default").equals("true")) { + defaultType = aic; + } + } + } + + protected void buildFilePatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + FILE_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no file patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + filePatterns.add(pair); + } + } + } + + protected void buildDescriptionPatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + DESCRIPTION_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no line patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + descPatterns.add(pair); + } + } + } + + protected Pair buildPair(Element element) { + String pattern = element.getAttribute("pattern"); + String type = element.getAttribute("type"); + + if (pattern.length() == 0) { + log.warn("pattern has no 'pattern' attribute."); + return null; + } + + if (type.length() == 0) { + log.warn("pattern has no 'type' attribute."); + return null; + } + + ImportAnnotationType annType = types.get(type); + + if (annType == null) { + log.warn("pattern has unknown type '" + type + "'"); + return null; + } + + Pattern p; + + try { + p = Pattern.compile(pattern, + Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); + } + catch (IllegalArgumentException iae) { + log.warn("pattern '" + pattern + "' is invalid.", iae); + return null; + } + + return new Pair(p, annType); + } + + public ImportAnnotationType getDefaultType() { + return defaultType; + } + + public ImportAnnotationType classifyFile(String filename) { + return classifyFile(filename, null); + } + + public ImportAnnotationType classifyFile( + String filename, + ImportAnnotationType def + ) { + if (filename.toLowerCase().endsWith(".km")) { + filename = filename.substring(0, filename.length()-3); + } + + for (Pair pair: filePatterns) { + ImportAnnotationType annType = pair.match(filename); + if (annType != null) { + return annType; + } + } + + return def; + } + + public ImportAnnotationType classifyDescription(String description) { + return classifyDescription(description, null); + } + + public ImportAnnotationType classifyDescription( + String description, + ImportAnnotationType def + ) { + for (Pair pair: descPatterns) { + ImportAnnotationType annType = pair.match(description); + if (annType != null) { + return annType; + } + } + + return def; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r e09f00ecb915 -r 763c4137d6e1 flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java --- a/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java Sun Jun 19 12:26:12 2011 +0000 @@ -31,15 +31,30 @@ protected HashMap attributes; protected HashMap positions; protected TreeSet annotations; + protected AnnotationClassifier classifier; public AnnotationsParser() { + this(null); + } + + public AnnotationsParser(AnnotationClassifier classifier) { attributes = new HashMap(); positions = new HashMap(); annotations = new TreeSet(); + this.classifier = classifier; } public void parseFile(File file) throws IOException { log.info("parsing km file: '" + file + "'"); + + ImportAnnotationType defaultIAT = null; + + if (classifier != null) { + defaultIAT = classifier.classifyFile( + file.getName(), + classifier.getDefaultType()); + } + LineNumberReader in = null; try { in = @@ -124,7 +139,9 @@ ImportRange range = new ImportRange(from, to); - ImportAnnotationType type = null; // TODO: do classification + ImportAnnotationType type = classifier != null + ? classifier.classifyDescription(line, defaultIAT) + : null; ImportAnnotation annotation = new ImportAnnotation( attribute, position, range, edge, type); diff -r e09f00ecb915 -r 763c4137d6e1 flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java --- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Sun Jun 19 12:26:12 2011 +0000 @@ -60,14 +60,21 @@ protected ImportWst wst; + protected AnnotationClassifier annotationClassifier; + protected River peer; public ImportRiver() { + this(null); + } + + public ImportRiver(AnnotationClassifier annotationClassifier) { extraWsts = new ArrayList(); fixations = new ArrayList(); officialLines = new ArrayList(); floodWater = new ArrayList(); floodProtection = new ArrayList(); + this.annotationClassifier = annotationClassifier; } public ImportRiver(String name, File wstFile, File bbInfoFile) { @@ -330,7 +337,8 @@ public void parseAnnotations() throws IOException { File riverDir = wstFile.getParentFile().getParentFile(); - AnnotationsParser aparser = new AnnotationsParser(); + AnnotationsParser aparser = + new AnnotationsParser(annotationClassifier); aparser.parse(riverDir); annotations = aparser.getAnnotations(); diff -r e09f00ecb915 -r 763c4137d6e1 flys-backend/src/main/java/de/intevation/flys/importer/Importer.java --- a/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java Sun Jun 19 12:26:12 2011 +0000 @@ -1,5 +1,7 @@ package de.intevation.flys.importer; +import de.intevation.artifacts.common.utils.XMLUtils; + import java.io.File; import java.io.IOException; @@ -12,6 +14,8 @@ import org.hibernate.Transaction; import org.hibernate.HibernateException; +import org.w3c.dom.Document; + public class Importer { private static Logger log = Logger.getLogger(Importer.class); @@ -19,6 +23,9 @@ public static final boolean DRY_RUN = Boolean.getBoolean("flys.backend.importer.dry.run"); + public static final String ANNOTATION_TYPES = + "flys.backend.importer.annotation.types"; + protected List rivers; public Importer() { @@ -79,9 +86,34 @@ } } + public static AnnotationClassifier getAnnotationClassifier() { + String annotationTypes = System.getProperty(ANNOTATION_TYPES); + + if (annotationTypes == null) { + return null; + } + + File file = new File(annotationTypes); + + if (!(file.isFile() && file.canRead())) { + log.warn("annotation type file '" + file + "' is not readable."); + return null; + } + + Document rules = XMLUtils.parseDocument(file); + + if (rules == null) { + log.warn("cannot parse annotation types file."); + return null; + } + + return new AnnotationClassifier(rules); + } + public static void main(String [] args) { - InfoGewParser infoGewParser = new InfoGewParser(); + InfoGewParser infoGewParser = new InfoGewParser( + getAnnotationClassifier()); for (String gew: args) { log.info("parsing info gew file: " + gew); diff -r e09f00ecb915 -r 763c4137d6e1 flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java --- a/flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java Sun Jun 19 12:26:12 2011 +0000 @@ -34,8 +34,15 @@ protected ArrayList rivers; + protected AnnotationClassifier annotationClassifier; + public InfoGewParser() { + this(null); + } + + public InfoGewParser(AnnotationClassifier annotationClassifier) { rivers = new ArrayList(); + this.annotationClassifier = annotationClassifier; } public List getRivers() { @@ -52,7 +59,7 @@ File root = file.getParentFile(); - ImportRiver importRiver = new ImportRiver(); + ImportRiver importRiver = new ImportRiver(annotationClassifier); try { in = new LineNumberReader(