Mercurial > dive4elements > river
changeset 765:763c4137d6e1
Added classification of annotation types. Needs testing!
flys-backend/trunk@2162 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Sun, 19 Jun 2011 12:26:12 +0000 |
parents | e09f00ecb915 |
children | aa9e3da95c31 |
files | flys-backend/ChangeLog flys-backend/doc/annotation-types.xml flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java flys-backend/src/main/java/de/intevation/flys/importer/Importer.java flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java |
diffstat | 7 files changed, 404 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/flys-backend/ChangeLog Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/ChangeLog Sun Jun 19 12:26:12 2011 +0000 @@ -1,4 +1,50 @@ -2011-06-14 Sascha L. Teichmann <sascha.teichmann@intevation.de> +2011-06-19 Sascha L. Teichmann <sascha.teichmann@intevation.de> + + Added classification of annotation types. Needs testing! + + * doc/annotation-types.xml: New. Rules to classify the different + types of annotations. The classification works like this: + + There are unique types like 'Bruecke', 'Pegel' and so on. + They are defined in the /annotation/type section and + identified by their name. One of the types can be set + as the default type if no rule applies. + + In the /annotation/pattern section are two types of pattern. + + 1 - file pattern: If a KM file is opened its filename is + matched against the regular expressions of these + patterns. If a match is found the corresponding type + is used as the default type in the open file. + If no match is found the global default type is used + as the default type. + + 2 - line patterns: For each line of an open KM file these + patterns are applied to find a match. If a match is + found the corresponding type is used as the type of + the annotation. If no match is found the default + file default is assumed to be the right type. For + the file default see 1. + + * src/main/java/de/intevation/flys/importer/Importer.java: + To activate the annotation type classification set + the system property + + 'flys.backend.importer.annotation.types' + + to the path of a XML looking like the annotation-types.xml + file. If the system property is not set no classification + is done. + + * src/main/java/de/intevation/flys/importer/AnnotationClassifier.java: + New. Implements the classification. + + * src/main/java/de/intevation/flys/importer/AnnotationsParser.java, + src/main/java/de/intevation/flys/importer/InfoGewParser.java, + src/main/java/de/intevation/flys/importer/ImportRiver.java: + Looped through the annotation type classification. + +2011-06-18 Sascha L. Teichmann <sascha.teichmann@intevation.de> * src/main/java/de/intevation/flys/model/River.java: Added method to find gauge by a position lying in its range.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/doc/annotation-types.xml Sun Jun 19 12:26:12 2011 +0000 @@ -0,0 +1,56 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<annotation> + <types> + <type name="Abzweigung"/> + <type name="Berechnungsstrecke"/> + <type name="Brücke"/> + <type name="Bundesland"/> + <type name="Deich"/> + <type name="Einmündung"/> + <type name="Fähre"/> + <type name="Gemeinde"/> + <type name="Grenze"/> + <type name="Hafen"/> + <type name="HW-Schutz"/> + <type name="Landkreis"/> + <type name="Meldestelle"/> + <type name="Messstelle"/> + <type name="Pegel"/> + <type name="Stauwehr"/> + <type name="Staatsgrenze"/> + <type name="Staat"/> + <type name="WSA"/> + <type name="Zufluß"/> + <type name="Sonstige" default="true"/> + </types> + + <patterns> + <file pattern="^Brücken$" type="Brücke"/> + <file pattern="^Deich.*$" type="Deich"/> + <file pattern="^Hafen$" type="Hafen"/> + <file pattern="^Pegel-alle$" type="Pegel"/> + <file pattern="^Pegel$" type="Pegel"/> + <file pattern="^Wehr$" type="Stauwehr"/> + <file pattern="^Stauwehr$" type="Stauwehr"/> + <file pattern="^Zufluß$" type="Zufluß"/> + + <line pattern="^Abz\.?[:\s].*$" type="Abzweigung"/> + <line pattern="^Berechnungsstrecke.*$" type="Berechnungsstrecke"/> + <line pattern="^Brücke[:\s].*$" type="Brücke"/> + <line pattern="^Bundesland[:\s].*$" type="Bundesland"/> + <line pattern="^Einmündung[:\s].*$" type="Abzweig"/> + <line pattern="^Fähre[:\s].*$" type="Abzweig"/> + <line pattern="^Gemeinde[:\s].*$" type="Abzweig"/> + <line pattern="^Grenze[:\s].*$" type="Grenze"/> + <line pattern="^Hafen[:\s].*$" type="Hafen"/> + <line pattern="^HW-Schutz[:\s].*$" type="HW-Schutz"/> + <line pattern="^Landkreis[:\s].*$" type="Landkreis"/> + <line pattern="^Meldestelle[:\s].*$" type="Meldestelle"/> + <line pattern="^Pegel[:\s].*$" type="Pegel"/> + <line pattern="^Staatsgrenze[:\s].*$" type="Staatsgrenze"/> + <line pattern="^Staat[:\s].*$" type="Staat"/> + <line pattern="^Wehr[:\s].*$" type="Stauwehr"/> + <line pattern="^WSA[:\s].*$" type="WSA"/> + <line pattern="^Zufluß[:\s].*$" type="Zufluß"/> + </patterns> +</annotation>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java Sun Jun 19 12:26:12 2011 +0000 @@ -0,0 +1,233 @@ +package de.intevation.flys.importer; + +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.w3c.dom.Element; + +import javax.xml.xpath.XPathConstants; + +import java.io.File; +import java.io.IOException; + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import org.apache.log4j.Logger; + +import de.intevation.artifacts.common.utils.XMLUtils; + +public class AnnotationClassifier +{ + private static Logger log = Logger.getLogger(Importer.class); + + public static final String TYPES_XPATH = + "/annotations/types/type"; + + public static final String FILE_PATTERNS_XPATH = + "/annotations/patterns/file"; + + public static final String DESCRIPTION_PATTERNS_XPATH = + "/annotations/patterns/line"; + + + public static class Pair { + + protected Pattern pattern; + protected ImportAnnotationType annType; + + public Pair(Pattern pattern, ImportAnnotationType annType) { + this.pattern = pattern; + this.annType = annType; + } + + public ImportAnnotationType match(String s) { + Matcher m = pattern.matcher(s); + return m.matches() ? annType : null; + } + } // class Pair + + + protected Map<String, ImportAnnotationType> types; + protected List<Pair> filePatterns; + protected List<Pair> descPatterns; + + protected ImportAnnotationType defaultType; + + public AnnotationClassifier() { + } + + public AnnotationClassifier(Document rules) { + types = new HashMap<String, ImportAnnotationType>(); + filePatterns = new ArrayList<Pair>(); + descPatterns = new ArrayList<Pair>(); + + buildRules(rules); + } + + protected void buildRules(Document rules) { + + buildTypes(rules); + buildFilePatterns(rules); + buildDescriptionPatterns(rules); + } + + protected void buildTypes(Document rules) { + + NodeList typeList = (NodeList)XMLUtils.xpath( + rules, + TYPES_XPATH, + XPathConstants.NODESET, + null); + + if (typeList == null) { + log.info("no rules found."); + return; + } + + for (int i = 0, N = typeList.getLength(); i < N; ++i) { + Element typeElement = (Element)typeList.item(i); + String name = typeElement.getAttribute("name"); + if (name.length() == 0) { + log.warn("rule has no name"); + continue; + } + + ImportAnnotationType aic = new ImportAnnotationType(name); + + types.put(name, aic); + + if (typeElement.getAttribute("default").equals("true")) { + defaultType = aic; + } + } + } + + protected void buildFilePatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + FILE_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no file patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + filePatterns.add(pair); + } + } + } + + protected void buildDescriptionPatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + DESCRIPTION_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no line patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + descPatterns.add(pair); + } + } + } + + protected Pair buildPair(Element element) { + String pattern = element.getAttribute("pattern"); + String type = element.getAttribute("type"); + + if (pattern.length() == 0) { + log.warn("pattern has no 'pattern' attribute."); + return null; + } + + if (type.length() == 0) { + log.warn("pattern has no 'type' attribute."); + return null; + } + + ImportAnnotationType annType = types.get(type); + + if (annType == null) { + log.warn("pattern has unknown type '" + type + "'"); + return null; + } + + Pattern p; + + try { + p = Pattern.compile(pattern, + Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); + } + catch (IllegalArgumentException iae) { + log.warn("pattern '" + pattern + "' is invalid.", iae); + return null; + } + + return new Pair(p, annType); + } + + public ImportAnnotationType getDefaultType() { + return defaultType; + } + + public ImportAnnotationType classifyFile(String filename) { + return classifyFile(filename, null); + } + + public ImportAnnotationType classifyFile( + String filename, + ImportAnnotationType def + ) { + if (filename.toLowerCase().endsWith(".km")) { + filename = filename.substring(0, filename.length()-3); + } + + for (Pair pair: filePatterns) { + ImportAnnotationType annType = pair.match(filename); + if (annType != null) { + return annType; + } + } + + return def; + } + + public ImportAnnotationType classifyDescription(String description) { + return classifyDescription(description, null); + } + + public ImportAnnotationType classifyDescription( + String description, + ImportAnnotationType def + ) { + for (Pair pair: descPatterns) { + ImportAnnotationType annType = pair.match(description); + if (annType != null) { + return annType; + } + } + + return def; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java Sun Jun 19 12:26:12 2011 +0000 @@ -31,15 +31,30 @@ protected HashMap<String, ImportAttribute> attributes; protected HashMap<String, ImportPosition> positions; protected TreeSet<ImportAnnotation> annotations; + protected AnnotationClassifier classifier; public AnnotationsParser() { + this(null); + } + + public AnnotationsParser(AnnotationClassifier classifier) { attributes = new HashMap<String, ImportAttribute>(); positions = new HashMap<String, ImportPosition>(); annotations = new TreeSet<ImportAnnotation>(); + this.classifier = classifier; } public void parseFile(File file) throws IOException { log.info("parsing km file: '" + file + "'"); + + ImportAnnotationType defaultIAT = null; + + if (classifier != null) { + defaultIAT = classifier.classifyFile( + file.getName(), + classifier.getDefaultType()); + } + LineNumberReader in = null; try { in = @@ -124,7 +139,9 @@ ImportRange range = new ImportRange(from, to); - ImportAnnotationType type = null; // TODO: do classification + ImportAnnotationType type = classifier != null + ? classifier.classifyDescription(line, defaultIAT) + : null; ImportAnnotation annotation = new ImportAnnotation( attribute, position, range, edge, type);
--- a/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Sun Jun 19 12:26:12 2011 +0000 @@ -60,14 +60,21 @@ protected ImportWst wst; + protected AnnotationClassifier annotationClassifier; + protected River peer; public ImportRiver() { + this(null); + } + + public ImportRiver(AnnotationClassifier annotationClassifier) { extraWsts = new ArrayList<ImportWst>(); fixations = new ArrayList<ImportWst>(); officialLines = new ArrayList<ImportWst>(); floodWater = new ArrayList<ImportWst>(); floodProtection = new ArrayList<ImportWst>(); + this.annotationClassifier = annotationClassifier; } public ImportRiver(String name, File wstFile, File bbInfoFile) { @@ -330,7 +337,8 @@ public void parseAnnotations() throws IOException { File riverDir = wstFile.getParentFile().getParentFile(); - AnnotationsParser aparser = new AnnotationsParser(); + AnnotationsParser aparser = + new AnnotationsParser(annotationClassifier); aparser.parse(riverDir); annotations = aparser.getAnnotations();
--- a/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java Sun Jun 19 12:26:12 2011 +0000 @@ -1,5 +1,7 @@ package de.intevation.flys.importer; +import de.intevation.artifacts.common.utils.XMLUtils; + import java.io.File; import java.io.IOException; @@ -12,6 +14,8 @@ import org.hibernate.Transaction; import org.hibernate.HibernateException; +import org.w3c.dom.Document; + public class Importer { private static Logger log = Logger.getLogger(Importer.class); @@ -19,6 +23,9 @@ public static final boolean DRY_RUN = Boolean.getBoolean("flys.backend.importer.dry.run"); + public static final String ANNOTATION_TYPES = + "flys.backend.importer.annotation.types"; + protected List<ImportRiver> rivers; public Importer() { @@ -79,9 +86,34 @@ } } + public static AnnotationClassifier getAnnotationClassifier() { + String annotationTypes = System.getProperty(ANNOTATION_TYPES); + + if (annotationTypes == null) { + return null; + } + + File file = new File(annotationTypes); + + if (!(file.isFile() && file.canRead())) { + log.warn("annotation type file '" + file + "' is not readable."); + return null; + } + + Document rules = XMLUtils.parseDocument(file); + + if (rules == null) { + log.warn("cannot parse annotation types file."); + return null; + } + + return new AnnotationClassifier(rules); + } + public static void main(String [] args) { - InfoGewParser infoGewParser = new InfoGewParser(); + InfoGewParser infoGewParser = new InfoGewParser( + getAnnotationClassifier()); for (String gew: args) { log.info("parsing info gew file: " + gew);
--- a/flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java Sat Jun 18 20:13:31 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java Sun Jun 19 12:26:12 2011 +0000 @@ -34,8 +34,15 @@ protected ArrayList<ImportRiver> rivers; + protected AnnotationClassifier annotationClassifier; + public InfoGewParser() { + this(null); + } + + public InfoGewParser(AnnotationClassifier annotationClassifier) { rivers = new ArrayList<ImportRiver>(); + this.annotationClassifier = annotationClassifier; } public List<ImportRiver> getRivers() { @@ -52,7 +59,7 @@ File root = file.getParentFile(); - ImportRiver importRiver = new ImportRiver(); + ImportRiver importRiver = new ImportRiver(annotationClassifier); try { in = new LineNumberReader(