diff flys-backend/src/main/java/org/dive4elements/river/importer/parsers/AnnotationClassifier.java @ 5828:dfb26b03b179

Moved directories to org.dive4elements.river
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 11:53:11 +0200
parents flys-backend/src/main/java/de/intevation/flys/importer/parsers/AnnotationClassifier.java@0d27d02b1208
children 18619c1e7c2a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/org/dive4elements/river/importer/parsers/AnnotationClassifier.java	Thu Apr 25 11:53:11 2013 +0200
@@ -0,0 +1,232 @@
+package de.intevation.flys.importer.parsers;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Element;
+
+import javax.xml.xpath.XPathConstants;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import org.apache.log4j.Logger;
+
+import de.intevation.artifacts.common.utils.XMLUtils;
+
+import de.intevation.flys.importer.ImportAnnotationType;
+import de.intevation.flys.importer.Importer;
+
+public class AnnotationClassifier
+{
+    private static Logger log = Logger.getLogger(Importer.class);
+
+    public static final String TYPES_XPATH =
+        "/annotation/types/type";
+
+    public static final String FILE_PATTERNS_XPATH =
+        "/annotation/patterns/file";
+
+    public static final String DESCRIPTION_PATTERNS_XPATH =
+        "/annotation/patterns/line";
+
+
+    public static class Pair {
+
+        protected Pattern              pattern;
+        protected ImportAnnotationType annType;
+
+        public Pair(Pattern pattern, ImportAnnotationType annType) {
+            this.pattern  = pattern;
+            this.annType = annType;
+        }
+
+        public ImportAnnotationType match(String s) {
+            Matcher m = pattern.matcher(s);
+            return m.matches() ? annType : null;
+        }
+    } // class Pair
+
+
+    protected Map<String, ImportAnnotationType> types;
+    protected List<Pair>                        filePatterns;
+    protected List<Pair>                        descPatterns;
+
+    protected ImportAnnotationType defaultType;
+
+    public AnnotationClassifier() {
+    }
+
+    public AnnotationClassifier(Document rules) {
+        types        = new HashMap<String, ImportAnnotationType>();
+        filePatterns = new ArrayList<Pair>();
+        descPatterns = new ArrayList<Pair>();
+
+        buildRules(rules);
+    }
+
+    protected void buildRules(Document rules) {
+        buildTypes(rules);
+        buildFilePatterns(rules);
+        buildDescriptionPatterns(rules);
+    }
+
+    protected void buildTypes(Document rules) {
+
+        NodeList typeList = (NodeList)XMLUtils.xpath(
+            rules,
+            TYPES_XPATH,
+            XPathConstants.NODESET,
+            null);
+
+        if (typeList == null) {
+            log.info("no rules found.");
+            return;
+        }
+
+        for (int i = 0, N = typeList.getLength(); i < N; ++i) {
+            Element typeElement = (Element)typeList.item(i);
+            String name = typeElement.getAttribute("name");
+            if (name.length() == 0) {
+                log.warn("ANNCLASS: rule has no name");
+                continue;
+            }
+
+            ImportAnnotationType aic = new ImportAnnotationType(name);
+
+            types.put(name, aic);
+
+            if (typeElement.getAttribute("default").equals("true")) {
+                defaultType = aic;
+            }
+        }
+    }
+
+    protected void buildFilePatterns(Document rules) {
+
+        NodeList patternList = (NodeList)XMLUtils.xpath(
+            rules,
+            FILE_PATTERNS_XPATH,
+            XPathConstants.NODESET,
+            null);
+
+        if (patternList == null) {
+            log.info("no file patterns found.");
+            return;
+        }
+
+        for (int i = 0, N = patternList.getLength(); i < N; ++i) {
+            Element element = (Element)patternList.item(i);
+            Pair pair = buildPair(element);
+            if (pair != null) {
+                filePatterns.add(pair);
+            }
+        }
+    }
+
+    protected void buildDescriptionPatterns(Document rules) {
+
+        NodeList patternList = (NodeList)XMLUtils.xpath(
+            rules,
+            DESCRIPTION_PATTERNS_XPATH,
+            XPathConstants.NODESET,
+            null);
+
+        if (patternList == null) {
+            log.info("no line patterns found.");
+            return;
+        }
+
+        for (int i = 0, N = patternList.getLength(); i < N; ++i) {
+            Element element = (Element)patternList.item(i);
+            Pair pair = buildPair(element);
+            if (pair != null) {
+                descPatterns.add(pair);
+            }
+        }
+    }
+
+    protected Pair buildPair(Element element) {
+        String pattern = element.getAttribute("pattern");
+        String type    = element.getAttribute("type");
+
+        if (pattern.length() == 0) {
+            log.warn("ANNCLASS: pattern has no 'pattern' attribute.");
+            return null;
+        }
+
+        if (type.length() == 0) {
+            log.warn("ANNCLASS: pattern has no 'type' attribute.");
+            return null;
+        }
+
+        ImportAnnotationType annType = types.get(type);
+
+        if (annType == null) {
+            log.warn("ANNCLASS: pattern has unknown type '" + type + "'");
+            return null;
+        }
+
+        Pattern p;
+
+        try {
+            p = Pattern.compile(pattern,
+                    Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
+        }
+        catch (IllegalArgumentException iae) {
+            log.warn("ANNCLASS: pattern '" + pattern + "' is invalid.", iae);
+            return null;
+        }
+
+        return new Pair(p, annType);
+    }
+
+    public ImportAnnotationType getDefaultType() {
+        return defaultType;
+    }
+
+    public ImportAnnotationType classifyFile(String filename) {
+        return classifyFile(filename, null);
+    }
+
+    public ImportAnnotationType classifyFile(
+        String                filename,
+        ImportAnnotationType def
+    ) {
+        if (filename.toLowerCase().endsWith(".km")) {
+            filename = filename.substring(0, filename.length()-3);
+        }
+
+        for (Pair pair: filePatterns) {
+            ImportAnnotationType annType = pair.match(filename);
+            if (annType != null) {
+                return annType;
+            }
+        }
+
+        return def;
+    }
+
+    public ImportAnnotationType classifyDescription(String description) {
+        return classifyDescription(description, null);
+    }
+
+    public ImportAnnotationType classifyDescription(
+        String                description,
+        ImportAnnotationType def
+    ) {
+        for (Pair pair: descPatterns) {
+            ImportAnnotationType annType = pair.match(description);
+            if (annType != null) {
+                return annType;
+            }
+        }
+
+        return def;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org