Mercurial > dive4elements > river
diff flys-backend/src/main/java/de/intevation/flys/importer/parsers/AnnotationClassifier.java @ 1211:f08fe480092c
Moved file parsers to separate package.
flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Fri, 15 Jul 2011 13:07:45 +0000 |
parents | |
children | 0d27d02b1208 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/parsers/AnnotationClassifier.java Fri Jul 15 13:07:45 2011 +0000 @@ -0,0 +1,232 @@ +package de.intevation.flys.importer.parsers; + +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.w3c.dom.Element; + +import javax.xml.xpath.XPathConstants; + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import org.apache.log4j.Logger; + +import de.intevation.artifacts.common.utils.XMLUtils; + +import de.intevation.flys.importer.ImportAnnotationType; +import de.intevation.flys.importer.Importer; + +public class AnnotationClassifier +{ + private static Logger log = Logger.getLogger(Importer.class); + + public static final String TYPES_XPATH = + "/annotation/types/type"; + + public static final String FILE_PATTERNS_XPATH = + "/annotation/patterns/file"; + + public static final String DESCRIPTION_PATTERNS_XPATH = + "/annotation/patterns/line"; + + + public static class Pair { + + protected Pattern pattern; + protected ImportAnnotationType annType; + + public Pair(Pattern pattern, ImportAnnotationType annType) { + this.pattern = pattern; + this.annType = annType; + } + + public ImportAnnotationType match(String s) { + Matcher m = pattern.matcher(s); + return m.matches() ? annType : null; + } + } // class Pair + + + protected Map<String, ImportAnnotationType> types; + protected List<Pair> filePatterns; + protected List<Pair> descPatterns; + + protected ImportAnnotationType defaultType; + + public AnnotationClassifier() { + } + + public AnnotationClassifier(Document rules) { + types = new HashMap<String, ImportAnnotationType>(); + filePatterns = new ArrayList<Pair>(); + descPatterns = new ArrayList<Pair>(); + + buildRules(rules); + } + + protected void buildRules(Document rules) { + buildTypes(rules); + buildFilePatterns(rules); + buildDescriptionPatterns(rules); + } + + protected void buildTypes(Document rules) { + + NodeList typeList = (NodeList)XMLUtils.xpath( + rules, + TYPES_XPATH, + XPathConstants.NODESET, + null); + + if (typeList == null) { + log.info("no rules found."); + return; + } + + for (int i = 0, N = typeList.getLength(); i < N; ++i) { + Element typeElement = (Element)typeList.item(i); + String name = typeElement.getAttribute("name"); + if (name.length() == 0) { + log.warn("rule has no name"); + continue; + } + + ImportAnnotationType aic = new ImportAnnotationType(name); + + types.put(name, aic); + + if (typeElement.getAttribute("default").equals("true")) { + defaultType = aic; + } + } + } + + protected void buildFilePatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + FILE_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no file patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + filePatterns.add(pair); + } + } + } + + protected void buildDescriptionPatterns(Document rules) { + + NodeList patternList = (NodeList)XMLUtils.xpath( + rules, + DESCRIPTION_PATTERNS_XPATH, + XPathConstants.NODESET, + null); + + if (patternList == null) { + log.info("no line patterns found."); + return; + } + + for (int i = 0, N = patternList.getLength(); i < N; ++i) { + Element element = (Element)patternList.item(i); + Pair pair = buildPair(element); + if (pair != null) { + descPatterns.add(pair); + } + } + } + + protected Pair buildPair(Element element) { + String pattern = element.getAttribute("pattern"); + String type = element.getAttribute("type"); + + if (pattern.length() == 0) { + log.warn("pattern has no 'pattern' attribute."); + return null; + } + + if (type.length() == 0) { + log.warn("pattern has no 'type' attribute."); + return null; + } + + ImportAnnotationType annType = types.get(type); + + if (annType == null) { + log.warn("pattern has unknown type '" + type + "'"); + return null; + } + + Pattern p; + + try { + p = Pattern.compile(pattern, + Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); + } + catch (IllegalArgumentException iae) { + log.warn("pattern '" + pattern + "' is invalid.", iae); + return null; + } + + return new Pair(p, annType); + } + + public ImportAnnotationType getDefaultType() { + return defaultType; + } + + public ImportAnnotationType classifyFile(String filename) { + return classifyFile(filename, null); + } + + public ImportAnnotationType classifyFile( + String filename, + ImportAnnotationType def + ) { + if (filename.toLowerCase().endsWith(".km")) { + filename = filename.substring(0, filename.length()-3); + } + + for (Pair pair: filePatterns) { + ImportAnnotationType annType = pair.match(filename); + if (annType != null) { + return annType; + } + } + + return def; + } + + public ImportAnnotationType classifyDescription(String description) { + return classifyDescription(description, null); + } + + public ImportAnnotationType classifyDescription( + String description, + ImportAnnotationType def + ) { + for (Pair pair: descPatterns) { + ImportAnnotationType annType = pair.match(description); + if (annType != null) { + return annType; + } + } + + return def; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :