Mercurial > dive4elements > river
view flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java @ 1207:7121a40671ff
HashedFile: Forget to call the file hashing so only the file lengths were compared.
flys-backend/trunk@2316 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Mon, 11 Jul 2011 10:02:13 +0000 |
parents | 6dc847194625 |
children |
line wrap: on
line source
package de.intevation.flys.importer; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.w3c.dom.Element; import javax.xml.xpath.XPathConstants; import java.util.Map; import java.util.HashMap; import java.util.List; import java.util.ArrayList; import java.util.regex.Pattern; import java.util.regex.Matcher; import org.apache.log4j.Logger; import de.intevation.artifacts.common.utils.XMLUtils; public class AnnotationClassifier { private static Logger log = Logger.getLogger(Importer.class); public static final String TYPES_XPATH = "/annotation/types/type"; public static final String FILE_PATTERNS_XPATH = "/annotation/patterns/file"; public static final String DESCRIPTION_PATTERNS_XPATH = "/annotation/patterns/line"; public static class Pair { protected Pattern pattern; protected ImportAnnotationType annType; public Pair(Pattern pattern, ImportAnnotationType annType) { this.pattern = pattern; this.annType = annType; } public ImportAnnotationType match(String s) { Matcher m = pattern.matcher(s); return m.matches() ? annType : null; } } // class Pair protected Map<String, ImportAnnotationType> types; protected List<Pair> filePatterns; protected List<Pair> descPatterns; protected ImportAnnotationType defaultType; public AnnotationClassifier() { } public AnnotationClassifier(Document rules) { types = new HashMap<String, ImportAnnotationType>(); filePatterns = new ArrayList<Pair>(); descPatterns = new ArrayList<Pair>(); buildRules(rules); } protected void buildRules(Document rules) { buildTypes(rules); buildFilePatterns(rules); buildDescriptionPatterns(rules); } protected void buildTypes(Document rules) { NodeList typeList = (NodeList)XMLUtils.xpath( rules, TYPES_XPATH, XPathConstants.NODESET, null); if (typeList == null) { log.info("no rules found."); return; } for (int i = 0, N = typeList.getLength(); i < N; ++i) { Element typeElement = (Element)typeList.item(i); String name = typeElement.getAttribute("name"); if (name.length() == 0) { log.warn("rule has no name"); continue; } ImportAnnotationType aic = new ImportAnnotationType(name); types.put(name, aic); if (typeElement.getAttribute("default").equals("true")) { defaultType = aic; } } } protected void buildFilePatterns(Document rules) { NodeList patternList = (NodeList)XMLUtils.xpath( rules, FILE_PATTERNS_XPATH, XPathConstants.NODESET, null); if (patternList == null) { log.info("no file patterns found."); return; } for (int i = 0, N = patternList.getLength(); i < N; ++i) { Element element = (Element)patternList.item(i); Pair pair = buildPair(element); if (pair != null) { filePatterns.add(pair); } } } protected void buildDescriptionPatterns(Document rules) { NodeList patternList = (NodeList)XMLUtils.xpath( rules, DESCRIPTION_PATTERNS_XPATH, XPathConstants.NODESET, null); if (patternList == null) { log.info("no line patterns found."); return; } for (int i = 0, N = patternList.getLength(); i < N; ++i) { Element element = (Element)patternList.item(i); Pair pair = buildPair(element); if (pair != null) { descPatterns.add(pair); } } } protected Pair buildPair(Element element) { String pattern = element.getAttribute("pattern"); String type = element.getAttribute("type"); if (pattern.length() == 0) { log.warn("pattern has no 'pattern' attribute."); return null; } if (type.length() == 0) { log.warn("pattern has no 'type' attribute."); return null; } ImportAnnotationType annType = types.get(type); if (annType == null) { log.warn("pattern has unknown type '" + type + "'"); return null; } Pattern p; try { p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); } catch (IllegalArgumentException iae) { log.warn("pattern '" + pattern + "' is invalid.", iae); return null; } return new Pair(p, annType); } public ImportAnnotationType getDefaultType() { return defaultType; } public ImportAnnotationType classifyFile(String filename) { return classifyFile(filename, null); } public ImportAnnotationType classifyFile( String filename, ImportAnnotationType def ) { if (filename.toLowerCase().endsWith(".km")) { filename = filename.substring(0, filename.length()-3); } for (Pair pair: filePatterns) { ImportAnnotationType annType = pair.match(filename); if (annType != null) { return annType; } } return def; } public ImportAnnotationType classifyDescription(String description) { return classifyDescription(description, null); } public ImportAnnotationType classifyDescription( String description, ImportAnnotationType def ) { for (Pair pair: descPatterns) { ImportAnnotationType annType = pair.match(description); if (annType != null) { return annType; } } return def; } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :