teichmann@5829: package org.dive4elements.river.importer.parsers; sascha@1211: sascha@1211: import org.w3c.dom.Document; sascha@1211: import org.w3c.dom.NodeList; sascha@1211: import org.w3c.dom.Element; sascha@1211: sascha@1211: import javax.xml.xpath.XPathConstants; sascha@1211: sascha@1211: import java.util.Map; sascha@1211: import java.util.HashMap; sascha@1211: import java.util.List; sascha@1211: import java.util.ArrayList; sascha@1211: sascha@1211: import java.util.regex.Pattern; sascha@1211: import java.util.regex.Matcher; sascha@1211: sascha@1211: import org.apache.log4j.Logger; sascha@1211: teichmann@5829: import org.dive4elements.artifacts.common.utils.XMLUtils; sascha@1211: teichmann@5829: import org.dive4elements.river.importer.ImportAnnotationType; teichmann@5829: import org.dive4elements.river.importer.Importer; sascha@1211: sascha@1211: public class AnnotationClassifier sascha@1211: { sascha@1211: private static Logger log = Logger.getLogger(Importer.class); sascha@1211: sascha@1211: public static final String TYPES_XPATH = sascha@1211: "/annotation/types/type"; sascha@1211: sascha@1211: public static final String FILE_PATTERNS_XPATH = sascha@1211: "/annotation/patterns/file"; sascha@1211: sascha@1211: public static final String DESCRIPTION_PATTERNS_XPATH = sascha@1211: "/annotation/patterns/line"; sascha@1211: sascha@1211: sascha@1211: public static class Pair { sascha@1211: sascha@1211: protected Pattern pattern; sascha@1211: protected ImportAnnotationType annType; sascha@1211: sascha@1211: public Pair(Pattern pattern, ImportAnnotationType annType) { sascha@1211: this.pattern = pattern; sascha@1211: this.annType = annType; sascha@1211: } sascha@1211: sascha@1211: public ImportAnnotationType match(String s) { sascha@1211: Matcher m = pattern.matcher(s); sascha@1211: return m.matches() ? annType : null; sascha@1211: } sascha@1211: } // class Pair sascha@1211: sascha@1211: sascha@1211: protected Map types; sascha@1211: protected List filePatterns; sascha@1211: protected List descPatterns; sascha@1211: sascha@1211: protected ImportAnnotationType defaultType; sascha@1211: sascha@1211: public AnnotationClassifier() { sascha@1211: } sascha@1211: sascha@1211: public AnnotationClassifier(Document rules) { sascha@1211: types = new HashMap(); sascha@1211: filePatterns = new ArrayList(); sascha@1211: descPatterns = new ArrayList(); sascha@1211: sascha@1211: buildRules(rules); sascha@1211: } sascha@1211: sascha@1211: protected void buildRules(Document rules) { sascha@1211: buildTypes(rules); sascha@1211: buildFilePatterns(rules); sascha@1211: buildDescriptionPatterns(rules); sascha@1211: } sascha@1211: sascha@1211: protected void buildTypes(Document rules) { sascha@1211: sascha@1211: NodeList typeList = (NodeList)XMLUtils.xpath( sascha@1211: rules, sascha@1211: TYPES_XPATH, sascha@1211: XPathConstants.NODESET, sascha@1211: null); sascha@1211: sascha@1211: if (typeList == null) { sascha@1211: log.info("no rules found."); sascha@1211: return; sascha@1211: } sascha@1211: sascha@1211: for (int i = 0, N = typeList.getLength(); i < N; ++i) { sascha@1211: Element typeElement = (Element)typeList.item(i); sascha@1211: String name = typeElement.getAttribute("name"); sascha@1211: if (name.length() == 0) { sascha@3662: log.warn("ANNCLASS: rule has no name"); sascha@1211: continue; sascha@1211: } sascha@1211: sascha@1211: ImportAnnotationType aic = new ImportAnnotationType(name); sascha@1211: sascha@1211: types.put(name, aic); sascha@1211: sascha@1211: if (typeElement.getAttribute("default").equals("true")) { sascha@1211: defaultType = aic; sascha@1211: } sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: protected void buildFilePatterns(Document rules) { sascha@1211: sascha@1211: NodeList patternList = (NodeList)XMLUtils.xpath( sascha@1211: rules, sascha@1211: FILE_PATTERNS_XPATH, sascha@1211: XPathConstants.NODESET, sascha@1211: null); sascha@1211: sascha@1211: if (patternList == null) { sascha@1211: log.info("no file patterns found."); sascha@1211: return; sascha@1211: } sascha@1211: sascha@1211: for (int i = 0, N = patternList.getLength(); i < N; ++i) { sascha@1211: Element element = (Element)patternList.item(i); sascha@1211: Pair pair = buildPair(element); sascha@1211: if (pair != null) { sascha@1211: filePatterns.add(pair); sascha@1211: } sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: protected void buildDescriptionPatterns(Document rules) { sascha@1211: sascha@1211: NodeList patternList = (NodeList)XMLUtils.xpath( sascha@1211: rules, sascha@1211: DESCRIPTION_PATTERNS_XPATH, sascha@1211: XPathConstants.NODESET, sascha@1211: null); sascha@1211: sascha@1211: if (patternList == null) { sascha@1211: log.info("no line patterns found."); sascha@1211: return; sascha@1211: } sascha@1211: sascha@1211: for (int i = 0, N = patternList.getLength(); i < N; ++i) { sascha@1211: Element element = (Element)patternList.item(i); sascha@1211: Pair pair = buildPair(element); sascha@1211: if (pair != null) { sascha@1211: descPatterns.add(pair); sascha@1211: } sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: protected Pair buildPair(Element element) { sascha@1211: String pattern = element.getAttribute("pattern"); sascha@1211: String type = element.getAttribute("type"); sascha@1211: sascha@1211: if (pattern.length() == 0) { sascha@3662: log.warn("ANNCLASS: pattern has no 'pattern' attribute."); sascha@1211: return null; sascha@1211: } sascha@1211: sascha@1211: if (type.length() == 0) { sascha@3662: log.warn("ANNCLASS: pattern has no 'type' attribute."); sascha@1211: return null; sascha@1211: } sascha@1211: sascha@1211: ImportAnnotationType annType = types.get(type); sascha@1211: sascha@1211: if (annType == null) { sascha@3662: log.warn("ANNCLASS: pattern has unknown type '" + type + "'"); sascha@1211: return null; sascha@1211: } sascha@1211: sascha@1211: Pattern p; sascha@1211: sascha@1211: try { sascha@1211: p = Pattern.compile(pattern, sascha@1211: Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); sascha@1211: } sascha@1211: catch (IllegalArgumentException iae) { sascha@3662: log.warn("ANNCLASS: pattern '" + pattern + "' is invalid.", iae); sascha@1211: return null; sascha@1211: } sascha@1211: sascha@1211: return new Pair(p, annType); sascha@1211: } sascha@1211: sascha@1211: public ImportAnnotationType getDefaultType() { sascha@1211: return defaultType; sascha@1211: } sascha@1211: sascha@1211: public ImportAnnotationType classifyFile(String filename) { sascha@1211: return classifyFile(filename, null); sascha@1211: } sascha@1211: sascha@1211: public ImportAnnotationType classifyFile( sascha@1211: String filename, sascha@1211: ImportAnnotationType def sascha@1211: ) { sascha@1211: if (filename.toLowerCase().endsWith(".km")) { sascha@1211: filename = filename.substring(0, filename.length()-3); sascha@1211: } sascha@1211: sascha@1211: for (Pair pair: filePatterns) { sascha@1211: ImportAnnotationType annType = pair.match(filename); sascha@1211: if (annType != null) { sascha@1211: return annType; sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: return def; sascha@1211: } sascha@1211: sascha@1211: public ImportAnnotationType classifyDescription(String description) { sascha@1211: return classifyDescription(description, null); sascha@1211: } sascha@1211: sascha@1211: public ImportAnnotationType classifyDescription( sascha@1211: String description, sascha@1211: ImportAnnotationType def sascha@1211: ) { sascha@1211: for (Pair pair: descPatterns) { sascha@1211: ImportAnnotationType annType = pair.match(description); sascha@1211: if (annType != null) { sascha@1211: return annType; sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: return def; sascha@1211: } sascha@1211: } sascha@1211: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :