Mercurial > dive4elements > river
view backend/src/main/java/org/dive4elements/river/importer/parsers/AnnotationClassifier.java @ 6890:8ca5d0e3c438 double-precision
BWASTR-Import: name of tributary contains name of river, not the other way round.
author | Tom Gottfried <tom.gottfried@intevation.de> |
---|---|
date | Wed, 21 Aug 2013 15:32:53 +0200 |
parents | 4c3ccf2b0304 |
children | 9344aa0fb021 |
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde * Software engineering by Intevation GmbH * * This file is Free Software under the GNU AGPL (>=v3) * and comes with ABSOLUTELY NO WARRANTY! Check out the * documentation coming with Dive4Elements River for details. */ package org.dive4elements.river.importer.parsers; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.w3c.dom.Element; import javax.xml.xpath.XPathConstants; import java.util.Map; import java.util.HashMap; import java.util.List; import java.util.ArrayList; import java.util.regex.Pattern; import java.util.regex.Matcher; import org.apache.log4j.Logger; import org.dive4elements.artifacts.common.utils.XMLUtils; import org.dive4elements.river.importer.ImportAnnotationType; import org.dive4elements.river.importer.Importer; public class AnnotationClassifier { private static Logger log = Logger.getLogger(Importer.class); public static final String TYPES_XPATH = "/annotation/types/type"; public static final String FILE_PATTERNS_XPATH = "/annotation/patterns/file"; public static final String DESCRIPTION_PATTERNS_XPATH = "/annotation/patterns/line"; public static class Pair { protected Pattern pattern; protected ImportAnnotationType annType; public Pair(Pattern pattern, ImportAnnotationType annType) { this.pattern = pattern; this.annType = annType; } public ImportAnnotationType match(String s) { Matcher m = pattern.matcher(s); return m.matches() ? annType : null; } } // class Pair protected Map<String, ImportAnnotationType> types; protected List<Pair> filePatterns; protected List<Pair> descPatterns; protected ImportAnnotationType defaultType; public AnnotationClassifier() { } public AnnotationClassifier(Document rules) { types = new HashMap<String, ImportAnnotationType>(); filePatterns = new ArrayList<Pair>(); descPatterns = new ArrayList<Pair>(); buildRules(rules); } protected void buildRules(Document rules) { buildTypes(rules); buildFilePatterns(rules); buildDescriptionPatterns(rules); } protected void buildTypes(Document rules) { NodeList typeList = (NodeList)XMLUtils.xpath( rules, TYPES_XPATH, XPathConstants.NODESET, null); if (typeList == null) { log.info("no rules found."); return; } for (int i = 0, N = typeList.getLength(); i < N; ++i) { Element typeElement = (Element)typeList.item(i); String name = typeElement.getAttribute("name"); if (name.length() == 0) { log.warn("ANNCLASS: rule has no name"); continue; } ImportAnnotationType aic = new ImportAnnotationType(name); types.put(name, aic); if (typeElement.getAttribute("default").equals("true")) { defaultType = aic; } } } protected void buildFilePatterns(Document rules) { NodeList patternList = (NodeList)XMLUtils.xpath( rules, FILE_PATTERNS_XPATH, XPathConstants.NODESET, null); if (patternList == null) { log.info("no file patterns found."); return; } for (int i = 0, N = patternList.getLength(); i < N; ++i) { Element element = (Element)patternList.item(i); Pair pair = buildPair(element); if (pair != null) { filePatterns.add(pair); } } } protected void buildDescriptionPatterns(Document rules) { NodeList patternList = (NodeList)XMLUtils.xpath( rules, DESCRIPTION_PATTERNS_XPATH, XPathConstants.NODESET, null); if (patternList == null) { log.info("no line patterns found."); return; } for (int i = 0, N = patternList.getLength(); i < N; ++i) { Element element = (Element)patternList.item(i); Pair pair = buildPair(element); if (pair != null) { descPatterns.add(pair); } } } protected Pair buildPair(Element element) { String pattern = element.getAttribute("pattern"); String type = element.getAttribute("type"); if (pattern.length() == 0) { log.warn("ANNCLASS: pattern has no 'pattern' attribute."); return null; } if (type.length() == 0) { log.warn("ANNCLASS: pattern has no 'type' attribute."); return null; } ImportAnnotationType annType = types.get(type); if (annType == null) { log.warn("ANNCLASS: pattern has unknown type '" + type + "'"); return null; } Pattern p; try { p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); } catch (IllegalArgumentException iae) { log.warn("ANNCLASS: pattern '" + pattern + "' is invalid.", iae); return null; } return new Pair(p, annType); } public ImportAnnotationType getDefaultType() { return defaultType; } public ImportAnnotationType classifyFile(String filename) { return classifyFile(filename, null); } public ImportAnnotationType classifyFile( String filename, ImportAnnotationType def ) { if (filename.toLowerCase().endsWith(".km")) { filename = filename.substring(0, filename.length()-3); } for (Pair pair: filePatterns) { ImportAnnotationType annType = pair.match(filename); if (annType != null) { return annType; } } return def; } public ImportAnnotationType classifyDescription(String description) { return classifyDescription(description, null); } public ImportAnnotationType classifyDescription( String description, ImportAnnotationType def ) { for (Pair pair: descPatterns) { ImportAnnotationType annType = pair.match(description); if (annType != null) { return annType; } } return def; } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :