comparison flys-backend/src/main/java/org/dive4elements/river/importer/parsers/AnnotationClassifier.java @ 5828:dfb26b03b179

Moved directories to org.dive4elements.river
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 11:53:11 +0200
parents flys-backend/src/main/java/de/intevation/flys/importer/parsers/AnnotationClassifier.java@0d27d02b1208
children 18619c1e7c2a
comparison
equal deleted inserted replaced
5827:e308d4ecd35a 5828:dfb26b03b179
1 package de.intevation.flys.importer.parsers;
2
3 import org.w3c.dom.Document;
4 import org.w3c.dom.NodeList;
5 import org.w3c.dom.Element;
6
7 import javax.xml.xpath.XPathConstants;
8
9 import java.util.Map;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.ArrayList;
13
14 import java.util.regex.Pattern;
15 import java.util.regex.Matcher;
16
17 import org.apache.log4j.Logger;
18
19 import de.intevation.artifacts.common.utils.XMLUtils;
20
21 import de.intevation.flys.importer.ImportAnnotationType;
22 import de.intevation.flys.importer.Importer;
23
24 public class AnnotationClassifier
25 {
26 private static Logger log = Logger.getLogger(Importer.class);
27
28 public static final String TYPES_XPATH =
29 "/annotation/types/type";
30
31 public static final String FILE_PATTERNS_XPATH =
32 "/annotation/patterns/file";
33
34 public static final String DESCRIPTION_PATTERNS_XPATH =
35 "/annotation/patterns/line";
36
37
38 public static class Pair {
39
40 protected Pattern pattern;
41 protected ImportAnnotationType annType;
42
43 public Pair(Pattern pattern, ImportAnnotationType annType) {
44 this.pattern = pattern;
45 this.annType = annType;
46 }
47
48 public ImportAnnotationType match(String s) {
49 Matcher m = pattern.matcher(s);
50 return m.matches() ? annType : null;
51 }
52 } // class Pair
53
54
55 protected Map<String, ImportAnnotationType> types;
56 protected List<Pair> filePatterns;
57 protected List<Pair> descPatterns;
58
59 protected ImportAnnotationType defaultType;
60
61 public AnnotationClassifier() {
62 }
63
64 public AnnotationClassifier(Document rules) {
65 types = new HashMap<String, ImportAnnotationType>();
66 filePatterns = new ArrayList<Pair>();
67 descPatterns = new ArrayList<Pair>();
68
69 buildRules(rules);
70 }
71
72 protected void buildRules(Document rules) {
73 buildTypes(rules);
74 buildFilePatterns(rules);
75 buildDescriptionPatterns(rules);
76 }
77
78 protected void buildTypes(Document rules) {
79
80 NodeList typeList = (NodeList)XMLUtils.xpath(
81 rules,
82 TYPES_XPATH,
83 XPathConstants.NODESET,
84 null);
85
86 if (typeList == null) {
87 log.info("no rules found.");
88 return;
89 }
90
91 for (int i = 0, N = typeList.getLength(); i < N; ++i) {
92 Element typeElement = (Element)typeList.item(i);
93 String name = typeElement.getAttribute("name");
94 if (name.length() == 0) {
95 log.warn("ANNCLASS: rule has no name");
96 continue;
97 }
98
99 ImportAnnotationType aic = new ImportAnnotationType(name);
100
101 types.put(name, aic);
102
103 if (typeElement.getAttribute("default").equals("true")) {
104 defaultType = aic;
105 }
106 }
107 }
108
109 protected void buildFilePatterns(Document rules) {
110
111 NodeList patternList = (NodeList)XMLUtils.xpath(
112 rules,
113 FILE_PATTERNS_XPATH,
114 XPathConstants.NODESET,
115 null);
116
117 if (patternList == null) {
118 log.info("no file patterns found.");
119 return;
120 }
121
122 for (int i = 0, N = patternList.getLength(); i < N; ++i) {
123 Element element = (Element)patternList.item(i);
124 Pair pair = buildPair(element);
125 if (pair != null) {
126 filePatterns.add(pair);
127 }
128 }
129 }
130
131 protected void buildDescriptionPatterns(Document rules) {
132
133 NodeList patternList = (NodeList)XMLUtils.xpath(
134 rules,
135 DESCRIPTION_PATTERNS_XPATH,
136 XPathConstants.NODESET,
137 null);
138
139 if (patternList == null) {
140 log.info("no line patterns found.");
141 return;
142 }
143
144 for (int i = 0, N = patternList.getLength(); i < N; ++i) {
145 Element element = (Element)patternList.item(i);
146 Pair pair = buildPair(element);
147 if (pair != null) {
148 descPatterns.add(pair);
149 }
150 }
151 }
152
153 protected Pair buildPair(Element element) {
154 String pattern = element.getAttribute("pattern");
155 String type = element.getAttribute("type");
156
157 if (pattern.length() == 0) {
158 log.warn("ANNCLASS: pattern has no 'pattern' attribute.");
159 return null;
160 }
161
162 if (type.length() == 0) {
163 log.warn("ANNCLASS: pattern has no 'type' attribute.");
164 return null;
165 }
166
167 ImportAnnotationType annType = types.get(type);
168
169 if (annType == null) {
170 log.warn("ANNCLASS: pattern has unknown type '" + type + "'");
171 return null;
172 }
173
174 Pattern p;
175
176 try {
177 p = Pattern.compile(pattern,
178 Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
179 }
180 catch (IllegalArgumentException iae) {
181 log.warn("ANNCLASS: pattern '" + pattern + "' is invalid.", iae);
182 return null;
183 }
184
185 return new Pair(p, annType);
186 }
187
188 public ImportAnnotationType getDefaultType() {
189 return defaultType;
190 }
191
192 public ImportAnnotationType classifyFile(String filename) {
193 return classifyFile(filename, null);
194 }
195
196 public ImportAnnotationType classifyFile(
197 String filename,
198 ImportAnnotationType def
199 ) {
200 if (filename.toLowerCase().endsWith(".km")) {
201 filename = filename.substring(0, filename.length()-3);
202 }
203
204 for (Pair pair: filePatterns) {
205 ImportAnnotationType annType = pair.match(filename);
206 if (annType != null) {
207 return annType;
208 }
209 }
210
211 return def;
212 }
213
214 public ImportAnnotationType classifyDescription(String description) {
215 return classifyDescription(description, null);
216 }
217
218 public ImportAnnotationType classifyDescription(
219 String description,
220 ImportAnnotationType def
221 ) {
222 for (Pair pair: descPatterns) {
223 ImportAnnotationType annType = pair.match(description);
224 if (annType != null) {
225 return annType;
226 }
227 }
228
229 return def;
230 }
231 }
232 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org