Mercurial > dive4elements > river
comparison flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java @ 765:763c4137d6e1
Added classification of annotation types. Needs testing!
flys-backend/trunk@2162 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Sun, 19 Jun 2011 12:26:12 +0000 |
parents | |
children | aa9e3da95c31 |
comparison
equal
deleted
inserted
replaced
764:e09f00ecb915 | 765:763c4137d6e1 |
---|---|
1 package de.intevation.flys.importer; | |
2 | |
3 import org.w3c.dom.Document; | |
4 import org.w3c.dom.NodeList; | |
5 import org.w3c.dom.Element; | |
6 | |
7 import javax.xml.xpath.XPathConstants; | |
8 | |
9 import java.io.File; | |
10 import java.io.IOException; | |
11 | |
12 import java.util.Map; | |
13 import java.util.HashMap; | |
14 import java.util.List; | |
15 import java.util.ArrayList; | |
16 | |
17 import java.util.regex.Pattern; | |
18 import java.util.regex.Matcher; | |
19 | |
20 import org.apache.log4j.Logger; | |
21 | |
22 import de.intevation.artifacts.common.utils.XMLUtils; | |
23 | |
24 public class AnnotationClassifier | |
25 { | |
26 private static Logger log = Logger.getLogger(Importer.class); | |
27 | |
28 public static final String TYPES_XPATH = | |
29 "/annotations/types/type"; | |
30 | |
31 public static final String FILE_PATTERNS_XPATH = | |
32 "/annotations/patterns/file"; | |
33 | |
34 public static final String DESCRIPTION_PATTERNS_XPATH = | |
35 "/annotations/patterns/line"; | |
36 | |
37 | |
38 public static class Pair { | |
39 | |
40 protected Pattern pattern; | |
41 protected ImportAnnotationType annType; | |
42 | |
43 public Pair(Pattern pattern, ImportAnnotationType annType) { | |
44 this.pattern = pattern; | |
45 this.annType = annType; | |
46 } | |
47 | |
48 public ImportAnnotationType match(String s) { | |
49 Matcher m = pattern.matcher(s); | |
50 return m.matches() ? annType : null; | |
51 } | |
52 } // class Pair | |
53 | |
54 | |
55 protected Map<String, ImportAnnotationType> types; | |
56 protected List<Pair> filePatterns; | |
57 protected List<Pair> descPatterns; | |
58 | |
59 protected ImportAnnotationType defaultType; | |
60 | |
61 public AnnotationClassifier() { | |
62 } | |
63 | |
64 public AnnotationClassifier(Document rules) { | |
65 types = new HashMap<String, ImportAnnotationType>(); | |
66 filePatterns = new ArrayList<Pair>(); | |
67 descPatterns = new ArrayList<Pair>(); | |
68 | |
69 buildRules(rules); | |
70 } | |
71 | |
72 protected void buildRules(Document rules) { | |
73 | |
74 buildTypes(rules); | |
75 buildFilePatterns(rules); | |
76 buildDescriptionPatterns(rules); | |
77 } | |
78 | |
79 protected void buildTypes(Document rules) { | |
80 | |
81 NodeList typeList = (NodeList)XMLUtils.xpath( | |
82 rules, | |
83 TYPES_XPATH, | |
84 XPathConstants.NODESET, | |
85 null); | |
86 | |
87 if (typeList == null) { | |
88 log.info("no rules found."); | |
89 return; | |
90 } | |
91 | |
92 for (int i = 0, N = typeList.getLength(); i < N; ++i) { | |
93 Element typeElement = (Element)typeList.item(i); | |
94 String name = typeElement.getAttribute("name"); | |
95 if (name.length() == 0) { | |
96 log.warn("rule has no name"); | |
97 continue; | |
98 } | |
99 | |
100 ImportAnnotationType aic = new ImportAnnotationType(name); | |
101 | |
102 types.put(name, aic); | |
103 | |
104 if (typeElement.getAttribute("default").equals("true")) { | |
105 defaultType = aic; | |
106 } | |
107 } | |
108 } | |
109 | |
110 protected void buildFilePatterns(Document rules) { | |
111 | |
112 NodeList patternList = (NodeList)XMLUtils.xpath( | |
113 rules, | |
114 FILE_PATTERNS_XPATH, | |
115 XPathConstants.NODESET, | |
116 null); | |
117 | |
118 if (patternList == null) { | |
119 log.info("no file patterns found."); | |
120 return; | |
121 } | |
122 | |
123 for (int i = 0, N = patternList.getLength(); i < N; ++i) { | |
124 Element element = (Element)patternList.item(i); | |
125 Pair pair = buildPair(element); | |
126 if (pair != null) { | |
127 filePatterns.add(pair); | |
128 } | |
129 } | |
130 } | |
131 | |
132 protected void buildDescriptionPatterns(Document rules) { | |
133 | |
134 NodeList patternList = (NodeList)XMLUtils.xpath( | |
135 rules, | |
136 DESCRIPTION_PATTERNS_XPATH, | |
137 XPathConstants.NODESET, | |
138 null); | |
139 | |
140 if (patternList == null) { | |
141 log.info("no line patterns found."); | |
142 return; | |
143 } | |
144 | |
145 for (int i = 0, N = patternList.getLength(); i < N; ++i) { | |
146 Element element = (Element)patternList.item(i); | |
147 Pair pair = buildPair(element); | |
148 if (pair != null) { | |
149 descPatterns.add(pair); | |
150 } | |
151 } | |
152 } | |
153 | |
154 protected Pair buildPair(Element element) { | |
155 String pattern = element.getAttribute("pattern"); | |
156 String type = element.getAttribute("type"); | |
157 | |
158 if (pattern.length() == 0) { | |
159 log.warn("pattern has no 'pattern' attribute."); | |
160 return null; | |
161 } | |
162 | |
163 if (type.length() == 0) { | |
164 log.warn("pattern has no 'type' attribute."); | |
165 return null; | |
166 } | |
167 | |
168 ImportAnnotationType annType = types.get(type); | |
169 | |
170 if (annType == null) { | |
171 log.warn("pattern has unknown type '" + type + "'"); | |
172 return null; | |
173 } | |
174 | |
175 Pattern p; | |
176 | |
177 try { | |
178 p = Pattern.compile(pattern, | |
179 Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE); | |
180 } | |
181 catch (IllegalArgumentException iae) { | |
182 log.warn("pattern '" + pattern + "' is invalid.", iae); | |
183 return null; | |
184 } | |
185 | |
186 return new Pair(p, annType); | |
187 } | |
188 | |
189 public ImportAnnotationType getDefaultType() { | |
190 return defaultType; | |
191 } | |
192 | |
193 public ImportAnnotationType classifyFile(String filename) { | |
194 return classifyFile(filename, null); | |
195 } | |
196 | |
197 public ImportAnnotationType classifyFile( | |
198 String filename, | |
199 ImportAnnotationType def | |
200 ) { | |
201 if (filename.toLowerCase().endsWith(".km")) { | |
202 filename = filename.substring(0, filename.length()-3); | |
203 } | |
204 | |
205 for (Pair pair: filePatterns) { | |
206 ImportAnnotationType annType = pair.match(filename); | |
207 if (annType != null) { | |
208 return annType; | |
209 } | |
210 } | |
211 | |
212 return def; | |
213 } | |
214 | |
215 public ImportAnnotationType classifyDescription(String description) { | |
216 return classifyDescription(description, null); | |
217 } | |
218 | |
219 public ImportAnnotationType classifyDescription( | |
220 String description, | |
221 ImportAnnotationType def | |
222 ) { | |
223 for (Pair pair: descPatterns) { | |
224 ImportAnnotationType annType = pair.match(description); | |
225 if (annType != null) { | |
226 return annType; | |
227 } | |
228 } | |
229 | |
230 return def; | |
231 } | |
232 } | |
233 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |