comparison flys-backend/src/main/java/de/intevation/flys/importer/AnnotationClassifier.java @ 765:763c4137d6e1

Added classification of annotation types. Needs testing! flys-backend/trunk@2162 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Sun, 19 Jun 2011 12:26:12 +0000
parents
children aa9e3da95c31
comparison
equal deleted inserted replaced
764:e09f00ecb915 765:763c4137d6e1
1 package de.intevation.flys.importer;
2
3 import org.w3c.dom.Document;
4 import org.w3c.dom.NodeList;
5 import org.w3c.dom.Element;
6
7 import javax.xml.xpath.XPathConstants;
8
9 import java.io.File;
10 import java.io.IOException;
11
12 import java.util.Map;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.ArrayList;
16
17 import java.util.regex.Pattern;
18 import java.util.regex.Matcher;
19
20 import org.apache.log4j.Logger;
21
22 import de.intevation.artifacts.common.utils.XMLUtils;
23
24 public class AnnotationClassifier
25 {
26 private static Logger log = Logger.getLogger(Importer.class);
27
28 public static final String TYPES_XPATH =
29 "/annotations/types/type";
30
31 public static final String FILE_PATTERNS_XPATH =
32 "/annotations/patterns/file";
33
34 public static final String DESCRIPTION_PATTERNS_XPATH =
35 "/annotations/patterns/line";
36
37
38 public static class Pair {
39
40 protected Pattern pattern;
41 protected ImportAnnotationType annType;
42
43 public Pair(Pattern pattern, ImportAnnotationType annType) {
44 this.pattern = pattern;
45 this.annType = annType;
46 }
47
48 public ImportAnnotationType match(String s) {
49 Matcher m = pattern.matcher(s);
50 return m.matches() ? annType : null;
51 }
52 } // class Pair
53
54
55 protected Map<String, ImportAnnotationType> types;
56 protected List<Pair> filePatterns;
57 protected List<Pair> descPatterns;
58
59 protected ImportAnnotationType defaultType;
60
61 public AnnotationClassifier() {
62 }
63
64 public AnnotationClassifier(Document rules) {
65 types = new HashMap<String, ImportAnnotationType>();
66 filePatterns = new ArrayList<Pair>();
67 descPatterns = new ArrayList<Pair>();
68
69 buildRules(rules);
70 }
71
72 protected void buildRules(Document rules) {
73
74 buildTypes(rules);
75 buildFilePatterns(rules);
76 buildDescriptionPatterns(rules);
77 }
78
79 protected void buildTypes(Document rules) {
80
81 NodeList typeList = (NodeList)XMLUtils.xpath(
82 rules,
83 TYPES_XPATH,
84 XPathConstants.NODESET,
85 null);
86
87 if (typeList == null) {
88 log.info("no rules found.");
89 return;
90 }
91
92 for (int i = 0, N = typeList.getLength(); i < N; ++i) {
93 Element typeElement = (Element)typeList.item(i);
94 String name = typeElement.getAttribute("name");
95 if (name.length() == 0) {
96 log.warn("rule has no name");
97 continue;
98 }
99
100 ImportAnnotationType aic = new ImportAnnotationType(name);
101
102 types.put(name, aic);
103
104 if (typeElement.getAttribute("default").equals("true")) {
105 defaultType = aic;
106 }
107 }
108 }
109
110 protected void buildFilePatterns(Document rules) {
111
112 NodeList patternList = (NodeList)XMLUtils.xpath(
113 rules,
114 FILE_PATTERNS_XPATH,
115 XPathConstants.NODESET,
116 null);
117
118 if (patternList == null) {
119 log.info("no file patterns found.");
120 return;
121 }
122
123 for (int i = 0, N = patternList.getLength(); i < N; ++i) {
124 Element element = (Element)patternList.item(i);
125 Pair pair = buildPair(element);
126 if (pair != null) {
127 filePatterns.add(pair);
128 }
129 }
130 }
131
132 protected void buildDescriptionPatterns(Document rules) {
133
134 NodeList patternList = (NodeList)XMLUtils.xpath(
135 rules,
136 DESCRIPTION_PATTERNS_XPATH,
137 XPathConstants.NODESET,
138 null);
139
140 if (patternList == null) {
141 log.info("no line patterns found.");
142 return;
143 }
144
145 for (int i = 0, N = patternList.getLength(); i < N; ++i) {
146 Element element = (Element)patternList.item(i);
147 Pair pair = buildPair(element);
148 if (pair != null) {
149 descPatterns.add(pair);
150 }
151 }
152 }
153
154 protected Pair buildPair(Element element) {
155 String pattern = element.getAttribute("pattern");
156 String type = element.getAttribute("type");
157
158 if (pattern.length() == 0) {
159 log.warn("pattern has no 'pattern' attribute.");
160 return null;
161 }
162
163 if (type.length() == 0) {
164 log.warn("pattern has no 'type' attribute.");
165 return null;
166 }
167
168 ImportAnnotationType annType = types.get(type);
169
170 if (annType == null) {
171 log.warn("pattern has unknown type '" + type + "'");
172 return null;
173 }
174
175 Pattern p;
176
177 try {
178 p = Pattern.compile(pattern,
179 Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
180 }
181 catch (IllegalArgumentException iae) {
182 log.warn("pattern '" + pattern + "' is invalid.", iae);
183 return null;
184 }
185
186 return new Pair(p, annType);
187 }
188
189 public ImportAnnotationType getDefaultType() {
190 return defaultType;
191 }
192
193 public ImportAnnotationType classifyFile(String filename) {
194 return classifyFile(filename, null);
195 }
196
197 public ImportAnnotationType classifyFile(
198 String filename,
199 ImportAnnotationType def
200 ) {
201 if (filename.toLowerCase().endsWith(".km")) {
202 filename = filename.substring(0, filename.length()-3);
203 }
204
205 for (Pair pair: filePatterns) {
206 ImportAnnotationType annType = pair.match(filename);
207 if (annType != null) {
208 return annType;
209 }
210 }
211
212 return def;
213 }
214
215 public ImportAnnotationType classifyDescription(String description) {
216 return classifyDescription(description, null);
217 }
218
219 public ImportAnnotationType classifyDescription(
220 String description,
221 ImportAnnotationType def
222 ) {
223 for (Pair pair: descPatterns) {
224 ImportAnnotationType annType = pair.match(description);
225 if (annType != null) {
226 return annType;
227 }
228 }
229
230 return def;
231 }
232 }
233 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org