Mercurial > dive4elements > river
comparison backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java @ 6328:53d08f33d094
Backend: Moved guessing of main values and there time intervals out of the STA parser. Same come will be useful to extend the WST parser to better handle official lines.
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Thu, 13 Jun 2013 17:15:34 +0200 |
parents | ec54bb4fe407 |
children | b58a71a5720c |
comparison
equal
deleted
inserted
replaced
6327:447ed3dee890 | 6328:53d08f33d094 |
---|---|
17 import java.math.BigDecimal; | 17 import java.math.BigDecimal; |
18 | 18 |
19 import java.util.regex.Pattern; | 19 import java.util.regex.Pattern; |
20 import java.util.regex.Matcher; | 20 import java.util.regex.Matcher; |
21 | 21 |
22 import java.util.Date; | |
23 import java.util.HashMap; | 22 import java.util.HashMap; |
24 import java.util.ArrayList; | 23 import java.util.ArrayList; |
25 import java.util.List; | |
26 | 24 |
27 import org.apache.log4j.Logger; | 25 import org.apache.log4j.Logger; |
28 | 26 |
29 import org.dive4elements.river.importer.ImportMainValueType; | 27 import org.dive4elements.river.importer.ImportMainValueType; |
30 import org.dive4elements.river.importer.ImportMainValue; | 28 import org.dive4elements.river.importer.ImportMainValue; |
31 import org.dive4elements.river.importer.ImportNamedMainValue; | 29 import org.dive4elements.river.importer.ImportNamedMainValue; |
32 import org.dive4elements.river.importer.ImportGauge; | 30 import org.dive4elements.river.importer.ImportGauge; |
33 import org.dive4elements.river.importer.ImportTimeInterval; | |
34 import org.dive4elements.river.utils.DateGuesser; | |
35 | 31 |
36 public class StaFileParser | 32 public class StaFileParser |
37 { | 33 { |
38 private static Logger log = Logger.getLogger(StaFileParser.class); | 34 private static Logger log = Logger.getLogger(StaFileParser.class); |
39 | 35 |
44 | 40 |
45 public static final Pattern QWTD_ = | 41 public static final Pattern QWTD_ = |
46 Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + | 42 Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + |
47 Pattern.quote(TYPES) + "]).*"); | 43 Pattern.quote(TYPES) + "]).*"); |
48 | 44 |
49 // TODO: To be extented. | |
50 private static final Pattern MAIN_VALUE = Pattern.compile( | |
51 "^(HQ|MHW|GLQ|NMQ|HQEXT)(\\d*)$"); | |
52 | |
53 private static boolean isMainValue(String s) { | |
54 s = s.replace(" ", "").toUpperCase(); | |
55 return MAIN_VALUE.matcher(s).matches(); | |
56 } | |
57 | |
58 | |
59 public static final class NameAndTimeInterval { | |
60 private String name; | |
61 private ImportTimeInterval timeInterval; | |
62 | |
63 public NameAndTimeInterval(String name) { | |
64 this(name, null); | |
65 } | |
66 | |
67 public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) { | |
68 this.name = name; | |
69 this.timeInterval = timeInterval; | |
70 } | |
71 | |
72 public String getName() { | |
73 return name; | |
74 } | |
75 | |
76 public ImportTimeInterval getTimeInterval() { | |
77 return timeInterval; | |
78 } | |
79 | |
80 @Override | |
81 public String toString() { | |
82 return "name: " + name + " time interval: " + timeInterval; | |
83 } | |
84 } // class NameAndTimeInterval | |
85 | 45 |
86 public StaFileParser() { | 46 public StaFileParser() { |
87 } | 47 } |
88 | 48 |
89 public boolean parse(ImportGauge gauge) throws IOException { | 49 public boolean parse(ImportGauge gauge) throws IOException { |
202 if (type == null) { | 162 if (type == null) { |
203 type = new ImportMainValueType(typeString); | 163 type = new ImportMainValueType(typeString); |
204 types.put(typeString, type); | 164 types.put(typeString, type); |
205 } | 165 } |
206 String name = m.group(1); | 166 String name = m.group(1); |
207 NameAndTimeInterval nat = parseName(name); | 167 NameAndTimeInterval nat = |
168 NameAndTimeInterval.parseName(name); | |
208 ImportNamedMainValue namedMainValue = | 169 ImportNamedMainValue namedMainValue = |
209 new ImportNamedMainValue(type, nat.getName()); | 170 new ImportNamedMainValue(type, nat.getName()); |
210 namedMainValues.add(namedMainValue); | 171 namedMainValues.add(namedMainValue); |
211 | 172 |
212 ImportMainValue mainValue = new ImportMainValue( | 173 ImportMainValue mainValue = new ImportMainValue( |
233 } | 194 } |
234 log.info("finished parsing STA file: " + file); | 195 log.info("finished parsing STA file: " + file); |
235 return true; | 196 return true; |
236 } | 197 } |
237 | 198 |
238 protected static NameAndTimeInterval parseName(String name) { | |
239 List<String> result = new ArrayList<String>(); | |
240 | |
241 unbracket(name, 0, result); | |
242 | |
243 int length = result.size(); | |
244 | |
245 if (length < 1) { // Should not happen. | |
246 return new NameAndTimeInterval(name); | |
247 } | |
248 | |
249 if (length == 1) { // No date at all -> use first part. | |
250 return new NameAndTimeInterval(result.get(0).trim()); | |
251 } | |
252 | |
253 if (length == 2) { // e.g. HQ(1994) or HQ(1994 - 1999) | |
254 | |
255 String type = result.get(0).trim(); | |
256 ImportTimeInterval timeInterval = null; | |
257 | |
258 String datePart = result.get(1).trim(); | |
259 if (isMainValue(datePart)) { // e.g. W(HQ100) | |
260 type += "(" + datePart + ")"; | |
261 timeInterval = null; | |
262 } | |
263 else { | |
264 timeInterval = getTimeInterval(result.get(1).trim()); | |
265 | |
266 if (timeInterval == null) { // No date at all. | |
267 type = name; | |
268 } | |
269 } | |
270 | |
271 return new NameAndTimeInterval(type, timeInterval); | |
272 } | |
273 | |
274 if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999)) | |
275 | |
276 String type = | |
277 result.get(0).trim() + "(" + | |
278 result.get(1).trim() + ")"; | |
279 | |
280 ImportTimeInterval timeInterval = getTimeInterval( | |
281 result.get(2).trim()); | |
282 | |
283 if (timeInterval == null) { // No date at all. | |
284 type = name; | |
285 } | |
286 | |
287 return new NameAndTimeInterval(type, timeInterval); | |
288 } | |
289 | |
290 // more than 3 elements return unmodified. | |
291 | |
292 return new NameAndTimeInterval(name); | |
293 } | |
294 | |
295 private static ImportTimeInterval getTimeInterval(String datePart) { | |
296 | |
297 int minus = datePart.indexOf('-'); | |
298 | |
299 if (minus < 0) { // '-' not found | |
300 | |
301 Date date = null; | |
302 try { | |
303 date = DateGuesser.guessDate(datePart); | |
304 } | |
305 catch (IllegalArgumentException iae) { | |
306 log.warn("STA: Invalid date '" + datePart + "'"); | |
307 return null; | |
308 } | |
309 | |
310 return new ImportTimeInterval(date); | |
311 } | |
312 | |
313 // Found '-' so we have <from> - <to> | |
314 String startPart = datePart.substring(0, minus).trim(); | |
315 String endPart = datePart.substring(minus).trim(); | |
316 | |
317 Date startDate = null; | |
318 Date endDate = null; | |
319 | |
320 try { | |
321 startDate = DateGuesser.guessDate(startPart); | |
322 } | |
323 catch (IllegalArgumentException iae) { | |
324 log.warn("STA: Invalid start date '" + startPart + "'"); | |
325 } | |
326 | |
327 try { | |
328 endDate = DateGuesser.guessDate(endPart); | |
329 } | |
330 catch (IllegalArgumentException iae) { | |
331 log.warn("STA: Invalid end date '" + endPart + "'"); | |
332 } | |
333 | |
334 if (startDate == null) { | |
335 log.warn("STA: Need start date."); | |
336 return null; | |
337 } | |
338 | |
339 return new ImportTimeInterval(startDate, endDate); | |
340 } | |
341 | |
342 private static int unbracket(String s, int index, List<String> result) { | |
343 StringBuilder sb = new StringBuilder(); | |
344 int length = s.length(); | |
345 while (index < length) { | |
346 char c = s.charAt(index); | |
347 switch (c) { | |
348 case '(': | |
349 index = unbracket(s, index+1, result); | |
350 break; | |
351 case ')': | |
352 result.add(0, sb.toString()); | |
353 return index+1; | |
354 default: | |
355 sb.append(c); | |
356 ++index; | |
357 } | |
358 } | |
359 result.add(0, sb.toString()); | |
360 | |
361 return index; | |
362 } | |
363 | |
364 /* | |
365 public static void main(String [] args) { | |
366 for (String arg: args) { | |
367 NameAndTimeInterval nti = parseName(arg); | |
368 System.out.println(arg + " -> " + nti); | |
369 } | |
370 } | |
371 */ | |
372 } | 199 } |
373 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : | 200 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |