comparison backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java @ 6328:53d08f33d094

Backend: Moved guessing of main values and there time intervals out of the STA parser. Same come will be useful to extend the WST parser to better handle official lines.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 13 Jun 2013 17:15:34 +0200
parents ec54bb4fe407
children b58a71a5720c
comparison
equal deleted inserted replaced
6327:447ed3dee890 6328:53d08f33d094
17 import java.math.BigDecimal; 17 import java.math.BigDecimal;
18 18
19 import java.util.regex.Pattern; 19 import java.util.regex.Pattern;
20 import java.util.regex.Matcher; 20 import java.util.regex.Matcher;
21 21
22 import java.util.Date;
23 import java.util.HashMap; 22 import java.util.HashMap;
24 import java.util.ArrayList; 23 import java.util.ArrayList;
25 import java.util.List;
26 24
27 import org.apache.log4j.Logger; 25 import org.apache.log4j.Logger;
28 26
29 import org.dive4elements.river.importer.ImportMainValueType; 27 import org.dive4elements.river.importer.ImportMainValueType;
30 import org.dive4elements.river.importer.ImportMainValue; 28 import org.dive4elements.river.importer.ImportMainValue;
31 import org.dive4elements.river.importer.ImportNamedMainValue; 29 import org.dive4elements.river.importer.ImportNamedMainValue;
32 import org.dive4elements.river.importer.ImportGauge; 30 import org.dive4elements.river.importer.ImportGauge;
33 import org.dive4elements.river.importer.ImportTimeInterval;
34 import org.dive4elements.river.utils.DateGuesser;
35 31
36 public class StaFileParser 32 public class StaFileParser
37 { 33 {
38 private static Logger log = Logger.getLogger(StaFileParser.class); 34 private static Logger log = Logger.getLogger(StaFileParser.class);
39 35
44 40
45 public static final Pattern QWTD_ = 41 public static final Pattern QWTD_ =
46 Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + 42 Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" +
47 Pattern.quote(TYPES) + "]).*"); 43 Pattern.quote(TYPES) + "]).*");
48 44
49 // TODO: To be extented.
50 private static final Pattern MAIN_VALUE = Pattern.compile(
51 "^(HQ|MHW|GLQ|NMQ|HQEXT)(\\d*)$");
52
53 private static boolean isMainValue(String s) {
54 s = s.replace(" ", "").toUpperCase();
55 return MAIN_VALUE.matcher(s).matches();
56 }
57
58
59 public static final class NameAndTimeInterval {
60 private String name;
61 private ImportTimeInterval timeInterval;
62
63 public NameAndTimeInterval(String name) {
64 this(name, null);
65 }
66
67 public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) {
68 this.name = name;
69 this.timeInterval = timeInterval;
70 }
71
72 public String getName() {
73 return name;
74 }
75
76 public ImportTimeInterval getTimeInterval() {
77 return timeInterval;
78 }
79
80 @Override
81 public String toString() {
82 return "name: " + name + " time interval: " + timeInterval;
83 }
84 } // class NameAndTimeInterval
85 45
86 public StaFileParser() { 46 public StaFileParser() {
87 } 47 }
88 48
89 public boolean parse(ImportGauge gauge) throws IOException { 49 public boolean parse(ImportGauge gauge) throws IOException {
202 if (type == null) { 162 if (type == null) {
203 type = new ImportMainValueType(typeString); 163 type = new ImportMainValueType(typeString);
204 types.put(typeString, type); 164 types.put(typeString, type);
205 } 165 }
206 String name = m.group(1); 166 String name = m.group(1);
207 NameAndTimeInterval nat = parseName(name); 167 NameAndTimeInterval nat =
168 NameAndTimeInterval.parseName(name);
208 ImportNamedMainValue namedMainValue = 169 ImportNamedMainValue namedMainValue =
209 new ImportNamedMainValue(type, nat.getName()); 170 new ImportNamedMainValue(type, nat.getName());
210 namedMainValues.add(namedMainValue); 171 namedMainValues.add(namedMainValue);
211 172
212 ImportMainValue mainValue = new ImportMainValue( 173 ImportMainValue mainValue = new ImportMainValue(
233 } 194 }
234 log.info("finished parsing STA file: " + file); 195 log.info("finished parsing STA file: " + file);
235 return true; 196 return true;
236 } 197 }
237 198
238 protected static NameAndTimeInterval parseName(String name) {
239 List<String> result = new ArrayList<String>();
240
241 unbracket(name, 0, result);
242
243 int length = result.size();
244
245 if (length < 1) { // Should not happen.
246 return new NameAndTimeInterval(name);
247 }
248
249 if (length == 1) { // No date at all -> use first part.
250 return new NameAndTimeInterval(result.get(0).trim());
251 }
252
253 if (length == 2) { // e.g. HQ(1994) or HQ(1994 - 1999)
254
255 String type = result.get(0).trim();
256 ImportTimeInterval timeInterval = null;
257
258 String datePart = result.get(1).trim();
259 if (isMainValue(datePart)) { // e.g. W(HQ100)
260 type += "(" + datePart + ")";
261 timeInterval = null;
262 }
263 else {
264 timeInterval = getTimeInterval(result.get(1).trim());
265
266 if (timeInterval == null) { // No date at all.
267 type = name;
268 }
269 }
270
271 return new NameAndTimeInterval(type, timeInterval);
272 }
273
274 if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999))
275
276 String type =
277 result.get(0).trim() + "(" +
278 result.get(1).trim() + ")";
279
280 ImportTimeInterval timeInterval = getTimeInterval(
281 result.get(2).trim());
282
283 if (timeInterval == null) { // No date at all.
284 type = name;
285 }
286
287 return new NameAndTimeInterval(type, timeInterval);
288 }
289
290 // more than 3 elements return unmodified.
291
292 return new NameAndTimeInterval(name);
293 }
294
295 private static ImportTimeInterval getTimeInterval(String datePart) {
296
297 int minus = datePart.indexOf('-');
298
299 if (minus < 0) { // '-' not found
300
301 Date date = null;
302 try {
303 date = DateGuesser.guessDate(datePart);
304 }
305 catch (IllegalArgumentException iae) {
306 log.warn("STA: Invalid date '" + datePart + "'");
307 return null;
308 }
309
310 return new ImportTimeInterval(date);
311 }
312
313 // Found '-' so we have <from> - <to>
314 String startPart = datePart.substring(0, minus).trim();
315 String endPart = datePart.substring(minus).trim();
316
317 Date startDate = null;
318 Date endDate = null;
319
320 try {
321 startDate = DateGuesser.guessDate(startPart);
322 }
323 catch (IllegalArgumentException iae) {
324 log.warn("STA: Invalid start date '" + startPart + "'");
325 }
326
327 try {
328 endDate = DateGuesser.guessDate(endPart);
329 }
330 catch (IllegalArgumentException iae) {
331 log.warn("STA: Invalid end date '" + endPart + "'");
332 }
333
334 if (startDate == null) {
335 log.warn("STA: Need start date.");
336 return null;
337 }
338
339 return new ImportTimeInterval(startDate, endDate);
340 }
341
342 private static int unbracket(String s, int index, List<String> result) {
343 StringBuilder sb = new StringBuilder();
344 int length = s.length();
345 while (index < length) {
346 char c = s.charAt(index);
347 switch (c) {
348 case '(':
349 index = unbracket(s, index+1, result);
350 break;
351 case ')':
352 result.add(0, sb.toString());
353 return index+1;
354 default:
355 sb.append(c);
356 ++index;
357 }
358 }
359 result.add(0, sb.toString());
360
361 return index;
362 }
363
364 /*
365 public static void main(String [] args) {
366 for (String arg: args) {
367 NameAndTimeInterval nti = parseName(arg);
368 System.out.println(arg + " -> " + nti);
369 }
370 }
371 */
372 } 199 }
373 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : 200 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org