Mercurial > dive4elements > river
comparison backend/src/main/java/org/dive4elements/river/importer/parsers/NameAndTimeInterval.java @ 6328:53d08f33d094
Backend: Moved guessing of main values and there time intervals out of the STA parser. Same come will be useful to extend the WST parser to better handle official lines.
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Thu, 13 Jun 2013 17:15:34 +0200 |
parents | |
children | 9231940bd192 |
comparison
equal
deleted
inserted
replaced
6327:447ed3dee890 | 6328:53d08f33d094 |
---|---|
1 /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde | |
2 * Software engineering by Intevation GmbH | |
3 * | |
4 * This file is Free Software under the GNU AGPL (>=v3) | |
5 * and comes with ABSOLUTELY NO WARRANTY! Check out the | |
6 * documentation coming with Dive4Elements River for details. | |
7 */ | |
8 package org.dive4elements.river.importer.parsers; | |
9 | |
10 import java.util.ArrayList; | |
11 import java.util.Date; | |
12 import java.util.List; | |
13 import java.util.regex.Pattern; | |
14 | |
15 import org.apache.log4j.Logger; | |
16 import org.dive4elements.river.importer.ImportTimeInterval; | |
17 import org.dive4elements.river.utils.DateGuesser; | |
18 | |
19 public class NameAndTimeInterval { | |
20 | |
21 private static Logger log = Logger.getLogger(NameAndTimeInterval.class); | |
22 | |
23 // TODO: To be extented. | |
24 private static final Pattern MAIN_VALUE = Pattern.compile( | |
25 "^(HQ|MHW|GLQ|NMQ|HQEXT)(\\d*)$"); | |
26 | |
27 private String name; | |
28 private ImportTimeInterval timeInterval; | |
29 | |
30 public NameAndTimeInterval() { | |
31 } | |
32 | |
33 public NameAndTimeInterval(String name) { | |
34 this(name, null); | |
35 } | |
36 | |
37 public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) { | |
38 this.name = name; | |
39 this.timeInterval = timeInterval; | |
40 } | |
41 | |
42 public String getName() { | |
43 return name; | |
44 } | |
45 | |
46 public ImportTimeInterval getTimeInterval() { | |
47 return timeInterval; | |
48 } | |
49 | |
50 @Override | |
51 public String toString() { | |
52 return "name: " + name + " time interval: " + timeInterval; | |
53 } | |
54 | |
55 public static boolean isMainValue(String s) { | |
56 s = s.replace(" ", "").toUpperCase(); | |
57 return MAIN_VALUE.matcher(s).matches(); | |
58 } | |
59 | |
60 public static NameAndTimeInterval parseName(String name) { | |
61 List<String> result = new ArrayList<String>(); | |
62 | |
63 unbracket(name, 0, result); | |
64 | |
65 int length = result.size(); | |
66 | |
67 if (length < 1) { // Should not happen. | |
68 return new NameAndTimeInterval(name); | |
69 } | |
70 | |
71 if (length == 1) { // No date at all -> use first part. | |
72 return new NameAndTimeInterval(result.get(0).trim()); | |
73 } | |
74 | |
75 if (length == 2) { // e.g. HQ(1994) or HQ(1994 - 1999) | |
76 | |
77 String type = result.get(0).trim(); | |
78 ImportTimeInterval timeInterval = null; | |
79 | |
80 String datePart = result.get(1).trim(); | |
81 if (isMainValue(datePart)) { // e.g. W(HQ100) | |
82 type += "(" + datePart + ")"; | |
83 timeInterval = null; | |
84 } | |
85 else { | |
86 timeInterval = getTimeInterval(result.get(1).trim()); | |
87 | |
88 if (timeInterval == null) { // No date at all. | |
89 type = name; | |
90 } | |
91 } | |
92 | |
93 return new NameAndTimeInterval(type, timeInterval); | |
94 } | |
95 | |
96 if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999)) | |
97 | |
98 String type = | |
99 result.get(0).trim() + "(" + | |
100 result.get(1).trim() + ")"; | |
101 | |
102 ImportTimeInterval timeInterval = getTimeInterval( | |
103 result.get(2).trim()); | |
104 | |
105 if (timeInterval == null) { // No date at all. | |
106 type = name; | |
107 } | |
108 | |
109 return new NameAndTimeInterval(type, timeInterval); | |
110 } | |
111 | |
112 // more than 3 elements return unmodified. | |
113 | |
114 return new NameAndTimeInterval(name); | |
115 } | |
116 | |
117 private static ImportTimeInterval getTimeInterval(String datePart) { | |
118 | |
119 int minus = datePart.indexOf('-'); | |
120 | |
121 if (minus < 0) { // '-' not found | |
122 | |
123 Date date = null; | |
124 try { | |
125 date = DateGuesser.guessDate(datePart); | |
126 } | |
127 catch (IllegalArgumentException iae) { | |
128 log.warn("STA: Invalid date '" + datePart + "'"); | |
129 return null; | |
130 } | |
131 | |
132 return new ImportTimeInterval(date); | |
133 } | |
134 | |
135 // Found '-' so we have <from> - <to> | |
136 String startPart = datePart.substring(0, minus).trim(); | |
137 String endPart = datePart.substring(minus).trim(); | |
138 | |
139 Date startDate = null; | |
140 Date endDate = null; | |
141 | |
142 try { | |
143 startDate = DateGuesser.guessDate(startPart); | |
144 } | |
145 catch (IllegalArgumentException iae) { | |
146 log.warn("STA: Invalid start date '" + startPart + "'"); | |
147 } | |
148 | |
149 try { | |
150 endDate = DateGuesser.guessDate(endPart); | |
151 } | |
152 catch (IllegalArgumentException iae) { | |
153 log.warn("STA: Invalid end date '" + endPart + "'"); | |
154 } | |
155 | |
156 if (startDate == null) { | |
157 log.warn("STA: Need start date."); | |
158 return null; | |
159 } | |
160 | |
161 return new ImportTimeInterval(startDate, endDate); | |
162 } | |
163 | |
164 private static int unbracket(String s, int index, List<String> result) { | |
165 StringBuilder sb = new StringBuilder(); | |
166 int length = s.length(); | |
167 while (index < length) { | |
168 char c = s.charAt(index); | |
169 switch (c) { | |
170 case '(': | |
171 index = unbracket(s, index+1, result); | |
172 break; | |
173 case ')': | |
174 result.add(0, sb.toString()); | |
175 return index+1; | |
176 default: | |
177 sb.append(c); | |
178 ++index; | |
179 } | |
180 } | |
181 result.add(0, sb.toString()); | |
182 | |
183 return index; | |
184 } | |
185 | |
186 /* | |
187 public static void main(String [] args) { | |
188 for (String arg: args) { | |
189 NameAndTimeInterval nti = parseName(arg); | |
190 System.out.println(arg + " -> " + nti); | |
191 } | |
192 } | |
193 */ | |
194 } | |
195 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : | |
196 |