Mercurial > dive4elements > river
comparison flys-backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java @ 5828:dfb26b03b179
Moved directories to org.dive4elements.river
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Thu, 25 Apr 2013 11:53:11 +0200 |
parents | flys-backend/src/main/java/de/intevation/flys/importer/parsers/StaFileParser.java@54077c9c9305 |
children | 18619c1e7c2a |
comparison
equal
deleted
inserted
replaced
5827:e308d4ecd35a | 5828:dfb26b03b179 |
---|---|
1 package de.intevation.flys.importer.parsers; | |
2 | |
3 import java.io.File; | |
4 import java.io.IOException; | |
5 import java.io.LineNumberReader; | |
6 import java.io.FileInputStream; | |
7 import java.io.InputStreamReader; | |
8 | |
9 import java.math.BigDecimal; | |
10 | |
11 import java.util.regex.Pattern; | |
12 import java.util.regex.Matcher; | |
13 | |
14 import java.util.Date; | |
15 import java.util.HashMap; | |
16 import java.util.ArrayList; | |
17 import java.util.List; | |
18 | |
19 import org.apache.log4j.Logger; | |
20 | |
21 import de.intevation.flys.importer.ImportMainValueType; | |
22 import de.intevation.flys.importer.ImportMainValue; | |
23 import de.intevation.flys.importer.ImportNamedMainValue; | |
24 import de.intevation.flys.importer.ImportGauge; | |
25 import de.intevation.flys.importer.ImportTimeInterval; | |
26 import de.intevation.flys.utils.DateGuesser; | |
27 | |
28 public class StaFileParser | |
29 { | |
30 private static Logger log = Logger.getLogger(StaFileParser.class); | |
31 | |
32 public static final String ENCODING = "ISO-8859-1"; | |
33 | |
34 public static final String TYPES = | |
35 System.getProperty("flys.backend.main.value.types", "QWTD"); | |
36 | |
37 public static final boolean NOT_PARSE_GAUGE_NUMBERS = | |
38 Boolean.getBoolean("flys.backend.sta.not.parse.gauge.numbers"); | |
39 | |
40 public static final Pattern QWTD_ = | |
41 Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + | |
42 Pattern.quote(TYPES) + "]).*"); | |
43 | |
44 public static final class NameAndTimeInterval { | |
45 private String name; | |
46 private ImportTimeInterval timeInterval; | |
47 | |
48 public NameAndTimeInterval(String name) { | |
49 this(name, null); | |
50 } | |
51 | |
52 public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) { | |
53 this.name = name; | |
54 this.timeInterval = timeInterval; | |
55 } | |
56 | |
57 public String getName() { | |
58 return name; | |
59 } | |
60 | |
61 public ImportTimeInterval getTimeInterval() { | |
62 return timeInterval; | |
63 } | |
64 } // class NameAndTimeInterval | |
65 | |
66 public StaFileParser() { | |
67 } | |
68 | |
69 public boolean parse(ImportGauge gauge) throws IOException { | |
70 | |
71 File file = gauge.getStaFile(); | |
72 | |
73 log.info("parsing STA file: " + file); | |
74 LineNumberReader in = null; | |
75 try { | |
76 in = | |
77 new LineNumberReader( | |
78 new InputStreamReader( | |
79 new FileInputStream(file), ENCODING)); | |
80 | |
81 String line = in.readLine(); | |
82 | |
83 if (line == null) { | |
84 log.warn("STA file is empty."); | |
85 return false; | |
86 } | |
87 | |
88 if (line.length() < 37) { | |
89 log.warn("First line in STA file is too short."); | |
90 return false; | |
91 } | |
92 | |
93 String gaugeName = line.substring(16, 28).trim(); | |
94 | |
95 Long gaugeNumber = null; | |
96 | |
97 if (!NOT_PARSE_GAUGE_NUMBERS) { | |
98 String gaugeNumberString = line.substring(8, 16).trim(); | |
99 | |
100 try { | |
101 gaugeNumber = Long.parseLong(gaugeNumberString); | |
102 } | |
103 catch (NumberFormatException nfe) { | |
104 log.warn("STA: '" + gaugeNumberString + | |
105 "' is not a valid long number."); | |
106 } | |
107 } | |
108 | |
109 gauge.setName(gaugeName); | |
110 gauge.setOfficialNumber(gaugeNumber); | |
111 | |
112 if (log.isDebugEnabled()) { | |
113 log.debug( | |
114 "name/number: '" + gaugeName + "' '" + gaugeNumber + "'"); | |
115 } | |
116 | |
117 String [] values = line.substring(38).trim().split("\\s+", 2); | |
118 | |
119 if (values.length < 2) { | |
120 log.warn("STA: Not enough columns for aeo and datum."); | |
121 } | |
122 try { | |
123 gauge.setAeo(new BigDecimal(values[0].replace(",", "."))); | |
124 gauge.setDatum(new BigDecimal(values[1].replace(",", "."))); | |
125 } | |
126 catch (NumberFormatException nfe) { | |
127 log.warn("STA: cannot parse aeo or datum."); | |
128 return false; | |
129 } | |
130 | |
131 line = in.readLine(); | |
132 | |
133 if (line == null) { | |
134 log.warn("STA file has not enough lines"); | |
135 return false; | |
136 } | |
137 | |
138 if (line.length() < 36) { | |
139 log.warn("STA: second line is too short"); | |
140 return false; | |
141 } | |
142 | |
143 try { | |
144 gauge.setStation( | |
145 new BigDecimal(line.substring(29, 36).trim())); | |
146 } | |
147 catch (NumberFormatException nfe) { | |
148 log.warn("STA: parsing of the datum of the gauge failed"); | |
149 return false; | |
150 } | |
151 | |
152 // overread the next six lines | |
153 for (int i = 0; i < 6; ++i) { | |
154 if ((line = in.readLine()) == null) { | |
155 log.warn("STA file is too short"); | |
156 return false; | |
157 } | |
158 } | |
159 | |
160 HashMap<String, ImportMainValueType> types = | |
161 new HashMap<String, ImportMainValueType>(); | |
162 | |
163 ArrayList<ImportNamedMainValue> namedMainValues = | |
164 new ArrayList<ImportNamedMainValue>(); | |
165 | |
166 ArrayList<ImportMainValue> mainValues = | |
167 new ArrayList<ImportMainValue>(); | |
168 | |
169 while ((line = in.readLine()) != null) { | |
170 Matcher m = QWTD_.matcher(line); | |
171 if (m.matches()) { | |
172 BigDecimal value; | |
173 try { | |
174 value = new BigDecimal(m.group(2).replace(",", ".")); | |
175 } | |
176 catch (NumberFormatException nfe) { | |
177 log.warn("STA: value not parseable in line " | |
178 + in.getLineNumber()); | |
179 continue; | |
180 } | |
181 String typeString = m.group(3); | |
182 log.debug("\t type: " + typeString); | |
183 ImportMainValueType type = types.get(typeString); | |
184 if (type == null) { | |
185 type = new ImportMainValueType(typeString); | |
186 types.put(typeString, type); | |
187 } | |
188 String name = m.group(1); | |
189 NameAndTimeInterval nat = parseName(name); | |
190 ImportNamedMainValue namedMainValue = | |
191 new ImportNamedMainValue(type, nat.getName()); | |
192 namedMainValues.add(namedMainValue); | |
193 | |
194 ImportMainValue mainValue = new ImportMainValue( | |
195 gauge, | |
196 namedMainValue, | |
197 value, | |
198 nat.getTimeInterval()); | |
199 | |
200 mainValues.add(mainValue); | |
201 } | |
202 else { | |
203 // TODO: treat as a comment | |
204 } | |
205 } | |
206 gauge.setMainValueTypes( | |
207 new ArrayList<ImportMainValueType>(types.values())); | |
208 gauge.setNamedMainValues(namedMainValues); | |
209 gauge.setMainValues(mainValues); | |
210 } | |
211 finally { | |
212 if (in != null) { | |
213 in.close(); | |
214 } | |
215 } | |
216 log.info("finished parsing STA file: " + file); | |
217 return true; | |
218 } | |
219 | |
220 protected NameAndTimeInterval parseName(String name) { | |
221 List<String> result = new ArrayList<String>(); | |
222 | |
223 unbracket(name, 0, result); | |
224 | |
225 int length = result.size(); | |
226 | |
227 if (length < 1) { // Should not happen. | |
228 return new NameAndTimeInterval(name); | |
229 } | |
230 | |
231 if (length == 1) { // No date at all -> use first part. | |
232 return new NameAndTimeInterval(result.get(0).trim()); | |
233 } | |
234 | |
235 if (length == 2) { // e.g. W(1994) or W(1994 - 1999) | |
236 String type = result.get(0).trim(); | |
237 | |
238 ImportTimeInterval timeInterval = getTimeInterval( | |
239 result.get(1).trim()); | |
240 | |
241 if (timeInterval == null) { // No date at all. | |
242 type = name; | |
243 } | |
244 | |
245 return new NameAndTimeInterval(type, timeInterval); | |
246 } | |
247 | |
248 if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999)) | |
249 | |
250 String type = | |
251 result.get(0).trim() + "(" + | |
252 result.get(1).trim() + ")"; | |
253 | |
254 ImportTimeInterval timeInterval = getTimeInterval( | |
255 result.get(2).trim()); | |
256 | |
257 if (timeInterval == null) { // No date at all. | |
258 type = name; | |
259 } | |
260 | |
261 return new NameAndTimeInterval(type, timeInterval); | |
262 } | |
263 | |
264 // more than 3 elements return unmodified. | |
265 | |
266 return new NameAndTimeInterval(name); | |
267 } | |
268 | |
269 private static ImportTimeInterval getTimeInterval(String datePart) { | |
270 | |
271 int minus = datePart.indexOf('-'); | |
272 | |
273 if (minus < 0) { // '-' not found | |
274 | |
275 Date date = null; | |
276 try { | |
277 date = DateGuesser.guessDate(datePart); | |
278 } | |
279 catch (IllegalArgumentException iae) { | |
280 log.warn("STA: Invalid date '" + datePart + "'"); | |
281 return null; | |
282 } | |
283 | |
284 return new ImportTimeInterval(date); | |
285 } | |
286 | |
287 // Found '-' so we have <from> - <to> | |
288 String startPart = datePart.substring(0, minus).trim(); | |
289 String endPart = datePart.substring(minus).trim(); | |
290 | |
291 Date startDate = null; | |
292 Date endDate = null; | |
293 | |
294 try { | |
295 startDate = DateGuesser.guessDate(startPart); | |
296 } | |
297 catch (IllegalArgumentException iae) { | |
298 log.warn("STA: Invalid start date '" + startPart + "'"); | |
299 } | |
300 | |
301 try { | |
302 endDate = DateGuesser.guessDate(endPart); | |
303 } | |
304 catch (IllegalArgumentException iae) { | |
305 log.warn("STA: Invalid end date '" + endPart + "'"); | |
306 } | |
307 | |
308 if (startDate == null) { | |
309 log.warn("STA: Need start date."); | |
310 return null; | |
311 } | |
312 | |
313 return new ImportTimeInterval(startDate, endDate); | |
314 } | |
315 | |
316 private static int unbracket(String s, int index, List<String> result) { | |
317 StringBuilder sb = new StringBuilder(); | |
318 int length = s.length(); | |
319 while (index < length) { | |
320 char c = s.charAt(index); | |
321 switch (c) { | |
322 case '(': | |
323 index = unbracket(s, index+1, result); | |
324 break; | |
325 case ')': | |
326 result.add(0, sb.toString()); | |
327 return index+1; | |
328 default: | |
329 sb.append(c); | |
330 ++index; | |
331 } | |
332 } | |
333 result.add(0, sb.toString()); | |
334 | |
335 return index; | |
336 } | |
337 } | |
338 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |