Mercurial > dive4elements > river
comparison flys-backend/src/main/java/de/intevation/flys/importer/parsers/WstParser.java @ 1211:f08fe480092c
Moved file parsers to separate package.
flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Fri, 15 Jul 2011 13:07:45 +0000 |
parents | |
children | f834b411ca57 |
comparison
equal
deleted
inserted
replaced
1210:31d8638760b1 | 1211:f08fe480092c |
---|---|
1 package de.intevation.flys.importer.parsers; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.HashSet; | |
5 | |
6 import java.io.File; | |
7 import java.io.IOException; | |
8 import java.io.LineNumberReader; | |
9 import java.io.InputStreamReader; | |
10 import java.io.FileInputStream; | |
11 | |
12 import java.text.NumberFormat; | |
13 | |
14 import org.apache.log4j.Logger; | |
15 | |
16 import de.intevation.flys.utils.StringUtil; | |
17 | |
18 import java.util.regex.Pattern; | |
19 import java.util.regex.Matcher; | |
20 | |
21 import java.math.BigDecimal; | |
22 | |
23 import de.intevation.flys.importer.ImportWstQRange; | |
24 import de.intevation.flys.importer.ImportWstColumn; | |
25 import de.intevation.flys.importer.ImportRange; | |
26 import de.intevation.flys.importer.ImportWst; | |
27 | |
28 public class WstParser | |
29 { | |
30 private static Logger log = Logger.getLogger(WstParser.class); | |
31 | |
32 public static final String COLUMN_BEZ_TEXT = "column-bez-text"; | |
33 public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; | |
34 public static final String COLUMN_QUELLE = "column-quelle"; | |
35 public static final String COLUMN_DATUM = "column-datum"; | |
36 | |
37 public static final BigDecimal UNDEFINED_ZERO = | |
38 new BigDecimal(0.0); | |
39 public static final BigDecimal MIN_RANGE = | |
40 new BigDecimal(-Double.MAX_VALUE); | |
41 public static final BigDecimal MAX_RANGE = | |
42 new BigDecimal(Double.MAX_VALUE); | |
43 | |
44 public static final String ENCODING = "ISO-8859-1"; | |
45 | |
46 public static final Pattern UNIT_COMMENT = | |
47 Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)"); | |
48 | |
49 public static final Pattern UNIT = | |
50 Pattern.compile("[^\\[]*\\[([^]]+)\\].*"); | |
51 | |
52 public static final BigDecimal INTERVAL_GAP = | |
53 new BigDecimal(0.00001); | |
54 | |
55 protected ImportWst wst; | |
56 | |
57 protected ImportRange lastRange; | |
58 | |
59 public WstParser() { | |
60 } | |
61 | |
62 public ImportWst getWst() { | |
63 return wst; | |
64 } | |
65 | |
66 public void setWst(ImportWst wst) { | |
67 this.wst = wst; | |
68 } | |
69 | |
70 public void parse(File file) throws IOException { | |
71 | |
72 log.info("Parsing WST file '" + file + "'"); | |
73 | |
74 wst = new ImportWst(file.getName()); | |
75 | |
76 LineNumberReader in = null; | |
77 try { | |
78 in = | |
79 new LineNumberReader( | |
80 new InputStreamReader( | |
81 new FileInputStream(file), ENCODING)); | |
82 | |
83 String input; | |
84 boolean first = true; | |
85 int columnCount = 0; | |
86 | |
87 String [] lsBezeichner = null; | |
88 String [] langBezeichner = null; | |
89 int [] colNaWidths = null; | |
90 String [] quellen = null; | |
91 String [] daten = null; | |
92 | |
93 BigDecimal [] aktAbfluesse = null; | |
94 BigDecimal [] firstAbfluesse = null; | |
95 | |
96 BigDecimal minKm = MAX_RANGE; | |
97 BigDecimal maxKm = MIN_RANGE; | |
98 | |
99 boolean columnHeaderChecked = false; | |
100 | |
101 String einheit = "Wasserstand [NN + m]"; | |
102 | |
103 HashSet<BigDecimal> kms = new HashSet<BigDecimal>(); | |
104 | |
105 while ((input = in.readLine()) != null) { | |
106 String line = input; | |
107 if (first) { // fetch number of columns | |
108 if ((line = line.trim()).length() == 0) { | |
109 continue; | |
110 } | |
111 try { | |
112 columnCount = Integer.parseInt(line); | |
113 if (columnCount <= 0) { | |
114 throw new NumberFormatException( | |
115 "number columns <= 0"); | |
116 } | |
117 log.debug("Number of columns: " + columnCount); | |
118 wst.setNumberColumns(columnCount); | |
119 lsBezeichner = new String[columnCount]; | |
120 } | |
121 catch (NumberFormatException nfe) { | |
122 log.warn(nfe); | |
123 continue; | |
124 } | |
125 first = false; | |
126 continue; | |
127 } | |
128 | |
129 line = line.replace(',', '.'); | |
130 | |
131 if (line.startsWith("*\u001f")) { | |
132 BigDecimal [] data = | |
133 parseLineAsDouble(line, columnCount, false, true); | |
134 | |
135 if (aktAbfluesse != null) { | |
136 addInterval(minKm, maxKm, aktAbfluesse); | |
137 minKm = MAX_RANGE; | |
138 maxKm = MIN_RANGE; | |
139 } | |
140 | |
141 aktAbfluesse = new BigDecimal[columnCount]; | |
142 log.debug("new q range: " + columnCount); | |
143 for (int i = 0; i < Math.min(columnCount, data.length); ++i) { | |
144 if (data[i] != null) { | |
145 log.debug(" column: " + data[i]); | |
146 aktAbfluesse[i] = data[i]; | |
147 } | |
148 } | |
149 | |
150 if (firstAbfluesse == null) { | |
151 firstAbfluesse = (BigDecimal [])aktAbfluesse.clone(); | |
152 } | |
153 continue; | |
154 } | |
155 | |
156 if (line.startsWith("*!")) { | |
157 String spezial = line.substring(2).trim(); | |
158 | |
159 if (spezial.length() == 0) { | |
160 continue; | |
161 } | |
162 | |
163 if (spezial.startsWith(COLUMN_BEZ_TEXT)) { | |
164 spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); | |
165 if (spezial.length() == 0) { | |
166 continue; | |
167 } | |
168 langBezeichner = StringUtil.splitQuoted(spezial, '"'); | |
169 } | |
170 else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { | |
171 spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); | |
172 | |
173 if (spezial.length() == 0) { | |
174 continue; | |
175 } | |
176 | |
177 String[] split = spezial.split("\\s+"); | |
178 | |
179 colNaWidths = new int[split.length]; | |
180 for (int i=0; i < split.length; i++) { | |
181 colNaWidths[i] = Integer.parseInt(split[i]); | |
182 } | |
183 } | |
184 else if (spezial.startsWith(COLUMN_QUELLE)) { | |
185 if (spezial.length() == 0) { | |
186 continue; | |
187 } | |
188 quellen = StringUtil.splitQuoted(spezial, '"'); | |
189 } | |
190 else if (spezial.startsWith(COLUMN_DATUM)) { | |
191 spezial = spezial.substring(COLUMN_DATUM.length()).trim(); | |
192 if (spezial.length() == 0) { | |
193 continue; | |
194 } | |
195 daten = StringUtil.splitQuoted(spezial, '"'); | |
196 } | |
197 continue; | |
198 } | |
199 | |
200 if (line.length() < 11) { | |
201 continue; | |
202 } | |
203 | |
204 if (line.startsWith("*")) { | |
205 Matcher m = UNIT_COMMENT.matcher(line); | |
206 if (m.matches()) { | |
207 log.debug("unit comment found"); | |
208 // XXX: This hack is needed because desktop | |
209 // FLYS is broken figuring out the unit | |
210 String [] units = m.group(1).split("\\s{2,}"); | |
211 m = UNIT.matcher(units[0]); | |
212 einheit = m.matches() ? m.group(1) : units[0]; | |
213 log.debug("unit: " + einheit); | |
214 } | |
215 continue; | |
216 } | |
217 | |
218 if (firstAbfluesse != null) { | |
219 if (!columnHeaderChecked) { | |
220 int unknownCount = 0; | |
221 HashSet<String> uniqueColumnNames = | |
222 new HashSet<String>(); | |
223 for (int i = 0; i < lsBezeichner.length; ++i) { | |
224 if (lsBezeichner[i] == null | |
225 || lsBezeichner[i].length() == 0) { | |
226 double q = firstAbfluesse[i].doubleValue(); | |
227 if (q < 0.001) { | |
228 lsBezeichner[i] = | |
229 "<unbekannt #" + unknownCount + ">"; | |
230 ++unknownCount; | |
231 } | |
232 else { | |
233 lsBezeichner[i] = "Q="+format(q); | |
234 } | |
235 } | |
236 String candidate = lsBezeichner[i]; | |
237 int collision = 1; | |
238 while (!uniqueColumnNames.add(candidate)) { | |
239 candidate = lsBezeichner[i] + | |
240 " (" + collision + ")"; | |
241 ++collision; | |
242 } | |
243 wst.getColumn(i).setName(candidate); | |
244 } | |
245 columnHeaderChecked = true; | |
246 } | |
247 | |
248 BigDecimal [] data = | |
249 parseLineAsDouble(line, columnCount, true, false); | |
250 | |
251 BigDecimal kaem = data[0]; | |
252 | |
253 if (!kms.add(kaem)) { | |
254 log.warn( | |
255 "km " + kaem + | |
256 " (line " + in.getLineNumber() + | |
257 ") found more than once. -> ignored"); | |
258 continue; | |
259 } | |
260 | |
261 if (kaem.compareTo(minKm) < 0) { | |
262 minKm = kaem; | |
263 } | |
264 if (kaem.compareTo(maxKm) > 0) { | |
265 maxKm = kaem; | |
266 } | |
267 | |
268 // extract values | |
269 for (int i = 0; i < columnCount; ++i) { | |
270 addValue(kaem, data[i+1], i); | |
271 } | |
272 | |
273 } | |
274 else { // firstAbfluesse == null | |
275 if (langBezeichner != null) { | |
276 lsBezeichner = StringUtil.fitArray( | |
277 langBezeichner, lsBezeichner); | |
278 } | |
279 else if (colNaWidths != null) { | |
280 for (int j = 0, i = 0, N = input.length(); | |
281 j < colNaWidths.length && i < N; | |
282 i += colNaWidths[j++] | |
283 ) { | |
284 lsBezeichner[j] = input.substring( | |
285 i, i+colNaWidths[j]).trim(); | |
286 } | |
287 } | |
288 else { | |
289 // first column begins at position 8 in line | |
290 for (int i = 8, col = 0; i < input.length(); i += 9) { | |
291 if ((i + 9) > input.length()) { | |
292 i = input.length() - 10; | |
293 } | |
294 // one column header is 9 chars wide | |
295 lsBezeichner[col++] = | |
296 input.substring(i, i + 9).trim(); | |
297 | |
298 if (col == lsBezeichner.length) { | |
299 break; | |
300 } | |
301 } | |
302 } | |
303 } | |
304 | |
305 } | |
306 addInterval(minKm, maxKm, aktAbfluesse); | |
307 } | |
308 finally { | |
309 if (in != null) { | |
310 in.close(); | |
311 } | |
312 } | |
313 } | |
314 | |
315 protected void addValue(BigDecimal km, BigDecimal w, int index) { | |
316 if (w != null) { | |
317 ImportWstColumn column = wst.getColumn(index); | |
318 column.addColumnValue(km, w); | |
319 } | |
320 } | |
321 | |
322 private static final NumberFormat NF = getNumberFormat(); | |
323 | |
324 private static final NumberFormat getNumberFormat() { | |
325 NumberFormat nf = NumberFormat.getInstance(); | |
326 nf.setMinimumFractionDigits(2); | |
327 nf.setMaximumFractionDigits(2); | |
328 return nf; | |
329 } | |
330 | |
331 protected static String format(double value) { | |
332 return NF.format(value); | |
333 } | |
334 | |
335 protected void addInterval( | |
336 BigDecimal from, | |
337 BigDecimal to, | |
338 BigDecimal [] values | |
339 ) { | |
340 log.debug("addInterval: " + from + " " + to); | |
341 | |
342 if (values == null || from == MAX_RANGE) { | |
343 return; | |
344 } | |
345 | |
346 if (to.compareTo(from) < 0) { | |
347 BigDecimal t = from; from = to; to = t; | |
348 } | |
349 | |
350 ImportRange range = new ImportRange(from, to); | |
351 | |
352 // little workaround to make the q ranges tightly fit. | |
353 // Leave a very small gap to ensure that the range queries | |
354 // still work. | |
355 | |
356 if (lastRange != null) { | |
357 double d1 = Math.abs( | |
358 lastRange.getB().doubleValue() - range.getA().doubleValue()); | |
359 double d2 = Math.abs( | |
360 range.getB().doubleValue() - lastRange.getA().doubleValue()); | |
361 | |
362 if (d1 < d2) { | |
363 lastRange.setB(range.getA().subtract(INTERVAL_GAP)); | |
364 } | |
365 else { | |
366 range.setA(lastRange.getB().subtract(INTERVAL_GAP)); | |
367 } | |
368 } | |
369 | |
370 for (int i = 0; i < values.length; ++i) { | |
371 ImportWstColumn column = wst.getColumn(i); | |
372 ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]); | |
373 column.addColumnQRange(wstQRange); | |
374 } | |
375 | |
376 lastRange = range; | |
377 } | |
378 | |
379 private static final BigDecimal [] parseLineAsDouble( | |
380 String line, | |
381 int count, | |
382 boolean bStation, | |
383 boolean bParseEmptyAsZero | |
384 ) { | |
385 String [] tokens = parseLine(line, count, bStation); | |
386 | |
387 BigDecimal [] doubles = new BigDecimal[tokens.length]; | |
388 | |
389 for (int i = 0; i < doubles.length; ++i) { | |
390 String token = tokens[i].trim(); | |
391 if (token.length() != 0) { | |
392 doubles[i] = new BigDecimal(token); | |
393 } | |
394 else if (bParseEmptyAsZero) { | |
395 doubles[i] = UNDEFINED_ZERO; | |
396 } | |
397 } | |
398 | |
399 return doubles; | |
400 } | |
401 | |
402 private static String [] parseLine( | |
403 String line, | |
404 int tokenCount, | |
405 boolean bParseStation | |
406 ) { | |
407 ArrayList<String> strings = new ArrayList<String>(); | |
408 | |
409 if (bParseStation) { | |
410 if (line.length() < 8) { | |
411 throw new IllegalArgumentException("station too short"); | |
412 } | |
413 strings.add(line.substring(0, 8)); | |
414 } | |
415 | |
416 int pos = 9; | |
417 for (int i = 0; i < tokenCount; ++i) { | |
418 if (line.length() >= pos + 8) { | |
419 strings.add(line.substring(pos, pos + 8)); | |
420 } | |
421 else { | |
422 strings.add(""); | |
423 } | |
424 pos += 9; | |
425 } | |
426 | |
427 return strings.toArray(new String[strings.size()]); | |
428 } | |
429 } | |
430 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |