comparison backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java @ 5838:5aa05a7a34b7

Rename modules to more fitting names.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 15:23:37 +0200
parents flys-backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java@18619c1e7c2a
children 4dd33b86dc61
comparison
equal deleted inserted replaced
5837:d9901a08d0a6 5838:5aa05a7a34b7
1 package org.dive4elements.river.importer.parsers;
2
3 import java.util.ArrayList;
4 import java.util.HashSet;
5
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.LineNumberReader;
9 import java.io.InputStreamReader;
10 import java.io.FileInputStream;
11
12 import java.text.NumberFormat;
13
14 import org.apache.log4j.Logger;
15
16 import org.dive4elements.river.utils.StringUtil;
17 import org.dive4elements.river.utils.DateGuesser;
18
19 import java.util.regex.Pattern;
20 import java.util.regex.Matcher;
21
22 import java.math.BigDecimal;
23
24 import org.dive4elements.river.importer.ImportWstQRange;
25 import org.dive4elements.river.importer.ImportWstColumn;
26 import org.dive4elements.river.importer.ImportTimeInterval;
27 import org.dive4elements.river.importer.ImportRange;
28 import org.dive4elements.river.importer.ImportUnit;
29 import org.dive4elements.river.importer.ImportWst;
30
31 public class WstParser
32 {
33 private static Logger log = Logger.getLogger(WstParser.class);
34
35 public static final String COLUMN_BEZ_TEXT = "column-bez-text";
36 public static final String COLUMN_BEZ_BREITE = "column-bez-breite";
37 public static final String COLUMN_QUELLE = "column-quelle";
38 public static final String COLUMN_DATUM = "column-datum";
39
40 public static final BigDecimal UNDEFINED_ZERO =
41 new BigDecimal(0.0);
42 public static final BigDecimal MIN_RANGE =
43 new BigDecimal(-Double.MAX_VALUE);
44 public static final BigDecimal MAX_RANGE =
45 new BigDecimal(Double.MAX_VALUE);
46
47 public static final String ENCODING = "ISO-8859-1";
48
49 public static final Pattern UNIT_COMMENT =
50 Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)");
51
52 public static final Pattern UNIT =
53 Pattern.compile("[^\\[]*\\[([^]]+)\\].*");
54
55 public static final double INTERVAL_GAP = 0.00001d;
56
57 protected ImportWst wst;
58
59 protected ImportRange lastRange;
60
61 public WstParser() {
62 }
63
64 public ImportWst getWst() {
65 return wst;
66 }
67
68 public void setWst(ImportWst wst) {
69 this.wst = wst;
70 }
71
72 public ImportTimeInterval guessDate(String string) {
73 try {
74 return new ImportTimeInterval(
75 DateGuesser.guessDate(string));
76 }
77 catch (IllegalArgumentException iae) {
78 }
79 return null;
80 }
81
82 public void parse(File file) throws IOException {
83
84 log.info("Parsing WST file '" + file + "'");
85
86 wst = new ImportWst(file.getName());
87
88 LineNumberReader in = null;
89 try {
90 in =
91 new LineNumberReader(
92 new InputStreamReader(
93 new FileInputStream(file), ENCODING));
94
95 String input;
96 boolean first = true;
97 int columnCount = 0;
98
99 String [] lsBezeichner = null;
100 String [] langBezeichner = null;
101 int [] colNaWidths = null;
102 String [] quellen = null;
103 String [] daten = null;
104
105 BigDecimal [] aktAbfluesse = null;
106 BigDecimal [] firstAbfluesse = null;
107
108 BigDecimal minKm = MAX_RANGE;
109 BigDecimal maxKm = MIN_RANGE;
110 BigDecimal kmHist1 = null;
111 BigDecimal kmHist2 = null;
112
113 boolean columnHeaderChecked = false;
114
115 String einheit = "Wasserstand [NN + m]";
116
117 HashSet<BigDecimal> kms = new HashSet<BigDecimal>();
118
119 while ((input = in.readLine()) != null) {
120 String line = input;
121 if (first) { // fetch number of columns
122 if ((line = line.trim()).length() == 0) {
123 continue;
124 }
125 try {
126 columnCount = Integer.parseInt(line);
127 if (columnCount <= 0) {
128 throw new NumberFormatException(
129 "number columns <= 0");
130 }
131 log.debug("Number of columns: " + columnCount);
132 wst.setNumberColumns(columnCount);
133 lsBezeichner = new String[columnCount];
134 }
135 catch (NumberFormatException nfe) {
136 log.warn("WST: invalid number.", nfe);
137 continue;
138 }
139 first = false;
140 continue;
141 }
142
143 line = line.replace(',', '.');
144
145 if (line.startsWith("*\u001f")) {
146 BigDecimal [] data =
147 parseLineAsDouble(line, columnCount, false, true);
148
149 if (aktAbfluesse != null) {
150 if (kmHist1 != null && kmHist2 != null
151 && kmHist1.compareTo(kmHist2) < 0) {
152 BigDecimal t = minKm; minKm = maxKm; maxKm = t;
153 }
154 addInterval(minKm, maxKm, aktAbfluesse);
155 minKm = MAX_RANGE;
156 maxKm = MIN_RANGE;
157 }
158
159 aktAbfluesse = new BigDecimal[columnCount];
160 log.debug("new q range: " + columnCount);
161 for (int i = 0; i < Math.min(columnCount, data.length); ++i) {
162 if (data[i] != null) {
163 log.debug(" column: " + data[i]);
164 aktAbfluesse[i] = data[i];
165 }
166 }
167
168 if (firstAbfluesse == null) {
169 firstAbfluesse = (BigDecimal [])aktAbfluesse.clone();
170 }
171 continue;
172 }
173
174 if (line.startsWith("*!")) {
175 String spezial = line.substring(2).trim();
176
177 if (spezial.length() == 0) {
178 continue;
179 }
180
181 if (spezial.startsWith(COLUMN_BEZ_TEXT)) {
182 spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim();
183 if (spezial.length() == 0) {
184 continue;
185 }
186 langBezeichner = StringUtil.splitQuoted(spezial, '"');
187 }
188 else if (spezial.startsWith(COLUMN_BEZ_BREITE)) {
189 spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim();
190
191 if (spezial.length() == 0) {
192 continue;
193 }
194
195 String[] split = spezial.split("\\s+");
196
197 colNaWidths = new int[split.length];
198 for (int i=0; i < split.length; i++) {
199 colNaWidths[i] = Integer.parseInt(split[i]);
200 }
201 }
202 else if (spezial.startsWith(COLUMN_QUELLE)) {
203 if (spezial.length() == 0) {
204 continue;
205 }
206 quellen = StringUtil.splitQuoted(spezial, '"');
207 }
208 else if (spezial.startsWith(COLUMN_DATUM)) {
209 spezial = spezial.substring(COLUMN_DATUM.length()).trim();
210 if (spezial.length() == 0) {
211 continue;
212 }
213 daten = StringUtil.splitQuoted(spezial, '"');
214 }
215 continue;
216 }
217
218 if (line.length() < 11) {
219 continue;
220 }
221
222 if (line.startsWith("*")) {
223 Matcher m = UNIT_COMMENT.matcher(line);
224 if (m.matches()) {
225 log.debug("unit comment found");
226 // XXX: This hack is needed because desktop
227 // FLYS is broken figuring out the unit
228 String [] units = m.group(1).split("\\s{2,}");
229 m = UNIT.matcher(units[0]);
230 einheit = m.matches() ? m.group(1) : units[0];
231 log.debug("unit: " + einheit);
232 }
233 continue;
234 }
235
236 if (firstAbfluesse != null) {
237 if (!columnHeaderChecked) {
238 int unknownCount = 0;
239 HashSet<String> uniqueColumnNames =
240 new HashSet<String>();
241 for (int i = 0; i < lsBezeichner.length; ++i) {
242 if (lsBezeichner[i] == null
243 || lsBezeichner[i].length() == 0) {
244 double q = firstAbfluesse[i].doubleValue();
245 if (q < 0.001) {
246 lsBezeichner[i] =
247 "<unbekannt #" + unknownCount + ">";
248 ++unknownCount;
249 }
250 else {
251 lsBezeichner[i] = "Q="+format(q);
252 }
253 }
254 String candidate = lsBezeichner[i];
255 int collision = 1;
256 while (!uniqueColumnNames.add(candidate)) {
257 candidate = lsBezeichner[i] +
258 " (" + collision + ")";
259 ++collision;
260 }
261 ImportWstColumn iwc = wst.getColumn(i);
262 iwc.setName(candidate);
263 String potentialDate = daten != null && i < daten.length
264 ? daten[i]
265 : candidate;
266 iwc.setTimeInterval(guessDate(potentialDate));
267 }
268 columnHeaderChecked = true;
269 }
270
271 BigDecimal [] data =
272 parseLineAsDouble(line, columnCount, true, false);
273
274 BigDecimal kaem = data[0];
275
276 if (!kms.add(kaem)) {
277 log.warn(
278 "WST: km " + kaem +
279 " (line " + in.getLineNumber() +
280 ") found more than once. -> ignored");
281 continue;
282 }
283
284 kmHist2 = kmHist1;
285 kmHist1 = kaem;
286
287 if (kaem.compareTo(minKm) < 0) {
288 minKm = kaem;
289 }
290 if (kaem.compareTo(maxKm) > 0) {
291 maxKm = kaem;
292 }
293
294 // extract values
295 for (int i = 0; i < columnCount; ++i) {
296 addValue(kaem, data[i+1], i);
297 }
298
299 }
300 else { // firstAbfluesse == null
301 if (langBezeichner != null) {
302 lsBezeichner = StringUtil.fitArray(
303 langBezeichner, lsBezeichner);
304 }
305 else if (colNaWidths != null) {
306 for (int j = 0, i = 0, N = input.length();
307 j < colNaWidths.length && i < N;
308 i += colNaWidths[j++]
309 ) {
310 lsBezeichner[j] = input.substring(
311 i, i+colNaWidths[j]).trim();
312 }
313 }
314 else {
315 // first column begins at position 8 in line
316 for (int i = 8, col = 0; i < input.length(); i += 9) {
317 if ((i + 9) > input.length()) {
318 i = input.length() - 10;
319 }
320 // one column header is 9 chars wide
321 lsBezeichner[col++] =
322 input.substring(i, i + 9).trim();
323
324 if (col == lsBezeichner.length) {
325 break;
326 }
327 }
328 }
329 }
330
331 } // for all lines in WST file
332
333 wst.setUnit(new ImportUnit(einheit));
334
335 if (kmHist1 != null && kmHist2 != null
336 && kmHist1.compareTo(kmHist2) < 0) {
337 BigDecimal t = minKm; minKm = maxKm; maxKm = t;
338 }
339 addInterval(minKm, maxKm, aktAbfluesse);
340
341 fixRangesOrder();
342 }
343 finally {
344 if (in != null) {
345 in.close();
346 }
347 }
348 }
349
350 protected void fixRangesOrder() {
351 wst.fixRangesOrder();
352 }
353
354 protected void addValue(BigDecimal km, BigDecimal w, int index) {
355 if (w != null) {
356 ImportWstColumn column = wst.getColumn(index);
357 column.addColumnValue(km, w);
358 }
359 }
360
361 private static final NumberFormat NF = getNumberFormat();
362
363 private static final NumberFormat getNumberFormat() {
364 NumberFormat nf = NumberFormat.getInstance();
365 nf.setMinimumFractionDigits(2);
366 nf.setMaximumFractionDigits(2);
367 return nf;
368 }
369
370 protected static String format(double value) {
371 return NF.format(value);
372 }
373
374 protected void addInterval(
375 BigDecimal from,
376 BigDecimal to,
377 BigDecimal [] values
378 ) {
379 log.debug("addInterval: " + from + " " + to);
380
381 if (values == null || from == MAX_RANGE || from == MIN_RANGE) {
382 return;
383 }
384
385 ImportRange range = new ImportRange(from, to);
386
387 // little workaround to make the q ranges tightly fit.
388 // Leave a very small gap to ensure that the range queries
389 // still work.
390
391 if (lastRange != null) {
392 double a1 = lastRange.getA().doubleValue();
393 double b1 = lastRange.getB().doubleValue();
394 double a2 = range.getA().doubleValue();
395
396 if (a1 < b1) {
397 lastRange.setB(new BigDecimal(a2 - INTERVAL_GAP));
398 }
399 else { // a1 >= b1
400 lastRange.setB(new BigDecimal(a2 + INTERVAL_GAP));
401 }
402 }
403
404 for (int i = 0; i < values.length; ++i) {
405 ImportWstColumn column = wst.getColumn(i);
406 ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]);
407 column.addColumnQRange(wstQRange);
408 }
409
410 lastRange = range;
411 }
412
413 private static final BigDecimal [] parseLineAsDouble(
414 String line,
415 int count,
416 boolean bStation,
417 boolean bParseEmptyAsZero
418 ) {
419 String [] tokens = parseLine(line, count, bStation);
420
421 BigDecimal [] doubles = new BigDecimal[tokens.length];
422
423 for (int i = 0; i < doubles.length; ++i) {
424 String token = tokens[i].trim();
425 if (token.length() != 0) {
426 doubles[i] = new BigDecimal(token);
427 }
428 else if (bParseEmptyAsZero) {
429 doubles[i] = UNDEFINED_ZERO;
430 }
431 }
432
433 return doubles;
434 }
435
436 private static String [] parseLine(
437 String line,
438 int tokenCount,
439 boolean bParseStation
440 ) {
441 ArrayList<String> strings = new ArrayList<String>();
442
443 if (bParseStation) {
444 if (line.length() < 8) {
445 throw new IllegalArgumentException("station too short");
446 }
447 strings.add(line.substring(0, 8));
448 }
449
450 int pos = 9;
451 for (int i = 0; i < tokenCount; ++i) {
452 if (line.length() >= pos + 8) {
453 strings.add(line.substring(pos, pos + 8));
454 }
455 else {
456 strings.add("");
457 }
458 pos += 9;
459 }
460
461 return strings.toArray(new String[strings.size()]);
462 }
463 }
464 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org