comparison flys-backend/src/main/java/de/intevation/flys/importer/parsers/WstParser.java @ 1211:f08fe480092c

Moved file parsers to separate package. flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Fri, 15 Jul 2011 13:07:45 +0000
parents
children f834b411ca57
comparison
equal deleted inserted replaced
1210:31d8638760b1 1211:f08fe480092c
1 package de.intevation.flys.importer.parsers;
2
3 import java.util.ArrayList;
4 import java.util.HashSet;
5
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.LineNumberReader;
9 import java.io.InputStreamReader;
10 import java.io.FileInputStream;
11
12 import java.text.NumberFormat;
13
14 import org.apache.log4j.Logger;
15
16 import de.intevation.flys.utils.StringUtil;
17
18 import java.util.regex.Pattern;
19 import java.util.regex.Matcher;
20
21 import java.math.BigDecimal;
22
23 import de.intevation.flys.importer.ImportWstQRange;
24 import de.intevation.flys.importer.ImportWstColumn;
25 import de.intevation.flys.importer.ImportRange;
26 import de.intevation.flys.importer.ImportWst;
27
28 public class WstParser
29 {
30 private static Logger log = Logger.getLogger(WstParser.class);
31
32 public static final String COLUMN_BEZ_TEXT = "column-bez-text";
33 public static final String COLUMN_BEZ_BREITE = "column-bez-breite";
34 public static final String COLUMN_QUELLE = "column-quelle";
35 public static final String COLUMN_DATUM = "column-datum";
36
37 public static final BigDecimal UNDEFINED_ZERO =
38 new BigDecimal(0.0);
39 public static final BigDecimal MIN_RANGE =
40 new BigDecimal(-Double.MAX_VALUE);
41 public static final BigDecimal MAX_RANGE =
42 new BigDecimal(Double.MAX_VALUE);
43
44 public static final String ENCODING = "ISO-8859-1";
45
46 public static final Pattern UNIT_COMMENT =
47 Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)");
48
49 public static final Pattern UNIT =
50 Pattern.compile("[^\\[]*\\[([^]]+)\\].*");
51
52 public static final BigDecimal INTERVAL_GAP =
53 new BigDecimal(0.00001);
54
55 protected ImportWst wst;
56
57 protected ImportRange lastRange;
58
59 public WstParser() {
60 }
61
62 public ImportWst getWst() {
63 return wst;
64 }
65
66 public void setWst(ImportWst wst) {
67 this.wst = wst;
68 }
69
70 public void parse(File file) throws IOException {
71
72 log.info("Parsing WST file '" + file + "'");
73
74 wst = new ImportWst(file.getName());
75
76 LineNumberReader in = null;
77 try {
78 in =
79 new LineNumberReader(
80 new InputStreamReader(
81 new FileInputStream(file), ENCODING));
82
83 String input;
84 boolean first = true;
85 int columnCount = 0;
86
87 String [] lsBezeichner = null;
88 String [] langBezeichner = null;
89 int [] colNaWidths = null;
90 String [] quellen = null;
91 String [] daten = null;
92
93 BigDecimal [] aktAbfluesse = null;
94 BigDecimal [] firstAbfluesse = null;
95
96 BigDecimal minKm = MAX_RANGE;
97 BigDecimal maxKm = MIN_RANGE;
98
99 boolean columnHeaderChecked = false;
100
101 String einheit = "Wasserstand [NN + m]";
102
103 HashSet<BigDecimal> kms = new HashSet<BigDecimal>();
104
105 while ((input = in.readLine()) != null) {
106 String line = input;
107 if (first) { // fetch number of columns
108 if ((line = line.trim()).length() == 0) {
109 continue;
110 }
111 try {
112 columnCount = Integer.parseInt(line);
113 if (columnCount <= 0) {
114 throw new NumberFormatException(
115 "number columns <= 0");
116 }
117 log.debug("Number of columns: " + columnCount);
118 wst.setNumberColumns(columnCount);
119 lsBezeichner = new String[columnCount];
120 }
121 catch (NumberFormatException nfe) {
122 log.warn(nfe);
123 continue;
124 }
125 first = false;
126 continue;
127 }
128
129 line = line.replace(',', '.');
130
131 if (line.startsWith("*\u001f")) {
132 BigDecimal [] data =
133 parseLineAsDouble(line, columnCount, false, true);
134
135 if (aktAbfluesse != null) {
136 addInterval(minKm, maxKm, aktAbfluesse);
137 minKm = MAX_RANGE;
138 maxKm = MIN_RANGE;
139 }
140
141 aktAbfluesse = new BigDecimal[columnCount];
142 log.debug("new q range: " + columnCount);
143 for (int i = 0; i < Math.min(columnCount, data.length); ++i) {
144 if (data[i] != null) {
145 log.debug(" column: " + data[i]);
146 aktAbfluesse[i] = data[i];
147 }
148 }
149
150 if (firstAbfluesse == null) {
151 firstAbfluesse = (BigDecimal [])aktAbfluesse.clone();
152 }
153 continue;
154 }
155
156 if (line.startsWith("*!")) {
157 String spezial = line.substring(2).trim();
158
159 if (spezial.length() == 0) {
160 continue;
161 }
162
163 if (spezial.startsWith(COLUMN_BEZ_TEXT)) {
164 spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim();
165 if (spezial.length() == 0) {
166 continue;
167 }
168 langBezeichner = StringUtil.splitQuoted(spezial, '"');
169 }
170 else if (spezial.startsWith(COLUMN_BEZ_BREITE)) {
171 spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim();
172
173 if (spezial.length() == 0) {
174 continue;
175 }
176
177 String[] split = spezial.split("\\s+");
178
179 colNaWidths = new int[split.length];
180 for (int i=0; i < split.length; i++) {
181 colNaWidths[i] = Integer.parseInt(split[i]);
182 }
183 }
184 else if (spezial.startsWith(COLUMN_QUELLE)) {
185 if (spezial.length() == 0) {
186 continue;
187 }
188 quellen = StringUtil.splitQuoted(spezial, '"');
189 }
190 else if (spezial.startsWith(COLUMN_DATUM)) {
191 spezial = spezial.substring(COLUMN_DATUM.length()).trim();
192 if (spezial.length() == 0) {
193 continue;
194 }
195 daten = StringUtil.splitQuoted(spezial, '"');
196 }
197 continue;
198 }
199
200 if (line.length() < 11) {
201 continue;
202 }
203
204 if (line.startsWith("*")) {
205 Matcher m = UNIT_COMMENT.matcher(line);
206 if (m.matches()) {
207 log.debug("unit comment found");
208 // XXX: This hack is needed because desktop
209 // FLYS is broken figuring out the unit
210 String [] units = m.group(1).split("\\s{2,}");
211 m = UNIT.matcher(units[0]);
212 einheit = m.matches() ? m.group(1) : units[0];
213 log.debug("unit: " + einheit);
214 }
215 continue;
216 }
217
218 if (firstAbfluesse != null) {
219 if (!columnHeaderChecked) {
220 int unknownCount = 0;
221 HashSet<String> uniqueColumnNames =
222 new HashSet<String>();
223 for (int i = 0; i < lsBezeichner.length; ++i) {
224 if (lsBezeichner[i] == null
225 || lsBezeichner[i].length() == 0) {
226 double q = firstAbfluesse[i].doubleValue();
227 if (q < 0.001) {
228 lsBezeichner[i] =
229 "<unbekannt #" + unknownCount + ">";
230 ++unknownCount;
231 }
232 else {
233 lsBezeichner[i] = "Q="+format(q);
234 }
235 }
236 String candidate = lsBezeichner[i];
237 int collision = 1;
238 while (!uniqueColumnNames.add(candidate)) {
239 candidate = lsBezeichner[i] +
240 " (" + collision + ")";
241 ++collision;
242 }
243 wst.getColumn(i).setName(candidate);
244 }
245 columnHeaderChecked = true;
246 }
247
248 BigDecimal [] data =
249 parseLineAsDouble(line, columnCount, true, false);
250
251 BigDecimal kaem = data[0];
252
253 if (!kms.add(kaem)) {
254 log.warn(
255 "km " + kaem +
256 " (line " + in.getLineNumber() +
257 ") found more than once. -> ignored");
258 continue;
259 }
260
261 if (kaem.compareTo(minKm) < 0) {
262 minKm = kaem;
263 }
264 if (kaem.compareTo(maxKm) > 0) {
265 maxKm = kaem;
266 }
267
268 // extract values
269 for (int i = 0; i < columnCount; ++i) {
270 addValue(kaem, data[i+1], i);
271 }
272
273 }
274 else { // firstAbfluesse == null
275 if (langBezeichner != null) {
276 lsBezeichner = StringUtil.fitArray(
277 langBezeichner, lsBezeichner);
278 }
279 else if (colNaWidths != null) {
280 for (int j = 0, i = 0, N = input.length();
281 j < colNaWidths.length && i < N;
282 i += colNaWidths[j++]
283 ) {
284 lsBezeichner[j] = input.substring(
285 i, i+colNaWidths[j]).trim();
286 }
287 }
288 else {
289 // first column begins at position 8 in line
290 for (int i = 8, col = 0; i < input.length(); i += 9) {
291 if ((i + 9) > input.length()) {
292 i = input.length() - 10;
293 }
294 // one column header is 9 chars wide
295 lsBezeichner[col++] =
296 input.substring(i, i + 9).trim();
297
298 if (col == lsBezeichner.length) {
299 break;
300 }
301 }
302 }
303 }
304
305 }
306 addInterval(minKm, maxKm, aktAbfluesse);
307 }
308 finally {
309 if (in != null) {
310 in.close();
311 }
312 }
313 }
314
315 protected void addValue(BigDecimal km, BigDecimal w, int index) {
316 if (w != null) {
317 ImportWstColumn column = wst.getColumn(index);
318 column.addColumnValue(km, w);
319 }
320 }
321
322 private static final NumberFormat NF = getNumberFormat();
323
324 private static final NumberFormat getNumberFormat() {
325 NumberFormat nf = NumberFormat.getInstance();
326 nf.setMinimumFractionDigits(2);
327 nf.setMaximumFractionDigits(2);
328 return nf;
329 }
330
331 protected static String format(double value) {
332 return NF.format(value);
333 }
334
335 protected void addInterval(
336 BigDecimal from,
337 BigDecimal to,
338 BigDecimal [] values
339 ) {
340 log.debug("addInterval: " + from + " " + to);
341
342 if (values == null || from == MAX_RANGE) {
343 return;
344 }
345
346 if (to.compareTo(from) < 0) {
347 BigDecimal t = from; from = to; to = t;
348 }
349
350 ImportRange range = new ImportRange(from, to);
351
352 // little workaround to make the q ranges tightly fit.
353 // Leave a very small gap to ensure that the range queries
354 // still work.
355
356 if (lastRange != null) {
357 double d1 = Math.abs(
358 lastRange.getB().doubleValue() - range.getA().doubleValue());
359 double d2 = Math.abs(
360 range.getB().doubleValue() - lastRange.getA().doubleValue());
361
362 if (d1 < d2) {
363 lastRange.setB(range.getA().subtract(INTERVAL_GAP));
364 }
365 else {
366 range.setA(lastRange.getB().subtract(INTERVAL_GAP));
367 }
368 }
369
370 for (int i = 0; i < values.length; ++i) {
371 ImportWstColumn column = wst.getColumn(i);
372 ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]);
373 column.addColumnQRange(wstQRange);
374 }
375
376 lastRange = range;
377 }
378
379 private static final BigDecimal [] parseLineAsDouble(
380 String line,
381 int count,
382 boolean bStation,
383 boolean bParseEmptyAsZero
384 ) {
385 String [] tokens = parseLine(line, count, bStation);
386
387 BigDecimal [] doubles = new BigDecimal[tokens.length];
388
389 for (int i = 0; i < doubles.length; ++i) {
390 String token = tokens[i].trim();
391 if (token.length() != 0) {
392 doubles[i] = new BigDecimal(token);
393 }
394 else if (bParseEmptyAsZero) {
395 doubles[i] = UNDEFINED_ZERO;
396 }
397 }
398
399 return doubles;
400 }
401
402 private static String [] parseLine(
403 String line,
404 int tokenCount,
405 boolean bParseStation
406 ) {
407 ArrayList<String> strings = new ArrayList<String>();
408
409 if (bParseStation) {
410 if (line.length() < 8) {
411 throw new IllegalArgumentException("station too short");
412 }
413 strings.add(line.substring(0, 8));
414 }
415
416 int pos = 9;
417 for (int i = 0; i < tokenCount; ++i) {
418 if (line.length() >= pos + 8) {
419 strings.add(line.substring(pos, pos + 8));
420 }
421 else {
422 strings.add("");
423 }
424 pos += 9;
425 }
426
427 return strings.toArray(new String[strings.size()]);
428 }
429 }
430 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org