comparison flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java @ 508:a9c7f6ec3a5a 2.3.1

merged flys-backend/2.3.1
author Thomas Arendsen Hein <thomas@intevation.de>
date Fri, 28 Sep 2012 12:14:12 +0200
parents a92da0b3e8e7
children 677a6fceea6e
comparison
equal deleted inserted replaced
462:ebf049a1eb53 508:a9c7f6ec3a5a
1 package de.intevation.flys.importer;
2
3 import java.util.ArrayList;
4 import java.util.HashSet;
5
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.LineNumberReader;
9 import java.io.InputStreamReader;
10 import java.io.FileInputStream;
11
12 import java.text.NumberFormat;
13
14 import org.apache.log4j.Logger;
15
16 import de.intevation.flys.utils.StringUtil;
17
18 import java.util.regex.Pattern;
19 import java.util.regex.Matcher;
20
21 import java.math.BigDecimal;
22
23 public class WstParser
24 {
25 private static Logger log = Logger.getLogger(WstParser.class);
26
27 public static final String COLUMN_BEZ_TEXT = "column-bez-text";
28 public static final String COLUMN_BEZ_BREITE = "column-bez-breite";
29 public static final String COLUMN_QUELLE = "column-quelle";
30 public static final String COLUMN_DATUM = "column-datum";
31
32 public static final BigDecimal UNDEFINED_ZERO =
33 new BigDecimal(0.0);
34 public static final BigDecimal MIN_RANGE =
35 new BigDecimal(-Double.MAX_VALUE);
36 public static final BigDecimal MAX_RANGE =
37 new BigDecimal(Double.MAX_VALUE);
38
39 public static final String ENCODING = "ISO-8859-1";
40
41 public static final Pattern UNIT_COMMENT =
42 Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)");
43
44 public static final Pattern UNIT =
45 Pattern.compile("[^\\[]*\\[([^]]+)\\].*");
46
47 protected ImportWst wst;
48
49 public WstParser() {
50 }
51
52 public ImportWst getWst() {
53 return wst;
54 }
55
56 public void setWst(ImportWst wst) {
57 this.wst = wst;
58 }
59
60 public void parse(File file) throws IOException {
61
62 log.info("Parsing WST file '" + file + "'");
63
64 wst = new ImportWst(file.getName());
65
66 LineNumberReader in = null;
67 try {
68 in =
69 new LineNumberReader(
70 new InputStreamReader(
71 new FileInputStream(file), ENCODING));
72
73 String input;
74 boolean first = true;
75 int columnCount = 0;
76
77 String [] lsBezeichner = null;
78 String [] langBezeichner = null;
79 int [] colNaWidths = null;
80 String [] quellen = null;
81 String [] daten = null;
82
83 BigDecimal [] aktAbfluesse = null;
84 BigDecimal [] firstAbfluesse = null;
85
86 BigDecimal minKm = MAX_RANGE;
87 BigDecimal maxKm = MIN_RANGE;
88
89 boolean columnHeaderChecked = false;
90
91 String einheit = "Wasserstand [NN + m]";
92
93 HashSet<BigDecimal> kms = new HashSet<BigDecimal>();
94
95 while ((input = in.readLine()) != null) {
96 String line = input;
97 if (first) { // fetch number of columns
98 if ((line = line.trim()).length() == 0) {
99 continue;
100 }
101 try {
102 columnCount = Integer.parseInt(line);
103 if (columnCount <= 0) {
104 throw new NumberFormatException(
105 "number columns <= 0");
106 }
107 log.debug("Number of columns: " + columnCount);
108 wst.setNumberColumns(columnCount);
109 lsBezeichner = new String[columnCount];
110 }
111 catch (NumberFormatException nfe) {
112 log.warn(nfe);
113 continue;
114 }
115 first = false;
116 continue;
117 }
118
119 line = line.replace(',', '.');
120
121 if (line.startsWith("*\u001f")) {
122 BigDecimal [] data =
123 parseLineAsDouble(line, columnCount, false, true);
124
125 if (aktAbfluesse != null) {
126 addInterval(minKm, maxKm, aktAbfluesse);
127 minKm = MAX_RANGE;
128 maxKm = MIN_RANGE;
129 }
130
131 aktAbfluesse = new BigDecimal[columnCount];
132 log.debug("new q range: " + columnCount);
133 for (int i = 0; i < Math.min(columnCount, data.length); ++i) {
134 if (data[i] != null) {
135 log.debug(" column: " + data[i]);
136 aktAbfluesse[i] = data[i];
137 }
138 }
139
140 if (firstAbfluesse == null) {
141 firstAbfluesse = (BigDecimal [])aktAbfluesse.clone();
142 }
143 continue;
144 }
145
146 if (line.startsWith("*!")) {
147 String spezial = line.substring(2).trim();
148
149 if (spezial.length() == 0) {
150 continue;
151 }
152
153 if (spezial.startsWith(COLUMN_BEZ_TEXT)) {
154 spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim();
155 if (spezial.length() == 0) {
156 continue;
157 }
158 langBezeichner = StringUtil.splitQuoted(spezial, '"');
159 }
160 else if (spezial.startsWith(COLUMN_BEZ_BREITE)) {
161 spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim();
162
163 if (spezial.length() == 0) {
164 continue;
165 }
166
167 String[] split = spezial.split("\\s+");
168
169 colNaWidths = new int[split.length];
170 for (int i=0; i < split.length; i++) {
171 colNaWidths[i] = Integer.parseInt(split[i]);
172 }
173 }
174 else if (spezial.startsWith(COLUMN_QUELLE)) {
175 if (spezial.length() == 0) {
176 continue;
177 }
178 quellen = StringUtil.splitQuoted(spezial, '"');
179 }
180 else if (spezial.startsWith(COLUMN_DATUM)) {
181 spezial = spezial.substring(COLUMN_DATUM.length()).trim();
182 if (spezial.length() == 0) {
183 continue;
184 }
185 daten = StringUtil.splitQuoted(spezial, '"');
186 }
187 continue;
188 }
189
190 if (line.length() < 11) {
191 continue;
192 }
193
194 if (line.startsWith("*")) {
195 Matcher m = UNIT_COMMENT.matcher(line);
196 if (m.matches()) {
197 log.debug("unit comment found");
198 // XXX: This hack is needed because desktop
199 // FLYS is broken figuring out the unit
200 String [] units = m.group(1).split("\\s{2,}");
201 m = UNIT.matcher(units[0]);
202 einheit = m.matches() ? m.group(1) : units[0];
203 log.debug("unit: " + einheit);
204 }
205 continue;
206 }
207
208 if (firstAbfluesse != null) {
209 if (!columnHeaderChecked) {
210 int unknownCount = 0;
211 HashSet<String> uniqueColumnNames =
212 new HashSet<String>();
213 for (int i = 0; i < lsBezeichner.length; ++i) {
214 if (lsBezeichner[i] == null
215 || lsBezeichner[i].length() == 0) {
216 double q = firstAbfluesse[i].doubleValue();
217 if (q < 0.001) {
218 lsBezeichner[i] =
219 "<unbekannt #" + unknownCount + ">";
220 ++unknownCount;
221 }
222 else {
223 lsBezeichner[i] = "Q="+format(q);
224 }
225 }
226 String candidate = lsBezeichner[i];
227 int collision = 1;
228 while (!uniqueColumnNames.add(candidate)) {
229 candidate = lsBezeichner[i] +
230 " (" + collision + ")";
231 ++collision;
232 }
233 wst.getColumn(i).setName(candidate);
234 }
235 columnHeaderChecked = true;
236 }
237
238 BigDecimal [] data =
239 parseLineAsDouble(line, columnCount, true, false);
240
241 BigDecimal kaem = data[0];
242
243 if (!kms.add(kaem)) {
244 log.warn(
245 "km " + kaem +
246 " (line " + in.getLineNumber() +
247 ") found more than once. -> ignored");
248 continue;
249 }
250
251 if (kaem.compareTo(minKm) < 0) {
252 minKm = kaem;
253 }
254 if (kaem.compareTo(maxKm) > 0) {
255 maxKm = kaem;
256 }
257
258 // extract values
259 for (int i = 0; i < columnCount; ++i) {
260 addValue(kaem, data[i+1], i);
261 }
262
263 }
264 else { // firstAbfluesse == null
265 if (langBezeichner != null) {
266 lsBezeichner = StringUtil.fitArray(
267 langBezeichner, lsBezeichner);
268 }
269 else if (colNaWidths != null) {
270 for (int j = 0, i = 0, N = input.length();
271 j < colNaWidths.length && i < N;
272 i += colNaWidths[j++]
273 ) {
274 lsBezeichner[j] = input.substring(
275 i, i+colNaWidths[j]).trim();
276 }
277 }
278 else {
279 // first column begins at position 8 in line
280 for (int i = 8, col = 0; i < input.length(); i += 9) {
281 if ((i + 9) > input.length()) {
282 i = input.length() - 10;
283 }
284 // one column header is 9 chars wide
285 lsBezeichner[col++] =
286 input.substring(i, i + 9).trim();
287
288 if (col == lsBezeichner.length) {
289 break;
290 }
291 }
292 }
293 }
294
295 }
296 addInterval(minKm, maxKm, aktAbfluesse);
297 }
298 finally {
299 if (in != null) {
300 in.close();
301 }
302 }
303 }
304
305 protected void addValue(BigDecimal km, BigDecimal w, int index) {
306 if (w != null) {
307 ImportWstColumn column = wst.getColumn(index);
308 column.addColumnValue(km, w);
309 }
310 }
311
312 private static final NumberFormat NF = getNumberFormat();
313
314 private static final NumberFormat getNumberFormat() {
315 NumberFormat nf = NumberFormat.getInstance();
316 nf.setMinimumFractionDigits(2);
317 nf.setMaximumFractionDigits(2);
318 return nf;
319 }
320
321 protected static String format(double value) {
322 return NF.format(value);
323 }
324
325 protected void addInterval(
326 BigDecimal from,
327 BigDecimal to,
328 BigDecimal [] values
329 ) {
330 log.debug("addInterval: " + from + " " + to);
331
332 if (values == null || from == MAX_RANGE) {
333 return;
334 }
335
336 if (to.compareTo(from) < 0) {
337 BigDecimal t = from; from = to; to = t;
338 }
339
340 ImportRange range = new ImportRange(from, to);
341
342 for (int i = 0; i < values.length; ++i) {
343 ImportWstColumn column = wst.getColumn(i);
344 ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]);
345 column.addColumnQRange(wstQRange);
346 }
347 }
348
349 private static final BigDecimal [] parseLineAsDouble(
350 String line,
351 int count,
352 boolean bStation,
353 boolean bParseEmptyAsZero
354 ) {
355 String [] tokens = parseLine(line, count, bStation);
356
357 BigDecimal [] doubles = new BigDecimal[tokens.length];
358
359 for (int i = 0; i < doubles.length; ++i) {
360 String token = tokens[i].trim();
361 if (token.length() != 0) {
362 doubles[i] = new BigDecimal(token);
363 }
364 else if (bParseEmptyAsZero) {
365 doubles[i] = UNDEFINED_ZERO;
366 }
367 }
368
369 return doubles;
370 }
371
372 private static String [] parseLine(
373 String line,
374 int tokenCount,
375 boolean bParseStation
376 ) {
377 ArrayList<String> strings = new ArrayList<String>();
378
379 if (bParseStation) {
380 if (line.length() < 8) {
381 throw new IllegalArgumentException("station too short");
382 }
383 strings.add(line.substring(0, 8));
384 }
385
386 int pos = 9;
387 for (int i = 0; i < tokenCount; ++i) {
388 if (line.length() >= pos + 8) {
389 strings.add(line.substring(pos, pos + 8));
390 }
391 else {
392 strings.add("");
393 }
394 pos += 9;
395 }
396
397 return strings.toArray(new String[strings.size()]);
398 }
399 }
400 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org