Mercurial > dive4elements > river
comparison flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java @ 508:a9c7f6ec3a5a 2.3.1
merged flys-backend/2.3.1
author | Thomas Arendsen Hein <thomas@intevation.de> |
---|---|
date | Fri, 28 Sep 2012 12:14:12 +0200 |
parents | a92da0b3e8e7 |
children | 677a6fceea6e |
comparison
equal
deleted
inserted
replaced
462:ebf049a1eb53 | 508:a9c7f6ec3a5a |
---|---|
1 package de.intevation.flys.importer; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.HashSet; | |
5 | |
6 import java.io.File; | |
7 import java.io.IOException; | |
8 import java.io.LineNumberReader; | |
9 import java.io.InputStreamReader; | |
10 import java.io.FileInputStream; | |
11 | |
12 import java.text.NumberFormat; | |
13 | |
14 import org.apache.log4j.Logger; | |
15 | |
16 import de.intevation.flys.utils.StringUtil; | |
17 | |
18 import java.util.regex.Pattern; | |
19 import java.util.regex.Matcher; | |
20 | |
21 import java.math.BigDecimal; | |
22 | |
23 public class WstParser | |
24 { | |
25 private static Logger log = Logger.getLogger(WstParser.class); | |
26 | |
27 public static final String COLUMN_BEZ_TEXT = "column-bez-text"; | |
28 public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; | |
29 public static final String COLUMN_QUELLE = "column-quelle"; | |
30 public static final String COLUMN_DATUM = "column-datum"; | |
31 | |
32 public static final BigDecimal UNDEFINED_ZERO = | |
33 new BigDecimal(0.0); | |
34 public static final BigDecimal MIN_RANGE = | |
35 new BigDecimal(-Double.MAX_VALUE); | |
36 public static final BigDecimal MAX_RANGE = | |
37 new BigDecimal(Double.MAX_VALUE); | |
38 | |
39 public static final String ENCODING = "ISO-8859-1"; | |
40 | |
41 public static final Pattern UNIT_COMMENT = | |
42 Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)"); | |
43 | |
44 public static final Pattern UNIT = | |
45 Pattern.compile("[^\\[]*\\[([^]]+)\\].*"); | |
46 | |
47 protected ImportWst wst; | |
48 | |
49 public WstParser() { | |
50 } | |
51 | |
52 public ImportWst getWst() { | |
53 return wst; | |
54 } | |
55 | |
56 public void setWst(ImportWst wst) { | |
57 this.wst = wst; | |
58 } | |
59 | |
60 public void parse(File file) throws IOException { | |
61 | |
62 log.info("Parsing WST file '" + file + "'"); | |
63 | |
64 wst = new ImportWst(file.getName()); | |
65 | |
66 LineNumberReader in = null; | |
67 try { | |
68 in = | |
69 new LineNumberReader( | |
70 new InputStreamReader( | |
71 new FileInputStream(file), ENCODING)); | |
72 | |
73 String input; | |
74 boolean first = true; | |
75 int columnCount = 0; | |
76 | |
77 String [] lsBezeichner = null; | |
78 String [] langBezeichner = null; | |
79 int [] colNaWidths = null; | |
80 String [] quellen = null; | |
81 String [] daten = null; | |
82 | |
83 BigDecimal [] aktAbfluesse = null; | |
84 BigDecimal [] firstAbfluesse = null; | |
85 | |
86 BigDecimal minKm = MAX_RANGE; | |
87 BigDecimal maxKm = MIN_RANGE; | |
88 | |
89 boolean columnHeaderChecked = false; | |
90 | |
91 String einheit = "Wasserstand [NN + m]"; | |
92 | |
93 HashSet<BigDecimal> kms = new HashSet<BigDecimal>(); | |
94 | |
95 while ((input = in.readLine()) != null) { | |
96 String line = input; | |
97 if (first) { // fetch number of columns | |
98 if ((line = line.trim()).length() == 0) { | |
99 continue; | |
100 } | |
101 try { | |
102 columnCount = Integer.parseInt(line); | |
103 if (columnCount <= 0) { | |
104 throw new NumberFormatException( | |
105 "number columns <= 0"); | |
106 } | |
107 log.debug("Number of columns: " + columnCount); | |
108 wst.setNumberColumns(columnCount); | |
109 lsBezeichner = new String[columnCount]; | |
110 } | |
111 catch (NumberFormatException nfe) { | |
112 log.warn(nfe); | |
113 continue; | |
114 } | |
115 first = false; | |
116 continue; | |
117 } | |
118 | |
119 line = line.replace(',', '.'); | |
120 | |
121 if (line.startsWith("*\u001f")) { | |
122 BigDecimal [] data = | |
123 parseLineAsDouble(line, columnCount, false, true); | |
124 | |
125 if (aktAbfluesse != null) { | |
126 addInterval(minKm, maxKm, aktAbfluesse); | |
127 minKm = MAX_RANGE; | |
128 maxKm = MIN_RANGE; | |
129 } | |
130 | |
131 aktAbfluesse = new BigDecimal[columnCount]; | |
132 log.debug("new q range: " + columnCount); | |
133 for (int i = 0; i < Math.min(columnCount, data.length); ++i) { | |
134 if (data[i] != null) { | |
135 log.debug(" column: " + data[i]); | |
136 aktAbfluesse[i] = data[i]; | |
137 } | |
138 } | |
139 | |
140 if (firstAbfluesse == null) { | |
141 firstAbfluesse = (BigDecimal [])aktAbfluesse.clone(); | |
142 } | |
143 continue; | |
144 } | |
145 | |
146 if (line.startsWith("*!")) { | |
147 String spezial = line.substring(2).trim(); | |
148 | |
149 if (spezial.length() == 0) { | |
150 continue; | |
151 } | |
152 | |
153 if (spezial.startsWith(COLUMN_BEZ_TEXT)) { | |
154 spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); | |
155 if (spezial.length() == 0) { | |
156 continue; | |
157 } | |
158 langBezeichner = StringUtil.splitQuoted(spezial, '"'); | |
159 } | |
160 else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { | |
161 spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); | |
162 | |
163 if (spezial.length() == 0) { | |
164 continue; | |
165 } | |
166 | |
167 String[] split = spezial.split("\\s+"); | |
168 | |
169 colNaWidths = new int[split.length]; | |
170 for (int i=0; i < split.length; i++) { | |
171 colNaWidths[i] = Integer.parseInt(split[i]); | |
172 } | |
173 } | |
174 else if (spezial.startsWith(COLUMN_QUELLE)) { | |
175 if (spezial.length() == 0) { | |
176 continue; | |
177 } | |
178 quellen = StringUtil.splitQuoted(spezial, '"'); | |
179 } | |
180 else if (spezial.startsWith(COLUMN_DATUM)) { | |
181 spezial = spezial.substring(COLUMN_DATUM.length()).trim(); | |
182 if (spezial.length() == 0) { | |
183 continue; | |
184 } | |
185 daten = StringUtil.splitQuoted(spezial, '"'); | |
186 } | |
187 continue; | |
188 } | |
189 | |
190 if (line.length() < 11) { | |
191 continue; | |
192 } | |
193 | |
194 if (line.startsWith("*")) { | |
195 Matcher m = UNIT_COMMENT.matcher(line); | |
196 if (m.matches()) { | |
197 log.debug("unit comment found"); | |
198 // XXX: This hack is needed because desktop | |
199 // FLYS is broken figuring out the unit | |
200 String [] units = m.group(1).split("\\s{2,}"); | |
201 m = UNIT.matcher(units[0]); | |
202 einheit = m.matches() ? m.group(1) : units[0]; | |
203 log.debug("unit: " + einheit); | |
204 } | |
205 continue; | |
206 } | |
207 | |
208 if (firstAbfluesse != null) { | |
209 if (!columnHeaderChecked) { | |
210 int unknownCount = 0; | |
211 HashSet<String> uniqueColumnNames = | |
212 new HashSet<String>(); | |
213 for (int i = 0; i < lsBezeichner.length; ++i) { | |
214 if (lsBezeichner[i] == null | |
215 || lsBezeichner[i].length() == 0) { | |
216 double q = firstAbfluesse[i].doubleValue(); | |
217 if (q < 0.001) { | |
218 lsBezeichner[i] = | |
219 "<unbekannt #" + unknownCount + ">"; | |
220 ++unknownCount; | |
221 } | |
222 else { | |
223 lsBezeichner[i] = "Q="+format(q); | |
224 } | |
225 } | |
226 String candidate = lsBezeichner[i]; | |
227 int collision = 1; | |
228 while (!uniqueColumnNames.add(candidate)) { | |
229 candidate = lsBezeichner[i] + | |
230 " (" + collision + ")"; | |
231 ++collision; | |
232 } | |
233 wst.getColumn(i).setName(candidate); | |
234 } | |
235 columnHeaderChecked = true; | |
236 } | |
237 | |
238 BigDecimal [] data = | |
239 parseLineAsDouble(line, columnCount, true, false); | |
240 | |
241 BigDecimal kaem = data[0]; | |
242 | |
243 if (!kms.add(kaem)) { | |
244 log.warn( | |
245 "km " + kaem + | |
246 " (line " + in.getLineNumber() + | |
247 ") found more than once. -> ignored"); | |
248 continue; | |
249 } | |
250 | |
251 if (kaem.compareTo(minKm) < 0) { | |
252 minKm = kaem; | |
253 } | |
254 if (kaem.compareTo(maxKm) > 0) { | |
255 maxKm = kaem; | |
256 } | |
257 | |
258 // extract values | |
259 for (int i = 0; i < columnCount; ++i) { | |
260 addValue(kaem, data[i+1], i); | |
261 } | |
262 | |
263 } | |
264 else { // firstAbfluesse == null | |
265 if (langBezeichner != null) { | |
266 lsBezeichner = StringUtil.fitArray( | |
267 langBezeichner, lsBezeichner); | |
268 } | |
269 else if (colNaWidths != null) { | |
270 for (int j = 0, i = 0, N = input.length(); | |
271 j < colNaWidths.length && i < N; | |
272 i += colNaWidths[j++] | |
273 ) { | |
274 lsBezeichner[j] = input.substring( | |
275 i, i+colNaWidths[j]).trim(); | |
276 } | |
277 } | |
278 else { | |
279 // first column begins at position 8 in line | |
280 for (int i = 8, col = 0; i < input.length(); i += 9) { | |
281 if ((i + 9) > input.length()) { | |
282 i = input.length() - 10; | |
283 } | |
284 // one column header is 9 chars wide | |
285 lsBezeichner[col++] = | |
286 input.substring(i, i + 9).trim(); | |
287 | |
288 if (col == lsBezeichner.length) { | |
289 break; | |
290 } | |
291 } | |
292 } | |
293 } | |
294 | |
295 } | |
296 addInterval(minKm, maxKm, aktAbfluesse); | |
297 } | |
298 finally { | |
299 if (in != null) { | |
300 in.close(); | |
301 } | |
302 } | |
303 } | |
304 | |
305 protected void addValue(BigDecimal km, BigDecimal w, int index) { | |
306 if (w != null) { | |
307 ImportWstColumn column = wst.getColumn(index); | |
308 column.addColumnValue(km, w); | |
309 } | |
310 } | |
311 | |
312 private static final NumberFormat NF = getNumberFormat(); | |
313 | |
314 private static final NumberFormat getNumberFormat() { | |
315 NumberFormat nf = NumberFormat.getInstance(); | |
316 nf.setMinimumFractionDigits(2); | |
317 nf.setMaximumFractionDigits(2); | |
318 return nf; | |
319 } | |
320 | |
321 protected static String format(double value) { | |
322 return NF.format(value); | |
323 } | |
324 | |
325 protected void addInterval( | |
326 BigDecimal from, | |
327 BigDecimal to, | |
328 BigDecimal [] values | |
329 ) { | |
330 log.debug("addInterval: " + from + " " + to); | |
331 | |
332 if (values == null || from == MAX_RANGE) { | |
333 return; | |
334 } | |
335 | |
336 if (to.compareTo(from) < 0) { | |
337 BigDecimal t = from; from = to; to = t; | |
338 } | |
339 | |
340 ImportRange range = new ImportRange(from, to); | |
341 | |
342 for (int i = 0; i < values.length; ++i) { | |
343 ImportWstColumn column = wst.getColumn(i); | |
344 ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]); | |
345 column.addColumnQRange(wstQRange); | |
346 } | |
347 } | |
348 | |
349 private static final BigDecimal [] parseLineAsDouble( | |
350 String line, | |
351 int count, | |
352 boolean bStation, | |
353 boolean bParseEmptyAsZero | |
354 ) { | |
355 String [] tokens = parseLine(line, count, bStation); | |
356 | |
357 BigDecimal [] doubles = new BigDecimal[tokens.length]; | |
358 | |
359 for (int i = 0; i < doubles.length; ++i) { | |
360 String token = tokens[i].trim(); | |
361 if (token.length() != 0) { | |
362 doubles[i] = new BigDecimal(token); | |
363 } | |
364 else if (bParseEmptyAsZero) { | |
365 doubles[i] = UNDEFINED_ZERO; | |
366 } | |
367 } | |
368 | |
369 return doubles; | |
370 } | |
371 | |
372 private static String [] parseLine( | |
373 String line, | |
374 int tokenCount, | |
375 boolean bParseStation | |
376 ) { | |
377 ArrayList<String> strings = new ArrayList<String>(); | |
378 | |
379 if (bParseStation) { | |
380 if (line.length() < 8) { | |
381 throw new IllegalArgumentException("station too short"); | |
382 } | |
383 strings.add(line.substring(0, 8)); | |
384 } | |
385 | |
386 int pos = 9; | |
387 for (int i = 0; i < tokenCount; ++i) { | |
388 if (line.length() >= pos + 8) { | |
389 strings.add(line.substring(pos, pos + 8)); | |
390 } | |
391 else { | |
392 strings.add(""); | |
393 } | |
394 pos += 9; | |
395 } | |
396 | |
397 return strings.toArray(new String[strings.size()]); | |
398 } | |
399 } | |
400 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |