Mercurial > dive4elements > river
comparison flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java @ 199:ed38839a6b08
Ported over some WST parsing stuff from desktop flys
flys-backend/trunk@1538 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 22 Mar 2011 15:48:09 +0000 |
parents | c0dcc2357106 |
children | 88048d4f6e4d |
comparison
equal
deleted
inserted
replaced
198:d980e545ccab | 199:ed38839a6b08 |
---|---|
1 package de.intevation.flys.importer; | 1 package de.intevation.flys.importer; |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Map; | |
5 import java.util.HashMap; | |
2 | 6 |
3 import java.io.File; | 7 import java.io.File; |
4 import java.io.IOException; | 8 import java.io.IOException; |
9 import java.io.LineNumberReader; | |
10 import java.io.InputStreamReader; | |
11 import java.io.FileInputStream; | |
12 | |
13 import java.text.NumberFormat; | |
14 | |
15 import org.apache.log4j.Logger; | |
16 | |
17 import de.intevation.flys.utils.StringUtil; | |
5 | 18 |
6 public class WstParser | 19 public class WstParser |
7 { | 20 { |
21 private static Logger log = Logger.getLogger(WstParser.class); | |
22 | |
23 public static final String COLUMN_BEZ_TEXT = "column-bez-text"; | |
24 public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; | |
25 public static final String COLUMN_QUELLE = "column-quelle"; | |
26 public static final String COLUMN_DATUM = "column-datum"; | |
27 | |
28 public static final Double UNDEFINED_ZERO = Double.valueOf(0.0); | |
29 | |
30 public static final String ENCODING = "ISO-8859-1"; | |
31 | |
8 public WstParser() { | 32 public WstParser() { |
9 } | 33 } |
10 | 34 |
11 public void parse(File file) throws IOException { | 35 public void parse(File file) throws IOException { |
12 // TODO: Implement me! | 36 |
37 log.info("Parsing WST file '" + file + "'"); | |
38 | |
39 LineNumberReader in = null; | |
40 try { | |
41 in = | |
42 new LineNumberReader( | |
43 new InputStreamReader( | |
44 new FileInputStream(file), ENCODING)); | |
45 | |
46 String input; | |
47 boolean first = true; | |
48 int columnCount = 0; | |
49 | |
50 String [] lsHeader = null; | |
51 String [] lsBezeichner = null; | |
52 String [] langBezeichner = null; | |
53 int [] colNaWidths = null; | |
54 String [] quellen = null; | |
55 String [] daten = null; | |
56 double [] aktAbfluesse = null; | |
57 double [] firstAbfluesse = null; | |
58 | |
59 double minKm = Double.MAX_VALUE; | |
60 double maxKm = -Double.MAX_VALUE; | |
61 | |
62 boolean bFirstComment = true; | |
63 boolean columnHeaderChecked = false; | |
64 | |
65 double lastKm = Double.MAX_VALUE; | |
66 | |
67 String einheit = "Wassserstand [NN + m]"; | |
68 | |
69 HashMap<String, Double> oldEscapeLine = null; | |
70 | |
71 while ((input = in.readLine()) != null) { | |
72 String line = input; | |
73 if (first) { // fetch number of columns | |
74 if ((line = line.trim()).length() == 0) { | |
75 continue; | |
76 } | |
77 try { | |
78 columnCount = Integer.parseInt(line); | |
79 if (columnCount <= 0) { | |
80 throw new NumberFormatException( | |
81 "number columns <= 0"); | |
82 } | |
83 log.debug("Number of columns: " + columnCount); | |
84 lsBezeichner = new String[columnCount]; | |
85 lsHeader = new String[columnCount]; | |
86 aktAbfluesse = new double[columnCount]; | |
87 } | |
88 catch (NumberFormatException nfe) { | |
89 log.warn(nfe); | |
90 continue; | |
91 } | |
92 first = false; | |
93 continue; | |
94 } | |
95 | |
96 line = line.replace(',', '.'); | |
97 | |
98 if (line.startsWith("*\u001f")) { | |
99 Double [] data = | |
100 parseLineAsDouble(line, columnCount, false, true); | |
101 | |
102 if (oldEscapeLine != null) { | |
103 addInterval(minKm, maxKm, oldEscapeLine); | |
104 minKm = Double.MAX_VALUE; | |
105 maxKm = -Double.MAX_VALUE; | |
106 } | |
107 | |
108 oldEscapeLine = new HashMap<String, Double>(); | |
109 for (int i = 0; i < columnCount; ++i) { | |
110 if (lsHeader[i] != null) { | |
111 oldEscapeLine.put(lsHeader[i], data[i]); | |
112 } | |
113 } | |
114 | |
115 for (int i = Math.min(data.length, aktAbfluesse.length)-1; | |
116 i >= 0; --i) { | |
117 aktAbfluesse[i] = data[i].doubleValue(); | |
118 } | |
119 | |
120 if (firstAbfluesse == null) { | |
121 firstAbfluesse = (double [])aktAbfluesse.clone(); | |
122 } | |
123 continue; | |
124 } | |
125 | |
126 if (line.startsWith("*!")) { | |
127 String spezial = line.substring(2).trim(); | |
128 | |
129 if (spezial.length() == 0) { | |
130 continue; | |
131 } | |
132 | |
133 if (spezial.startsWith(COLUMN_BEZ_TEXT)) { | |
134 spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); | |
135 if (spezial.length() == 0) { | |
136 continue; | |
137 } | |
138 langBezeichner = StringUtil.splitQuoted(spezial, '"'); | |
139 } | |
140 else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { | |
141 spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); | |
142 | |
143 if (spezial.length() == 0) { | |
144 continue; | |
145 } | |
146 | |
147 String[] split = spezial.split("\\s+"); | |
148 | |
149 colNaWidths = new int[split.length]; | |
150 for (int i=0; i < split.length; i++) { | |
151 colNaWidths[i] = Integer.parseInt(split[i]); | |
152 } | |
153 } | |
154 else if (spezial.startsWith(COLUMN_QUELLE)) { | |
155 if (spezial.length() == 0) { | |
156 continue; | |
157 } | |
158 quellen = StringUtil.splitQuoted(spezial, '"'); | |
159 } | |
160 else if (spezial.startsWith(COLUMN_DATUM)) { | |
161 spezial = spezial.substring(COLUMN_DATUM.length()).trim(); | |
162 if (spezial.length() == 0) { | |
163 continue; | |
164 } | |
165 daten = StringUtil.splitQuoted(spezial, '"'); | |
166 } | |
167 continue; | |
168 } | |
169 | |
170 if (line.startsWith("*")) { | |
171 if (bFirstComment && line.length() >= 11) { | |
172 String yAxis = line.substring(10).trim(); | |
173 if (yAxis.length() > 0) { | |
174 einheit = yAxis; | |
175 } | |
176 bFirstComment = false; | |
177 } | |
178 continue; | |
179 } | |
180 | |
181 if (firstAbfluesse != null) { | |
182 if (!columnHeaderChecked) { | |
183 int unknownCount = 0; | |
184 for (int i = 0; i < lsHeader.length; ++i) { | |
185 if (lsBezeichner[i] == null | |
186 || lsBezeichner[i].length() == 0) { | |
187 double q = firstAbfluesse[i]; | |
188 if (q < 0.001) { | |
189 lsBezeichner[i] = | |
190 "<unbekannt#" + unknownCount + ">"; | |
191 ++unknownCount; | |
192 } | |
193 else { | |
194 lsBezeichner[i] = "Q="+format(q); | |
195 } | |
196 } | |
197 lsHeader[i] = lsBezeichner[i] + " " + einheit; | |
198 } | |
199 columnHeaderChecked = true; | |
200 } | |
201 | |
202 Double [] data = | |
203 parseLineAsDouble(line, columnCount, true, false); | |
204 | |
205 double kaem = data[0]; | |
206 | |
207 if (kaem < minKm) { | |
208 minKm = kaem; | |
209 } | |
210 if (kaem > maxKm) { | |
211 maxKm = kaem; | |
212 } | |
213 | |
214 lastKm = kaem; | |
215 | |
216 // extract values | |
217 for (int i = 0; i < columnCount; ++i) { | |
218 addValue(kaem, data[i].doubleValue(), lsBezeichner[i]); | |
219 } | |
220 | |
221 } | |
222 else { // firstAbfluesse == null | |
223 if (langBezeichner != null) { | |
224 lsBezeichner = StringUtil.fitArray( | |
225 langBezeichner, lsBezeichner); | |
226 } | |
227 else if (colNaWidths != null) { | |
228 for (int j = 0, i = 0, N = input.length(); | |
229 j < colNaWidths.length && i < N; | |
230 i += colNaWidths[j++] | |
231 ) { | |
232 lsBezeichner[j] = input.substring( | |
233 i, i+colNaWidths[j]).trim(); | |
234 } | |
235 } | |
236 else { | |
237 // first column begins at position 8 in line | |
238 for (int i = 8, col = 0; i < input.length(); i += 9) { | |
239 if ((i + 9) > input.length()) { | |
240 i = input.length() - 10; | |
241 } | |
242 // one column header is 9 chars wide | |
243 lsBezeichner[col++] = | |
244 input.substring(i, i + 9).trim(); | |
245 | |
246 if (col == lsBezeichner.length) { | |
247 break; | |
248 } | |
249 } | |
250 } | |
251 } | |
252 | |
253 } | |
254 addInterval(minKm, maxKm, oldEscapeLine); | |
255 } | |
256 finally { | |
257 if (in != null) { | |
258 in.close(); | |
259 } | |
260 } | |
261 } | |
262 | |
263 protected void addValue(double km, double w, String columnName) { | |
264 // TODO: store me! | |
265 } | |
266 | |
267 protected static String format(double value) { | |
268 NumberFormat nf = NumberFormat.getInstance(); | |
269 nf.setMinimumFractionDigits(2); | |
270 nf.setMaximumFractionDigits(2); | |
271 return nf.format(value); | |
272 } | |
273 | |
274 protected void addInterval( | |
275 double from, | |
276 double to, | |
277 Map<String, Double> values | |
278 ) { | |
279 log.debug("addInterval: " + from + " " + to); | |
280 if (values == null) { | |
281 return; | |
282 } | |
283 } | |
284 | |
285 private static final Double [] parseLineAsDouble( | |
286 String line, | |
287 int count, | |
288 boolean bStation, | |
289 boolean bParseEmptyAsZero | |
290 ) { | |
291 String [] tokens = parseLine(line, count, bStation); | |
292 | |
293 Double [] doubles = new Double[tokens.length]; | |
294 | |
295 for (int i = 0; i < doubles.length; ++i) { | |
296 String token = tokens[i].trim(); | |
297 if (token.length() != 0) { | |
298 doubles[i] = Double.valueOf(token); | |
299 } | |
300 else if (bParseEmptyAsZero) { | |
301 doubles[i] = UNDEFINED_ZERO; | |
302 } | |
303 } | |
304 | |
305 return doubles; | |
306 } | |
307 | |
308 private static String [] parseLine( | |
309 String line, | |
310 int tokenCount, | |
311 boolean bParseStation | |
312 ) { | |
313 ArrayList<String> strings = new ArrayList<String>(); | |
314 | |
315 if (bParseStation) { | |
316 if (line.length() < 8) { | |
317 throw new IllegalArgumentException("station too short"); | |
318 } | |
319 strings.add(line.substring(0, 8)); | |
320 } | |
321 | |
322 int pos = 9; | |
323 for (int i = 0; i < tokenCount; ++i) { | |
324 if (line.length() >= pos + 8) { | |
325 strings.add(line.substring(pos, pos + 8)); | |
326 } | |
327 else { | |
328 strings.add(""); | |
329 } | |
330 pos += 9; | |
331 } | |
332 | |
333 return strings.toArray(new String[strings.size()]); | |
13 } | 334 } |
14 } | 335 } |
15 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : | 336 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |