comparison flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java @ 199:ed38839a6b08

Ported over some WST parsing stuff from desktop flys flys-backend/trunk@1538 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 22 Mar 2011 15:48:09 +0000
parents c0dcc2357106
children 88048d4f6e4d
comparison
equal deleted inserted replaced
198:d980e545ccab 199:ed38839a6b08
1 package de.intevation.flys.importer; 1 package de.intevation.flys.importer;
2
3 import java.util.ArrayList;
4 import java.util.Map;
5 import java.util.HashMap;
2 6
3 import java.io.File; 7 import java.io.File;
4 import java.io.IOException; 8 import java.io.IOException;
9 import java.io.LineNumberReader;
10 import java.io.InputStreamReader;
11 import java.io.FileInputStream;
12
13 import java.text.NumberFormat;
14
15 import org.apache.log4j.Logger;
16
17 import de.intevation.flys.utils.StringUtil;
5 18
6 public class WstParser 19 public class WstParser
7 { 20 {
21 private static Logger log = Logger.getLogger(WstParser.class);
22
23 public static final String COLUMN_BEZ_TEXT = "column-bez-text";
24 public static final String COLUMN_BEZ_BREITE = "column-bez-breite";
25 public static final String COLUMN_QUELLE = "column-quelle";
26 public static final String COLUMN_DATUM = "column-datum";
27
28 public static final Double UNDEFINED_ZERO = Double.valueOf(0.0);
29
30 public static final String ENCODING = "ISO-8859-1";
31
8 public WstParser() { 32 public WstParser() {
9 } 33 }
10 34
11 public void parse(File file) throws IOException { 35 public void parse(File file) throws IOException {
12 // TODO: Implement me! 36
37 log.info("Parsing WST file '" + file + "'");
38
39 LineNumberReader in = null;
40 try {
41 in =
42 new LineNumberReader(
43 new InputStreamReader(
44 new FileInputStream(file), ENCODING));
45
46 String input;
47 boolean first = true;
48 int columnCount = 0;
49
50 String [] lsHeader = null;
51 String [] lsBezeichner = null;
52 String [] langBezeichner = null;
53 int [] colNaWidths = null;
54 String [] quellen = null;
55 String [] daten = null;
56 double [] aktAbfluesse = null;
57 double [] firstAbfluesse = null;
58
59 double minKm = Double.MAX_VALUE;
60 double maxKm = -Double.MAX_VALUE;
61
62 boolean bFirstComment = true;
63 boolean columnHeaderChecked = false;
64
65 double lastKm = Double.MAX_VALUE;
66
67 String einheit = "Wassserstand [NN + m]";
68
69 HashMap<String, Double> oldEscapeLine = null;
70
71 while ((input = in.readLine()) != null) {
72 String line = input;
73 if (first) { // fetch number of columns
74 if ((line = line.trim()).length() == 0) {
75 continue;
76 }
77 try {
78 columnCount = Integer.parseInt(line);
79 if (columnCount <= 0) {
80 throw new NumberFormatException(
81 "number columns <= 0");
82 }
83 log.debug("Number of columns: " + columnCount);
84 lsBezeichner = new String[columnCount];
85 lsHeader = new String[columnCount];
86 aktAbfluesse = new double[columnCount];
87 }
88 catch (NumberFormatException nfe) {
89 log.warn(nfe);
90 continue;
91 }
92 first = false;
93 continue;
94 }
95
96 line = line.replace(',', '.');
97
98 if (line.startsWith("*\u001f")) {
99 Double [] data =
100 parseLineAsDouble(line, columnCount, false, true);
101
102 if (oldEscapeLine != null) {
103 addInterval(minKm, maxKm, oldEscapeLine);
104 minKm = Double.MAX_VALUE;
105 maxKm = -Double.MAX_VALUE;
106 }
107
108 oldEscapeLine = new HashMap<String, Double>();
109 for (int i = 0; i < columnCount; ++i) {
110 if (lsHeader[i] != null) {
111 oldEscapeLine.put(lsHeader[i], data[i]);
112 }
113 }
114
115 for (int i = Math.min(data.length, aktAbfluesse.length)-1;
116 i >= 0; --i) {
117 aktAbfluesse[i] = data[i].doubleValue();
118 }
119
120 if (firstAbfluesse == null) {
121 firstAbfluesse = (double [])aktAbfluesse.clone();
122 }
123 continue;
124 }
125
126 if (line.startsWith("*!")) {
127 String spezial = line.substring(2).trim();
128
129 if (spezial.length() == 0) {
130 continue;
131 }
132
133 if (spezial.startsWith(COLUMN_BEZ_TEXT)) {
134 spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim();
135 if (spezial.length() == 0) {
136 continue;
137 }
138 langBezeichner = StringUtil.splitQuoted(spezial, '"');
139 }
140 else if (spezial.startsWith(COLUMN_BEZ_BREITE)) {
141 spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim();
142
143 if (spezial.length() == 0) {
144 continue;
145 }
146
147 String[] split = spezial.split("\\s+");
148
149 colNaWidths = new int[split.length];
150 for (int i=0; i < split.length; i++) {
151 colNaWidths[i] = Integer.parseInt(split[i]);
152 }
153 }
154 else if (spezial.startsWith(COLUMN_QUELLE)) {
155 if (spezial.length() == 0) {
156 continue;
157 }
158 quellen = StringUtil.splitQuoted(spezial, '"');
159 }
160 else if (spezial.startsWith(COLUMN_DATUM)) {
161 spezial = spezial.substring(COLUMN_DATUM.length()).trim();
162 if (spezial.length() == 0) {
163 continue;
164 }
165 daten = StringUtil.splitQuoted(spezial, '"');
166 }
167 continue;
168 }
169
170 if (line.startsWith("*")) {
171 if (bFirstComment && line.length() >= 11) {
172 String yAxis = line.substring(10).trim();
173 if (yAxis.length() > 0) {
174 einheit = yAxis;
175 }
176 bFirstComment = false;
177 }
178 continue;
179 }
180
181 if (firstAbfluesse != null) {
182 if (!columnHeaderChecked) {
183 int unknownCount = 0;
184 for (int i = 0; i < lsHeader.length; ++i) {
185 if (lsBezeichner[i] == null
186 || lsBezeichner[i].length() == 0) {
187 double q = firstAbfluesse[i];
188 if (q < 0.001) {
189 lsBezeichner[i] =
190 "<unbekannt#" + unknownCount + ">";
191 ++unknownCount;
192 }
193 else {
194 lsBezeichner[i] = "Q="+format(q);
195 }
196 }
197 lsHeader[i] = lsBezeichner[i] + " " + einheit;
198 }
199 columnHeaderChecked = true;
200 }
201
202 Double [] data =
203 parseLineAsDouble(line, columnCount, true, false);
204
205 double kaem = data[0];
206
207 if (kaem < minKm) {
208 minKm = kaem;
209 }
210 if (kaem > maxKm) {
211 maxKm = kaem;
212 }
213
214 lastKm = kaem;
215
216 // extract values
217 for (int i = 0; i < columnCount; ++i) {
218 addValue(kaem, data[i].doubleValue(), lsBezeichner[i]);
219 }
220
221 }
222 else { // firstAbfluesse == null
223 if (langBezeichner != null) {
224 lsBezeichner = StringUtil.fitArray(
225 langBezeichner, lsBezeichner);
226 }
227 else if (colNaWidths != null) {
228 for (int j = 0, i = 0, N = input.length();
229 j < colNaWidths.length && i < N;
230 i += colNaWidths[j++]
231 ) {
232 lsBezeichner[j] = input.substring(
233 i, i+colNaWidths[j]).trim();
234 }
235 }
236 else {
237 // first column begins at position 8 in line
238 for (int i = 8, col = 0; i < input.length(); i += 9) {
239 if ((i + 9) > input.length()) {
240 i = input.length() - 10;
241 }
242 // one column header is 9 chars wide
243 lsBezeichner[col++] =
244 input.substring(i, i + 9).trim();
245
246 if (col == lsBezeichner.length) {
247 break;
248 }
249 }
250 }
251 }
252
253 }
254 addInterval(minKm, maxKm, oldEscapeLine);
255 }
256 finally {
257 if (in != null) {
258 in.close();
259 }
260 }
261 }
262
263 protected void addValue(double km, double w, String columnName) {
264 // TODO: store me!
265 }
266
267 protected static String format(double value) {
268 NumberFormat nf = NumberFormat.getInstance();
269 nf.setMinimumFractionDigits(2);
270 nf.setMaximumFractionDigits(2);
271 return nf.format(value);
272 }
273
274 protected void addInterval(
275 double from,
276 double to,
277 Map<String, Double> values
278 ) {
279 log.debug("addInterval: " + from + " " + to);
280 if (values == null) {
281 return;
282 }
283 }
284
285 private static final Double [] parseLineAsDouble(
286 String line,
287 int count,
288 boolean bStation,
289 boolean bParseEmptyAsZero
290 ) {
291 String [] tokens = parseLine(line, count, bStation);
292
293 Double [] doubles = new Double[tokens.length];
294
295 for (int i = 0; i < doubles.length; ++i) {
296 String token = tokens[i].trim();
297 if (token.length() != 0) {
298 doubles[i] = Double.valueOf(token);
299 }
300 else if (bParseEmptyAsZero) {
301 doubles[i] = UNDEFINED_ZERO;
302 }
303 }
304
305 return doubles;
306 }
307
308 private static String [] parseLine(
309 String line,
310 int tokenCount,
311 boolean bParseStation
312 ) {
313 ArrayList<String> strings = new ArrayList<String>();
314
315 if (bParseStation) {
316 if (line.length() < 8) {
317 throw new IllegalArgumentException("station too short");
318 }
319 strings.add(line.substring(0, 8));
320 }
321
322 int pos = 9;
323 for (int i = 0; i < tokenCount; ++i) {
324 if (line.length() >= pos + 8) {
325 strings.add(line.substring(pos, pos + 8));
326 }
327 else {
328 strings.add("");
329 }
330 pos += 9;
331 }
332
333 return strings.toArray(new String[strings.size()]);
13 } 334 }
14 } 335 }
15 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : 336 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org