comparison flys-backend/src/main/java/de/intevation/flys/importer/parsers/PRFParser.java @ 1211:f08fe480092c

Moved file parsers to separate package. flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Fri, 15 Jul 2011 13:07:45 +0000
parents
children cc88db4a5b34
comparison
equal deleted inserted replaced
1210:31d8638760b1 1211:f08fe480092c
1 package de.intevation.flys.importer.parsers;
2
3 import java.util.Map;
4 import java.util.Stack;
5 import java.util.TreeMap;
6 import java.util.List;
7 import java.util.ArrayList;
8 import java.util.Collections;
9
10 import java.util.regex.Pattern;
11 import java.util.regex.Matcher;
12
13 import java.io.File;
14 import java.io.InputStreamReader;
15 import java.io.LineNumberReader;
16 import java.io.FileInputStream;
17 import java.io.IOException;
18
19 import org.apache.log4j.Logger;
20
21 import de.intevation.flys.importer.XY;
22
23 public class PRFParser
24 {
25 private static Logger log = Logger.getLogger(PRFParser.class);
26
27 public static final String ENCODING =
28 System.getProperty("flys.backend.prf.encoding", "ISO-8859-1");
29
30 public static final Pattern DATA_PATTERN =
31 Pattern.compile(
32 "\\((\\d+)x\\s*,\\s*(\\d+)\\(" +
33 "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?");
34
35 public static final Pattern KM_PATTERN =
36 Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?");
37
38 public static final Pattern YEAR_PATTERN =
39 Pattern.compile("(\\d{4})");
40
41 public static final int MIN_YEAR = 1800;
42 public static final int MAX_YEAR = 2100;
43
44 public interface Callback {
45 boolean prfAccept(File file);
46 void prfParsed(PRFParser parser);
47 } // interface Parser
48
49 public static class DataFormat {
50
51 protected int deleteChars;
52 protected int maxRepetitions;
53 protected int firstIntegerPlaces;
54 protected int firstFractionPlaces;
55 protected int secondIntegerPlaces;
56 protected int secondFractionPlaces;
57
58 protected double firstShift;
59 protected double secondShift;
60
61 public DataFormat() {
62 }
63
64 public DataFormat(Matcher m) {
65 deleteChars = Integer.parseInt(m.group(1));
66 maxRepetitions = Integer.parseInt(m.group(2));
67 firstIntegerPlaces = Integer.parseInt(m.group(3));
68 firstFractionPlaces = Integer.parseInt(m.group(4));
69 secondIntegerPlaces = Integer.parseInt(m.group(5));
70 secondFractionPlaces = Integer.parseInt(m.group(6));
71
72 firstShift = Math.pow(10, firstFractionPlaces);
73 secondShift = Math.pow(10, secondFractionPlaces);
74 }
75
76 public int extractData(String line, List<XY> kmData) {
77 int L = line.length();
78 if (L <= deleteChars) {
79 return -1;
80 }
81
82 int pos = deleteChars;
83
84 boolean debug = log.isDebugEnabled();
85
86
87 int rep = 0;
88 for (;rep < maxRepetitions; ++rep) {
89 if (pos >= L || pos + firstIntegerPlaces >= L) {
90 break;
91 }
92 String first = line.substring(
93 pos, pos + firstIntegerPlaces);
94
95 String second = line.substring(
96 pos + firstIntegerPlaces,
97 Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces));
98
99 double x, y;
100 try {
101 x = Double.parseDouble(first);
102 y = Double.parseDouble(second);
103 }
104 catch (NumberFormatException nfe) {
105 // broken line -> substract from dataset skip
106 return -1;
107 }
108
109 if (first.indexOf('.') < 0) {
110 x /= firstShift;
111 }
112
113 if (firstFractionPlaces > 0) {
114 x = (int)(x*firstShift)/firstShift;
115 }
116
117 if (second.indexOf('.') < 0) {
118 y /= secondShift;
119 }
120
121 if (secondFractionPlaces > 0) {
122 y = (int)(y*secondShift)/secondShift;
123 }
124
125 kmData.add(new XY(x, y, kmData.size()));
126
127 pos += firstIntegerPlaces + secondIntegerPlaces;
128 }
129
130 return rep == maxRepetitions ? 1 : 0;
131 }
132 } // class DataFormat
133
134 public static class KMFormat {
135
136 protected int deleteChars;
137 protected int integerPlaces;
138 protected int fractionPlaces;
139
140 protected double shift;
141
142 public KMFormat() {
143 }
144
145 public KMFormat(Matcher m) {
146 deleteChars = Integer.parseInt(m.group(1));
147 integerPlaces = Integer.parseInt(m.group(2));
148 fractionPlaces = Integer.parseInt(m.group(3));
149
150 shift = Math.pow(10, fractionPlaces);
151 }
152
153 public double extractKm(String line) throws NumberFormatException {
154
155 if (line.length() <= deleteChars) {
156 throw new NumberFormatException("line too short");
157 }
158
159 String kmS =
160 line.substring(deleteChars, deleteChars+integerPlaces);
161
162 double km = Double.parseDouble(kmS.trim());
163
164 if (kmS.indexOf('.') < 0) {
165 km /= shift;
166 }
167
168 return fractionPlaces > 0
169 ? ((int)(km*shift))/shift
170 : km;
171 }
172 } // class KMFormat
173
174 protected Map<Double, List<XY>> data;
175
176 protected Integer year;
177
178 protected String description;
179
180
181 public PRFParser() {
182 data = new TreeMap<Double, List<XY>>();
183 }
184
185 public Integer getYear() {
186 return year;
187 }
188
189 public void setYear(Integer year) {
190 this.year = year;
191 }
192
193 public String getDescription() {
194 return description;
195 }
196
197 public void setDescription(String description) {
198 this.description = description;
199 }
200
201 public Map<Double, List<XY>> getData() {
202 return data;
203 }
204
205 public void setData(Map<Double, List<XY>> data) {
206 this.data = data;
207 }
208
209 protected void sortLists() {
210 for (List<XY> xy: data.values()) {
211 Collections.sort(xy);
212 }
213 }
214
215 public static final Integer findYear(String s) {
216 Matcher m = YEAR_PATTERN.matcher(s);
217 while (m.find()) {
218 int year = Integer.parseInt(m.group(1));
219 if (year >= MIN_YEAR && year <= MAX_YEAR) {
220 return Integer.valueOf(year);
221 }
222 }
223 return null;
224 }
225
226 public boolean parse(File file) {
227
228 if (!(file.isFile() && file.canRead())) {
229 log.warn("cannot open file '" + file + "'");
230 return false;
231 }
232
233 log.info("parsing PRF file: '" + file + "'");
234
235 description = file.getName();
236
237 year = findYear(file.getName());
238
239 if (year == null) {
240 File parent = file.getParentFile();
241 if (parent != null) {
242 description = parent.getName() + "/" + description;
243 year = findYear(parent.getName());
244 }
245 }
246
247 if (year != null) {
248 log.info("year of sounding: " + year);
249 }
250
251 LineNumberReader in = null;
252
253 try {
254 in =
255 new LineNumberReader(
256 new InputStreamReader(
257 new FileInputStream(file), ENCODING));
258
259 String line = in.readLine();
260
261 if (line == null || (line = line.trim()).length() == 0) {
262 log.warn("file is empty.");
263 return false;
264 }
265
266 Matcher m = DATA_PATTERN.matcher(line);
267
268 if (!m.matches()) {
269 log.warn("First line does not look like a PRF data pattern.");
270 return false;
271 }
272
273 DataFormat dataFormat = new DataFormat(m);
274
275 if ((line = in.readLine()) == null
276 || (line = line.trim()).length() == 0) {
277 log.warn("premature EOF. Expected integer in line 2");
278 return false;
279 }
280
281 try {
282 if (Integer.parseInt(line) != dataFormat.maxRepetitions) {
283 log.warn("Expected " +
284 dataFormat.maxRepetitions + " in line 2");
285 return false;
286 }
287 }
288 catch (NumberFormatException nfe) {
289 log.warn("invalid integer in line 2", nfe);
290 return false;
291 }
292
293 if ((line = in.readLine()) == null) {
294 log.warn(
295 "premature EOF. Expected pattern for km extraction");
296 return false;
297 }
298
299 m = KM_PATTERN.matcher(line);
300
301 if (!m.matches()) {
302 log.warn(
303 "line 4 does not look like a PRF km extraction pattern.");
304 return false;
305 }
306
307 KMFormat kmFormat = new KMFormat(m);
308
309 if ((line = in.readLine()) == null
310 || (line = line.trim()).length() == 0) {
311 log.warn("premature EOF. Expected skip row count.");
312 return false;
313 }
314
315 int lineSkipCount;
316 try {
317 if ((lineSkipCount = Integer.parseInt(line)) < 0) {
318 throw new IllegalArgumentException(lineSkipCount + " < 0");
319 }
320 }
321 catch (NumberFormatException nfe) {
322 log.warn(
323 "line 5 is not an positive integer.");
324 return false;
325 }
326
327 int skip = lineSkipCount;
328
329 while ((line = in.readLine()) != null) {
330 if (skip > 0) {
331 --skip;
332 continue;
333 }
334 double km;
335 try {
336 km = kmFormat.extractKm(line);
337 }
338 catch (NumberFormatException iae) {
339 log.warn("cannot extract km in line + " + in.getLineNumber());
340 return false;
341 }
342
343 Double station = Double.valueOf(km);
344
345 List<XY> kmData = data.get(station);
346
347 if (kmData == null) {
348 //log.debug("found new km: " + station);
349 kmData = new ArrayList<XY>();
350 data.put(station, kmData);
351 }
352
353 int c = dataFormat.extractData(line, kmData);
354 if (c < 1) {
355 skip = lineSkipCount + c;
356 }
357 }
358
359 // sort all the lists by x and index
360 sortLists();
361 }
362 catch (IOException ioe) {
363 log.error(ioe);
364 return false;
365 }
366 finally {
367 if (in != null) {
368 try {
369 in.close();
370 }
371 catch (IOException ioe) {
372 log.error(ioe);
373 }
374 }
375 }
376
377 return true;
378 }
379
380 public void reset() {
381 data.clear();
382 year = null;
383 description = null;
384 }
385
386 public void parsePRFs(File root, Callback callback) {
387
388 Stack<File> stack = new Stack<File>();
389 stack.push(root);
390
391 while (!stack.empty()) {
392 File file = stack.pop();
393 if (file.isDirectory()) {
394 File [] files = file.listFiles();
395 if (files != null) {
396 for (File f: files) {
397 stack.push(f);
398 }
399 }
400 }
401 else if (file.isFile()
402 && file.getName().toLowerCase().endsWith(".prf")
403 && (callback == null || callback.prfAccept(file))
404 ) {
405 reset();
406 boolean success = parse(file);
407 log.info("parsing " + (success ? "succeeded" : "failed"));
408 if (success && callback != null) {
409 callback.prfParsed(this);
410 }
411 }
412 }
413 }
414
415 public static void main(String [] args) {
416
417 PRFParser parser = new PRFParser();
418
419 for (String arg: args) {
420 parser.parsePRFs(new File(arg), null);
421 }
422 }
423 }
424 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org