Mercurial > dive4elements > river
comparison flys-backend/src/main/java/de/intevation/flys/importer/parsers/PRFParser.java @ 1211:f08fe480092c
Moved file parsers to separate package.
flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Fri, 15 Jul 2011 13:07:45 +0000 |
parents | |
children | cc88db4a5b34 |
comparison
equal
deleted
inserted
replaced
1210:31d8638760b1 | 1211:f08fe480092c |
---|---|
1 package de.intevation.flys.importer.parsers; | |
2 | |
3 import java.util.Map; | |
4 import java.util.Stack; | |
5 import java.util.TreeMap; | |
6 import java.util.List; | |
7 import java.util.ArrayList; | |
8 import java.util.Collections; | |
9 | |
10 import java.util.regex.Pattern; | |
11 import java.util.regex.Matcher; | |
12 | |
13 import java.io.File; | |
14 import java.io.InputStreamReader; | |
15 import java.io.LineNumberReader; | |
16 import java.io.FileInputStream; | |
17 import java.io.IOException; | |
18 | |
19 import org.apache.log4j.Logger; | |
20 | |
21 import de.intevation.flys.importer.XY; | |
22 | |
23 public class PRFParser | |
24 { | |
25 private static Logger log = Logger.getLogger(PRFParser.class); | |
26 | |
27 public static final String ENCODING = | |
28 System.getProperty("flys.backend.prf.encoding", "ISO-8859-1"); | |
29 | |
30 public static final Pattern DATA_PATTERN = | |
31 Pattern.compile( | |
32 "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + | |
33 "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); | |
34 | |
35 public static final Pattern KM_PATTERN = | |
36 Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); | |
37 | |
38 public static final Pattern YEAR_PATTERN = | |
39 Pattern.compile("(\\d{4})"); | |
40 | |
41 public static final int MIN_YEAR = 1800; | |
42 public static final int MAX_YEAR = 2100; | |
43 | |
44 public interface Callback { | |
45 boolean prfAccept(File file); | |
46 void prfParsed(PRFParser parser); | |
47 } // interface Parser | |
48 | |
49 public static class DataFormat { | |
50 | |
51 protected int deleteChars; | |
52 protected int maxRepetitions; | |
53 protected int firstIntegerPlaces; | |
54 protected int firstFractionPlaces; | |
55 protected int secondIntegerPlaces; | |
56 protected int secondFractionPlaces; | |
57 | |
58 protected double firstShift; | |
59 protected double secondShift; | |
60 | |
61 public DataFormat() { | |
62 } | |
63 | |
64 public DataFormat(Matcher m) { | |
65 deleteChars = Integer.parseInt(m.group(1)); | |
66 maxRepetitions = Integer.parseInt(m.group(2)); | |
67 firstIntegerPlaces = Integer.parseInt(m.group(3)); | |
68 firstFractionPlaces = Integer.parseInt(m.group(4)); | |
69 secondIntegerPlaces = Integer.parseInt(m.group(5)); | |
70 secondFractionPlaces = Integer.parseInt(m.group(6)); | |
71 | |
72 firstShift = Math.pow(10, firstFractionPlaces); | |
73 secondShift = Math.pow(10, secondFractionPlaces); | |
74 } | |
75 | |
76 public int extractData(String line, List<XY> kmData) { | |
77 int L = line.length(); | |
78 if (L <= deleteChars) { | |
79 return -1; | |
80 } | |
81 | |
82 int pos = deleteChars; | |
83 | |
84 boolean debug = log.isDebugEnabled(); | |
85 | |
86 | |
87 int rep = 0; | |
88 for (;rep < maxRepetitions; ++rep) { | |
89 if (pos >= L || pos + firstIntegerPlaces >= L) { | |
90 break; | |
91 } | |
92 String first = line.substring( | |
93 pos, pos + firstIntegerPlaces); | |
94 | |
95 String second = line.substring( | |
96 pos + firstIntegerPlaces, | |
97 Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces)); | |
98 | |
99 double x, y; | |
100 try { | |
101 x = Double.parseDouble(first); | |
102 y = Double.parseDouble(second); | |
103 } | |
104 catch (NumberFormatException nfe) { | |
105 // broken line -> substract from dataset skip | |
106 return -1; | |
107 } | |
108 | |
109 if (first.indexOf('.') < 0) { | |
110 x /= firstShift; | |
111 } | |
112 | |
113 if (firstFractionPlaces > 0) { | |
114 x = (int)(x*firstShift)/firstShift; | |
115 } | |
116 | |
117 if (second.indexOf('.') < 0) { | |
118 y /= secondShift; | |
119 } | |
120 | |
121 if (secondFractionPlaces > 0) { | |
122 y = (int)(y*secondShift)/secondShift; | |
123 } | |
124 | |
125 kmData.add(new XY(x, y, kmData.size())); | |
126 | |
127 pos += firstIntegerPlaces + secondIntegerPlaces; | |
128 } | |
129 | |
130 return rep == maxRepetitions ? 1 : 0; | |
131 } | |
132 } // class DataFormat | |
133 | |
134 public static class KMFormat { | |
135 | |
136 protected int deleteChars; | |
137 protected int integerPlaces; | |
138 protected int fractionPlaces; | |
139 | |
140 protected double shift; | |
141 | |
142 public KMFormat() { | |
143 } | |
144 | |
145 public KMFormat(Matcher m) { | |
146 deleteChars = Integer.parseInt(m.group(1)); | |
147 integerPlaces = Integer.parseInt(m.group(2)); | |
148 fractionPlaces = Integer.parseInt(m.group(3)); | |
149 | |
150 shift = Math.pow(10, fractionPlaces); | |
151 } | |
152 | |
153 public double extractKm(String line) throws NumberFormatException { | |
154 | |
155 if (line.length() <= deleteChars) { | |
156 throw new NumberFormatException("line too short"); | |
157 } | |
158 | |
159 String kmS = | |
160 line.substring(deleteChars, deleteChars+integerPlaces); | |
161 | |
162 double km = Double.parseDouble(kmS.trim()); | |
163 | |
164 if (kmS.indexOf('.') < 0) { | |
165 km /= shift; | |
166 } | |
167 | |
168 return fractionPlaces > 0 | |
169 ? ((int)(km*shift))/shift | |
170 : km; | |
171 } | |
172 } // class KMFormat | |
173 | |
174 protected Map<Double, List<XY>> data; | |
175 | |
176 protected Integer year; | |
177 | |
178 protected String description; | |
179 | |
180 | |
181 public PRFParser() { | |
182 data = new TreeMap<Double, List<XY>>(); | |
183 } | |
184 | |
185 public Integer getYear() { | |
186 return year; | |
187 } | |
188 | |
189 public void setYear(Integer year) { | |
190 this.year = year; | |
191 } | |
192 | |
193 public String getDescription() { | |
194 return description; | |
195 } | |
196 | |
197 public void setDescription(String description) { | |
198 this.description = description; | |
199 } | |
200 | |
201 public Map<Double, List<XY>> getData() { | |
202 return data; | |
203 } | |
204 | |
205 public void setData(Map<Double, List<XY>> data) { | |
206 this.data = data; | |
207 } | |
208 | |
209 protected void sortLists() { | |
210 for (List<XY> xy: data.values()) { | |
211 Collections.sort(xy); | |
212 } | |
213 } | |
214 | |
215 public static final Integer findYear(String s) { | |
216 Matcher m = YEAR_PATTERN.matcher(s); | |
217 while (m.find()) { | |
218 int year = Integer.parseInt(m.group(1)); | |
219 if (year >= MIN_YEAR && year <= MAX_YEAR) { | |
220 return Integer.valueOf(year); | |
221 } | |
222 } | |
223 return null; | |
224 } | |
225 | |
226 public boolean parse(File file) { | |
227 | |
228 if (!(file.isFile() && file.canRead())) { | |
229 log.warn("cannot open file '" + file + "'"); | |
230 return false; | |
231 } | |
232 | |
233 log.info("parsing PRF file: '" + file + "'"); | |
234 | |
235 description = file.getName(); | |
236 | |
237 year = findYear(file.getName()); | |
238 | |
239 if (year == null) { | |
240 File parent = file.getParentFile(); | |
241 if (parent != null) { | |
242 description = parent.getName() + "/" + description; | |
243 year = findYear(parent.getName()); | |
244 } | |
245 } | |
246 | |
247 if (year != null) { | |
248 log.info("year of sounding: " + year); | |
249 } | |
250 | |
251 LineNumberReader in = null; | |
252 | |
253 try { | |
254 in = | |
255 new LineNumberReader( | |
256 new InputStreamReader( | |
257 new FileInputStream(file), ENCODING)); | |
258 | |
259 String line = in.readLine(); | |
260 | |
261 if (line == null || (line = line.trim()).length() == 0) { | |
262 log.warn("file is empty."); | |
263 return false; | |
264 } | |
265 | |
266 Matcher m = DATA_PATTERN.matcher(line); | |
267 | |
268 if (!m.matches()) { | |
269 log.warn("First line does not look like a PRF data pattern."); | |
270 return false; | |
271 } | |
272 | |
273 DataFormat dataFormat = new DataFormat(m); | |
274 | |
275 if ((line = in.readLine()) == null | |
276 || (line = line.trim()).length() == 0) { | |
277 log.warn("premature EOF. Expected integer in line 2"); | |
278 return false; | |
279 } | |
280 | |
281 try { | |
282 if (Integer.parseInt(line) != dataFormat.maxRepetitions) { | |
283 log.warn("Expected " + | |
284 dataFormat.maxRepetitions + " in line 2"); | |
285 return false; | |
286 } | |
287 } | |
288 catch (NumberFormatException nfe) { | |
289 log.warn("invalid integer in line 2", nfe); | |
290 return false; | |
291 } | |
292 | |
293 if ((line = in.readLine()) == null) { | |
294 log.warn( | |
295 "premature EOF. Expected pattern for km extraction"); | |
296 return false; | |
297 } | |
298 | |
299 m = KM_PATTERN.matcher(line); | |
300 | |
301 if (!m.matches()) { | |
302 log.warn( | |
303 "line 4 does not look like a PRF km extraction pattern."); | |
304 return false; | |
305 } | |
306 | |
307 KMFormat kmFormat = new KMFormat(m); | |
308 | |
309 if ((line = in.readLine()) == null | |
310 || (line = line.trim()).length() == 0) { | |
311 log.warn("premature EOF. Expected skip row count."); | |
312 return false; | |
313 } | |
314 | |
315 int lineSkipCount; | |
316 try { | |
317 if ((lineSkipCount = Integer.parseInt(line)) < 0) { | |
318 throw new IllegalArgumentException(lineSkipCount + " < 0"); | |
319 } | |
320 } | |
321 catch (NumberFormatException nfe) { | |
322 log.warn( | |
323 "line 5 is not an positive integer."); | |
324 return false; | |
325 } | |
326 | |
327 int skip = lineSkipCount; | |
328 | |
329 while ((line = in.readLine()) != null) { | |
330 if (skip > 0) { | |
331 --skip; | |
332 continue; | |
333 } | |
334 double km; | |
335 try { | |
336 km = kmFormat.extractKm(line); | |
337 } | |
338 catch (NumberFormatException iae) { | |
339 log.warn("cannot extract km in line + " + in.getLineNumber()); | |
340 return false; | |
341 } | |
342 | |
343 Double station = Double.valueOf(km); | |
344 | |
345 List<XY> kmData = data.get(station); | |
346 | |
347 if (kmData == null) { | |
348 //log.debug("found new km: " + station); | |
349 kmData = new ArrayList<XY>(); | |
350 data.put(station, kmData); | |
351 } | |
352 | |
353 int c = dataFormat.extractData(line, kmData); | |
354 if (c < 1) { | |
355 skip = lineSkipCount + c; | |
356 } | |
357 } | |
358 | |
359 // sort all the lists by x and index | |
360 sortLists(); | |
361 } | |
362 catch (IOException ioe) { | |
363 log.error(ioe); | |
364 return false; | |
365 } | |
366 finally { | |
367 if (in != null) { | |
368 try { | |
369 in.close(); | |
370 } | |
371 catch (IOException ioe) { | |
372 log.error(ioe); | |
373 } | |
374 } | |
375 } | |
376 | |
377 return true; | |
378 } | |
379 | |
380 public void reset() { | |
381 data.clear(); | |
382 year = null; | |
383 description = null; | |
384 } | |
385 | |
386 public void parsePRFs(File root, Callback callback) { | |
387 | |
388 Stack<File> stack = new Stack<File>(); | |
389 stack.push(root); | |
390 | |
391 while (!stack.empty()) { | |
392 File file = stack.pop(); | |
393 if (file.isDirectory()) { | |
394 File [] files = file.listFiles(); | |
395 if (files != null) { | |
396 for (File f: files) { | |
397 stack.push(f); | |
398 } | |
399 } | |
400 } | |
401 else if (file.isFile() | |
402 && file.getName().toLowerCase().endsWith(".prf") | |
403 && (callback == null || callback.prfAccept(file)) | |
404 ) { | |
405 reset(); | |
406 boolean success = parse(file); | |
407 log.info("parsing " + (success ? "succeeded" : "failed")); | |
408 if (success && callback != null) { | |
409 callback.prfParsed(this); | |
410 } | |
411 } | |
412 } | |
413 } | |
414 | |
415 public static void main(String [] args) { | |
416 | |
417 PRFParser parser = new PRFParser(); | |
418 | |
419 for (String arg: args) { | |
420 parser.parsePRFs(new File(arg), null); | |
421 } | |
422 } | |
423 } | |
424 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |