Mercurial > dive4elements > river
comparison backend/src/main/java/org/dive4elements/river/importer/parsers/PRFParser.java @ 5838:5aa05a7a34b7
Rename modules to more fitting names.
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Thu, 25 Apr 2013 15:23:37 +0200 |
parents | flys-backend/src/main/java/org/dive4elements/river/importer/parsers/PRFParser.java@18619c1e7c2a |
children | 4dd33b86dc61 |
comparison
equal
deleted
inserted
replaced
5837:d9901a08d0a6 | 5838:5aa05a7a34b7 |
---|---|
1 package org.dive4elements.river.importer.parsers; | |
2 | |
3 import java.util.Map; | |
4 import java.util.TreeMap; | |
5 import java.util.List; | |
6 import java.util.ArrayList; | |
7 import java.util.Collections; | |
8 | |
9 import java.util.regex.Pattern; | |
10 import java.util.regex.Matcher; | |
11 | |
12 import java.io.File; | |
13 import java.io.InputStreamReader; | |
14 import java.io.LineNumberReader; | |
15 import java.io.FileInputStream; | |
16 import java.io.IOException; | |
17 | |
18 import org.apache.log4j.Logger; | |
19 | |
20 import org.dive4elements.river.importer.XY; | |
21 | |
22 import org.dive4elements.artifacts.common.utils.FileTools; | |
23 | |
24 | |
25 /** | |
26 * Parse files in .prf format and generate a mapping of double | |
27 * (km) to List of Points (XY). | |
28 */ | |
29 public class PRFParser implements CrossSectionParser | |
30 { | |
31 private static Logger log = Logger.getLogger(PRFParser.class); | |
32 | |
33 public static final String ENCODING = | |
34 System.getProperty("flys.backend.prf.encoding", "ISO-8859-1"); | |
35 | |
36 public static final Pattern DATA_PATTERN = | |
37 Pattern.compile( | |
38 "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + | |
39 "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); | |
40 | |
41 public static final Pattern KM_PATTERN = | |
42 Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); | |
43 | |
44 public static final Pattern YEAR_PATTERN = | |
45 Pattern.compile("(\\d{4})"); | |
46 | |
47 public static final int MIN_YEAR = 1800; | |
48 public static final int MAX_YEAR = 2100; | |
49 | |
50 public static class DataFormat { | |
51 | |
52 protected int deleteChars; | |
53 protected int maxRepetitions; | |
54 protected int firstIntegerPlaces; | |
55 protected int firstFractionPlaces; | |
56 protected int secondIntegerPlaces; | |
57 protected int secondFractionPlaces; | |
58 | |
59 protected double firstShift; | |
60 protected double secondShift; | |
61 | |
62 public DataFormat() { | |
63 } | |
64 | |
65 public DataFormat(Matcher m) { | |
66 deleteChars = Integer.parseInt(m.group(1)); | |
67 maxRepetitions = Integer.parseInt(m.group(2)); | |
68 firstIntegerPlaces = Integer.parseInt(m.group(3)); | |
69 firstFractionPlaces = Integer.parseInt(m.group(4)); | |
70 secondIntegerPlaces = Integer.parseInt(m.group(5)); | |
71 secondFractionPlaces = Integer.parseInt(m.group(6)); | |
72 | |
73 firstShift = Math.pow(10, firstFractionPlaces); | |
74 secondShift = Math.pow(10, secondFractionPlaces); | |
75 } | |
76 | |
77 public int extractData(String line, List<XY> kmData) { | |
78 int L = line.length(); | |
79 if (L <= deleteChars) { | |
80 return -1; | |
81 } | |
82 | |
83 int pos = deleteChars; | |
84 | |
85 boolean debug = log.isDebugEnabled(); | |
86 | |
87 | |
88 int rep = 0; | |
89 for (;rep < maxRepetitions; ++rep) { | |
90 if (pos >= L || pos + firstIntegerPlaces >= L) { | |
91 break; | |
92 } | |
93 String first = line.substring( | |
94 pos, pos + firstIntegerPlaces); | |
95 | |
96 String second = line.substring( | |
97 pos + firstIntegerPlaces, | |
98 Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces)); | |
99 | |
100 double x, y; | |
101 try { | |
102 x = Double.parseDouble(first); | |
103 y = Double.parseDouble(second); | |
104 } | |
105 catch (NumberFormatException nfe) { | |
106 // broken line -> substract from dataset skip | |
107 return -1; | |
108 } | |
109 | |
110 if (first.indexOf('.') < 0) { | |
111 x /= firstShift; | |
112 } | |
113 | |
114 if (firstFractionPlaces > 0) { | |
115 x = (int)(x*firstShift)/firstShift; | |
116 } | |
117 | |
118 if (second.indexOf('.') < 0) { | |
119 y /= secondShift; | |
120 } | |
121 | |
122 if (secondFractionPlaces > 0) { | |
123 y = (int)(y*secondShift)/secondShift; | |
124 } | |
125 | |
126 kmData.add(new XY(x, y, kmData.size())); | |
127 | |
128 pos += firstIntegerPlaces + secondIntegerPlaces; | |
129 } | |
130 | |
131 return rep == maxRepetitions ? 1 : 0; | |
132 } | |
133 } // class DataFormat | |
134 | |
135 public static class KMFormat { | |
136 | |
137 protected int deleteChars; | |
138 protected int integerPlaces; | |
139 protected int fractionPlaces; | |
140 | |
141 protected double shift; | |
142 | |
143 public KMFormat() { | |
144 } | |
145 | |
146 public KMFormat(Matcher m) { | |
147 deleteChars = Integer.parseInt(m.group(1)); | |
148 integerPlaces = Integer.parseInt(m.group(2)); | |
149 fractionPlaces = Integer.parseInt(m.group(3)); | |
150 | |
151 shift = Math.pow(10, fractionPlaces); | |
152 } | |
153 | |
154 public double extractKm(String line) throws NumberFormatException { | |
155 | |
156 if (line.length() <= deleteChars) { | |
157 throw new NumberFormatException("line too short"); | |
158 } | |
159 | |
160 String kmS = | |
161 line.substring(deleteChars, deleteChars+integerPlaces); | |
162 | |
163 double km = Double.parseDouble(kmS.trim()); | |
164 | |
165 if (kmS.indexOf('.') < 0) { | |
166 km /= shift; | |
167 } | |
168 | |
169 return fractionPlaces > 0 | |
170 ? ((int)(km*shift))/shift | |
171 : km; | |
172 } | |
173 } // class KMFormat | |
174 | |
175 protected Map<Double, List<XY>> data; | |
176 | |
177 protected Integer year; | |
178 | |
179 protected String description; | |
180 | |
181 | |
182 public PRFParser() { | |
183 data = new TreeMap<Double, List<XY>>(); | |
184 } | |
185 | |
186 @Override | |
187 public Integer getYear() { | |
188 return year; | |
189 } | |
190 | |
191 public void setYear(Integer year) { | |
192 this.year = year; | |
193 } | |
194 | |
195 @Override | |
196 public String getDescription() { | |
197 return description; | |
198 } | |
199 | |
200 public void setDescription(String description) { | |
201 this.description = description; | |
202 } | |
203 | |
204 @Override | |
205 public Map<Double, List<XY>> getData() { | |
206 return data; | |
207 } | |
208 | |
209 public void setData(Map<Double, List<XY>> data) { | |
210 this.data = data; | |
211 } | |
212 | |
213 protected void sortLists() { | |
214 for (List<XY> xy: data.values()) { | |
215 Collections.sort(xy); | |
216 } | |
217 } | |
218 | |
219 public static final Integer findYear(String s) { | |
220 Matcher m = YEAR_PATTERN.matcher(s); | |
221 while (m.find()) { | |
222 int year = Integer.parseInt(m.group(1)); | |
223 if (year >= MIN_YEAR && year <= MAX_YEAR) { | |
224 return Integer.valueOf(year); | |
225 } | |
226 } | |
227 return null; | |
228 } | |
229 | |
230 public boolean parse(File file) { | |
231 | |
232 if (!(file.isFile() && file.canRead())) { | |
233 log.warn("PRF: cannot open file '" + file + "'"); | |
234 return false; | |
235 } | |
236 | |
237 log.info("parsing PRF file: '" + file + "'"); | |
238 | |
239 description = FileTools.removeExtension(file.getName()); | |
240 | |
241 year = findYear(file.getName()); | |
242 | |
243 if (year == null) { | |
244 File parent = file.getParentFile(); | |
245 if (parent != null) { | |
246 description = parent.getName() + "/" + description; | |
247 year = findYear(parent.getName()); | |
248 } | |
249 } | |
250 | |
251 if (year != null) { | |
252 log.info("year of sounding: " + year); | |
253 } | |
254 | |
255 LineNumberReader in = null; | |
256 | |
257 try { | |
258 in = | |
259 new LineNumberReader( | |
260 new InputStreamReader( | |
261 new FileInputStream(file), ENCODING)); | |
262 | |
263 String line = in.readLine(); | |
264 | |
265 if (line == null || (line = line.trim()).length() == 0) { | |
266 log.warn("PRF: file is empty."); | |
267 return false; | |
268 } | |
269 | |
270 Matcher m = DATA_PATTERN.matcher(line); | |
271 | |
272 if (!m.matches()) { | |
273 log.warn("PRF: First line does not look like a PRF data pattern."); | |
274 return false; | |
275 } | |
276 | |
277 DataFormat dataFormat = new DataFormat(m); | |
278 | |
279 if ((line = in.readLine()) == null | |
280 || (line = line.trim()).length() == 0) { | |
281 log.warn("PRF: premature EOF. Expected integer in line 2"); | |
282 return false; | |
283 } | |
284 | |
285 try { | |
286 if (Integer.parseInt(line) != dataFormat.maxRepetitions) { | |
287 log.warn("PRF: Expected " + | |
288 dataFormat.maxRepetitions + " in line 2"); | |
289 return false; | |
290 } | |
291 } | |
292 catch (NumberFormatException nfe) { | |
293 log.warn("PRF: invalid integer in line 2", nfe); | |
294 return false; | |
295 } | |
296 | |
297 if ((line = in.readLine()) == null) { | |
298 log.warn( | |
299 "PRF: premature EOF. Expected pattern for km extraction"); | |
300 return false; | |
301 } | |
302 | |
303 m = KM_PATTERN.matcher(line); | |
304 | |
305 if (!m.matches()) { | |
306 log.warn( | |
307 "PRF: line 4 does not look like a PRF km extraction pattern."); | |
308 return false; | |
309 } | |
310 | |
311 KMFormat kmFormat = new KMFormat(m); | |
312 | |
313 if ((line = in.readLine()) == null | |
314 || (line = line.trim()).length() == 0) { | |
315 log.warn("PRF: premature EOF. Expected skip row count."); | |
316 return false; | |
317 } | |
318 | |
319 int lineSkipCount; | |
320 try { | |
321 if ((lineSkipCount = Integer.parseInt(line)) < 0) { | |
322 throw new IllegalArgumentException(lineSkipCount + " < 0"); | |
323 } | |
324 } | |
325 catch (NumberFormatException nfe) { | |
326 log.warn( | |
327 "PRF: line 5 is not an positive integer."); | |
328 return false; | |
329 } | |
330 | |
331 int skip = lineSkipCount; | |
332 | |
333 while ((line = in.readLine()) != null) { | |
334 if (skip > 0) { | |
335 --skip; | |
336 continue; | |
337 } | |
338 double km; | |
339 try { | |
340 km = kmFormat.extractKm(line); | |
341 } | |
342 catch (NumberFormatException iae) { | |
343 log.warn("PRF: cannot extract km in line " + in.getLineNumber()); | |
344 return false; | |
345 } | |
346 | |
347 Double station = Double.valueOf(km); | |
348 | |
349 List<XY> kmData = data.get(station); | |
350 | |
351 if (kmData == null) { | |
352 //log.debug("found new km: " + station); | |
353 kmData = new ArrayList<XY>(); | |
354 data.put(station, kmData); | |
355 } | |
356 | |
357 int c = dataFormat.extractData(line, kmData); | |
358 if (c < 1) { | |
359 skip = lineSkipCount + c; | |
360 } | |
361 } | |
362 | |
363 // sort all the lists by x and index | |
364 sortLists(); | |
365 } | |
366 catch (IOException ioe) { | |
367 log.error("Error reading PRF file.", ioe); | |
368 return false; | |
369 } | |
370 finally { | |
371 if (in != null) { | |
372 try { | |
373 in.close(); | |
374 } | |
375 catch (IOException ioe) { | |
376 log.error("Error closing PRF file.", ioe); | |
377 } | |
378 } | |
379 } | |
380 | |
381 return true; | |
382 } | |
383 | |
384 public void reset() { | |
385 data.clear(); | |
386 year = null; | |
387 description = null; | |
388 } | |
389 | |
390 public void parsePRFs(File root, final CrossSectionParser.Callback callback) { | |
391 | |
392 FileTools.walkTree(root, new FileTools.FileVisitor() { | |
393 @Override | |
394 public boolean visit(File file) { | |
395 if (file.isFile() && file.canRead() | |
396 && file.getName().toLowerCase().endsWith(".prf") | |
397 && (callback == null || callback.accept(file))) { | |
398 reset(); | |
399 boolean success = parse(file); | |
400 log.info("parsing " + (success ? "succeeded" : "failed")); | |
401 if (success && callback != null) { | |
402 callback.parsed(PRFParser.this); | |
403 } | |
404 } | |
405 return true; | |
406 } | |
407 }); | |
408 } | |
409 | |
410 public static void main(String [] args) { | |
411 | |
412 PRFParser parser = new PRFParser(); | |
413 | |
414 for (String arg: args) { | |
415 parser.parsePRFs(new File(arg), null); | |
416 } | |
417 } | |
418 } | |
419 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |