comparison backend/src/main/java/org/dive4elements/river/importer/parsers/PRFParser.java @ 5838:5aa05a7a34b7

Rename modules to more fitting names.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 15:23:37 +0200
parents flys-backend/src/main/java/org/dive4elements/river/importer/parsers/PRFParser.java@18619c1e7c2a
children 4dd33b86dc61
comparison
equal deleted inserted replaced
5837:d9901a08d0a6 5838:5aa05a7a34b7
1 package org.dive4elements.river.importer.parsers;
2
3 import java.util.Map;
4 import java.util.TreeMap;
5 import java.util.List;
6 import java.util.ArrayList;
7 import java.util.Collections;
8
9 import java.util.regex.Pattern;
10 import java.util.regex.Matcher;
11
12 import java.io.File;
13 import java.io.InputStreamReader;
14 import java.io.LineNumberReader;
15 import java.io.FileInputStream;
16 import java.io.IOException;
17
18 import org.apache.log4j.Logger;
19
20 import org.dive4elements.river.importer.XY;
21
22 import org.dive4elements.artifacts.common.utils.FileTools;
23
24
25 /**
26 * Parse files in .prf format and generate a mapping of double
27 * (km) to List of Points (XY).
28 */
29 public class PRFParser implements CrossSectionParser
30 {
31 private static Logger log = Logger.getLogger(PRFParser.class);
32
33 public static final String ENCODING =
34 System.getProperty("flys.backend.prf.encoding", "ISO-8859-1");
35
36 public static final Pattern DATA_PATTERN =
37 Pattern.compile(
38 "\\((\\d+)x\\s*,\\s*(\\d+)\\(" +
39 "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?");
40
41 public static final Pattern KM_PATTERN =
42 Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?");
43
44 public static final Pattern YEAR_PATTERN =
45 Pattern.compile("(\\d{4})");
46
47 public static final int MIN_YEAR = 1800;
48 public static final int MAX_YEAR = 2100;
49
50 public static class DataFormat {
51
52 protected int deleteChars;
53 protected int maxRepetitions;
54 protected int firstIntegerPlaces;
55 protected int firstFractionPlaces;
56 protected int secondIntegerPlaces;
57 protected int secondFractionPlaces;
58
59 protected double firstShift;
60 protected double secondShift;
61
62 public DataFormat() {
63 }
64
65 public DataFormat(Matcher m) {
66 deleteChars = Integer.parseInt(m.group(1));
67 maxRepetitions = Integer.parseInt(m.group(2));
68 firstIntegerPlaces = Integer.parseInt(m.group(3));
69 firstFractionPlaces = Integer.parseInt(m.group(4));
70 secondIntegerPlaces = Integer.parseInt(m.group(5));
71 secondFractionPlaces = Integer.parseInt(m.group(6));
72
73 firstShift = Math.pow(10, firstFractionPlaces);
74 secondShift = Math.pow(10, secondFractionPlaces);
75 }
76
77 public int extractData(String line, List<XY> kmData) {
78 int L = line.length();
79 if (L <= deleteChars) {
80 return -1;
81 }
82
83 int pos = deleteChars;
84
85 boolean debug = log.isDebugEnabled();
86
87
88 int rep = 0;
89 for (;rep < maxRepetitions; ++rep) {
90 if (pos >= L || pos + firstIntegerPlaces >= L) {
91 break;
92 }
93 String first = line.substring(
94 pos, pos + firstIntegerPlaces);
95
96 String second = line.substring(
97 pos + firstIntegerPlaces,
98 Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces));
99
100 double x, y;
101 try {
102 x = Double.parseDouble(first);
103 y = Double.parseDouble(second);
104 }
105 catch (NumberFormatException nfe) {
106 // broken line -> substract from dataset skip
107 return -1;
108 }
109
110 if (first.indexOf('.') < 0) {
111 x /= firstShift;
112 }
113
114 if (firstFractionPlaces > 0) {
115 x = (int)(x*firstShift)/firstShift;
116 }
117
118 if (second.indexOf('.') < 0) {
119 y /= secondShift;
120 }
121
122 if (secondFractionPlaces > 0) {
123 y = (int)(y*secondShift)/secondShift;
124 }
125
126 kmData.add(new XY(x, y, kmData.size()));
127
128 pos += firstIntegerPlaces + secondIntegerPlaces;
129 }
130
131 return rep == maxRepetitions ? 1 : 0;
132 }
133 } // class DataFormat
134
135 public static class KMFormat {
136
137 protected int deleteChars;
138 protected int integerPlaces;
139 protected int fractionPlaces;
140
141 protected double shift;
142
143 public KMFormat() {
144 }
145
146 public KMFormat(Matcher m) {
147 deleteChars = Integer.parseInt(m.group(1));
148 integerPlaces = Integer.parseInt(m.group(2));
149 fractionPlaces = Integer.parseInt(m.group(3));
150
151 shift = Math.pow(10, fractionPlaces);
152 }
153
154 public double extractKm(String line) throws NumberFormatException {
155
156 if (line.length() <= deleteChars) {
157 throw new NumberFormatException("line too short");
158 }
159
160 String kmS =
161 line.substring(deleteChars, deleteChars+integerPlaces);
162
163 double km = Double.parseDouble(kmS.trim());
164
165 if (kmS.indexOf('.') < 0) {
166 km /= shift;
167 }
168
169 return fractionPlaces > 0
170 ? ((int)(km*shift))/shift
171 : km;
172 }
173 } // class KMFormat
174
175 protected Map<Double, List<XY>> data;
176
177 protected Integer year;
178
179 protected String description;
180
181
182 public PRFParser() {
183 data = new TreeMap<Double, List<XY>>();
184 }
185
186 @Override
187 public Integer getYear() {
188 return year;
189 }
190
191 public void setYear(Integer year) {
192 this.year = year;
193 }
194
195 @Override
196 public String getDescription() {
197 return description;
198 }
199
200 public void setDescription(String description) {
201 this.description = description;
202 }
203
204 @Override
205 public Map<Double, List<XY>> getData() {
206 return data;
207 }
208
209 public void setData(Map<Double, List<XY>> data) {
210 this.data = data;
211 }
212
213 protected void sortLists() {
214 for (List<XY> xy: data.values()) {
215 Collections.sort(xy);
216 }
217 }
218
219 public static final Integer findYear(String s) {
220 Matcher m = YEAR_PATTERN.matcher(s);
221 while (m.find()) {
222 int year = Integer.parseInt(m.group(1));
223 if (year >= MIN_YEAR && year <= MAX_YEAR) {
224 return Integer.valueOf(year);
225 }
226 }
227 return null;
228 }
229
230 public boolean parse(File file) {
231
232 if (!(file.isFile() && file.canRead())) {
233 log.warn("PRF: cannot open file '" + file + "'");
234 return false;
235 }
236
237 log.info("parsing PRF file: '" + file + "'");
238
239 description = FileTools.removeExtension(file.getName());
240
241 year = findYear(file.getName());
242
243 if (year == null) {
244 File parent = file.getParentFile();
245 if (parent != null) {
246 description = parent.getName() + "/" + description;
247 year = findYear(parent.getName());
248 }
249 }
250
251 if (year != null) {
252 log.info("year of sounding: " + year);
253 }
254
255 LineNumberReader in = null;
256
257 try {
258 in =
259 new LineNumberReader(
260 new InputStreamReader(
261 new FileInputStream(file), ENCODING));
262
263 String line = in.readLine();
264
265 if (line == null || (line = line.trim()).length() == 0) {
266 log.warn("PRF: file is empty.");
267 return false;
268 }
269
270 Matcher m = DATA_PATTERN.matcher(line);
271
272 if (!m.matches()) {
273 log.warn("PRF: First line does not look like a PRF data pattern.");
274 return false;
275 }
276
277 DataFormat dataFormat = new DataFormat(m);
278
279 if ((line = in.readLine()) == null
280 || (line = line.trim()).length() == 0) {
281 log.warn("PRF: premature EOF. Expected integer in line 2");
282 return false;
283 }
284
285 try {
286 if (Integer.parseInt(line) != dataFormat.maxRepetitions) {
287 log.warn("PRF: Expected " +
288 dataFormat.maxRepetitions + " in line 2");
289 return false;
290 }
291 }
292 catch (NumberFormatException nfe) {
293 log.warn("PRF: invalid integer in line 2", nfe);
294 return false;
295 }
296
297 if ((line = in.readLine()) == null) {
298 log.warn(
299 "PRF: premature EOF. Expected pattern for km extraction");
300 return false;
301 }
302
303 m = KM_PATTERN.matcher(line);
304
305 if (!m.matches()) {
306 log.warn(
307 "PRF: line 4 does not look like a PRF km extraction pattern.");
308 return false;
309 }
310
311 KMFormat kmFormat = new KMFormat(m);
312
313 if ((line = in.readLine()) == null
314 || (line = line.trim()).length() == 0) {
315 log.warn("PRF: premature EOF. Expected skip row count.");
316 return false;
317 }
318
319 int lineSkipCount;
320 try {
321 if ((lineSkipCount = Integer.parseInt(line)) < 0) {
322 throw new IllegalArgumentException(lineSkipCount + " < 0");
323 }
324 }
325 catch (NumberFormatException nfe) {
326 log.warn(
327 "PRF: line 5 is not an positive integer.");
328 return false;
329 }
330
331 int skip = lineSkipCount;
332
333 while ((line = in.readLine()) != null) {
334 if (skip > 0) {
335 --skip;
336 continue;
337 }
338 double km;
339 try {
340 km = kmFormat.extractKm(line);
341 }
342 catch (NumberFormatException iae) {
343 log.warn("PRF: cannot extract km in line " + in.getLineNumber());
344 return false;
345 }
346
347 Double station = Double.valueOf(km);
348
349 List<XY> kmData = data.get(station);
350
351 if (kmData == null) {
352 //log.debug("found new km: " + station);
353 kmData = new ArrayList<XY>();
354 data.put(station, kmData);
355 }
356
357 int c = dataFormat.extractData(line, kmData);
358 if (c < 1) {
359 skip = lineSkipCount + c;
360 }
361 }
362
363 // sort all the lists by x and index
364 sortLists();
365 }
366 catch (IOException ioe) {
367 log.error("Error reading PRF file.", ioe);
368 return false;
369 }
370 finally {
371 if (in != null) {
372 try {
373 in.close();
374 }
375 catch (IOException ioe) {
376 log.error("Error closing PRF file.", ioe);
377 }
378 }
379 }
380
381 return true;
382 }
383
384 public void reset() {
385 data.clear();
386 year = null;
387 description = null;
388 }
389
390 public void parsePRFs(File root, final CrossSectionParser.Callback callback) {
391
392 FileTools.walkTree(root, new FileTools.FileVisitor() {
393 @Override
394 public boolean visit(File file) {
395 if (file.isFile() && file.canRead()
396 && file.getName().toLowerCase().endsWith(".prf")
397 && (callback == null || callback.accept(file))) {
398 reset();
399 boolean success = parse(file);
400 log.info("parsing " + (success ? "succeeded" : "failed"));
401 if (success && callback != null) {
402 callback.parsed(PRFParser.this);
403 }
404 }
405 return true;
406 }
407 });
408 }
409
410 public static void main(String [] args) {
411
412 PRFParser parser = new PRFParser();
413
414 for (String arg: args) {
415 parser.parsePRFs(new File(arg), null);
416 }
417 }
418 }
419 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org