comparison backend/src/main/java/org/dive4elements/river/importer/parsers/DA66Parser.java @ 5838:5aa05a7a34b7

Rename modules to more fitting names.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 15:23:37 +0200
parents flys-backend/src/main/java/org/dive4elements/river/importer/parsers/DA66Parser.java@18619c1e7c2a
children 4dd33b86dc61
comparison
equal deleted inserted replaced
5837:d9901a08d0a6 5838:5aa05a7a34b7
1 package org.dive4elements.river.importer.parsers;
2
3 import org.dive4elements.artifacts.common.utils.FileTools;
4
5 import org.dive4elements.river.importer.XY;
6
7 import org.dive4elements.river.utils.EpsilonComparator;
8
9 import java.io.File;
10 import java.io.IOException;
11
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.TreeMap;
17
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20
21 import org.apache.log4j.Logger;
22
23
24 /**
25 * To create cross-sections, generate: Map<double,list<xy>> from files
26 * in da66 format.
27 */
28 public class DA66Parser extends LineParser implements CrossSectionParser
29 {
30 /** Private logger. */
31 private static Logger logger = Logger.getLogger(DA66Parser.class);
32
33 private static String HEAD_HEAD = "00";
34 private static String HEAD_GEOM = "66"; // "Values"
35 private static String HEAD_ENDG = "88"; // Probably never used.
36
37 /** Regex to match lines of files in da66 format. */
38 private static final Pattern LINE_PATTERN =
39 Pattern.compile("^([0-9 -]{2})" + // Type (00|66|88)
40 "([0-9 -]{5})" + // unset
41 "([0-9 -]{2})" + // id
42 "([0-9 -]{9})" + // station
43 "([0-9 -]{2})" + // running number
44 "([0-9 -]{1})?" + // point id
45 /*
46 Would be great if we could express the pattern as this:
47 ([0-9 -]{1})([0-9 -JKMLMNOPQR]{7})([0-9 -]{7})+
48 */
49 "([0-9 -JKMLMNOPQR]{7})?" + // y
50 "([0-9 -]{7})?" + // z
51 "([0-9 -]{1})?" + // point id
52 "([0-9 -JKMLMNOPQR]{7})?" + // y
53 "([0-9 -]{7})?" + // z
54 "([0-9 -]{1})?" + // point id
55 "([0-9 -JKMLMNOPQR]{7})?" + // y
56 "([0-9 -]{7})?" + // z
57 "([0-9 -]{1})?" + // point id
58 "([0-9 -JKMLMNOPQR]{7})?" + // y
59 "([0-9 -]{7})?" // z
60 );
61
62
63 /** Indices to match group of main regex. */
64 private static enum FIELD {
65 HEAD ( 1),
66 UNSET ( 2),
67 ID ( 3),
68 STATION ( 4),
69 RUNNR ( 5),
70 POINT_1_ID( 6),
71 POINT_1_Y ( 7),
72 POINT_1_Z ( 8),
73 POINT_2_ID( 9),
74 POINT_2_Y (10),
75 POINT_2_Z (11),
76 POINT_3_ID(12),
77 POINT_3_Y (13),
78 POINT_3_Z (14),
79 POINT_4_ID(15),
80 POINT_4_Y (16),
81 POINT_4_Z (17);
82
83 private int idx;
84 FIELD(int idx) {
85 this.idx = idx;
86 }
87 int getIdx() {
88 return idx;
89 }
90 }
91
92
93 /** Header lines of da66 can define a type. */
94 private static enum Type {
95 DATE ( 0),
96 HEKTOSTONE_LEFT ( 1), //grm. "Standlinie"
97 HEKTOSTONE_RIGHT ( 2),
98 CHANNEL_LEFT ( 3), //grm. "Fahrrinne"
99 CHANNEL_RIGHT ( 4),
100 CHANNEL_2_LEFT ( 5),
101 CHANNEL_2_RIGHT ( 6),
102 GIW_1972 ( 7),
103 GROIN_DIST_LEFT ( 8), //grm. "Buhnenkopfabstand links"
104 GROIN_HEIGHT_LEFT ( 9),
105 GROIN_SLOPE_LEFT (10),
106 GROIN_DIST_RIGHT (11),
107 GROIN_HEIGHT_RIGHT (12),
108 GROIN_SLOPE_RIGHT (13),
109 STRIKE_LEFT (14), //grm. "Streichlinie links"
110 AXIS (15),
111 STRIKE_RIGHT (16),
112 GROIN_BACK_SLOPE_LEFT (17), //grm. "Buhnenrueckenneigung"
113 GROIN_BACK_SLOPE_RIGHT (18),
114 GIW_1932 (19),
115 GIW_1982 (20),
116 STAND_ISLAND_1 (21),
117 STAND_ISLAND_2 (22),
118 STAND_ISLAND_3 (23),
119 STAND_ISLAND_4 (24),
120 UNSPECIFIED_1 (25),
121 UNSPECIFIED_2 (26),
122 HHW (27),
123 OLD_PROFILE_NULL (28),
124 AW_1978 (29),
125 SIGN_LEFT (30),
126 SIGN_RIGHT (31),
127 DIST_SIGNAL_CHANNEL_LEFT (32),
128 DIST_SIGNAL_CHANNEL_RIGHT(33),
129 UNSPECIFIED_3 (34),
130 UNSPECIFIED_4 (35),
131 UNSPECIFIED_5 (36),
132 UNSPECIFIED_6 (37),
133 SHORE_LEFT (38),
134 SHORE_RIGHT (39),
135 UNSPECIFIED_7 (40);
136
137 private final int id;
138 Type(int id) {
139 this.id = id;
140 }
141 public int getId() {
142 return id;
143 }
144 }
145
146
147 /** Available types. */
148 private static HashMap<Integer, Type> typeMap;
149
150
151 /** Types we can deal with. */
152 private static List<Type> implementedTypes;
153
154
155 static {
156 typeMap = new HashMap<Integer, Type>();
157 for (Type t: Type.values()) {
158 typeMap.put(new Integer(t.getId()), t);
159 }
160 // TODO populate and respect header type.
161 implementedTypes = new ArrayList<Type>();
162 //implementedTypes.add(..);
163 }
164
165
166 /** The current line to which add points. */
167 private List<XY> currentLine;
168
169
170 /** Data collected so far, last element will be currentLine. */
171 protected Map<Double, List<XY>> data;
172
173
174 /** Trivial constructor. */
175 public DA66Parser() {
176 data = new TreeMap<Double, List<XY>>(EpsilonComparator.CMP);
177 }
178
179
180 /** Get the description of the cross section parsed. */
181 @Override
182 public String getDescription() {
183 return FileTools.removeExtension(getFileName());
184 }
185
186
187 /** Get the year of this cross sections measurement. */
188 @Override
189 public Integer getYear() {
190 return null;
191 }
192
193
194 /**
195 * Return the data parsed.
196 * @return map of stations (km) to list of points.
197 */
198 @Override
199 public Map<Double, List<XY>> getData() {
200 return data;
201 }
202
203
204 /**
205 * Walk a directory tree, parse its *.da66 files and store the
206 * data found.
207 */
208 public void parseDA66s(File root, final Callback callback) {
209
210 FileTools.walkTree(root, new FileTools.FileVisitor() {
211 @Override
212 public boolean visit(File file) {
213 if (file.isFile() && file.canRead()
214 && file.getName().toLowerCase().endsWith(".d66")
215 && (callback == null || callback.accept(file))) {
216 reset();
217 try {
218 parse(file);
219 logger.info("parsing done");
220 if (callback != null) {
221 callback.parsed(DA66Parser.this);
222 }
223 }
224 catch (IOException ioe) {
225 logger.error("IOException while parsing file");
226 return false;
227 }
228 }
229 return true;
230 }
231 });
232 }
233
234
235 /**
236 * Get the Index of the last cross-section lines point.
237 * @return last points index, -1 if not available.
238 */
239 private int lastPointIdx() {
240 if (currentLine == null || currentLine.isEmpty()) {
241 return -1;
242 }
243 XY lastPoint = this.currentLine.get(currentLine.size()-1);
244 return lastPoint.getIndex();
245 }
246
247
248 /** Returns station, deciding if it could in cm, in which case convert. */
249 private double stationInKm(double station) {
250 if (station > 10000) {
251 return station/100000d;
252 }
253 else {
254 return station;
255 }
256 }
257
258
259 /** Apply the convention how to deal with numbers < -99.999 .*/
260 private String applyLetterConvention(String orig) {
261 if (orig.endsWith("-")) {
262 return "-" + orig.replace("-","");
263 }
264 else if (orig.endsWith("J")) {
265 return "-" + orig.replace("J","1");
266 }
267 else if (orig.endsWith("K")) {
268 return "-" + orig.replace("K","2");
269 }
270 else if (orig.endsWith("L")) {
271 return "-" + orig.replace("L","3");
272 }
273 else if (orig.endsWith("M")) {
274 return "-" + orig.replace("M","4");
275 }
276 else if (orig.endsWith("N")) {
277 return "-" + orig.replace("N","5");
278 }
279 else if (orig.endsWith("O")) {
280 return "-" + orig.replace("O","6");
281 }
282 else if (orig.endsWith("P")) {
283 return "-" + orig.replace("P","7");
284 }
285 else if (orig.endsWith("Q")) {
286 return "-" + orig.replace("Q","8");
287 }
288 else if (orig.endsWith("R")) {
289 return "-" + orig.replace("R","9");
290 }
291 else {
292 return orig;
293 }
294 }
295
296 /**
297 * Add a Point (YZ,Index) to the current cross section line.
298 * @param y The y coordinate of new point.
299 * @param z The z coordinate of new point.
300 * @param idx Ignored, the parameter of new point.
301 * @return true if point could been added, false otherwise (e.g. not
302 * parsable y or z values.
303 */
304 private boolean addPoint(String y, String z, String idx) {
305 if (z == null || y == null || idx == null) {
306 logger.error("Incomplete point definition");
307 return false;
308 }
309
310 double iy;
311 double iz;
312 // Handle letter convention.
313 y = applyLetterConvention(y);
314 try {
315 iy = Double.parseDouble(y) / 1000d;
316 iz = Double.parseDouble(z) / 1000d;
317 }
318 catch(java.lang.NumberFormatException nfe) {
319 logger.error("Could not parse Number: " + nfe.getMessage());
320 return false;
321 }
322
323 // We ignore idx, and increment instead.
324 int index;
325 int lastPointIdx = lastPointIdx();
326 if (lastPointIdx <= 0) {
327 index = 1;
328 } else {
329 index = lastPointIdx + 1;
330 }
331
332 currentLine.add(new XY(iy, iz, index));
333 return true;
334 }
335
336
337 /** Called before consuming first line of file. */
338 public void reset() {
339 data.clear();
340 currentLine = new ArrayList<XY>();
341 }
342
343
344 /**
345 * Called for each line. Try to extract info from a da66 line.
346 */
347 @Override
348 protected void handleLine(int lineNum, String line) {
349 String head = line.substring(0,2);
350 if (HEAD_HEAD.equals(head)) {
351 //logger.debug("New station");
352 Matcher m = LINE_PATTERN.matcher(line);
353 if (m.find()) {
354 // Actually matches!
355 // TODO 'move' last line to match river axis
356 // TODO find river axis intersection
357 currentLine = new ArrayList<XY>();
358 double station = stationInKm(Double.parseDouble(m.group(FIELD.STATION.getIdx())));
359 data.put(station, currentLine);
360 }
361 else {
362 logger.error("HEAD line bad.");
363 }
364 }
365 else if (HEAD_GEOM.equals(head)) {
366 Matcher m = LINE_PATTERN.matcher(line);
367 if (m.find()) {
368 //logger.info("Station: " + m.group(FIELD.STATION.getIdx()));
369 // TODO if last station differs, error and abort
370 if (m.group(FIELD.POINT_1_ID.getIdx()) != null) {
371 // Point 1
372 if(addPoint(
373 m.group(FIELD.POINT_1_Y.getIdx()),
374 m.group(FIELD.POINT_1_Z.getIdx()),
375 m.group(FIELD.POINT_1_ID.getIdx()))) {
376 // Point added.
377 }
378 else {
379 // Problematic point.
380 logger.error("A point could not be added");
381 }
382 }
383 if (m.group(FIELD.POINT_2_ID.getIdx()) != null) {
384 // Point 2
385 if(addPoint(
386 m.group(FIELD.POINT_2_Y.getIdx()),
387 m.group(FIELD.POINT_2_Z.getIdx()),
388 m.group(FIELD.POINT_2_ID.getIdx()))) {
389 // Point added.
390 }
391 else {
392 // Problematic point.
393 logger.error("A point could not be added");
394 }
395 }
396 if (m.group(FIELD.POINT_3_ID.getIdx()) != null) {
397 // Point 3
398 if(addPoint(
399 m.group(FIELD.POINT_3_Y.getIdx()),
400 m.group(FIELD.POINT_3_Z.getIdx()),
401 m.group(FIELD.POINT_3_ID.getIdx()))) {
402 // Point added.
403 }
404 else {
405 // Problematic point.
406 logger.error("A point could not be added");
407 }
408 }
409 if (m.group(FIELD.POINT_4_ID.getIdx()) != null) {
410 // Point 4
411 if(addPoint(
412 m.group(FIELD.POINT_4_Y.getIdx()),
413 m.group(FIELD.POINT_4_Z.getIdx()),
414 m.group(FIELD.POINT_4_ID.getIdx()))) {
415 // Point added.
416 }
417 else {
418 // Problematic point.
419 logger.error("A point could not be added");
420 }
421 }
422 }
423 else {
424 logger.warn("Line could not be parsed: ");
425 logger.warn(line);
426 }
427 }
428 else if (HEAD_GEOM.equals(head)) {
429 logger.debug("Hit a 88");
430 }
431 else {
432 logger.error("Do not know how to treat da66 line:");
433 logger.error(line);
434 }
435 }
436
437
438 /** Called when file is fully consumed. */
439 @Override
440 protected void finish() {
441 // TODO 'move' last line to match river axis
442 logger.info("Parsed " + data.size() + " lines");
443 }
444
445
446 /** Parses files given as arguments. */
447 public static void main(String [] args) {
448
449 DA66Parser parser = new DA66Parser();
450
451 logger.warn("Start parsing files.");
452 for (String arg: args) {
453 parser.parseDA66s(new File(arg), null);
454 logger.warn("Parsing a file.");
455 }
456 logger.error("Finished parsing files.");
457 }
458 }
459 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org