comparison flys-backend/src/main/java/de/intevation/flys/importer/PRFParser.java @ 1198:661a9304f2f5

PRFParser: Extracted the data. All BfG PRFs are parsed correctly, now. flys-backend/trunk@2300 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Thu, 07 Jul 2011 14:09:54 +0000
parents ce3dacc6ea92
children cc8f770796cb
comparison
equal deleted inserted replaced
1197:ce3dacc6ea92 1198:661a9304f2f5
1 package de.intevation.flys.importer; 1 package de.intevation.flys.importer;
2 2
3 import java.util.Map; 3 import java.util.Map;
4 import java.util.Stack; 4 import java.util.Stack;
5 import java.util.TreeMap; 5 import java.util.TreeMap;
6 import java.util.List;
7 import java.util.ArrayList;
8 import java.util.Collections;
6 9
7 import java.util.regex.Pattern; 10 import java.util.regex.Pattern;
8 import java.util.regex.Matcher; 11 import java.util.regex.Matcher;
9 12
10 import java.io.File; 13 import java.io.File;
27 "\\((\\d+)x\\s*,\\s*(\\d+)\\(" + 30 "\\((\\d+)x\\s*,\\s*(\\d+)\\(" +
28 "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?"); 31 "\\s*f(\\d+)\\.(\\d+)\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?\\)?");
29 32
30 public static final Pattern KM_PATTERN = 33 public static final Pattern KM_PATTERN =
31 Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?"); 34 Pattern.compile("\\((\\d+)x\\s*,\\s*f(\\d+)\\.(\\d+)\\s*\\)?");
35
36 public static final double X_EPSILON = 1e-4;
37
38 public static final class XY
39 implements Comparable<XY>
40 {
41 protected double x;
42 protected double y;
43 protected int index;
44
45 public XY() {
46 }
47
48 public XY(double x, double y, int index) {
49 this.x = x;
50 this.y = y;
51 this.index = index;
52 }
53
54 @Override
55 public int compareTo(XY other) {
56 if (x + X_EPSILON < other.x) return -1;
57 if (x > other.x + X_EPSILON) return +1;
58 if (index < other.index) return -1;
59 if (index > other.index) return +1;
60 return 0;
61 }
62 } // class XY
32 63
33 public static class DataFormat { 64 public static class DataFormat {
34 65
35 protected int deleteChars; 66 protected int deleteChars;
36 protected int maxRepetitions; 67 protected int maxRepetitions;
37 protected int firstIntegerPlaces; 68 protected int firstIntegerPlaces;
38 protected int firstFractionPlaces; 69 protected int firstFractionPlaces;
39 protected int secondIntegerPlaces; 70 protected int secondIntegerPlaces;
40 protected int secondFractionPlaces; 71 protected int secondFractionPlaces;
72
73 protected double firstShift;
74 protected double secondShift;
41 75
42 public DataFormat() { 76 public DataFormat() {
43 } 77 }
44 78
45 public DataFormat(Matcher m) { 79 public DataFormat(Matcher m) {
47 maxRepetitions = Integer.parseInt(m.group(2)); 81 maxRepetitions = Integer.parseInt(m.group(2));
48 firstIntegerPlaces = Integer.parseInt(m.group(3)); 82 firstIntegerPlaces = Integer.parseInt(m.group(3));
49 firstFractionPlaces = Integer.parseInt(m.group(4)); 83 firstFractionPlaces = Integer.parseInt(m.group(4));
50 secondIntegerPlaces = Integer.parseInt(m.group(5)); 84 secondIntegerPlaces = Integer.parseInt(m.group(5));
51 secondFractionPlaces = Integer.parseInt(m.group(6)); 85 secondFractionPlaces = Integer.parseInt(m.group(6));
52 } 86
53 87 firstShift = Math.pow(10, firstFractionPlaces);
54 public boolean extractData(String line, Map<Double, Double> dest) 88 secondShift = Math.pow(10, secondFractionPlaces);
55 throws NumberFormatException 89 }
56 { 90
57 //TODO: Implement me! 91 public int extractData(String line, List<XY> kmData) {
58 return true; 92 int L = line.length();
93 if (L <= deleteChars) {
94 return -1;
95 }
96
97 int pos = deleteChars;
98
99 boolean debug = log.isDebugEnabled();
100
101
102 int rep = 0;
103 for (;rep < maxRepetitions; ++rep) {
104 if (pos >= L || pos + firstIntegerPlaces >= L) {
105 break;
106 }
107 String first = line.substring(
108 pos, pos + firstIntegerPlaces);
109
110 String second = line.substring(
111 pos + firstIntegerPlaces,
112 Math.min(L, pos+firstIntegerPlaces+secondIntegerPlaces));
113
114 double x, y;
115 try {
116 x = Double.parseDouble(first);
117 y = Double.parseDouble(second);
118 }
119 catch (NumberFormatException nfe) {
120 // broken line -> substract from dataset skip
121 return -1;
122 }
123
124 if (first.indexOf('.') < 0) {
125 x /= firstShift;
126 }
127
128 if (firstFractionPlaces > 0) {
129 x = (int)(x*firstShift)/firstShift;
130 }
131
132 if (second.indexOf('.') < 0) {
133 y /= secondShift;
134 }
135
136 if (secondFractionPlaces > 0) {
137 y = (int)(y*secondShift)/secondShift;
138 }
139
140 kmData.add(new XY(x, y, kmData.size()));
141
142 pos += firstIntegerPlaces + secondIntegerPlaces;
143 }
144
145 return rep == maxRepetitions ? 1 : 0;
59 } 146 }
60 } // class DataFormat 147 } // class DataFormat
61 148
62 public static class KMFormat { 149 public static class KMFormat {
150
63 protected int deleteChars; 151 protected int deleteChars;
64 protected int integerPlaces; 152 protected int integerPlaces;
65 protected int fractionPlaces; 153 protected int fractionPlaces;
66 154
67 protected double scale;
68 protected double shift; 155 protected double shift;
69 156
70 public KMFormat() { 157 public KMFormat() {
71 } 158 }
72 159
74 deleteChars = Integer.parseInt(m.group(1)); 161 deleteChars = Integer.parseInt(m.group(1));
75 integerPlaces = Integer.parseInt(m.group(2)); 162 integerPlaces = Integer.parseInt(m.group(2));
76 fractionPlaces = Integer.parseInt(m.group(3)); 163 fractionPlaces = Integer.parseInt(m.group(3));
77 164
78 shift = Math.pow(10, fractionPlaces); 165 shift = Math.pow(10, fractionPlaces);
79 scale = 1d/shift;
80 } 166 }
81 167
82 public double extractKm(String line) throws NumberFormatException { 168 public double extractKm(String line) throws NumberFormatException {
83 169
84 if (line.length() <= deleteChars) { 170 if (line.length() <= deleteChars) {
88 String kmS = 174 String kmS =
89 line.substring(deleteChars, deleteChars+integerPlaces); 175 line.substring(deleteChars, deleteChars+integerPlaces);
90 176
91 double km = Double.parseDouble(kmS.trim()); 177 double km = Double.parseDouble(kmS.trim());
92 178
179 if (kmS.indexOf('.') < 0) {
180 km /= shift;
181 }
182
93 return fractionPlaces > 0 183 return fractionPlaces > 0
94 ? ((int)((scale*km)*shift))/shift 184 ? ((int)(km*shift))/shift
95 : km; 185 : km;
96 } 186 }
97 } // class KMFormat 187 } // class KMFormat
98 188
99 protected Map<Double, Map<Double, Double>> data; 189 protected Map<Double, List<XY>> data;
100 190
101 public PRFParser() { 191 public PRFParser() {
102 data = new TreeMap<Double, Map<Double, Double>>(); 192 data = new TreeMap<Double, List<XY>>();
103 } 193 }
104 194
195 protected void sortLists() {
196 for (List<XY> xy: data.values()) {
197 Collections.sort(xy);
198 }
199 }
105 200
106 public boolean parse(File file) { 201 public boolean parse(File file) {
107 202
108 if (!(file.isFile() && file.canRead())) { 203 if (!(file.isFile() && file.canRead())) {
109 log.warn("cannot open file '" + file + "'"); 204 log.warn("cannot open file '" + file + "'");
204 return false; 299 return false;
205 } 300 }
206 301
207 Double station = Double.valueOf(km); 302 Double station = Double.valueOf(km);
208 303
209 Map<Double, Double> kmData = data.get(station); 304 List<XY> kmData = data.get(station);
210 305
211 if (kmData == null) { 306 if (kmData == null) {
212 log.debug("found new km: " + station); 307 //log.debug("found new km: " + station);
213 kmData = new TreeMap<Double, Double>(); 308 kmData = new ArrayList<XY>();
214 data.put(station, kmData); 309 data.put(station, kmData);
215 } 310 }
216 311
217 try { 312 int c = dataFormat.extractData(line, kmData);
218 if (!dataFormat.extractData(line, kmData)) { 313 if (c < 1) {
219 skip = lineSkipCount; 314 skip = lineSkipCount + c;
220 } 315 }
221 } 316 }
222 catch (NumberFormatException nfe) { 317
223 log.warn("cannot extract data from line " + in.getLineNumber()); 318 // sort all the lists by x and index
224 return false; 319 sortLists();
225 }
226 }
227 } 320 }
228 catch (IOException ioe) { 321 catch (IOException ioe) {
229 log.error(ioe); 322 log.error(ioe);
230 return false; 323 return false;
231 } 324 }

http://dive4elements.wald.intevation.org