Mercurial > dive4elements > river
comparison backend/src/main/java/org/dive4elements/river/importer/parsers/SedimentLoadLSParser.java @ 8032:fd3a24336e6a
SCHEMA CHANGE and Importer (only longitudinal section data so far): only grain fractions given now in schema are valid. We are handling sediment loads, not yields.
author | "Tom Gottfried <tom@intevation.de>" |
---|---|
date | Mon, 14 Jul 2014 15:36:44 +0200 |
parents | 6954ac9b7591 |
children | 9342d7fe0ee7 |
comparison
equal
deleted
inserted
replaced
8031:6954ac9b7591 | 8032:fd3a24336e6a |
---|---|
19 import java.util.regex.Matcher; | 19 import java.util.regex.Matcher; |
20 import java.util.regex.Pattern; | 20 import java.util.regex.Pattern; |
21 | 21 |
22 import org.apache.log4j.Logger; | 22 import org.apache.log4j.Logger; |
23 | 23 |
24 import org.dive4elements.river.importer.ImporterSession; | |
24 import org.dive4elements.river.importer.ImportGrainFraction; | 25 import org.dive4elements.river.importer.ImportGrainFraction; |
25 import org.dive4elements.river.importer.ImportSedimentLoadLS; | 26 import org.dive4elements.river.importer.ImportSedimentLoadLS; |
26 import org.dive4elements.river.importer.ImportSedimentLoadLSValue; | 27 import org.dive4elements.river.importer.ImportSedimentLoadLSValue; |
27 import org.dive4elements.river.importer.ImportTimeInterval; | 28 import org.dive4elements.river.importer.ImportTimeInterval; |
28 import org.dive4elements.river.importer.ImportUnit; | 29 import org.dive4elements.river.importer.ImportUnit; |
30 | |
29 import org.dive4elements.river.model.GrainFraction; | 31 import org.dive4elements.river.model.GrainFraction; |
32 | |
30 import org.dive4elements.river.utils.DateUtil; | 33 import org.dive4elements.river.utils.DateUtil; |
31 | 34 import org.dive4elements.river.utils.EpsilonComparator; |
32 | 35 |
33 /** Parses Sediment Yield files. */ | 36 /** Parses sediment load longitudinal section files. */ |
34 public class SedimentLoadLSParser extends LineParser { | 37 public class SedimentLoadLSParser extends LineParser { |
35 | 38 |
36 private static final Logger log = | 39 private static final Logger log = |
37 Logger.getLogger(SedimentLoadLSParser.class); | 40 Logger.getLogger(SedimentLoadLSParser.class); |
38 | 41 |
39 | 42 |
40 public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); | 43 public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); |
41 | 44 |
42 | 45 |
43 public static final String FRAKTION_START = "Fraktion:"; | |
44 | |
45 public static final String FRACTION_COARSE_STR = | |
46 ".*Grobkorn.*"; | |
47 | |
48 public static final String FRACTION_FINE_MIDDLE_STR = | |
49 ".*Fein.Mittel.Kies.*"; | |
50 | |
51 public static final String FRACTION_SAND = | |
52 ".*Sand.*"; | |
53 | |
54 public static final String FRACTION_SUSP_SAND = | |
55 ".*susp.Sand.*"; | |
56 | |
57 public static final String FRACTION_SUSP_SAND_BED = | |
58 ".*bettbild.Anteil.susp.Sand.*"; | |
59 | |
60 public static final String FRACTION_SUSP_SAND_BED_EPOCH = | |
61 ".*susp.Sand.bettbildAnteil.*"; | |
62 | |
63 public static final String FRACTION_SUSPENDED_SEDIMENT = | |
64 ".*Schwebstoff.*"; | |
65 | |
66 public static final String FRACTION_TOTAL = | |
67 ".*gesamt.*"; | |
68 | |
69 | |
70 public static final Pattern TIMEINTERVAL_SINGLE = | 46 public static final Pattern TIMEINTERVAL_SINGLE = |
71 Pattern.compile("\\D*([0-9]+?)\\D*"); | 47 Pattern.compile("\\D*([0-9]+?)\\D*"); |
72 | 48 |
73 public static final Pattern TIMEINTERVAL_EPOCH = | 49 public static final Pattern TIMEINTERVAL_EPOCH = |
74 Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); | 50 Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); |
75 | 51 |
76 public static final Pattern META_FRACTION = | 52 public static final Pattern META_FRACTION = |
77 Pattern.compile("^Fraktion: (.*)"); | 53 Pattern.compile("^Fraktion: (.*)"); |
78 | 54 |
55 public static final Pattern META_FRACTION_NAME = | |
56 Pattern.compile("^Fraktionsname: (.*)"); | |
57 | |
79 public static final Pattern META_UNIT = | 58 public static final Pattern META_UNIT = |
80 Pattern.compile("^Einheit: \\[(.*)\\].*"); | 59 Pattern.compile("^Einheit: \\[(.*)\\].*"); |
81 | 60 |
82 public static final Pattern META_COLUMN_NAMES = | 61 public static final Pattern META_COLUMN_NAMES = |
83 Pattern.compile("^Fluss-km.*"); | 62 Pattern.compile("^Fluss-km.*"); |
84 | 63 |
85 public static final Pattern META_GRAIN_FRACTION_A = | 64 public static final Pattern META_GRAIN_SIZE = |
86 Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*"); | 65 Pattern.compile("([0-9]*,*[0-9]+)-([0-9]*,*[0-9]+) *mm"); |
87 | 66 |
88 public static final Pattern META_GRAIN_FRACTION_B = | 67 |
89 Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)"); | 68 protected List<ImportSedimentLoadLS> sedimentLoadLSs; |
90 | |
91 public static final Pattern META_GRAIN_FRACTION_C = | |
92 Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))"); | |
93 | |
94 | |
95 protected List<ImportSedimentLoadLS> sedimentYields; | |
96 | 69 |
97 protected ImportSedimentLoadLS[] current; | 70 protected ImportSedimentLoadLS[] current; |
98 | 71 |
99 protected ImportGrainFraction grainFraction; | 72 protected ImportGrainFraction grainFraction; |
100 | 73 |
102 | 75 |
103 protected String description; | 76 protected String description; |
104 | 77 |
105 protected String[] columnNames; | 78 protected String[] columnNames; |
106 | 79 |
80 private String upper; | |
81 | |
82 private String lower; | |
83 | |
107 | 84 |
108 public SedimentLoadLSParser() { | 85 public SedimentLoadLSParser() { |
109 sedimentYields = new ArrayList<ImportSedimentLoadLS>(); | 86 sedimentLoadLSs = new ArrayList<ImportSedimentLoadLS>(); |
110 } | 87 } |
111 | 88 |
112 | 89 |
113 @Override | 90 @Override |
114 public void parse(File file) throws IOException { | 91 public void parse(File file) throws IOException { |
128 | 105 |
129 @Override | 106 @Override |
130 protected void finish() { | 107 protected void finish() { |
131 if (current != null) { | 108 if (current != null) { |
132 for (ImportSedimentLoadLS isy: current) { | 109 for (ImportSedimentLoadLS isy: current) { |
133 sedimentYields.add(isy); | 110 sedimentLoadLSs.add(isy); |
134 } | 111 } |
135 } | 112 } |
136 | 113 |
137 description = null; | 114 description = null; |
138 } | 115 } |
154 return; | 131 return; |
155 } | 132 } |
156 else if (handleMetaFraction(line)) { | 133 else if (handleMetaFraction(line)) { |
157 return; | 134 return; |
158 } | 135 } |
136 else if (handleMetaFractionName(line)) { | |
137 return; | |
138 } | |
159 else if (handleColumnNames(line)) { | 139 else if (handleColumnNames(line)) { |
160 return; | 140 return; |
161 } | 141 } |
162 else { | 142 else { |
163 log.warn("SYP: Unknown meta line: '" + line + "'"); | 143 log.warn("SLLSP: Unknown meta line: '" + line + "'"); |
164 } | 144 } |
165 } | 145 } |
166 | 146 |
167 | 147 |
168 protected boolean handleMetaUnit(String line) { | 148 protected boolean handleMetaUnit(String line) { |
179 | 159 |
180 public boolean handleMetaFraction(String line) { | 160 public boolean handleMetaFraction(String line) { |
181 Matcher m = META_FRACTION.matcher(line); | 161 Matcher m = META_FRACTION.matcher(line); |
182 | 162 |
183 if (m.matches()) { | 163 if (m.matches()) { |
184 String tmp = m.group(1); | 164 String interval = m.group(1); |
185 | 165 |
186 this.grainFraction = buildGrainFraction(tmp); | 166 Matcher sizes = META_GRAIN_SIZE.matcher(interval); |
187 | 167 if (sizes.matches()) { |
168 lower = sizes.group(1); | |
169 upper = sizes.group(2); | |
170 | |
171 return true; | |
172 } | |
173 | |
174 log.warn("SLLSP: Unrecognized grain-size interval. Ignored."); | |
188 return true; | 175 return true; |
189 } | 176 |
190 else if (line.startsWith(FRAKTION_START)) { | 177 } |
191 String newLine = line.replace(FRAKTION_START, "").trim(); | 178 |
192 if (newLine.length() == 0) { | 179 return false; |
193 log.debug("Found total grain fraction."); | 180 } |
194 this.grainFraction = new ImportGrainFraction(GrainFraction.UNKNOWN); | 181 |
195 | 182 |
183 public boolean handleMetaFractionName(String line) { | |
184 Matcher m = META_FRACTION_NAME.matcher(line); | |
185 | |
186 if (m.matches()) { | |
187 String name = m.group(1); | |
188 | |
189 | |
190 GrainFraction gf = ImporterSession.getInstance().getGrainFraction(name); | |
191 | |
192 if (gf != null) { | |
193 | |
194 if (lower != null && upper != null) { | |
195 // Validate grain size interval | |
196 try { | |
197 Double lowval = nf.parse(lower).doubleValue(); | |
198 Double upval = nf.parse(upper).doubleValue(); | |
199 | |
200 if (EpsilonComparator.CMP.compare(lowval, | |
201 gf.getLower()) != 0 || | |
202 EpsilonComparator.CMP.compare(upval, | |
203 gf.getUpper()) != 0) { | |
204 log.warn("SLLSP: Invalid grain size for grain fraction '" + | |
205 name + "'. Ignored."); | |
206 } | |
207 } | |
208 catch (ParseException pe) { | |
209 log.warn("SLLSP: Could not parse grain-size interval. Ignored."); | |
210 } | |
211 } | |
212 | |
213 grainFraction = new ImportGrainFraction(gf); | |
196 return true; | 214 return true; |
197 } | 215 } |
216 | |
217 log.error("SLLSP: Unknown grain fraction: '" + name + "'"); | |
198 } | 218 } |
199 | 219 |
200 return false; | 220 return false; |
201 } | 221 } |
202 | 222 |
205 Matcher m = META_COLUMN_NAMES.matcher(line); | 225 Matcher m = META_COLUMN_NAMES.matcher(line); |
206 | 226 |
207 if (m.matches()) { | 227 if (m.matches()) { |
208 columnNames = line.split(SEPERATOR_CHAR); | 228 columnNames = line.split(SEPERATOR_CHAR); |
209 | 229 |
210 initializeSedimentYields(); | 230 // 'Fluss-km', 'Hinweise' and at least one data column required |
231 if (columnNames.length < 3) { | |
232 log.error("SLLSP: missing columns."); | |
233 return true; | |
234 } | |
235 | |
236 initializeSedimentLoadLSs(); | |
211 | 237 |
212 return true; | 238 return true; |
213 } | 239 } |
214 | 240 |
215 return false; | 241 return false; |
218 | 244 |
219 protected void handleDataLine(String line) { | 245 protected void handleDataLine(String line) { |
220 String[] vals = line.split(SEPERATOR_CHAR); | 246 String[] vals = line.split(SEPERATOR_CHAR); |
221 | 247 |
222 if (vals == null || vals.length < columnNames.length-1) { | 248 if (vals == null || vals.length < columnNames.length-1) { |
223 log.warn("SYP: skip invalid data line: '" + line + "'"); | 249 log.warn("SLLSP: skip invalid data line: '" + line + "'"); |
224 return; | 250 return; |
225 } | 251 } |
226 | 252 |
227 try { | 253 try { |
228 Double km = nf.parse(vals[0]).doubleValue(); | 254 Double km = nf.parse(vals[0]).doubleValue(); |
236 )); | 262 )); |
237 } | 263 } |
238 } | 264 } |
239 } | 265 } |
240 catch (ParseException pe) { | 266 catch (ParseException pe) { |
241 log.warn("SYP: unparseable number in data row '" + line + "':", pe); | 267 log.warn("SLLSP: unparseable number in data row '" + line + "':", pe); |
242 } | 268 } |
243 } | 269 } |
244 | 270 |
245 | 271 |
246 /** Initialize SedimentYields from columns, set the kind | 272 /** Initialize SedimentLoadLSs from columns, set the kind |
247 * with respect to file location (offical epoch or not?) */ | 273 * with respect to file location (offical epoch or not?) */ |
248 private void initializeSedimentYields() { | 274 private void initializeSedimentLoadLSs() { |
249 // skip first column (Fluss-km) and last column (Hinweise) | 275 // skip first column (Fluss-km) and last column (Hinweise) |
250 current = new ImportSedimentLoadLS[columnNames.length-2]; | 276 current = new ImportSedimentLoadLS[columnNames.length-2]; |
251 | 277 |
252 Integer kind; | 278 Integer kind; |
253 | 279 |
286 int year = nf.parse(b.group(1)).intValue(); | 312 int year = nf.parse(b.group(1)).intValue(); |
287 | 313 |
288 return new ImportTimeInterval(DateUtil.getStartDateFromYear(year)); | 314 return new ImportTimeInterval(DateUtil.getStartDateFromYear(year)); |
289 } | 315 } |
290 | 316 |
291 log.warn("SYP: Unknown time interval string: '" + column + "'"); | 317 log.warn("SLLSP: Unknown time interval string: '" + column + "'"); |
292 } | 318 } |
293 catch (ParseException pe) { | 319 catch (ParseException pe) { |
294 log.warn("SYP: Could not parse years: " + column, pe); | 320 log.warn("SLLSP: Could not parse years: " + column, pe); |
295 } | 321 } |
296 | 322 |
297 return null; | 323 return null; |
298 } | 324 } |
299 | 325 |
300 | 326 |
301 private ImportGrainFraction buildGrainFraction(String gfStr) { | 327 public List<ImportSedimentLoadLS> getSedimentLoadLSs() { |
302 Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr); | 328 return sedimentLoadLSs; |
303 if (a.matches()) { | |
304 String lowerA = a.group(2); | |
305 String lowerB = a.group(3); | |
306 | |
307 String upperA = a.group(4); | |
308 String upperB = a.group(5); | |
309 | |
310 String lower = lowerA != null ? lowerA : lowerB; | |
311 String upper = upperA != null ? upperA : upperB; | |
312 | |
313 try { | |
314 return new ImportGrainFraction( | |
315 getGrainFractionTypeName(this.description), | |
316 nf.parse(lower).doubleValue(), | |
317 nf.parse(upper).doubleValue() | |
318 ); | |
319 } | |
320 catch (ParseException pe) { | |
321 log.warn("SYP: Could not parse ranges of: '" + gfStr + "'"); | |
322 } | |
323 } | |
324 | |
325 Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr); | |
326 if (b.matches()) { | |
327 String lowerA = b.group(4); | |
328 String lowerB = b.group(5); | |
329 String upperA = b.group(6); | |
330 String upperB = b.group(7); | |
331 | |
332 String lower = lowerA != null ? lowerA : lowerB; | |
333 String upper = upperA != null ? upperA : upperB; | |
334 | |
335 try { | |
336 return new ImportGrainFraction( | |
337 getGrainFractionTypeName(this.description), | |
338 nf.parse(lower).doubleValue(), | |
339 nf.parse(upper).doubleValue() | |
340 ); | |
341 } | |
342 catch (ParseException pe) { | |
343 log.warn("SYP: Could not parse ranges of: '" + gfStr + "'"); | |
344 } | |
345 } | |
346 | |
347 Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr); | |
348 if (c.matches()) { | |
349 String oper = c.group(1); | |
350 String valueStr = c.group(3); | |
351 | |
352 try { | |
353 Double value = nf.parse(valueStr).doubleValue(); | |
354 | |
355 if (oper.equals(">")) { | |
356 return new ImportGrainFraction( | |
357 getGrainFractionTypeName(this.description), | |
358 value, | |
359 null | |
360 ); | |
361 } | |
362 else { | |
363 return new ImportGrainFraction( | |
364 getGrainFractionTypeName(this.description), | |
365 null, | |
366 value | |
367 ); | |
368 } | |
369 } | |
370 catch (ParseException pe) { | |
371 log.warn("SYP: Could not parse ranges of: '" + gfStr + "'"); | |
372 } | |
373 } | |
374 | |
375 log.warn("SYP: Unknown grain fraction: '" + gfStr + "'"); | |
376 return new ImportGrainFraction(GrainFraction.UNKNOWN); | |
377 } | |
378 | |
379 | |
380 public static String getGrainFractionTypeName(String filename) { | |
381 if (Pattern.matches(FRACTION_COARSE_STR, filename)) { | |
382 return GrainFraction.COARSE; | |
383 } | |
384 else if (Pattern.matches(FRACTION_FINE_MIDDLE_STR, filename)) { | |
385 return GrainFraction.FINE_MIDDLE; | |
386 } | |
387 else if (Pattern.matches(FRACTION_SUSP_SAND_BED, filename) || | |
388 Pattern.matches(FRACTION_SUSP_SAND_BED_EPOCH, filename)) { | |
389 return GrainFraction.SUSP_SAND_BED; | |
390 } | |
391 else if (Pattern.matches(FRACTION_SUSP_SAND, filename)) { | |
392 return GrainFraction.SUSP_SAND; | |
393 } | |
394 else if (Pattern.matches(FRACTION_SAND, filename)) { | |
395 return GrainFraction.SAND; | |
396 } | |
397 else if (Pattern.matches(FRACTION_SUSPENDED_SEDIMENT, filename)) { | |
398 return GrainFraction.SUSPENDED_SEDIMENT; | |
399 } | |
400 else if (Pattern.matches(FRACTION_TOTAL, filename)) { | |
401 return GrainFraction.TOTAL; | |
402 } | |
403 else { | |
404 log.warn("SYP: Unknown grain fraction '" + filename + "'"); | |
405 return GrainFraction.UNKNOWN; | |
406 } | |
407 } | |
408 | |
409 | |
410 public List<ImportSedimentLoadLS> getSedimentYields() { | |
411 return sedimentYields; | |
412 } | 329 } |
413 } | 330 } |
414 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : | 331 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |