comparison backend/src/main/java/org/dive4elements/river/importer/parsers/SedimentLoadLSParser.java @ 8032:fd3a24336e6a

SCHEMA CHANGE and Importer (only longitudinal section data so far): only grain fractions given now in schema are valid. We are handling sediment loads, not yields.
author "Tom Gottfried <tom@intevation.de>"
date Mon, 14 Jul 2014 15:36:44 +0200
parents 6954ac9b7591
children 9342d7fe0ee7
comparison
equal deleted inserted replaced
8031:6954ac9b7591 8032:fd3a24336e6a
19 import java.util.regex.Matcher; 19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern; 20 import java.util.regex.Pattern;
21 21
22 import org.apache.log4j.Logger; 22 import org.apache.log4j.Logger;
23 23
24 import org.dive4elements.river.importer.ImporterSession;
24 import org.dive4elements.river.importer.ImportGrainFraction; 25 import org.dive4elements.river.importer.ImportGrainFraction;
25 import org.dive4elements.river.importer.ImportSedimentLoadLS; 26 import org.dive4elements.river.importer.ImportSedimentLoadLS;
26 import org.dive4elements.river.importer.ImportSedimentLoadLSValue; 27 import org.dive4elements.river.importer.ImportSedimentLoadLSValue;
27 import org.dive4elements.river.importer.ImportTimeInterval; 28 import org.dive4elements.river.importer.ImportTimeInterval;
28 import org.dive4elements.river.importer.ImportUnit; 29 import org.dive4elements.river.importer.ImportUnit;
30
29 import org.dive4elements.river.model.GrainFraction; 31 import org.dive4elements.river.model.GrainFraction;
32
30 import org.dive4elements.river.utils.DateUtil; 33 import org.dive4elements.river.utils.DateUtil;
31 34 import org.dive4elements.river.utils.EpsilonComparator;
32 35
33 /** Parses Sediment Yield files. */ 36 /** Parses sediment load longitudinal section files. */
34 public class SedimentLoadLSParser extends LineParser { 37 public class SedimentLoadLSParser extends LineParser {
35 38
36 private static final Logger log = 39 private static final Logger log =
37 Logger.getLogger(SedimentLoadLSParser.class); 40 Logger.getLogger(SedimentLoadLSParser.class);
38 41
39 42
40 public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); 43 public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);
41 44
42 45
43 public static final String FRAKTION_START = "Fraktion:";
44
45 public static final String FRACTION_COARSE_STR =
46 ".*Grobkorn.*";
47
48 public static final String FRACTION_FINE_MIDDLE_STR =
49 ".*Fein.Mittel.Kies.*";
50
51 public static final String FRACTION_SAND =
52 ".*Sand.*";
53
54 public static final String FRACTION_SUSP_SAND =
55 ".*susp.Sand.*";
56
57 public static final String FRACTION_SUSP_SAND_BED =
58 ".*bettbild.Anteil.susp.Sand.*";
59
60 public static final String FRACTION_SUSP_SAND_BED_EPOCH =
61 ".*susp.Sand.bettbildAnteil.*";
62
63 public static final String FRACTION_SUSPENDED_SEDIMENT =
64 ".*Schwebstoff.*";
65
66 public static final String FRACTION_TOTAL =
67 ".*gesamt.*";
68
69
70 public static final Pattern TIMEINTERVAL_SINGLE = 46 public static final Pattern TIMEINTERVAL_SINGLE =
71 Pattern.compile("\\D*([0-9]+?)\\D*"); 47 Pattern.compile("\\D*([0-9]+?)\\D*");
72 48
73 public static final Pattern TIMEINTERVAL_EPOCH = 49 public static final Pattern TIMEINTERVAL_EPOCH =
74 Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); 50 Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*");
75 51
76 public static final Pattern META_FRACTION = 52 public static final Pattern META_FRACTION =
77 Pattern.compile("^Fraktion: (.*)"); 53 Pattern.compile("^Fraktion: (.*)");
78 54
55 public static final Pattern META_FRACTION_NAME =
56 Pattern.compile("^Fraktionsname: (.*)");
57
79 public static final Pattern META_UNIT = 58 public static final Pattern META_UNIT =
80 Pattern.compile("^Einheit: \\[(.*)\\].*"); 59 Pattern.compile("^Einheit: \\[(.*)\\].*");
81 60
82 public static final Pattern META_COLUMN_NAMES = 61 public static final Pattern META_COLUMN_NAMES =
83 Pattern.compile("^Fluss-km.*"); 62 Pattern.compile("^Fluss-km.*");
84 63
85 public static final Pattern META_GRAIN_FRACTION_A = 64 public static final Pattern META_GRAIN_SIZE =
86 Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*"); 65 Pattern.compile("([0-9]*,*[0-9]+)-([0-9]*,*[0-9]+) *mm");
87 66
88 public static final Pattern META_GRAIN_FRACTION_B = 67
89 Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)"); 68 protected List<ImportSedimentLoadLS> sedimentLoadLSs;
90
91 public static final Pattern META_GRAIN_FRACTION_C =
92 Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))");
93
94
95 protected List<ImportSedimentLoadLS> sedimentYields;
96 69
97 protected ImportSedimentLoadLS[] current; 70 protected ImportSedimentLoadLS[] current;
98 71
99 protected ImportGrainFraction grainFraction; 72 protected ImportGrainFraction grainFraction;
100 73
102 75
103 protected String description; 76 protected String description;
104 77
105 protected String[] columnNames; 78 protected String[] columnNames;
106 79
80 private String upper;
81
82 private String lower;
83
107 84
108 public SedimentLoadLSParser() { 85 public SedimentLoadLSParser() {
109 sedimentYields = new ArrayList<ImportSedimentLoadLS>(); 86 sedimentLoadLSs = new ArrayList<ImportSedimentLoadLS>();
110 } 87 }
111 88
112 89
113 @Override 90 @Override
114 public void parse(File file) throws IOException { 91 public void parse(File file) throws IOException {
128 105
129 @Override 106 @Override
130 protected void finish() { 107 protected void finish() {
131 if (current != null) { 108 if (current != null) {
132 for (ImportSedimentLoadLS isy: current) { 109 for (ImportSedimentLoadLS isy: current) {
133 sedimentYields.add(isy); 110 sedimentLoadLSs.add(isy);
134 } 111 }
135 } 112 }
136 113
137 description = null; 114 description = null;
138 } 115 }
154 return; 131 return;
155 } 132 }
156 else if (handleMetaFraction(line)) { 133 else if (handleMetaFraction(line)) {
157 return; 134 return;
158 } 135 }
136 else if (handleMetaFractionName(line)) {
137 return;
138 }
159 else if (handleColumnNames(line)) { 139 else if (handleColumnNames(line)) {
160 return; 140 return;
161 } 141 }
162 else { 142 else {
163 log.warn("SYP: Unknown meta line: '" + line + "'"); 143 log.warn("SLLSP: Unknown meta line: '" + line + "'");
164 } 144 }
165 } 145 }
166 146
167 147
168 protected boolean handleMetaUnit(String line) { 148 protected boolean handleMetaUnit(String line) {
179 159
180 public boolean handleMetaFraction(String line) { 160 public boolean handleMetaFraction(String line) {
181 Matcher m = META_FRACTION.matcher(line); 161 Matcher m = META_FRACTION.matcher(line);
182 162
183 if (m.matches()) { 163 if (m.matches()) {
184 String tmp = m.group(1); 164 String interval = m.group(1);
185 165
186 this.grainFraction = buildGrainFraction(tmp); 166 Matcher sizes = META_GRAIN_SIZE.matcher(interval);
187 167 if (sizes.matches()) {
168 lower = sizes.group(1);
169 upper = sizes.group(2);
170
171 return true;
172 }
173
174 log.warn("SLLSP: Unrecognized grain-size interval. Ignored.");
188 return true; 175 return true;
189 } 176
190 else if (line.startsWith(FRAKTION_START)) { 177 }
191 String newLine = line.replace(FRAKTION_START, "").trim(); 178
192 if (newLine.length() == 0) { 179 return false;
193 log.debug("Found total grain fraction."); 180 }
194 this.grainFraction = new ImportGrainFraction(GrainFraction.UNKNOWN); 181
195 182
183 public boolean handleMetaFractionName(String line) {
184 Matcher m = META_FRACTION_NAME.matcher(line);
185
186 if (m.matches()) {
187 String name = m.group(1);
188
189
190 GrainFraction gf = ImporterSession.getInstance().getGrainFraction(name);
191
192 if (gf != null) {
193
194 if (lower != null && upper != null) {
195 // Validate grain size interval
196 try {
197 Double lowval = nf.parse(lower).doubleValue();
198 Double upval = nf.parse(upper).doubleValue();
199
200 if (EpsilonComparator.CMP.compare(lowval,
201 gf.getLower()) != 0 ||
202 EpsilonComparator.CMP.compare(upval,
203 gf.getUpper()) != 0) {
204 log.warn("SLLSP: Invalid grain size for grain fraction '" +
205 name + "'. Ignored.");
206 }
207 }
208 catch (ParseException pe) {
209 log.warn("SLLSP: Could not parse grain-size interval. Ignored.");
210 }
211 }
212
213 grainFraction = new ImportGrainFraction(gf);
196 return true; 214 return true;
197 } 215 }
216
217 log.error("SLLSP: Unknown grain fraction: '" + name + "'");
198 } 218 }
199 219
200 return false; 220 return false;
201 } 221 }
202 222
205 Matcher m = META_COLUMN_NAMES.matcher(line); 225 Matcher m = META_COLUMN_NAMES.matcher(line);
206 226
207 if (m.matches()) { 227 if (m.matches()) {
208 columnNames = line.split(SEPERATOR_CHAR); 228 columnNames = line.split(SEPERATOR_CHAR);
209 229
210 initializeSedimentYields(); 230 // 'Fluss-km', 'Hinweise' and at least one data column required
231 if (columnNames.length < 3) {
232 log.error("SLLSP: missing columns.");
233 return true;
234 }
235
236 initializeSedimentLoadLSs();
211 237
212 return true; 238 return true;
213 } 239 }
214 240
215 return false; 241 return false;
218 244
219 protected void handleDataLine(String line) { 245 protected void handleDataLine(String line) {
220 String[] vals = line.split(SEPERATOR_CHAR); 246 String[] vals = line.split(SEPERATOR_CHAR);
221 247
222 if (vals == null || vals.length < columnNames.length-1) { 248 if (vals == null || vals.length < columnNames.length-1) {
223 log.warn("SYP: skip invalid data line: '" + line + "'"); 249 log.warn("SLLSP: skip invalid data line: '" + line + "'");
224 return; 250 return;
225 } 251 }
226 252
227 try { 253 try {
228 Double km = nf.parse(vals[0]).doubleValue(); 254 Double km = nf.parse(vals[0]).doubleValue();
236 )); 262 ));
237 } 263 }
238 } 264 }
239 } 265 }
240 catch (ParseException pe) { 266 catch (ParseException pe) {
241 log.warn("SYP: unparseable number in data row '" + line + "':", pe); 267 log.warn("SLLSP: unparseable number in data row '" + line + "':", pe);
242 } 268 }
243 } 269 }
244 270
245 271
246 /** Initialize SedimentYields from columns, set the kind 272 /** Initialize SedimentLoadLSs from columns, set the kind
247 * with respect to file location (offical epoch or not?) */ 273 * with respect to file location (offical epoch or not?) */
248 private void initializeSedimentYields() { 274 private void initializeSedimentLoadLSs() {
249 // skip first column (Fluss-km) and last column (Hinweise) 275 // skip first column (Fluss-km) and last column (Hinweise)
250 current = new ImportSedimentLoadLS[columnNames.length-2]; 276 current = new ImportSedimentLoadLS[columnNames.length-2];
251 277
252 Integer kind; 278 Integer kind;
253 279
286 int year = nf.parse(b.group(1)).intValue(); 312 int year = nf.parse(b.group(1)).intValue();
287 313
288 return new ImportTimeInterval(DateUtil.getStartDateFromYear(year)); 314 return new ImportTimeInterval(DateUtil.getStartDateFromYear(year));
289 } 315 }
290 316
291 log.warn("SYP: Unknown time interval string: '" + column + "'"); 317 log.warn("SLLSP: Unknown time interval string: '" + column + "'");
292 } 318 }
293 catch (ParseException pe) { 319 catch (ParseException pe) {
294 log.warn("SYP: Could not parse years: " + column, pe); 320 log.warn("SLLSP: Could not parse years: " + column, pe);
295 } 321 }
296 322
297 return null; 323 return null;
298 } 324 }
299 325
300 326
301 private ImportGrainFraction buildGrainFraction(String gfStr) { 327 public List<ImportSedimentLoadLS> getSedimentLoadLSs() {
302 Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr); 328 return sedimentLoadLSs;
303 if (a.matches()) {
304 String lowerA = a.group(2);
305 String lowerB = a.group(3);
306
307 String upperA = a.group(4);
308 String upperB = a.group(5);
309
310 String lower = lowerA != null ? lowerA : lowerB;
311 String upper = upperA != null ? upperA : upperB;
312
313 try {
314 return new ImportGrainFraction(
315 getGrainFractionTypeName(this.description),
316 nf.parse(lower).doubleValue(),
317 nf.parse(upper).doubleValue()
318 );
319 }
320 catch (ParseException pe) {
321 log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
322 }
323 }
324
325 Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr);
326 if (b.matches()) {
327 String lowerA = b.group(4);
328 String lowerB = b.group(5);
329 String upperA = b.group(6);
330 String upperB = b.group(7);
331
332 String lower = lowerA != null ? lowerA : lowerB;
333 String upper = upperA != null ? upperA : upperB;
334
335 try {
336 return new ImportGrainFraction(
337 getGrainFractionTypeName(this.description),
338 nf.parse(lower).doubleValue(),
339 nf.parse(upper).doubleValue()
340 );
341 }
342 catch (ParseException pe) {
343 log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
344 }
345 }
346
347 Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr);
348 if (c.matches()) {
349 String oper = c.group(1);
350 String valueStr = c.group(3);
351
352 try {
353 Double value = nf.parse(valueStr).doubleValue();
354
355 if (oper.equals(">")) {
356 return new ImportGrainFraction(
357 getGrainFractionTypeName(this.description),
358 value,
359 null
360 );
361 }
362 else {
363 return new ImportGrainFraction(
364 getGrainFractionTypeName(this.description),
365 null,
366 value
367 );
368 }
369 }
370 catch (ParseException pe) {
371 log.warn("SYP: Could not parse ranges of: '" + gfStr + "'");
372 }
373 }
374
375 log.warn("SYP: Unknown grain fraction: '" + gfStr + "'");
376 return new ImportGrainFraction(GrainFraction.UNKNOWN);
377 }
378
379
380 public static String getGrainFractionTypeName(String filename) {
381 if (Pattern.matches(FRACTION_COARSE_STR, filename)) {
382 return GrainFraction.COARSE;
383 }
384 else if (Pattern.matches(FRACTION_FINE_MIDDLE_STR, filename)) {
385 return GrainFraction.FINE_MIDDLE;
386 }
387 else if (Pattern.matches(FRACTION_SUSP_SAND_BED, filename) ||
388 Pattern.matches(FRACTION_SUSP_SAND_BED_EPOCH, filename)) {
389 return GrainFraction.SUSP_SAND_BED;
390 }
391 else if (Pattern.matches(FRACTION_SUSP_SAND, filename)) {
392 return GrainFraction.SUSP_SAND;
393 }
394 else if (Pattern.matches(FRACTION_SAND, filename)) {
395 return GrainFraction.SAND;
396 }
397 else if (Pattern.matches(FRACTION_SUSPENDED_SEDIMENT, filename)) {
398 return GrainFraction.SUSPENDED_SEDIMENT;
399 }
400 else if (Pattern.matches(FRACTION_TOTAL, filename)) {
401 return GrainFraction.TOTAL;
402 }
403 else {
404 log.warn("SYP: Unknown grain fraction '" + filename + "'");
405 return GrainFraction.UNKNOWN;
406 }
407 }
408
409
410 public List<ImportSedimentLoadLS> getSedimentYields() {
411 return sedimentYields;
412 } 329 }
413 } 330 }
414 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : 331 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org