Mercurial > dive4elements > river
comparison backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java @ 9650:a2a42a6bac6b
Importer (s/u-info) extensions:
outer try/catch for parse and log of line no,
catching parsing exception if not enough value fields,
parsing error and warning log messages with line number,
detecting and rejecting duplicate data series,
better differentiation between error and warning log messages
author | mschaefer |
---|---|
date | Mon, 23 Mar 2020 14:57:03 +0100 |
parents | ddebd4c2fe93 |
children |
comparison
equal
deleted
inserted
replaced
9649:295b3cb5ebc8 | 9650:a2a42a6bac6b |
---|---|
11 package org.dive4elements.river.importer.common; | 11 package org.dive4elements.river.importer.common; |
12 | 12 |
13 import java.io.File; | 13 import java.io.File; |
14 import java.io.FileInputStream; | 14 import java.io.FileInputStream; |
15 import java.io.FilenameFilter; | 15 import java.io.FilenameFilter; |
16 import java.io.IOException; | |
17 import java.io.InputStreamReader; | 16 import java.io.InputStreamReader; |
18 import java.io.LineNumberReader; | 17 import java.io.LineNumberReader; |
19 import java.math.BigDecimal; | 18 import java.math.BigDecimal; |
20 import java.text.DecimalFormat; | 19 import java.text.DecimalFormat; |
21 import java.text.NumberFormat; | 20 import java.text.NumberFormat; |
28 import java.util.regex.Pattern; | 27 import java.util.regex.Pattern; |
29 | 28 |
30 import org.apache.log4j.Logger; | 29 import org.apache.log4j.Logger; |
31 import org.dive4elements.river.backend.utils.EpsilonComparator; | 30 import org.dive4elements.river.backend.utils.EpsilonComparator; |
32 import org.dive4elements.river.importer.ImportRiver; | 31 import org.dive4elements.river.importer.ImportRiver; |
32 import org.dive4elements.river.importer.ImporterSession; | |
33 import org.hibernate.Session; | |
33 | 34 |
34 /** | 35 /** |
35 * Abstract base class for a parser of one FLYS csv data file.<br /> | 36 * Abstract base class for a parser of one FLYS csv data file.<br /> |
36 * The {@link parse} method creates a SERIES object for the meta data | 37 * The {@link parse} method creates a SERIES object for the meta data |
37 * and a list of KMLINE objects for the km value lines read from the file.<br /> | 38 * and a list of KMLINE objects for the km value lines read from the file.<br /> |
74 | 75 |
75 private static NumberFormat numberFormat = NumberFormat.getInstance(Locale.ROOT); | 76 private static NumberFormat numberFormat = NumberFormat.getInstance(Locale.ROOT); |
76 | 77 |
77 private static DecimalFormat bigDecimalFormat; | 78 private static DecimalFormat bigDecimalFormat; |
78 | 79 |
80 protected static final String INVALID_VALUE_ERROR_FORMAT = "Invalid or missing %s value"; | |
81 | |
79 static { | 82 static { |
80 bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT); | 83 bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT); |
81 bigDecimalFormat.setParseBigDecimal(true); | 84 bigDecimalFormat.setParseBigDecimal(true); |
82 } | 85 } |
83 | 86 |
159 this.columnTitles = new ArrayList<>(); | 162 this.columnTitles = new ArrayList<>(); |
160 this.values = new ArrayList<>(); | 163 this.values = new ArrayList<>(); |
161 } | 164 } |
162 | 165 |
163 | 166 |
164 /***** METHODS *****/ | 167 /***** FILE-METHODS *****/ |
165 | 168 |
166 /** | 169 /** |
167 * Lists all files from a directory having a type extension (starting with dot) | 170 * Lists all files from a directory having a type extension (starting with dot) |
168 */ | 171 */ |
169 protected static List<File> listFiles(final File importDir, final String extension) { | 172 protected static List<File> listFiles(final File importDir, final String extension) { |
179 fl.add(file); | 182 fl.add(file); |
180 return fl; | 183 return fl; |
181 } | 184 } |
182 | 185 |
183 /** | 186 /** |
187 * Lists all files from a directory matching a file name pattern | |
188 */ | |
189 protected static List<File> listFiles(final File importDir, final Pattern filenamePattern) { | |
190 final File[] files = importDir.listFiles(new FilenameFilter() { | |
191 @Override | |
192 public boolean accept(final File dir, final String name) { | |
193 return filenamePattern.matcher(name).matches(); | |
194 } | |
195 }); | |
196 final List<File> fl = new ArrayList<>(); | |
197 if (files != null) | |
198 for (final File file : files) | |
199 fl.add(file); | |
200 return fl; | |
201 } | |
202 | |
203 /***** PARSE-METHODS *****/ | |
204 | |
205 /** | |
184 * Parses a file and adds series and values to the parser's collection | 206 * Parses a file and adds series and values to the parser's collection |
185 */ | 207 */ |
186 @Override | 208 @Override |
187 public void parse() throws IOException { | 209 public void parse() throws Exception { |
188 logStartInfo(); | 210 logStartInfo(); |
189 this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); | 211 this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); |
190 this.metaPatternsMatched.clear(); | 212 this.metaPatternsMatched.clear(); |
191 this.kmExists.clear(); | 213 this.kmExists.clear(); |
192 this.headerParsingState = ParsingState.CONTINUE; | 214 this.headerParsingState = ParsingState.CONTINUE; |
193 try { | 215 try { |
194 try { | 216 try { |
195 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); | 217 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); |
196 } | 218 } |
197 catch (final Exception e) { | 219 catch (final Exception e) { |
198 logError("Could not open (" + e.getMessage() + ")"); | 220 logError("Could not open (%s)", e.getMessage()); |
199 this.headerParsingState = ParsingState.STOP; | 221 this.headerParsingState = ParsingState.STOP; |
200 } | 222 } |
201 this.currentLine = null; | 223 try { |
202 while (this.headerParsingState != ParsingState.STOP) { | 224 this.currentLine = null; |
203 this.currentLine = this.in.readLine(); | 225 while (this.headerParsingState != ParsingState.STOP) { |
204 if (this.currentLine == null) | 226 this.currentLine = this.in.readLine(); |
205 break; | 227 if (this.currentLine == null) |
206 this.currentLine = this.currentLine.trim(); | 228 break; |
207 if (this.currentLine.isEmpty()) | 229 this.currentLine = this.currentLine.trim(); |
208 continue; | 230 if (this.currentLine.isEmpty()) |
209 if (this.headerParsingState == ParsingState.CONTINUE) | 231 continue; |
210 handleMetaLine(); | 232 if (this.headerParsingState == ParsingState.CONTINUE) { |
211 else | 233 handleMetaLine(); |
212 handleDataLine(); | 234 if (this.headerParsingState == ParsingState.DONE) |
213 } | 235 checkMetaData(); |
214 if (this.headerParsingState != ParsingState.STOP) | 236 } |
215 getLog().info("Number of values found: " + this.seriesHeader.getValueCount()); | 237 else |
238 handleDataLine(); | |
239 } | |
240 if (this.headerParsingState != ParsingState.STOP) | |
241 getLog().info(String.format("Number of values found: %d", this.seriesHeader.getValueCount())); | |
242 } | |
243 catch (final Exception e) { | |
244 throw new Exception(String.format("Parsing error (last read line: %d)", this.in.getLineNumber() + 1), e); | |
245 } | |
216 } | 246 } |
217 finally { | 247 finally { |
218 if (this.in != null) { | 248 if (this.in != null) { |
219 this.in.close(); | 249 this.in.close(); |
220 this.in = null; | 250 this.in = null; |
226 | 256 |
227 /** | 257 /** |
228 * Writes the parse start info to the log | 258 * Writes the parse start info to the log |
229 */ | 259 */ |
230 protected void logStartInfo() { | 260 protected void logStartInfo() { |
231 getLog().info("Start parsing:;'" + this.rootRelativePath + "'"); | 261 getLog().info(String.format("Start parsing:;'%s'", this.rootRelativePath)); |
232 } | |
233 | |
234 /** | |
235 * Stores the parsed series and values in the database | |
236 */ | |
237 @Override | |
238 public void store() { | |
239 if (this.headerParsingState != ParsingState.STOP) { | |
240 this.seriesHeader.store(this.river.getPeer()); | |
241 final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(), | |
242 this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE)); | |
243 if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT)) | |
244 logWarning("Number of value inserts less than number parsed: " + counts); | |
245 else | |
246 getLog().info("Number of values records: " + counts); | |
247 } | |
248 else | |
249 logWarning("Severe parsing errors, not storing series '" + this.seriesHeader.getFilename() + "'"); | |
250 } | 262 } |
251 | 263 |
252 /** | 264 /** |
253 * Strips separator chars from a meta info text, and trims leading and trailing whitespace | 265 * Strips separator chars from a meta info text, and trims leading and trailing whitespace |
254 */ | 266 */ |
257 } | 269 } |
258 | 270 |
259 /** | 271 /** |
260 * Parses a number string with dot or comma as decimal char, and returning null in case of an error | 272 * Parses a number string with dot or comma as decimal char, and returning null in case of an error |
261 */ | 273 */ |
262 public static Number parseDoubleWithNull(final String text) { | 274 public static Number parseDoubleCheckNull(final String[] values, final int index) { |
275 if (index > values.length - 1) | |
276 return null; | |
263 try { | 277 try { |
264 return parseDouble(text); | 278 return parseDouble(values[index]); |
265 } | 279 } |
266 catch (final Exception e) { | 280 catch (final Exception e) { |
267 return null; | 281 return null; |
268 } | 282 } |
269 } | 283 } |
271 /** | 285 /** |
272 * Parses a number string with dot or comma as decimal char | 286 * Parses a number string with dot or comma as decimal char |
273 * | 287 * |
274 * @throws ParseException | 288 * @throws ParseException |
275 */ | 289 */ |
276 public static Number parseDouble(final String text) throws ParseException { | 290 private static Number parseDouble(final String text) throws ParseException { |
277 return numberFormat.parse(text.replace(',', '.')); | 291 return numberFormat.parse(text.replace(',', '.')); |
292 } | |
293 | |
294 /** | |
295 * Parses an integer number string , and returning null in case of an error | |
296 */ | |
297 public static Integer parseIntegerCheckNull(final String[] values, final int index) { | |
298 if (index > values.length - 1) | |
299 return null; | |
300 try { | |
301 return Integer.valueOf((values[index])); | |
302 } | |
303 catch (final Exception e) { | |
304 return null; | |
305 } | |
278 } | 306 } |
279 | 307 |
280 /** | 308 /** |
281 * Parses a number string as a BigDecimal, replacing a comma with a dot first | 309 * Parses a number string as a BigDecimal, replacing a comma with a dot first |
282 */ | 310 */ |
283 public static BigDecimal parseDecimal(final String text) throws ParseException { | 311 public static BigDecimal parseDecimal(final String text) throws ParseException { |
284 return (BigDecimal) bigDecimalFormat.parse(text.replace(',', '.')); | 312 return (BigDecimal) bigDecimalFormat.parse(text.replace(',', '.')); |
285 } | 313 } |
286 | 314 |
287 /** | 315 /** |
288 * Gets the class's logger | |
289 */ | |
290 protected abstract Logger getLog(); | |
291 | |
292 /** | |
293 * Logs an error message, appending the relative file path | |
294 */ | |
295 protected void logError(final String message) { | |
296 getLog().error(message + ";" + this.rootRelativePath); | |
297 } | |
298 | |
299 /** | |
300 * Logs a warning message, appending the relative file path | |
301 */ | |
302 protected void logWarning(final String message) { | |
303 getLog().warn(message + ";" + this.rootRelativePath); | |
304 } | |
305 | |
306 /** | |
307 * Logs an info message, appending the relative file path | |
308 */ | |
309 protected void logInfo(final String message) { | |
310 getLog().info(message + ";" + this.rootRelativePath); | |
311 } | |
312 | |
313 /** | |
314 * Logs a debug message, appending the relative file path | |
315 */ | |
316 protected void logDebug(final String message) { | |
317 getLog().debug(message + ";" + this.rootRelativePath); | |
318 } | |
319 | |
320 /** | |
321 * Logs a trace message, appending the relative file path | |
322 */ | |
323 protected void logTrace(final String message) { | |
324 getLog().trace(message + ";" + this.rootRelativePath); | |
325 } | |
326 | |
327 /** | |
328 * Creates a new series import object | 316 * Creates a new series import object |
329 */ | 317 */ |
330 protected abstract HEADER createSeriesImport(final String filename); | 318 protected abstract HEADER createSeriesImport(final String filename); |
319 | |
320 | |
321 /***** METAHEADER-PARSE-METHODS *****/ | |
331 | 322 |
332 protected void handleMetaLine() { | 323 protected void handleMetaLine() { |
333 if (META_SUBGROUP.matcher(this.currentLine).matches()) | 324 if (META_SUBGROUP.matcher(this.currentLine).matches()) |
334 return; | 325 return; |
335 else if (handleMetaRivername()) | 326 else if (handleMetaRivername()) |
346 return; | 337 return; |
347 } | 338 } |
348 else { | 339 else { |
349 if (this.currentLine.startsWith(START_META_CHAR)) { | 340 if (this.currentLine.startsWith(START_META_CHAR)) { |
350 if (this.headerParsingState != ParsingState.IGNORE) | 341 if (this.headerParsingState != ParsingState.IGNORE) |
351 logWarning("Not matching any known meta type in line " + this.in.getLineNumber() + ", ignored"); | 342 logLineWarning("Not matching any known meta type"); |
352 else | 343 else |
353 this.headerParsingState = ParsingState.CONTINUE; | 344 this.headerParsingState = ParsingState.CONTINUE; |
354 } | 345 } |
346 else | |
347 this.headerParsingState = ParsingState.DONE; // no more meta data expected, if neither meta line nor empty line | |
355 } | 348 } |
356 } | 349 } |
357 | 350 |
358 private boolean handleMetaRivername() { | 351 private boolean handleMetaRivername() { |
359 if (META_RIVERNAME.matcher(this.currentLine).matches()) { | 352 if (META_RIVERNAME.matcher(this.currentLine).matches()) { |
394 } | 387 } |
395 | 388 |
396 /** | 389 /** |
397 * Parses a header line for the km table column header line | 390 * Parses a header line for the km table column header line |
398 * | 391 * |
399 * @return Whether the line has been handled and we are ready for reading the km values lines | 392 * @return Whether the line has been handled (also in case of State=STOP),<br> |
393 * and we are ready for reading the km values lines (or cancel parsing) | |
400 */ | 394 */ |
401 protected boolean handleMetaColumnTitles() { | 395 protected boolean handleMetaColumnTitles() { |
402 if (META_COLUMNTITLES.matcher(this.currentLine).matches()) { | 396 if (META_COLUMNTITLES.matcher(this.currentLine).matches()) { |
403 this.metaPatternsMatched.add(META_COLUMNTITLES); | 397 this.metaPatternsMatched.add(META_COLUMNTITLES); |
404 this.columnTitles.clear(); | 398 this.columnTitles.clear(); |
409 } | 403 } |
410 return false; | 404 return false; |
411 } | 405 } |
412 | 406 |
413 /** | 407 /** |
408 * Check meta data after all meta data lines (#) have been read | |
409 */ | |
410 protected boolean checkMetaData() { | |
411 if (this.columnTitles.size() <= 1) { | |
412 logError("No valid header line with column titles found"); | |
413 this.headerParsingState = ParsingState.STOP; | |
414 return false; | |
415 } | |
416 if (checkSeriesExistsAlready()) { | |
417 logError("Data series/filename exists already in the database"); | |
418 this.headerParsingState = ParsingState.STOP; | |
419 return false; | |
420 } | |
421 return true; | |
422 } | |
423 | |
424 /** | |
425 * Checks the existence of the active series in the database | |
426 */ | |
427 protected boolean checkSeriesExistsAlready() { | |
428 if (!checkRiverExists()) | |
429 return false; | |
430 final Session session = ImporterSession.getInstance().getDatabaseSession(); | |
431 final List<DB_SERIES> rows = this.seriesHeader.querySeriesItem(session, this.river.getPeer(), true); | |
432 return !rows.isEmpty(); | |
433 } | |
434 | |
435 /** | |
436 * Checks the existence of the active river in the database | |
437 */ | |
438 protected boolean checkRiverExists() { | |
439 return (this.river.getPeer(false) != null); | |
440 } | |
441 | |
442 | |
443 /***** VALUELINE-PARSE-METHODS *****/ | |
444 | |
445 /** | |
414 * Parses a values line and adds the values record | 446 * Parses a values line and adds the values record |
415 */ | 447 */ |
416 protected void handleDataLine() { | 448 protected void handleDataLine() { |
417 final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0); | 449 final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0); |
418 // Skip import line without data or only km | 450 // Skip import line without data or only km |
419 if (values.length < 2) | 451 if (values.length < 2) { |
452 logLineWarning("Too few data"); | |
420 return; | 453 return; |
454 } | |
421 Double km = Double.NaN; | 455 Double km = Double.NaN; |
422 if (kmMode() != KmMode.NONE) { | 456 if (kmMode() != KmMode.NONE) { |
423 try { | 457 try { |
424 km = Double.valueOf(parseDouble(values[0]).doubleValue()); | 458 km = Double.valueOf(parseDouble(values[0]).doubleValue()); |
425 if (kmMode() == KmMode.UNIQUE) { | 459 if (kmMode() == KmMode.UNIQUE) { |
426 if (this.kmExists.contains(km)) { | 460 if (this.kmExists.contains(km)) { |
427 logWarning("Ignoring duplicate station '" + values[0] + "' in line " + this.in.getLineNumber()); | 461 logLineWarning("Duplicate km '%s'", values[0]); |
428 return; | 462 return; |
429 } | 463 } |
430 this.kmExists.add(km); | 464 this.kmExists.add(km); |
431 } | 465 } |
432 } | 466 } |
433 catch (final Exception e) { | 467 catch (final Exception e) { |
434 logError("Not parseable km in line " + this.in.getLineNumber() + ": " + e.getMessage()); | 468 logLineWarning("Invalid km: %s", e.getMessage()); |
435 return; | 469 return; |
436 } | 470 } |
437 } | 471 } |
438 final KMLINE value = createKmLineImport(km, values); | 472 final KMLINE value = createKmLineImport(km, values); |
439 if (value != null) | 473 if (value != null) { |
440 this.seriesHeader.addValue(value); | 474 final boolean added = this.seriesHeader.addValue(value); |
475 if (!added) | |
476 logLineWarning("Duplicate data line"); | |
477 } | |
441 } | 478 } |
442 | 479 |
443 /** | 480 /** |
444 * How {@link handleDataLine} shall handle the km column (if any) | 481 * How {@link handleDataLine} shall handle the km column (if any) |
445 */ | 482 */ |
452 * the km has been validated | 489 * the km has been validated |
453 * | 490 * |
454 * @return value item, or null if parse error | 491 * @return value item, or null if parse error |
455 */ | 492 */ |
456 protected abstract KMLINE createKmLineImport(final Double km, final String[] values); | 493 protected abstract KMLINE createKmLineImport(final Double km, final String[] values); |
494 | |
495 | |
496 /***** STORE-METHODS *****/ | |
497 | |
498 /** | |
499 * Stores the parsed series and values in the database | |
500 */ | |
501 @Override | |
502 public void store() { | |
503 if (this.headerParsingState != ParsingState.STOP) { | |
504 this.seriesHeader.store(this.river.getPeer()); | |
505 final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(), | |
506 this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE)); | |
507 if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT)) | |
508 logWarning("Number of value inserts less than number parsed: %s", counts); | |
509 else | |
510 getLog().info("Number of values records: " + counts); | |
511 } | |
512 else | |
513 logWarning("Severe parsing errors, not storing series '%s'", this.seriesHeader.getFilename()); | |
514 } | |
515 | |
516 | |
517 /***** LOG-METHODS *****/ | |
518 | |
519 /** | |
520 * Gets the class's logger | |
521 */ | |
522 protected abstract Logger getLog(); | |
523 | |
524 /** | |
525 * Logs an error message, appending the relative file path | |
526 */ | |
527 protected void logError(final String message) { | |
528 getLog().error(buildLogMessage(message)); | |
529 } | |
530 | |
531 /** | |
532 * Logs an error message, appending the relative file path | |
533 */ | |
534 protected void logError(final String format, final Object... args) { | |
535 getLog().error(buildLogMessage(String.format(format, args))); | |
536 } | |
537 | |
538 /** | |
539 * Logs an error message with current line number, appending the relative file path | |
540 */ | |
541 protected void logLineError(final String message) { | |
542 getLog().error(buildLineLogMessage(message)); | |
543 } | |
544 | |
545 /** | |
546 * Logs an error message with current line number, appending the relative file path | |
547 */ | |
548 protected void logLineError(final String format, final Object... args) { | |
549 getLog().error(buildLineLogMessage(String.format(format, args))); | |
550 } | |
551 | |
552 /** | |
553 * Logs a warning message, appending the relative file path | |
554 */ | |
555 protected void logWarning(final String message) { | |
556 getLog().warn(buildLogMessage(message)); | |
557 } | |
558 | |
559 /** | |
560 * Logs a warning message, appending the relative file path | |
561 */ | |
562 protected void logWarning(final String format, final Object... args) { | |
563 getLog().warn(buildLogMessage(String.format(format, args))); | |
564 } | |
565 | |
566 /** | |
567 * Logs a warning message, appending the line number and the relative file path | |
568 */ | |
569 protected void logLineWarning(final String message) { | |
570 getLog().warn(buildLineLogMessage(message)); | |
571 } | |
572 | |
573 /** | |
574 * Logs a warning message, appending the line number and the relative file path | |
575 */ | |
576 protected void logLineWarning(final String format, final Object... args) { | |
577 getLog().warn(buildLineLogMessage(String.format(format, args))); | |
578 } | |
579 | |
580 /** | |
581 * Logs an info message, appending the relative file path | |
582 */ | |
583 protected void logInfo(final String message) { | |
584 getLog().info(buildLogMessage(message)); | |
585 } | |
586 | |
587 /** | |
588 * Logs a debug message, appending the relative file path | |
589 */ | |
590 protected void logDebug(final String message) { | |
591 getLog().debug(buildLogMessage(message)); | |
592 } | |
593 | |
594 /** | |
595 * Logs a trace message, appending the relative file path | |
596 */ | |
597 protected void logTrace(final String message) { | |
598 getLog().trace(buildLogMessage(message)); | |
599 } | |
600 | |
601 private String buildLogMessage(final String message) { | |
602 return String.format("%s;%s", message, this.rootRelativePath); | |
603 } | |
604 | |
605 private String buildLineLogMessage(final String message) { | |
606 return String.format("Line %d: %s;%s", this.in.getLineNumber(), message, this.rootRelativePath); | |
607 } | |
457 } | 608 } |