comparison backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java @ 9650:a2a42a6bac6b

Importer (s/u-info) extensions: outer try/catch for parse and log of line no, catching parsing exception if not enough value fields, parsing error and warning log messages with line number, detecting and rejecting duplicate data series, better differentiation between error and warning log messages
author mschaefer
date Mon, 23 Mar 2020 14:57:03 +0100
parents ddebd4c2fe93
children
comparison
equal deleted inserted replaced
9649:295b3cb5ebc8 9650:a2a42a6bac6b
11 package org.dive4elements.river.importer.common; 11 package org.dive4elements.river.importer.common;
12 12
13 import java.io.File; 13 import java.io.File;
14 import java.io.FileInputStream; 14 import java.io.FileInputStream;
15 import java.io.FilenameFilter; 15 import java.io.FilenameFilter;
16 import java.io.IOException;
17 import java.io.InputStreamReader; 16 import java.io.InputStreamReader;
18 import java.io.LineNumberReader; 17 import java.io.LineNumberReader;
19 import java.math.BigDecimal; 18 import java.math.BigDecimal;
20 import java.text.DecimalFormat; 19 import java.text.DecimalFormat;
21 import java.text.NumberFormat; 20 import java.text.NumberFormat;
28 import java.util.regex.Pattern; 27 import java.util.regex.Pattern;
29 28
30 import org.apache.log4j.Logger; 29 import org.apache.log4j.Logger;
31 import org.dive4elements.river.backend.utils.EpsilonComparator; 30 import org.dive4elements.river.backend.utils.EpsilonComparator;
32 import org.dive4elements.river.importer.ImportRiver; 31 import org.dive4elements.river.importer.ImportRiver;
32 import org.dive4elements.river.importer.ImporterSession;
33 import org.hibernate.Session;
33 34
34 /** 35 /**
35 * Abstract base class for a parser of one FLYS csv data file.<br /> 36 * Abstract base class for a parser of one FLYS csv data file.<br />
36 * The {@link parse} method creates a SERIES object for the meta data 37 * The {@link parse} method creates a SERIES object for the meta data
37 * and a list of KMLINE objects for the km value lines read from the file.<br /> 38 * and a list of KMLINE objects for the km value lines read from the file.<br />
74 75
75 private static NumberFormat numberFormat = NumberFormat.getInstance(Locale.ROOT); 76 private static NumberFormat numberFormat = NumberFormat.getInstance(Locale.ROOT);
76 77
77 private static DecimalFormat bigDecimalFormat; 78 private static DecimalFormat bigDecimalFormat;
78 79
80 protected static final String INVALID_VALUE_ERROR_FORMAT = "Invalid or missing %s value";
81
79 static { 82 static {
80 bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT); 83 bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT);
81 bigDecimalFormat.setParseBigDecimal(true); 84 bigDecimalFormat.setParseBigDecimal(true);
82 } 85 }
83 86
159 this.columnTitles = new ArrayList<>(); 162 this.columnTitles = new ArrayList<>();
160 this.values = new ArrayList<>(); 163 this.values = new ArrayList<>();
161 } 164 }
162 165
163 166
164 /***** METHODS *****/ 167 /***** FILE-METHODS *****/
165 168
166 /** 169 /**
167 * Lists all files from a directory having a type extension (starting with dot) 170 * Lists all files from a directory having a type extension (starting with dot)
168 */ 171 */
169 protected static List<File> listFiles(final File importDir, final String extension) { 172 protected static List<File> listFiles(final File importDir, final String extension) {
179 fl.add(file); 182 fl.add(file);
180 return fl; 183 return fl;
181 } 184 }
182 185
183 /** 186 /**
187 * Lists all files from a directory matching a file name pattern
188 */
189 protected static List<File> listFiles(final File importDir, final Pattern filenamePattern) {
190 final File[] files = importDir.listFiles(new FilenameFilter() {
191 @Override
192 public boolean accept(final File dir, final String name) {
193 return filenamePattern.matcher(name).matches();
194 }
195 });
196 final List<File> fl = new ArrayList<>();
197 if (files != null)
198 for (final File file : files)
199 fl.add(file);
200 return fl;
201 }
202
203 /***** PARSE-METHODS *****/
204
205 /**
184 * Parses a file and adds series and values to the parser's collection 206 * Parses a file and adds series and values to the parser's collection
185 */ 207 */
186 @Override 208 @Override
187 public void parse() throws IOException { 209 public void parse() throws Exception {
188 logStartInfo(); 210 logStartInfo();
189 this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); 211 this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", ""));
190 this.metaPatternsMatched.clear(); 212 this.metaPatternsMatched.clear();
191 this.kmExists.clear(); 213 this.kmExists.clear();
192 this.headerParsingState = ParsingState.CONTINUE; 214 this.headerParsingState = ParsingState.CONTINUE;
193 try { 215 try {
194 try { 216 try {
195 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); 217 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING));
196 } 218 }
197 catch (final Exception e) { 219 catch (final Exception e) {
198 logError("Could not open (" + e.getMessage() + ")"); 220 logError("Could not open (%s)", e.getMessage());
199 this.headerParsingState = ParsingState.STOP; 221 this.headerParsingState = ParsingState.STOP;
200 } 222 }
201 this.currentLine = null; 223 try {
202 while (this.headerParsingState != ParsingState.STOP) { 224 this.currentLine = null;
203 this.currentLine = this.in.readLine(); 225 while (this.headerParsingState != ParsingState.STOP) {
204 if (this.currentLine == null) 226 this.currentLine = this.in.readLine();
205 break; 227 if (this.currentLine == null)
206 this.currentLine = this.currentLine.trim(); 228 break;
207 if (this.currentLine.isEmpty()) 229 this.currentLine = this.currentLine.trim();
208 continue; 230 if (this.currentLine.isEmpty())
209 if (this.headerParsingState == ParsingState.CONTINUE) 231 continue;
210 handleMetaLine(); 232 if (this.headerParsingState == ParsingState.CONTINUE) {
211 else 233 handleMetaLine();
212 handleDataLine(); 234 if (this.headerParsingState == ParsingState.DONE)
213 } 235 checkMetaData();
214 if (this.headerParsingState != ParsingState.STOP) 236 }
215 getLog().info("Number of values found: " + this.seriesHeader.getValueCount()); 237 else
238 handleDataLine();
239 }
240 if (this.headerParsingState != ParsingState.STOP)
241 getLog().info(String.format("Number of values found: %d", this.seriesHeader.getValueCount()));
242 }
243 catch (final Exception e) {
244 throw new Exception(String.format("Parsing error (last read line: %d)", this.in.getLineNumber() + 1), e);
245 }
216 } 246 }
217 finally { 247 finally {
218 if (this.in != null) { 248 if (this.in != null) {
219 this.in.close(); 249 this.in.close();
220 this.in = null; 250 this.in = null;
226 256
227 /** 257 /**
228 * Writes the parse start info to the log 258 * Writes the parse start info to the log
229 */ 259 */
230 protected void logStartInfo() { 260 protected void logStartInfo() {
231 getLog().info("Start parsing:;'" + this.rootRelativePath + "'"); 261 getLog().info(String.format("Start parsing:;'%s'", this.rootRelativePath));
232 }
233
234 /**
235 * Stores the parsed series and values in the database
236 */
237 @Override
238 public void store() {
239 if (this.headerParsingState != ParsingState.STOP) {
240 this.seriesHeader.store(this.river.getPeer());
241 final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(),
242 this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE));
243 if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT))
244 logWarning("Number of value inserts less than number parsed: " + counts);
245 else
246 getLog().info("Number of values records: " + counts);
247 }
248 else
249 logWarning("Severe parsing errors, not storing series '" + this.seriesHeader.getFilename() + "'");
250 } 262 }
251 263
252 /** 264 /**
253 * Strips separator chars from a meta info text, and trims leading and trailing whitespace 265 * Strips separator chars from a meta info text, and trims leading and trailing whitespace
254 */ 266 */
257 } 269 }
258 270
259 /** 271 /**
260 * Parses a number string with dot or comma as decimal char, and returning null in case of an error 272 * Parses a number string with dot or comma as decimal char, and returning null in case of an error
261 */ 273 */
262 public static Number parseDoubleWithNull(final String text) { 274 public static Number parseDoubleCheckNull(final String[] values, final int index) {
275 if (index > values.length - 1)
276 return null;
263 try { 277 try {
264 return parseDouble(text); 278 return parseDouble(values[index]);
265 } 279 }
266 catch (final Exception e) { 280 catch (final Exception e) {
267 return null; 281 return null;
268 } 282 }
269 } 283 }
271 /** 285 /**
272 * Parses a number string with dot or comma as decimal char 286 * Parses a number string with dot or comma as decimal char
273 * 287 *
274 * @throws ParseException 288 * @throws ParseException
275 */ 289 */
276 public static Number parseDouble(final String text) throws ParseException { 290 private static Number parseDouble(final String text) throws ParseException {
277 return numberFormat.parse(text.replace(',', '.')); 291 return numberFormat.parse(text.replace(',', '.'));
292 }
293
294 /**
295 * Parses an integer number string , and returning null in case of an error
296 */
297 public static Integer parseIntegerCheckNull(final String[] values, final int index) {
298 if (index > values.length - 1)
299 return null;
300 try {
301 return Integer.valueOf((values[index]));
302 }
303 catch (final Exception e) {
304 return null;
305 }
278 } 306 }
279 307
280 /** 308 /**
281 * Parses a number string as a BigDecimal, replacing a comma with a dot first 309 * Parses a number string as a BigDecimal, replacing a comma with a dot first
282 */ 310 */
283 public static BigDecimal parseDecimal(final String text) throws ParseException { 311 public static BigDecimal parseDecimal(final String text) throws ParseException {
284 return (BigDecimal) bigDecimalFormat.parse(text.replace(',', '.')); 312 return (BigDecimal) bigDecimalFormat.parse(text.replace(',', '.'));
285 } 313 }
286 314
287 /** 315 /**
288 * Gets the class's logger
289 */
290 protected abstract Logger getLog();
291
292 /**
293 * Logs an error message, appending the relative file path
294 */
295 protected void logError(final String message) {
296 getLog().error(message + ";" + this.rootRelativePath);
297 }
298
299 /**
300 * Logs a warning message, appending the relative file path
301 */
302 protected void logWarning(final String message) {
303 getLog().warn(message + ";" + this.rootRelativePath);
304 }
305
306 /**
307 * Logs an info message, appending the relative file path
308 */
309 protected void logInfo(final String message) {
310 getLog().info(message + ";" + this.rootRelativePath);
311 }
312
313 /**
314 * Logs a debug message, appending the relative file path
315 */
316 protected void logDebug(final String message) {
317 getLog().debug(message + ";" + this.rootRelativePath);
318 }
319
320 /**
321 * Logs a trace message, appending the relative file path
322 */
323 protected void logTrace(final String message) {
324 getLog().trace(message + ";" + this.rootRelativePath);
325 }
326
327 /**
328 * Creates a new series import object 316 * Creates a new series import object
329 */ 317 */
330 protected abstract HEADER createSeriesImport(final String filename); 318 protected abstract HEADER createSeriesImport(final String filename);
319
320
321 /***** METAHEADER-PARSE-METHODS *****/
331 322
332 protected void handleMetaLine() { 323 protected void handleMetaLine() {
333 if (META_SUBGROUP.matcher(this.currentLine).matches()) 324 if (META_SUBGROUP.matcher(this.currentLine).matches())
334 return; 325 return;
335 else if (handleMetaRivername()) 326 else if (handleMetaRivername())
346 return; 337 return;
347 } 338 }
348 else { 339 else {
349 if (this.currentLine.startsWith(START_META_CHAR)) { 340 if (this.currentLine.startsWith(START_META_CHAR)) {
350 if (this.headerParsingState != ParsingState.IGNORE) 341 if (this.headerParsingState != ParsingState.IGNORE)
351 logWarning("Not matching any known meta type in line " + this.in.getLineNumber() + ", ignored"); 342 logLineWarning("Not matching any known meta type");
352 else 343 else
353 this.headerParsingState = ParsingState.CONTINUE; 344 this.headerParsingState = ParsingState.CONTINUE;
354 } 345 }
346 else
347 this.headerParsingState = ParsingState.DONE; // no more meta data expected, if neither meta line nor empty line
355 } 348 }
356 } 349 }
357 350
358 private boolean handleMetaRivername() { 351 private boolean handleMetaRivername() {
359 if (META_RIVERNAME.matcher(this.currentLine).matches()) { 352 if (META_RIVERNAME.matcher(this.currentLine).matches()) {
394 } 387 }
395 388
396 /** 389 /**
397 * Parses a header line for the km table column header line 390 * Parses a header line for the km table column header line
398 * 391 *
399 * @return Whether the line has been handled and we are ready for reading the km values lines 392 * @return Whether the line has been handled (also in case of State=STOP),<br>
393 * and we are ready for reading the km values lines (or cancel parsing)
400 */ 394 */
401 protected boolean handleMetaColumnTitles() { 395 protected boolean handleMetaColumnTitles() {
402 if (META_COLUMNTITLES.matcher(this.currentLine).matches()) { 396 if (META_COLUMNTITLES.matcher(this.currentLine).matches()) {
403 this.metaPatternsMatched.add(META_COLUMNTITLES); 397 this.metaPatternsMatched.add(META_COLUMNTITLES);
404 this.columnTitles.clear(); 398 this.columnTitles.clear();
409 } 403 }
410 return false; 404 return false;
411 } 405 }
412 406
413 /** 407 /**
408 * Check meta data after all meta data lines (#) have been read
409 */
410 protected boolean checkMetaData() {
411 if (this.columnTitles.size() <= 1) {
412 logError("No valid header line with column titles found");
413 this.headerParsingState = ParsingState.STOP;
414 return false;
415 }
416 if (checkSeriesExistsAlready()) {
417 logError("Data series/filename exists already in the database");
418 this.headerParsingState = ParsingState.STOP;
419 return false;
420 }
421 return true;
422 }
423
424 /**
425 * Checks the existence of the active series in the database
426 */
427 protected boolean checkSeriesExistsAlready() {
428 if (!checkRiverExists())
429 return false;
430 final Session session = ImporterSession.getInstance().getDatabaseSession();
431 final List<DB_SERIES> rows = this.seriesHeader.querySeriesItem(session, this.river.getPeer(), true);
432 return !rows.isEmpty();
433 }
434
435 /**
436 * Checks the existence of the active river in the database
437 */
438 protected boolean checkRiverExists() {
439 return (this.river.getPeer(false) != null);
440 }
441
442
443 /***** VALUELINE-PARSE-METHODS *****/
444
445 /**
414 * Parses a values line and adds the values record 446 * Parses a values line and adds the values record
415 */ 447 */
416 protected void handleDataLine() { 448 protected void handleDataLine() {
417 final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0); 449 final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0);
418 // Skip import line without data or only km 450 // Skip import line without data or only km
419 if (values.length < 2) 451 if (values.length < 2) {
452 logLineWarning("Too few data");
420 return; 453 return;
454 }
421 Double km = Double.NaN; 455 Double km = Double.NaN;
422 if (kmMode() != KmMode.NONE) { 456 if (kmMode() != KmMode.NONE) {
423 try { 457 try {
424 km = Double.valueOf(parseDouble(values[0]).doubleValue()); 458 km = Double.valueOf(parseDouble(values[0]).doubleValue());
425 if (kmMode() == KmMode.UNIQUE) { 459 if (kmMode() == KmMode.UNIQUE) {
426 if (this.kmExists.contains(km)) { 460 if (this.kmExists.contains(km)) {
427 logWarning("Ignoring duplicate station '" + values[0] + "' in line " + this.in.getLineNumber()); 461 logLineWarning("Duplicate km '%s'", values[0]);
428 return; 462 return;
429 } 463 }
430 this.kmExists.add(km); 464 this.kmExists.add(km);
431 } 465 }
432 } 466 }
433 catch (final Exception e) { 467 catch (final Exception e) {
434 logError("Not parseable km in line " + this.in.getLineNumber() + ": " + e.getMessage()); 468 logLineWarning("Invalid km: %s", e.getMessage());
435 return; 469 return;
436 } 470 }
437 } 471 }
438 final KMLINE value = createKmLineImport(km, values); 472 final KMLINE value = createKmLineImport(km, values);
439 if (value != null) 473 if (value != null) {
440 this.seriesHeader.addValue(value); 474 final boolean added = this.seriesHeader.addValue(value);
475 if (!added)
476 logLineWarning("Duplicate data line");
477 }
441 } 478 }
442 479
443 /** 480 /**
444 * How {@link handleDataLine} shall handle the km column (if any) 481 * How {@link handleDataLine} shall handle the km column (if any)
445 */ 482 */
452 * the km has been validated 489 * the km has been validated
453 * 490 *
454 * @return value item, or null if parse error 491 * @return value item, or null if parse error
455 */ 492 */
456 protected abstract KMLINE createKmLineImport(final Double km, final String[] values); 493 protected abstract KMLINE createKmLineImport(final Double km, final String[] values);
494
495
496 /***** STORE-METHODS *****/
497
498 /**
499 * Stores the parsed series and values in the database
500 */
501 @Override
502 public void store() {
503 if (this.headerParsingState != ParsingState.STOP) {
504 this.seriesHeader.store(this.river.getPeer());
505 final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(),
506 this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE));
507 if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT))
508 logWarning("Number of value inserts less than number parsed: %s", counts);
509 else
510 getLog().info("Number of values records: " + counts);
511 }
512 else
513 logWarning("Severe parsing errors, not storing series '%s'", this.seriesHeader.getFilename());
514 }
515
516
517 /***** LOG-METHODS *****/
518
519 /**
520 * Gets the class's logger
521 */
522 protected abstract Logger getLog();
523
524 /**
525 * Logs an error message, appending the relative file path
526 */
527 protected void logError(final String message) {
528 getLog().error(buildLogMessage(message));
529 }
530
531 /**
532 * Logs an error message, appending the relative file path
533 */
534 protected void logError(final String format, final Object... args) {
535 getLog().error(buildLogMessage(String.format(format, args)));
536 }
537
538 /**
539 * Logs an error message with current line number, appending the relative file path
540 */
541 protected void logLineError(final String message) {
542 getLog().error(buildLineLogMessage(message));
543 }
544
545 /**
546 * Logs an error message with current line number, appending the relative file path
547 */
548 protected void logLineError(final String format, final Object... args) {
549 getLog().error(buildLineLogMessage(String.format(format, args)));
550 }
551
552 /**
553 * Logs a warning message, appending the relative file path
554 */
555 protected void logWarning(final String message) {
556 getLog().warn(buildLogMessage(message));
557 }
558
559 /**
560 * Logs a warning message, appending the relative file path
561 */
562 protected void logWarning(final String format, final Object... args) {
563 getLog().warn(buildLogMessage(String.format(format, args)));
564 }
565
566 /**
567 * Logs a warning message, appending the line number and the relative file path
568 */
569 protected void logLineWarning(final String message) {
570 getLog().warn(buildLineLogMessage(message));
571 }
572
573 /**
574 * Logs a warning message, appending the line number and the relative file path
575 */
576 protected void logLineWarning(final String format, final Object... args) {
577 getLog().warn(buildLineLogMessage(String.format(format, args)));
578 }
579
580 /**
581 * Logs an info message, appending the relative file path
582 */
583 protected void logInfo(final String message) {
584 getLog().info(buildLogMessage(message));
585 }
586
587 /**
588 * Logs a debug message, appending the relative file path
589 */
590 protected void logDebug(final String message) {
591 getLog().debug(buildLogMessage(message));
592 }
593
594 /**
595 * Logs a trace message, appending the relative file path
596 */
597 protected void logTrace(final String message) {
598 getLog().trace(buildLogMessage(message));
599 }
600
601 private String buildLogMessage(final String message) {
602 return String.format("%s;%s", message, this.rootRelativePath);
603 }
604
605 private String buildLineLogMessage(final String message) {
606 return String.format("Line %d: %s;%s", this.in.getLineNumber(), message, this.rootRelativePath);
607 }
457 } 608 }

http://dive4elements.wald.intevation.org