Mercurial > dive4elements > river
changeset 9650:a2a42a6bac6b
Importer (s/u-info) extensions:
outer try/catch for parse and log of line no,
catching parsing exception if not enough value fields,
parsing error and warning log messages with line number,
detecting and rejecting duplicate data series,
better differentiation between error and warning log messages
author | mschaefer |
---|---|
date | Mon, 23 Mar 2020 14:57:03 +0100 (2020-03-23) |
parents | 295b3cb5ebc8 |
children | eb3dfe900d8c |
files | backend/src/main/java/org/dive4elements/river/importer/ImportRiver.java backend/src/main/java/org/dive4elements/river/importer/Importer.java backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java backend/src/main/java/org/dive4elements/river/importer/common/AbstractSeriesImport.java backend/src/main/java/org/dive4elements/river/importer/common/ImportParser.java backend/src/main/java/org/dive4elements/river/importer/parsers/InfoGewParser.java |
diffstat | 6 files changed, 312 insertions(+), 162 deletions(-) [+] |
line wrap: on
line diff
--- a/backend/src/main/java/org/dive4elements/river/importer/ImportRiver.java Wed Dec 04 16:59:25 2019 +0100 +++ b/backend/src/main/java/org/dive4elements/river/importer/ImportRiver.java Mon Mar 23 14:57:03 2020 +0100 @@ -395,7 +395,7 @@ return this.annotationClassifier; } - public void parseDependencies() throws IOException { + public void parseDependencies() throws Exception { log.info("Root dir is '" + getRiverDir() + "'"); parseGauges(); parseAnnotations(); @@ -1667,31 +1667,36 @@ } public River getPeer() { - if (this.peer == null) { - final Session session = ImporterSession.getInstance() - .getDatabaseSession(); - final Query query = session.createQuery("from River where name=:name"); - - Unit u = null; - if (this.wstUnit != null) { - u = this.wstUnit.getPeer(); - } + return getPeer(true); + } - query.setString("name", this.name); - final List<River> rivers = query.list(); - if (rivers.isEmpty()) { - log.info("Store new river '" + this.name + "'"); - this.peer = new River(this.name, u, this.modelUuid); - if (!Config.INSTANCE.skipBWASTR()) { - this.peer.setOfficialNumber(this.officialNumber); - } - session.save(this.peer); + public River getPeer(final boolean storeIfNew) { + + if (this.peer != null) + return this.peer; + + final Session session = ImporterSession.getInstance().getDatabaseSession(); + final Query query = session.createQuery("FROM River WHERE name=:name"); + + Unit u = null; + if (storeIfNew && (this.wstUnit != null)) { + u = this.wstUnit.getPeer(); + } + + query.setString("name", this.name); + final List<River> rivers = query.list(); + if (rivers.isEmpty() && storeIfNew) { + log.info("Store new river '" + this.name + "'"); + this.peer = new River(this.name, u, this.modelUuid); + if (!Config.INSTANCE.skipBWASTR()) { + this.peer.setOfficialNumber(this.officialNumber); } - else { - this.peer = rivers.get(0); - } + session.save(this.peer); + } + else { + this.peer = rivers.get(0); } return this.peer; } } -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : +
--- a/backend/src/main/java/org/dive4elements/river/importer/Importer.java Wed Dec 04 16:59:25 2019 +0100 +++ b/backend/src/main/java/org/dive4elements/river/importer/Importer.java Mon Mar 23 14:57:03 2020 +0100 @@ -57,7 +57,7 @@ /** Write rivers and their dependencies/dependants to db. */ public void writeRivers() { - log.debug("write rivers started"); + log.info("write rivers started"); for (final ImportRiver river: this.rivers) { log.debug("writing river '" + river.getName() + "'"); @@ -65,7 +65,7 @@ ImporterSession.getInstance().getDatabaseSession().flush(); } - log.debug("write rivers finished"); + log.info("write rivers finished"); } public void writeToDatabase() { @@ -149,7 +149,7 @@ try { infoGewParser.parse(gewFile); } - catch (final IOException ioe) { + catch (final Exception ioe) { log.error("error parsing gew: " + gew, ioe); System.exit(1); } @@ -167,7 +167,7 @@ try { infoGewParser.parse(gewFile); } - catch (final IOException ioe) { + catch (final Exception ioe) { log.error("error parsing gew: " + gew, ioe); System.exit(1); }
--- a/backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java Wed Dec 04 16:59:25 2019 +0100 +++ b/backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java Mon Mar 23 14:57:03 2020 +0100 @@ -13,7 +13,6 @@ import java.io.File; import java.io.FileInputStream; import java.io.FilenameFilter; -import java.io.IOException; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.math.BigDecimal; @@ -30,6 +29,8 @@ import org.apache.log4j.Logger; import org.dive4elements.river.backend.utils.EpsilonComparator; import org.dive4elements.river.importer.ImportRiver; +import org.dive4elements.river.importer.ImporterSession; +import org.hibernate.Session; /** * Abstract base class for a parser of one FLYS csv data file.<br /> @@ -76,6 +77,8 @@ private static DecimalFormat bigDecimalFormat; + protected static final String INVALID_VALUE_ERROR_FORMAT = "Invalid or missing %s value"; + static { bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT); bigDecimalFormat.setParseBigDecimal(true); @@ -161,7 +164,7 @@ } - /***** METHODS *****/ + /***** FILE-METHODS *****/ /** * Lists all files from a directory having a type extension (starting with dot) @@ -181,10 +184,29 @@ } /** + * Lists all files from a directory matching a file name pattern + */ + protected static List<File> listFiles(final File importDir, final Pattern filenamePattern) { + final File[] files = importDir.listFiles(new FilenameFilter() { + @Override + public boolean accept(final File dir, final String name) { + return filenamePattern.matcher(name).matches(); + } + }); + final List<File> fl = new ArrayList<>(); + if (files != null) + for (final File file : files) + fl.add(file); + return fl; + } + + /***** PARSE-METHODS *****/ + + /** * Parses a file and adds series and values to the parser's collection */ @Override - public void parse() throws IOException { + public void parse() throws Exception { logStartInfo(); this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); this.metaPatternsMatched.clear(); @@ -195,24 +217,32 @@ this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); } catch (final Exception e) { - logError("Could not open (" + e.getMessage() + ")"); + logError("Could not open (%s)", e.getMessage()); this.headerParsingState = ParsingState.STOP; } - this.currentLine = null; - while (this.headerParsingState != ParsingState.STOP) { - this.currentLine = this.in.readLine(); - if (this.currentLine == null) - break; - this.currentLine = this.currentLine.trim(); - if (this.currentLine.isEmpty()) - continue; - if (this.headerParsingState == ParsingState.CONTINUE) - handleMetaLine(); - else - handleDataLine(); + try { + this.currentLine = null; + while (this.headerParsingState != ParsingState.STOP) { + this.currentLine = this.in.readLine(); + if (this.currentLine == null) + break; + this.currentLine = this.currentLine.trim(); + if (this.currentLine.isEmpty()) + continue; + if (this.headerParsingState == ParsingState.CONTINUE) { + handleMetaLine(); + if (this.headerParsingState == ParsingState.DONE) + checkMetaData(); + } + else + handleDataLine(); + } + if (this.headerParsingState != ParsingState.STOP) + getLog().info(String.format("Number of values found: %d", this.seriesHeader.getValueCount())); } - if (this.headerParsingState != ParsingState.STOP) - getLog().info("Number of values found: " + this.seriesHeader.getValueCount()); + catch (final Exception e) { + throw new Exception(String.format("Parsing error (last read line: %d)", this.in.getLineNumber() + 1), e); + } } finally { if (this.in != null) { @@ -228,25 +258,7 @@ * Writes the parse start info to the log */ protected void logStartInfo() { - getLog().info("Start parsing:;'" + this.rootRelativePath + "'"); - } - - /** - * Stores the parsed series and values in the database - */ - @Override - public void store() { - if (this.headerParsingState != ParsingState.STOP) { - this.seriesHeader.store(this.river.getPeer()); - final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(), - this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE)); - if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT)) - logWarning("Number of value inserts less than number parsed: " + counts); - else - getLog().info("Number of values records: " + counts); - } - else - logWarning("Severe parsing errors, not storing series '" + this.seriesHeader.getFilename() + "'"); + getLog().info(String.format("Start parsing:;'%s'", this.rootRelativePath)); } /** @@ -259,9 +271,11 @@ /** * Parses a number string with dot or comma as decimal char, and returning null in case of an error */ - public static Number parseDoubleWithNull(final String text) { + public static Number parseDoubleCheckNull(final String[] values, final int index) { + if (index > values.length - 1) + return null; try { - return parseDouble(text); + return parseDouble(values[index]); } catch (final Exception e) { return null; @@ -273,11 +287,25 @@ * * @throws ParseException */ - public static Number parseDouble(final String text) throws ParseException { + private static Number parseDouble(final String text) throws ParseException { return numberFormat.parse(text.replace(',', '.')); } /** + * Parses an integer number string , and returning null in case of an error + */ + public static Integer parseIntegerCheckNull(final String[] values, final int index) { + if (index > values.length - 1) + return null; + try { + return Integer.valueOf((values[index])); + } + catch (final Exception e) { + return null; + } + } + + /** * Parses a number string as a BigDecimal, replacing a comma with a dot first */ public static BigDecimal parseDecimal(final String text) throws ParseException { @@ -285,50 +313,13 @@ } /** - * Gets the class's logger - */ - protected abstract Logger getLog(); - - /** - * Logs an error message, appending the relative file path - */ - protected void logError(final String message) { - getLog().error(message + ";" + this.rootRelativePath); - } - - /** - * Logs a warning message, appending the relative file path - */ - protected void logWarning(final String message) { - getLog().warn(message + ";" + this.rootRelativePath); - } - - /** - * Logs an info message, appending the relative file path - */ - protected void logInfo(final String message) { - getLog().info(message + ";" + this.rootRelativePath); - } - - /** - * Logs a debug message, appending the relative file path - */ - protected void logDebug(final String message) { - getLog().debug(message + ";" + this.rootRelativePath); - } - - /** - * Logs a trace message, appending the relative file path - */ - protected void logTrace(final String message) { - getLog().trace(message + ";" + this.rootRelativePath); - } - - /** * Creates a new series import object */ protected abstract HEADER createSeriesImport(final String filename); + + /***** METAHEADER-PARSE-METHODS *****/ + protected void handleMetaLine() { if (META_SUBGROUP.matcher(this.currentLine).matches()) return; @@ -348,10 +339,12 @@ else { if (this.currentLine.startsWith(START_META_CHAR)) { if (this.headerParsingState != ParsingState.IGNORE) - logWarning("Not matching any known meta type in line " + this.in.getLineNumber() + ", ignored"); + logLineWarning("Not matching any known meta type"); else this.headerParsingState = ParsingState.CONTINUE; } + else + this.headerParsingState = ParsingState.DONE; // no more meta data expected, if neither meta line nor empty line } } @@ -396,7 +389,8 @@ /** * Parses a header line for the km table column header line * - * @return Whether the line has been handled and we are ready for reading the km values lines + * @return Whether the line has been handled (also in case of State=STOP),<br> + * and we are ready for reading the km values lines (or cancel parsing) */ protected boolean handleMetaColumnTitles() { if (META_COLUMNTITLES.matcher(this.currentLine).matches()) { @@ -411,33 +405,76 @@ } /** + * Check meta data after all meta data lines (#) have been read + */ + protected boolean checkMetaData() { + if (this.columnTitles.size() <= 1) { + logError("No valid header line with column titles found"); + this.headerParsingState = ParsingState.STOP; + return false; + } + if (checkSeriesExistsAlready()) { + logError("Data series/filename exists already in the database"); + this.headerParsingState = ParsingState.STOP; + return false; + } + return true; + } + + /** + * Checks the existence of the active series in the database + */ + protected boolean checkSeriesExistsAlready() { + if (!checkRiverExists()) + return false; + final Session session = ImporterSession.getInstance().getDatabaseSession(); + final List<DB_SERIES> rows = this.seriesHeader.querySeriesItem(session, this.river.getPeer(), true); + return !rows.isEmpty(); + } + + /** + * Checks the existence of the active river in the database + */ + protected boolean checkRiverExists() { + return (this.river.getPeer(false) != null); + } + + + /***** VALUELINE-PARSE-METHODS *****/ + + /** * Parses a values line and adds the values record */ protected void handleDataLine() { final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0); // Skip import line without data or only km - if (values.length < 2) + if (values.length < 2) { + logLineWarning("Too few data"); return; + } Double km = Double.NaN; if (kmMode() != KmMode.NONE) { try { km = Double.valueOf(parseDouble(values[0]).doubleValue()); if (kmMode() == KmMode.UNIQUE) { if (this.kmExists.contains(km)) { - logWarning("Ignoring duplicate station '" + values[0] + "' in line " + this.in.getLineNumber()); + logLineWarning("Duplicate km '%s'", values[0]); return; } this.kmExists.add(km); } } catch (final Exception e) { - logError("Not parseable km in line " + this.in.getLineNumber() + ": " + e.getMessage()); + logLineWarning("Invalid km: %s", e.getMessage()); return; } } final KMLINE value = createKmLineImport(km, values); - if (value != null) - this.seriesHeader.addValue(value); + if (value != null) { + final boolean added = this.seriesHeader.addValue(value); + if (!added) + logLineWarning("Duplicate data line"); + } } /** @@ -454,4 +491,118 @@ * @return value item, or null if parse error */ protected abstract KMLINE createKmLineImport(final Double km, final String[] values); + + + /***** STORE-METHODS *****/ + + /** + * Stores the parsed series and values in the database + */ + @Override + public void store() { + if (this.headerParsingState != ParsingState.STOP) { + this.seriesHeader.store(this.river.getPeer()); + final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(), + this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE)); + if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT)) + logWarning("Number of value inserts less than number parsed: %s", counts); + else + getLog().info("Number of values records: " + counts); + } + else + logWarning("Severe parsing errors, not storing series '%s'", this.seriesHeader.getFilename()); + } + + + /***** LOG-METHODS *****/ + + /** + * Gets the class's logger + */ + protected abstract Logger getLog(); + + /** + * Logs an error message, appending the relative file path + */ + protected void logError(final String message) { + getLog().error(buildLogMessage(message)); + } + + /** + * Logs an error message, appending the relative file path + */ + protected void logError(final String format, final Object... args) { + getLog().error(buildLogMessage(String.format(format, args))); + } + + /** + * Logs an error message with current line number, appending the relative file path + */ + protected void logLineError(final String message) { + getLog().error(buildLineLogMessage(message)); + } + + /** + * Logs an error message with current line number, appending the relative file path + */ + protected void logLineError(final String format, final Object... args) { + getLog().error(buildLineLogMessage(String.format(format, args))); + } + + /** + * Logs a warning message, appending the relative file path + */ + protected void logWarning(final String message) { + getLog().warn(buildLogMessage(message)); + } + + /** + * Logs a warning message, appending the relative file path + */ + protected void logWarning(final String format, final Object... args) { + getLog().warn(buildLogMessage(String.format(format, args))); + } + + /** + * Logs a warning message, appending the line number and the relative file path + */ + protected void logLineWarning(final String message) { + getLog().warn(buildLineLogMessage(message)); + } + + /** + * Logs a warning message, appending the line number and the relative file path + */ + protected void logLineWarning(final String format, final Object... args) { + getLog().warn(buildLineLogMessage(String.format(format, args))); + } + + /** + * Logs an info message, appending the relative file path + */ + protected void logInfo(final String message) { + getLog().info(buildLogMessage(message)); + } + + /** + * Logs a debug message, appending the relative file path + */ + protected void logDebug(final String message) { + getLog().debug(buildLogMessage(message)); + } + + /** + * Logs a trace message, appending the relative file path + */ + protected void logTrace(final String message) { + getLog().trace(buildLogMessage(message)); + } + + private String buildLogMessage(final String message) { + return String.format("%s;%s", message, this.rootRelativePath); + } + + private String buildLineLogMessage(final String message) { + return String.format("Line %d: %s;%s", this.in.getLineNumber(), message, this.rootRelativePath); + } }
--- a/backend/src/main/java/org/dive4elements/river/importer/common/AbstractSeriesImport.java Wed Dec 04 16:59:25 2019 +0100 +++ b/backend/src/main/java/org/dive4elements/river/importer/common/AbstractSeriesImport.java Mon Mar 23 14:57:03 2020 +0100 @@ -99,8 +99,9 @@ return this.values.size(); } - public void addValue(final KMLINE value) { + public boolean addValue(final KMLINE value) { this.values.add(value); + return true; } public int getValueStoreCount(final StoreMode mode) { @@ -144,7 +145,7 @@ if (this.peer != null) return this.peer; final Session session = ImporterSession.getInstance().getDatabaseSession(); - final List<SERIES> rows = querySeriesItem(session, river); + final List<SERIES> rows = querySeriesItem(session, river, false); if (rows.isEmpty()) { getLog().info("Create new database instance"); this.peer = createSeriesItem(river); @@ -158,9 +159,10 @@ } /** - * Queries the series item(s) from the database + * Queries the series item(s) from the database<br> + * if specified and necessary, the parent is also queried by properties instead of using its getPeer method */ - public abstract List<SERIES> querySeriesItem(final Session session, final River river); + public abstract List<SERIES> querySeriesItem(final Session session, final River river, final boolean doQueryParent); /** * Creates a new value item
--- a/backend/src/main/java/org/dive4elements/river/importer/common/ImportParser.java Wed Dec 04 16:59:25 2019 +0100 +++ b/backend/src/main/java/org/dive4elements/river/importer/common/ImportParser.java Mon Mar 23 14:57:03 2020 +0100 @@ -10,8 +10,6 @@ package org.dive4elements.river.importer.common; -import java.io.IOException; - /** * Parser interface * @@ -23,7 +21,7 @@ /** * Parses a file and adds the parsed objects to the parsers state */ - void parse() throws IOException; + void parse() throws Exception; /** * Stores the parsed objects in the database with reference to a river
--- a/backend/src/main/java/org/dive4elements/river/importer/parsers/InfoGewParser.java Wed Dec 04 16:59:25 2019 +0100 +++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/InfoGewParser.java Mon Mar 23 14:57:03 2020 +0100 @@ -9,22 +9,16 @@ package org.dive4elements.river.importer.parsers; import java.io.File; - -import java.util.List; -import java.util.ArrayList; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -import java.io.IOException; -import java.io.LineNumberReader; import java.io.FileInputStream; import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.log4j.Logger; - import org.dive4elements.artifacts.common.utils.FileTools; - import org.dive4elements.river.importer.ImportRiver; @@ -36,16 +30,16 @@ public static final String ENCODING = "ISO-8859-1"; public static final Pattern GEWAESSER = - Pattern.compile("^\\s*Gew\u00e4sser\\s*:\\s*(.+)"); + Pattern.compile("^\\s*Gew\u00e4sser\\s*:\\s*(.+)"); public static final Pattern WST_DATEI = - Pattern.compile("^\\s*WSTDatei\\s*:\\s*(.+)"); + Pattern.compile("^\\s*WSTDatei\\s*:\\s*(.+)"); public static final Pattern BB_INFO = - Pattern.compile("^\\s*B\\+B-Info\\s*:\\s*(.+)"); + Pattern.compile("^\\s*B\\+B-Info\\s*:\\s*(.+)"); public static final Pattern GEW_UUID = - Pattern.compile("^\\s*uuid\\s*:\\s*(.+)"); + Pattern.compile("^\\s*uuid\\s*:\\s*(.+)"); protected ArrayList<ImportRiver> rivers; @@ -55,31 +49,31 @@ this(null); } - public InfoGewParser(AnnotationClassifier annotationClassifier) { - rivers = new ArrayList<ImportRiver>(); + public InfoGewParser(final AnnotationClassifier annotationClassifier) { + this.rivers = new ArrayList<>(); this.annotationClassifier = annotationClassifier; } public List<ImportRiver> getRivers() { - return rivers; + return this.rivers; } - public static final String normalize(String f) { + public static final String normalize(final String f) { return f.replace("\\", "/").replace("/", File.separator); } /** Handle a gew, wst, or bb_info file. */ - public void parse(File file) throws IOException { + public void parse(final File file) throws Exception { LineNumberReader in = null; - File root = file.getParentFile(); + final File root = file.getParentFile(); try { in = - new LineNumberReader( - new InputStreamReader( - new FileInputStream(file), ENCODING)); + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); String line = null; @@ -95,15 +89,15 @@ Matcher m = GEWAESSER.matcher(line); if (m.matches()) { - String river = m.group(1); + final String river = m.group(1); log.info("Found river '" + river + "'"); if (riverName != null) { - rivers.add(new ImportRiver( - riverName, - modelUuid, - wstFile, - bbInfoFile, - annotationClassifier)); + this.rivers.add(new ImportRiver( + riverName, + modelUuid, + wstFile, + bbInfoFile, + this.annotationClassifier)); } riverName = river; modelUuid = null; @@ -119,7 +113,7 @@ wst = FileTools.repair(wst); if (!wst.isFile() || !wst.canRead()) { log.error( - "cannot access WST file '" + wstFilename + "'"); + "cannot access WST file '" + wstFilename + "'"); continue; } log.info("Found wst file '" + wst + "'"); @@ -128,21 +122,21 @@ else if ((m = GEW_UUID.matcher(line)).matches()) { modelUuid = m.group(1); log.debug("Found model uuid " + modelUuid + - " for river " + riverName); + " for river " + riverName); } else if ((m = BB_INFO.matcher(line)).matches()) { //TODO: Make it relative to the wst file. - String bbInfo = m.group(1); + final String bbInfo = m.group(1); bbInfoFile = new File(normalize(bbInfo)); } } if (riverName != null && wstFile != null) { - rivers.add(new ImportRiver( - riverName, - modelUuid, - wstFile, - bbInfoFile, - annotationClassifier)); + this.rivers.add(new ImportRiver( + riverName, + modelUuid, + wstFile, + bbInfoFile, + this.annotationClassifier)); } } finally { @@ -151,7 +145,7 @@ } } - for (ImportRiver river: rivers) { + for (final ImportRiver river: this.rivers) { river.parseDependencies(); } }