Mercurial > dive4elements > river
view backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java @ 9709:b74f817435fe
comment removed
author | dnt_bjoernsen <d.tironi@bjoernsen.de> |
---|---|
date | Wed, 27 Jan 2021 11:47:38 +0100 |
parents | a2a42a6bac6b |
children |
line wrap: on
line source
/* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde * Software engineering by * Björnsen Beratende Ingenieure GmbH * Dr. Schumacher Ingenieurbüro für Wasser und Umwelt * * This file is Free Software under the GNU AGPL (>=v3) * and comes with ABSOLUTELY NO WARRANTY! Check out the * documentation coming with Dive4Elements River for details. */ package org.dive4elements.river.importer.common; import java.io.File; import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.math.BigDecimal; import java.text.DecimalFormat; import java.text.NumberFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.dive4elements.river.backend.utils.EpsilonComparator; import org.dive4elements.river.importer.ImportRiver; import org.dive4elements.river.importer.ImporterSession; import org.hibernate.Session; /** * Abstract base class for a parser of one FLYS csv data file.<br /> * The {@link parse} method creates a SERIES object for the meta data * and a list of KMLINE objects for the km value lines read from the file.<br /> * The {@link store} method gets or creates the corresponding database objects * by the hibernate binding classes DB_SERIES and DB_KMTUPLE, * and updates or inserts them in the database. * DB_SERIES has a one-to-many relationship with DB_KMTUPLE.<br /> * <br /> * The structure of the file is as follows:<br /> * <ul> * <li>one or more comment lines (#) with the meta info of the data series</li> * <li>the comment line with the column titles of values table, starting with the km column</li> * <li>the rows of the values table, each one on its own line</li> * </ul> * * @author Matthias Schäfer * */ public abstract class AbstractParser<DB_SERIES, DB_KMTUPLE, KMLINE extends AbstractKmLineImport<DB_SERIES, DB_KMTUPLE>, HEADER extends AbstractSeriesImport<DB_SERIES, DB_KMTUPLE, KMLINE>> implements ImportParser { /***** FIELDS *****/ public static final String ENCODING = "ISO-8859-1"; protected static final Locale DEFAULT_LOCALE = Locale.GERMAN; public static final String START_META_CHAR = "#"; protected static final String SEPARATOR_CHAR = ";"; protected static final Pattern META_RIVERNAME = Pattern.compile("^#\\s*((Gew.sser)|(Gewaesser)):\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); protected static final Pattern META_KMRANGE_INFO = Pattern.compile("^#\\s*Strecke:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); protected static final Pattern META_COMMENTS = Pattern.compile("^#\\s*weitere Bemerkungen:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*Fluss.km\\s*;.+", Pattern.CASE_INSENSITIVE); private static final Pattern META_SUBGROUP = Pattern.compile("^##.*", Pattern.CASE_INSENSITIVE); private static NumberFormat numberFormat = NumberFormat.getInstance(Locale.ROOT); private static DecimalFormat bigDecimalFormat; protected static final String INVALID_VALUE_ERROR_FORMAT = "Invalid or missing %s value"; static { bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT); bigDecimalFormat.setParseBigDecimal(true); } /** * How the km column and its content are expected */ protected enum KmMode { NONE, UNIQUE, DUPLICATES } /** * Path of the file or directory to import from */ protected final File importPath; /** * Part of {@link importPath} without the river root dir */ protected final File rootRelativePath; /** * River for which the import runs */ protected final ImportRiver river; /** * Reader during parse */ protected LineNumberReader in; /** * Last line read from in */ protected String currentLine; /** * State of the header lines parse loop */ protected ParsingState headerParsingState; /** * Series header of the stations table, with the imported meta info. */ protected HEADER seriesHeader; /** * List of meta info Pattern matched during {@link handleMetaLine} */ protected final List<Pattern> metaPatternsMatched; /** * Column titles of the stations table, starting with the km column. * All strings have been trimmed. */ protected final List<String> columnTitles; /** * List of the km value tuples imported, no duplicate km */ protected final List<KMLINE> values; /** * Ordered list with the imported km to check for duplicates. */ protected final TreeSet<Double> kmExists; /***** CONSTRUCTORS *****/ /** * Constructs a parser for an import file */ public AbstractParser(final File importPath, final File rootRelativePath, final ImportRiver river) { this.importPath = importPath; this.rootRelativePath = rootRelativePath; this.river = river; this.metaPatternsMatched = new ArrayList<>(); this.kmExists = new TreeSet<>(EpsilonComparator.CMP); this.columnTitles = new ArrayList<>(); this.values = new ArrayList<>(); } /***** FILE-METHODS *****/ /** * Lists all files from a directory having a type extension (starting with dot) */ protected static List<File> listFiles(final File importDir, final String extension) { final File[] files = importDir.listFiles(new FilenameFilter() { @Override public boolean accept(final File dir, final String name) { return name.toLowerCase().endsWith(extension); } }); final List<File> fl = new ArrayList<>(); if (files != null) for (final File file : files) fl.add(file); return fl; } /** * Lists all files from a directory matching a file name pattern */ protected static List<File> listFiles(final File importDir, final Pattern filenamePattern) { final File[] files = importDir.listFiles(new FilenameFilter() { @Override public boolean accept(final File dir, final String name) { return filenamePattern.matcher(name).matches(); } }); final List<File> fl = new ArrayList<>(); if (files != null) for (final File file : files) fl.add(file); return fl; } /***** PARSE-METHODS *****/ /** * Parses a file and adds series and values to the parser's collection */ @Override public void parse() throws Exception { logStartInfo(); this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); this.metaPatternsMatched.clear(); this.kmExists.clear(); this.headerParsingState = ParsingState.CONTINUE; try { try { this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); } catch (final Exception e) { logError("Could not open (%s)", e.getMessage()); this.headerParsingState = ParsingState.STOP; } try { this.currentLine = null; while (this.headerParsingState != ParsingState.STOP) { this.currentLine = this.in.readLine(); if (this.currentLine == null) break; this.currentLine = this.currentLine.trim(); if (this.currentLine.isEmpty()) continue; if (this.headerParsingState == ParsingState.CONTINUE) { handleMetaLine(); if (this.headerParsingState == ParsingState.DONE) checkMetaData(); } else handleDataLine(); } if (this.headerParsingState != ParsingState.STOP) getLog().info(String.format("Number of values found: %d", this.seriesHeader.getValueCount())); } catch (final Exception e) { throw new Exception(String.format("Parsing error (last read line: %d)", this.in.getLineNumber() + 1), e); } } finally { if (this.in != null) { this.in.close(); this.in = null; } } if (this.headerParsingState == ParsingState.STOP) logError("Parsing of the file stopped due to a severe error"); } /** * Writes the parse start info to the log */ protected void logStartInfo() { getLog().info(String.format("Start parsing:;'%s'", this.rootRelativePath)); } /** * Strips separator chars from a meta info text, and trims leading and trailing whitespace */ public static String parseMetaInfo(final String text) { return text.replace(SEPARATOR_CHAR, "").trim(); } /** * Parses a number string with dot or comma as decimal char, and returning null in case of an error */ public static Number parseDoubleCheckNull(final String[] values, final int index) { if (index > values.length - 1) return null; try { return parseDouble(values[index]); } catch (final Exception e) { return null; } } /** * Parses a number string with dot or comma as decimal char * * @throws ParseException */ private static Number parseDouble(final String text) throws ParseException { return numberFormat.parse(text.replace(',', '.')); } /** * Parses an integer number string , and returning null in case of an error */ public static Integer parseIntegerCheckNull(final String[] values, final int index) { if (index > values.length - 1) return null; try { return Integer.valueOf((values[index])); } catch (final Exception e) { return null; } } /** * Parses a number string as a BigDecimal, replacing a comma with a dot first */ public static BigDecimal parseDecimal(final String text) throws ParseException { return (BigDecimal) bigDecimalFormat.parse(text.replace(',', '.')); } /** * Creates a new series import object */ protected abstract HEADER createSeriesImport(final String filename); /***** METAHEADER-PARSE-METHODS *****/ protected void handleMetaLine() { if (META_SUBGROUP.matcher(this.currentLine).matches()) return; else if (handleMetaRivername()) return; else if (handleMetaKmrange_info()) return; else if (handleMetaComment()) return; else if (handleMetaOther()) return; else if (handleMetaColumnTitles()) { if (this.headerParsingState != ParsingState.STOP) this.headerParsingState = ParsingState.DONE; return; } else { if (this.currentLine.startsWith(START_META_CHAR)) { if (this.headerParsingState != ParsingState.IGNORE) logLineWarning("Not matching any known meta type"); else this.headerParsingState = ParsingState.CONTINUE; } else this.headerParsingState = ParsingState.DONE; // no more meta data expected, if neither meta line nor empty line } } private boolean handleMetaRivername() { if (META_RIVERNAME.matcher(this.currentLine).matches()) { this.metaPatternsMatched.add(META_RIVERNAME); return true; } else return false; } private boolean handleMetaKmrange_info() { final Matcher m = META_KMRANGE_INFO.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_KMRANGE_INFO); this.seriesHeader.setKmrange_info(parseMetaInfo(m.group(1))); return true; } return false; } private boolean handleMetaComment() { final Matcher m = META_COMMENTS.matcher(this.currentLine); if (m.matches()) { this.metaPatternsMatched.add(META_COMMENTS); this.seriesHeader.setNotes(parseMetaInfo(m.group(1))); return true; } return false; } /** * Parses currentLine for non-default meta info * * @return Whether the line has been handled */ protected boolean handleMetaOther() { return false; } /** * Parses a header line for the km table column header line * * @return Whether the line has been handled (also in case of State=STOP),<br> * and we are ready for reading the km values lines (or cancel parsing) */ protected boolean handleMetaColumnTitles() { if (META_COLUMNTITLES.matcher(this.currentLine).matches()) { this.metaPatternsMatched.add(META_COLUMNTITLES); this.columnTitles.clear(); final String[] titles = this.currentLine.split(SEPARATOR_CHAR, 0); for (int i = 0; i <= titles.length - 1; i++) this.columnTitles.add(titles[i].trim()); return true; } return false; } /** * Check meta data after all meta data lines (#) have been read */ protected boolean checkMetaData() { if (this.columnTitles.size() <= 1) { logError("No valid header line with column titles found"); this.headerParsingState = ParsingState.STOP; return false; } if (checkSeriesExistsAlready()) { logError("Data series/filename exists already in the database"); this.headerParsingState = ParsingState.STOP; return false; } return true; } /** * Checks the existence of the active series in the database */ protected boolean checkSeriesExistsAlready() { if (!checkRiverExists()) return false; final Session session = ImporterSession.getInstance().getDatabaseSession(); final List<DB_SERIES> rows = this.seriesHeader.querySeriesItem(session, this.river.getPeer(), true); return !rows.isEmpty(); } /** * Checks the existence of the active river in the database */ protected boolean checkRiverExists() { return (this.river.getPeer(false) != null); } /***** VALUELINE-PARSE-METHODS *****/ /** * Parses a values line and adds the values record */ protected void handleDataLine() { final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0); // Skip import line without data or only km if (values.length < 2) { logLineWarning("Too few data"); return; } Double km = Double.NaN; if (kmMode() != KmMode.NONE) { try { km = Double.valueOf(parseDouble(values[0]).doubleValue()); if (kmMode() == KmMode.UNIQUE) { if (this.kmExists.contains(km)) { logLineWarning("Duplicate km '%s'", values[0]); return; } this.kmExists.add(km); } } catch (final Exception e) { logLineWarning("Invalid km: %s", e.getMessage()); return; } } final KMLINE value = createKmLineImport(km, values); if (value != null) { final boolean added = this.seriesHeader.addValue(value); if (!added) logLineWarning("Duplicate data line"); } } /** * How {@link handleDataLine} shall handle the km column (if any) */ protected KmMode kmMode() { return KmMode.UNIQUE; } /** * Creates a value import item with the km and other fields of the current line; * the km has been validated * * @return value item, or null if parse error */ protected abstract KMLINE createKmLineImport(final Double km, final String[] values); /***** STORE-METHODS *****/ /** * Stores the parsed series and values in the database */ @Override public void store() { if (this.headerParsingState != ParsingState.STOP) { this.seriesHeader.store(this.river.getPeer()); final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(), this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE)); if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT)) logWarning("Number of value inserts less than number parsed: %s", counts); else getLog().info("Number of values records: " + counts); } else logWarning("Severe parsing errors, not storing series '%s'", this.seriesHeader.getFilename()); } /***** LOG-METHODS *****/ /** * Gets the class's logger */ protected abstract Logger getLog(); /** * Logs an error message, appending the relative file path */ protected void logError(final String message) { getLog().error(buildLogMessage(message)); } /** * Logs an error message, appending the relative file path */ protected void logError(final String format, final Object... args) { getLog().error(buildLogMessage(String.format(format, args))); } /** * Logs an error message with current line number, appending the relative file path */ protected void logLineError(final String message) { getLog().error(buildLineLogMessage(message)); } /** * Logs an error message with current line number, appending the relative file path */ protected void logLineError(final String format, final Object... args) { getLog().error(buildLineLogMessage(String.format(format, args))); } /** * Logs a warning message, appending the relative file path */ protected void logWarning(final String message) { getLog().warn(buildLogMessage(message)); } /** * Logs a warning message, appending the relative file path */ protected void logWarning(final String format, final Object... args) { getLog().warn(buildLogMessage(String.format(format, args))); } /** * Logs a warning message, appending the line number and the relative file path */ protected void logLineWarning(final String message) { getLog().warn(buildLineLogMessage(message)); } /** * Logs a warning message, appending the line number and the relative file path */ protected void logLineWarning(final String format, final Object... args) { getLog().warn(buildLineLogMessage(String.format(format, args))); } /** * Logs an info message, appending the relative file path */ protected void logInfo(final String message) { getLog().info(buildLogMessage(message)); } /** * Logs a debug message, appending the relative file path */ protected void logDebug(final String message) { getLog().debug(buildLogMessage(message)); } /** * Logs a trace message, appending the relative file path */ protected void logTrace(final String message) { getLog().trace(buildLogMessage(message)); } private String buildLogMessage(final String message) { return String.format("%s;%s", message, this.rootRelativePath); } private String buildLineLogMessage(final String message) { return String.format("Line %d: %s;%s", this.in.getLineNumber(), message, this.rootRelativePath); } }