Mercurial > dive4elements > river
annotate backend/src/main/java/org/dive4elements/river/importer/common/AbstractParser.java @ 9650:a2a42a6bac6b
Importer (s/u-info) extensions:
outer try/catch for parse and log of line no,
catching parsing exception if not enough value fields,
parsing error and warning log messages with line number,
detecting and rejecting duplicate data series,
better differentiation between error and warning log messages
author | mschaefer |
---|---|
date | Mon, 23 Mar 2020 14:57:03 +0100 |
parents | ddebd4c2fe93 |
children |
rev | line source |
---|---|
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
1 /* Copyright (C) 2017 by Bundesanstalt für Gewässerkunde |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
2 * Software engineering by |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
3 * Björnsen Beratende Ingenieure GmbH |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
4 * Dr. Schumacher Ingenieurbüro für Wasser und Umwelt |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
5 * |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
6 * This file is Free Software under the GNU AGPL (>=v3) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
7 * and comes with ABSOLUTELY NO WARRANTY! Check out the |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
8 * documentation coming with Dive4Elements River for details. |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
9 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
10 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
11 package org.dive4elements.river.importer.common; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
12 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
13 import java.io.File; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
14 import java.io.FileInputStream; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
15 import java.io.FilenameFilter; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
16 import java.io.InputStreamReader; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
17 import java.io.LineNumberReader; |
8989
2693bfaf503d
Fixed several BigDecimal(double) creations by BigDecimal(String) parsing to avoid unnecessary decimal digits
mschaefer
parents:
8971
diff
changeset
|
18 import java.math.BigDecimal; |
8991
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
19 import java.text.DecimalFormat; |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
20 import java.text.NumberFormat; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
21 import java.text.ParseException; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
22 import java.util.ArrayList; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
23 import java.util.List; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
24 import java.util.Locale; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
25 import java.util.TreeSet; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
26 import java.util.regex.Matcher; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
27 import java.util.regex.Pattern; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
28 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
29 import org.apache.log4j.Logger; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
30 import org.dive4elements.river.backend.utils.EpsilonComparator; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
31 import org.dive4elements.river.importer.ImportRiver; |
9650 | 32 import org.dive4elements.river.importer.ImporterSession; |
33 import org.hibernate.Session; | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
34 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
35 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
36 * Abstract base class for a parser of one FLYS csv data file.<br /> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
37 * The {@link parse} method creates a SERIES object for the meta data |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
38 * and a list of KMLINE objects for the km value lines read from the file.<br /> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
39 * The {@link store} method gets or creates the corresponding database objects |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
40 * by the hibernate binding classes DB_SERIES and DB_KMTUPLE, |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
41 * and updates or inserts them in the database. |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
42 * DB_SERIES has a one-to-many relationship with DB_KMTUPLE.<br /> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
43 * <br /> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
44 * The structure of the file is as follows:<br /> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
45 * <ul> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
46 * <li>one or more comment lines (#) with the meta info of the data series</li> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
47 * <li>the comment line with the column titles of values table, starting with the km column</li> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
48 * <li>the rows of the values table, each one on its own line</li> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
49 * </ul> |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
50 * |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
51 * @author Matthias Schäfer |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
52 * |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
53 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
54 public abstract class AbstractParser<DB_SERIES, DB_KMTUPLE, KMLINE extends AbstractKmLineImport<DB_SERIES, DB_KMTUPLE>, HEADER extends AbstractSeriesImport<DB_SERIES, DB_KMTUPLE, KMLINE>> implements ImportParser { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
55 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
56 /***** FIELDS *****/ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
57 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
58 public static final String ENCODING = "ISO-8859-1"; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
59 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
60 protected static final Locale DEFAULT_LOCALE = Locale.GERMAN; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
61 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
62 public static final String START_META_CHAR = "#"; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
63 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
64 protected static final String SEPARATOR_CHAR = ";"; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
65 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
66 protected static final Pattern META_RIVERNAME = Pattern.compile("^#\\s*((Gew.sser)|(Gewaesser)):\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
67 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
68 protected static final Pattern META_KMRANGE_INFO = Pattern.compile("^#\\s*Strecke:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
69 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
70 protected static final Pattern META_COMMENTS = Pattern.compile("^#\\s*weitere Bemerkungen:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
71 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
72 private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*Fluss.km\\s*;.+", Pattern.CASE_INSENSITIVE); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
73 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
74 private static final Pattern META_SUBGROUP = Pattern.compile("^##.*", Pattern.CASE_INSENSITIVE); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
75 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
76 private static NumberFormat numberFormat = NumberFormat.getInstance(Locale.ROOT); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
77 |
8991
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
78 private static DecimalFormat bigDecimalFormat; |
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
79 |
9650 | 80 protected static final String INVALID_VALUE_ERROR_FORMAT = "Invalid or missing %s value"; |
81 | |
8991
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
82 static { |
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
83 bigDecimalFormat = (DecimalFormat) NumberFormat.getInstance(Locale.ROOT); |
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
84 bigDecimalFormat.setParseBigDecimal(true); |
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
85 } |
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
86 |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
87 /** |
9012 | 88 * How the km column and its content are expected |
89 */ | |
90 protected enum KmMode { | |
91 NONE, UNIQUE, DUPLICATES | |
92 } | |
93 | |
94 /** | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
95 * Path of the file or directory to import from |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
96 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
97 protected final File importPath; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
98 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
99 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
100 * Part of {@link importPath} without the river root dir |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
101 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
102 protected final File rootRelativePath; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
103 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
104 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
105 * River for which the import runs |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
106 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
107 protected final ImportRiver river; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
108 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
109 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
110 * Reader during parse |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
111 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
112 protected LineNumberReader in; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
113 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
114 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
115 * Last line read from in |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
116 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
117 protected String currentLine; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
118 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
119 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
120 * State of the header lines parse loop |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
121 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
122 protected ParsingState headerParsingState; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
123 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
124 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
125 * Series header of the stations table, with the imported meta info. |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
126 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
127 protected HEADER seriesHeader; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
128 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
129 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
130 * List of meta info Pattern matched during {@link handleMetaLine} |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
131 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
132 protected final List<Pattern> metaPatternsMatched; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
133 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
134 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
135 * Column titles of the stations table, starting with the km column. |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
136 * All strings have been trimmed. |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
137 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
138 protected final List<String> columnTitles; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
139 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
140 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
141 * List of the km value tuples imported, no duplicate km |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
142 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
143 protected final List<KMLINE> values; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
144 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
145 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
146 * Ordered list with the imported km to check for duplicates. |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
147 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
148 protected final TreeSet<Double> kmExists; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
149 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
150 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
151 /***** CONSTRUCTORS *****/ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
152 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
153 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
154 * Constructs a parser for an import file |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
155 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
156 public AbstractParser(final File importPath, final File rootRelativePath, final ImportRiver river) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
157 this.importPath = importPath; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
158 this.rootRelativePath = rootRelativePath; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
159 this.river = river; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
160 this.metaPatternsMatched = new ArrayList<>(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
161 this.kmExists = new TreeSet<>(EpsilonComparator.CMP); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
162 this.columnTitles = new ArrayList<>(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
163 this.values = new ArrayList<>(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
164 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
165 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
166 |
9650 | 167 /***** FILE-METHODS *****/ |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
168 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
169 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
170 * Lists all files from a directory having a type extension (starting with dot) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
171 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
172 protected static List<File> listFiles(final File importDir, final String extension) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
173 final File[] files = importDir.listFiles(new FilenameFilter() { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
174 @Override |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
175 public boolean accept(final File dir, final String name) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
176 return name.toLowerCase().endsWith(extension); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
177 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
178 }); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
179 final List<File> fl = new ArrayList<>(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
180 if (files != null) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
181 for (final File file : files) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
182 fl.add(file); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
183 return fl; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
184 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
185 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
186 /** |
9650 | 187 * Lists all files from a directory matching a file name pattern |
188 */ | |
189 protected static List<File> listFiles(final File importDir, final Pattern filenamePattern) { | |
190 final File[] files = importDir.listFiles(new FilenameFilter() { | |
191 @Override | |
192 public boolean accept(final File dir, final String name) { | |
193 return filenamePattern.matcher(name).matches(); | |
194 } | |
195 }); | |
196 final List<File> fl = new ArrayList<>(); | |
197 if (files != null) | |
198 for (final File file : files) | |
199 fl.add(file); | |
200 return fl; | |
201 } | |
202 | |
203 /***** PARSE-METHODS *****/ | |
204 | |
205 /** | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
206 * Parses a file and adds series and values to the parser's collection |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
207 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
208 @Override |
9650 | 209 public void parse() throws Exception { |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
210 logStartInfo(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
211 this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
212 this.metaPatternsMatched.clear(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
213 this.kmExists.clear(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
214 this.headerParsingState = ParsingState.CONTINUE; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
215 try { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
216 try { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
217 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
218 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
219 catch (final Exception e) { |
9650 | 220 logError("Could not open (%s)", e.getMessage()); |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
221 this.headerParsingState = ParsingState.STOP; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
222 } |
9650 | 223 try { |
224 this.currentLine = null; | |
225 while (this.headerParsingState != ParsingState.STOP) { | |
226 this.currentLine = this.in.readLine(); | |
227 if (this.currentLine == null) | |
228 break; | |
229 this.currentLine = this.currentLine.trim(); | |
230 if (this.currentLine.isEmpty()) | |
231 continue; | |
232 if (this.headerParsingState == ParsingState.CONTINUE) { | |
233 handleMetaLine(); | |
234 if (this.headerParsingState == ParsingState.DONE) | |
235 checkMetaData(); | |
236 } | |
237 else | |
238 handleDataLine(); | |
239 } | |
240 if (this.headerParsingState != ParsingState.STOP) | |
241 getLog().info(String.format("Number of values found: %d", this.seriesHeader.getValueCount())); | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
242 } |
9650 | 243 catch (final Exception e) { |
244 throw new Exception(String.format("Parsing error (last read line: %d)", this.in.getLineNumber() + 1), e); | |
245 } | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
246 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
247 finally { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
248 if (this.in != null) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
249 this.in.close(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
250 this.in = null; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
251 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
252 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
253 if (this.headerParsingState == ParsingState.STOP) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
254 logError("Parsing of the file stopped due to a severe error"); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
255 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
256 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
257 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
258 * Writes the parse start info to the log |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
259 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
260 protected void logStartInfo() { |
9650 | 261 getLog().info(String.format("Start parsing:;'%s'", this.rootRelativePath)); |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
262 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
263 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
264 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
265 * Strips separator chars from a meta info text, and trims leading and trailing whitespace |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
266 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
267 public static String parseMetaInfo(final String text) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
268 return text.replace(SEPARATOR_CHAR, "").trim(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
269 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
270 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
271 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
272 * Parses a number string with dot or comma as decimal char, and returning null in case of an error |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
273 */ |
9650 | 274 public static Number parseDoubleCheckNull(final String[] values, final int index) { |
275 if (index > values.length - 1) | |
276 return null; | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
277 try { |
9650 | 278 return parseDouble(values[index]); |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
279 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
280 catch (final Exception e) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
281 return null; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
282 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
283 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
284 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
285 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
286 * Parses a number string with dot or comma as decimal char |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
287 * |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
288 * @throws ParseException |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
289 */ |
9650 | 290 private static Number parseDouble(final String text) throws ParseException { |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
291 return numberFormat.parse(text.replace(',', '.')); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
292 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
293 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
294 /** |
9650 | 295 * Parses an integer number string , and returning null in case of an error |
296 */ | |
297 public static Integer parseIntegerCheckNull(final String[] values, final int index) { | |
298 if (index > values.length - 1) | |
299 return null; | |
300 try { | |
301 return Integer.valueOf((values[index])); | |
302 } | |
303 catch (final Exception e) { | |
304 return null; | |
305 } | |
306 } | |
307 | |
308 /** | |
8989
2693bfaf503d
Fixed several BigDecimal(double) creations by BigDecimal(String) parsing to avoid unnecessary decimal digits
mschaefer
parents:
8971
diff
changeset
|
309 * Parses a number string as a BigDecimal, replacing a comma with a dot first |
2693bfaf503d
Fixed several BigDecimal(double) creations by BigDecimal(String) parsing to avoid unnecessary decimal digits
mschaefer
parents:
8971
diff
changeset
|
310 */ |
8991
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
311 public static BigDecimal parseDecimal(final String text) throws ParseException { |
c43d8c1a4455
Parse via decimalformat instead of bigdecimal constructor (to cope with strings with chars after the actual number)
mschaefer
parents:
8989
diff
changeset
|
312 return (BigDecimal) bigDecimalFormat.parse(text.replace(',', '.')); |
8989
2693bfaf503d
Fixed several BigDecimal(double) creations by BigDecimal(String) parsing to avoid unnecessary decimal digits
mschaefer
parents:
8971
diff
changeset
|
313 } |
2693bfaf503d
Fixed several BigDecimal(double) creations by BigDecimal(String) parsing to avoid unnecessary decimal digits
mschaefer
parents:
8971
diff
changeset
|
314 |
2693bfaf503d
Fixed several BigDecimal(double) creations by BigDecimal(String) parsing to avoid unnecessary decimal digits
mschaefer
parents:
8971
diff
changeset
|
315 /** |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
316 * Creates a new series import object |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
317 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
318 protected abstract HEADER createSeriesImport(final String filename); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
319 |
9650 | 320 |
321 /***** METAHEADER-PARSE-METHODS *****/ | |
322 | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
323 protected void handleMetaLine() { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
324 if (META_SUBGROUP.matcher(this.currentLine).matches()) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
325 return; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
326 else if (handleMetaRivername()) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
327 return; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
328 else if (handleMetaKmrange_info()) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
329 return; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
330 else if (handleMetaComment()) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
331 return; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
332 else if (handleMetaOther()) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
333 return; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
334 else if (handleMetaColumnTitles()) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
335 if (this.headerParsingState != ParsingState.STOP) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
336 this.headerParsingState = ParsingState.DONE; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
337 return; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
338 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
339 else { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
340 if (this.currentLine.startsWith(START_META_CHAR)) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
341 if (this.headerParsingState != ParsingState.IGNORE) |
9650 | 342 logLineWarning("Not matching any known meta type"); |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
343 else |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
344 this.headerParsingState = ParsingState.CONTINUE; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
345 } |
9650 | 346 else |
347 this.headerParsingState = ParsingState.DONE; // no more meta data expected, if neither meta line nor empty line | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
348 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
349 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
350 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
351 private boolean handleMetaRivername() { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
352 if (META_RIVERNAME.matcher(this.currentLine).matches()) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
353 this.metaPatternsMatched.add(META_RIVERNAME); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
354 return true; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
355 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
356 else |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
357 return false; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
358 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
359 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
360 private boolean handleMetaKmrange_info() { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
361 final Matcher m = META_KMRANGE_INFO.matcher(this.currentLine); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
362 if (m.matches()) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
363 this.metaPatternsMatched.add(META_KMRANGE_INFO); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
364 this.seriesHeader.setKmrange_info(parseMetaInfo(m.group(1))); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
365 return true; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
366 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
367 return false; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
368 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
369 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
370 private boolean handleMetaComment() { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
371 final Matcher m = META_COMMENTS.matcher(this.currentLine); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
372 if (m.matches()) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
373 this.metaPatternsMatched.add(META_COMMENTS); |
9038
4c5eeaff554c
Database column "comment" renamed to "notes" (restrictions in Oracle)
mschaefer
parents:
9012
diff
changeset
|
374 this.seriesHeader.setNotes(parseMetaInfo(m.group(1))); |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
375 return true; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
376 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
377 return false; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
378 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
379 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
380 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
381 * Parses currentLine for non-default meta info |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
382 * |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
383 * @return Whether the line has been handled |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
384 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
385 protected boolean handleMetaOther() { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
386 return false; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
387 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
388 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
389 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
390 * Parses a header line for the km table column header line |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
391 * |
9650 | 392 * @return Whether the line has been handled (also in case of State=STOP),<br> |
393 * and we are ready for reading the km values lines (or cancel parsing) | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
394 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
395 protected boolean handleMetaColumnTitles() { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
396 if (META_COLUMNTITLES.matcher(this.currentLine).matches()) { |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
397 this.metaPatternsMatched.add(META_COLUMNTITLES); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
398 this.columnTitles.clear(); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
399 final String[] titles = this.currentLine.split(SEPARATOR_CHAR, 0); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
400 for (int i = 0; i <= titles.length - 1; i++) |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
401 this.columnTitles.add(titles[i].trim()); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
402 return true; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
403 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
404 return false; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
405 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
406 |
9012 | 407 /** |
9650 | 408 * Check meta data after all meta data lines (#) have been read |
409 */ | |
410 protected boolean checkMetaData() { | |
411 if (this.columnTitles.size() <= 1) { | |
412 logError("No valid header line with column titles found"); | |
413 this.headerParsingState = ParsingState.STOP; | |
414 return false; | |
415 } | |
416 if (checkSeriesExistsAlready()) { | |
417 logError("Data series/filename exists already in the database"); | |
418 this.headerParsingState = ParsingState.STOP; | |
419 return false; | |
420 } | |
421 return true; | |
422 } | |
423 | |
424 /** | |
425 * Checks the existence of the active series in the database | |
426 */ | |
427 protected boolean checkSeriesExistsAlready() { | |
428 if (!checkRiverExists()) | |
429 return false; | |
430 final Session session = ImporterSession.getInstance().getDatabaseSession(); | |
431 final List<DB_SERIES> rows = this.seriesHeader.querySeriesItem(session, this.river.getPeer(), true); | |
432 return !rows.isEmpty(); | |
433 } | |
434 | |
435 /** | |
436 * Checks the existence of the active river in the database | |
437 */ | |
438 protected boolean checkRiverExists() { | |
439 return (this.river.getPeer(false) != null); | |
440 } | |
441 | |
442 | |
443 /***** VALUELINE-PARSE-METHODS *****/ | |
444 | |
445 /** | |
9012 | 446 * Parses a values line and adds the values record |
447 */ | |
448 protected void handleDataLine() { | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
449 final String[] values = this.currentLine.split(SEPARATOR_CHAR, 0); |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
450 // Skip import line without data or only km |
9650 | 451 if (values.length < 2) { |
452 logLineWarning("Too few data"); | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
453 return; |
9650 | 454 } |
9012 | 455 Double km = Double.NaN; |
456 if (kmMode() != KmMode.NONE) { | |
457 try { | |
458 km = Double.valueOf(parseDouble(values[0]).doubleValue()); | |
459 if (kmMode() == KmMode.UNIQUE) { | |
460 if (this.kmExists.contains(km)) { | |
9650 | 461 logLineWarning("Duplicate km '%s'", values[0]); |
9012 | 462 return; |
463 } | |
464 this.kmExists.add(km); | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
465 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
466 } |
9012 | 467 catch (final Exception e) { |
9650 | 468 logLineWarning("Invalid km: %s", e.getMessage()); |
9012 | 469 return; |
470 } | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
471 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
472 final KMLINE value = createKmLineImport(km, values); |
9650 | 473 if (value != null) { |
474 final boolean added = this.seriesHeader.addValue(value); | |
475 if (!added) | |
476 logLineWarning("Duplicate data line"); | |
477 } | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
478 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
479 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
480 /** |
9012 | 481 * How {@link handleDataLine} shall handle the km column (if any) |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
482 */ |
9012 | 483 protected KmMode kmMode() { |
484 return KmMode.UNIQUE; | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
485 } |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
486 |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
487 /** |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
488 * Creates a value import item with the km and other fields of the current line; |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
489 * the km has been validated |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
490 * |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
491 * @return value item, or null if parse error |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
492 */ |
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
493 protected abstract KMLINE createKmLineImport(final Double km, final String[] values); |
9650 | 494 |
495 | |
496 /***** STORE-METHODS *****/ | |
497 | |
498 /** | |
499 * Stores the parsed series and values in the database | |
500 */ | |
501 @Override | |
502 public void store() { | |
503 if (this.headerParsingState != ParsingState.STOP) { | |
504 this.seriesHeader.store(this.river.getPeer()); | |
505 final String counts = String.format("parse=%d, insert=%d, update/ignore=%d", this.seriesHeader.getValueCount(), | |
506 this.seriesHeader.getValueStoreCount(StoreMode.INSERT), this.seriesHeader.getValueStoreCount(StoreMode.UPDATE)); | |
507 if (this.seriesHeader.getValueCount() > this.seriesHeader.getValueStoreCount(StoreMode.INSERT)) | |
508 logWarning("Number of value inserts less than number parsed: %s", counts); | |
509 else | |
510 getLog().info("Number of values records: " + counts); | |
511 } | |
512 else | |
513 logWarning("Severe parsing errors, not storing series '%s'", this.seriesHeader.getFilename()); | |
514 } | |
515 | |
516 | |
517 /***** LOG-METHODS *****/ | |
518 | |
519 /** | |
520 * Gets the class's logger | |
521 */ | |
522 protected abstract Logger getLog(); | |
523 | |
524 /** | |
525 * Logs an error message, appending the relative file path | |
526 */ | |
527 protected void logError(final String message) { | |
528 getLog().error(buildLogMessage(message)); | |
529 } | |
530 | |
531 /** | |
532 * Logs an error message, appending the relative file path | |
533 */ | |
534 protected void logError(final String format, final Object... args) { | |
535 getLog().error(buildLogMessage(String.format(format, args))); | |
536 } | |
537 | |
538 /** | |
539 * Logs an error message with current line number, appending the relative file path | |
540 */ | |
541 protected void logLineError(final String message) { | |
542 getLog().error(buildLineLogMessage(message)); | |
543 } | |
544 | |
545 /** | |
546 * Logs an error message with current line number, appending the relative file path | |
547 */ | |
548 protected void logLineError(final String format, final Object... args) { | |
549 getLog().error(buildLineLogMessage(String.format(format, args))); | |
550 } | |
551 | |
552 /** | |
553 * Logs a warning message, appending the relative file path | |
554 */ | |
555 protected void logWarning(final String message) { | |
556 getLog().warn(buildLogMessage(message)); | |
557 } | |
558 | |
559 /** | |
560 * Logs a warning message, appending the relative file path | |
561 */ | |
562 protected void logWarning(final String format, final Object... args) { | |
563 getLog().warn(buildLogMessage(String.format(format, args))); | |
564 } | |
565 | |
566 /** | |
567 * Logs a warning message, appending the line number and the relative file path | |
568 */ | |
569 protected void logLineWarning(final String message) { | |
570 getLog().warn(buildLineLogMessage(message)); | |
571 } | |
572 | |
573 /** | |
574 * Logs a warning message, appending the line number and the relative file path | |
575 */ | |
576 protected void logLineWarning(final String format, final Object... args) { | |
577 getLog().warn(buildLineLogMessage(String.format(format, args))); | |
578 } | |
579 | |
580 /** | |
581 * Logs an info message, appending the relative file path | |
582 */ | |
583 protected void logInfo(final String message) { | |
584 getLog().info(buildLogMessage(message)); | |
585 } | |
586 | |
587 /** | |
588 * Logs a debug message, appending the relative file path | |
589 */ | |
590 protected void logDebug(final String message) { | |
591 getLog().debug(buildLogMessage(message)); | |
592 } | |
593 | |
594 /** | |
595 * Logs a trace message, appending the relative file path | |
596 */ | |
597 protected void logTrace(final String message) { | |
598 getLog().trace(buildLogMessage(message)); | |
599 } | |
600 | |
601 private String buildLogMessage(final String message) { | |
602 return String.format("%s;%s", message, this.rootRelativePath); | |
603 } | |
604 | |
605 private String buildLineLogMessage(final String message) { | |
606 return String.format("Line %d: %s;%s", this.in.getLineNumber(), message, this.rootRelativePath); | |
607 } | |
8971
50416a0df385
Importer for the Schifffahrt (S-INFO) and Oekologie (U-INFO) files
mschaefer
parents:
diff
changeset
|
608 } |