comparison backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/TkhParser.java @ 9655:1f57381b3bb5

Importer (s/u-info) extensions: uniform formatting of from-to series names, warning instead of cancelling in case of missing tkh column values, detecting, logging and skipping columns with wrong unit
author mschaefer
date Mon, 23 Mar 2020 15:16:35 +0100
parents ac41551a8e4d
children
comparison
equal deleted inserted replaced
9654:8a2a777a8372 9655:1f57381b3bb5
10 10
11 package org.dive4elements.river.importer.sinfo.parsers; 11 package org.dive4elements.river.importer.sinfo.parsers;
12 12
13 import java.io.File; 13 import java.io.File;
14 import java.io.FileInputStream; 14 import java.io.FileInputStream;
15 import java.io.IOException;
16 import java.io.InputStreamReader; 15 import java.io.InputStreamReader;
17 import java.io.LineNumberReader; 16 import java.io.LineNumberReader;
18 import java.text.DateFormat; 17 import java.text.DateFormat;
19 import java.text.SimpleDateFormat; 18 import java.text.SimpleDateFormat;
20 import java.util.ArrayList; 19 import java.util.ArrayList;
24 import java.util.regex.Pattern; 23 import java.util.regex.Pattern;
25 24
26 import org.apache.log4j.Logger; 25 import org.apache.log4j.Logger;
27 import org.dive4elements.river.importer.Config; 26 import org.dive4elements.river.importer.Config;
28 import org.dive4elements.river.importer.ImportRiver; 27 import org.dive4elements.river.importer.ImportRiver;
28 import org.dive4elements.river.importer.ImporterSession;
29 import org.dive4elements.river.importer.common.AbstractParser; 29 import org.dive4elements.river.importer.common.AbstractParser;
30 import org.dive4elements.river.importer.common.ParsingState; 30 import org.dive4elements.river.importer.common.ParsingState;
31 import org.dive4elements.river.importer.sinfo.importitem.TkhColumnSeriesImport; 31 import org.dive4elements.river.importer.sinfo.importitem.TkhColumnSeriesImport;
32 import org.dive4elements.river.importer.sinfo.importitem.TkhKmLineImport; 32 import org.dive4elements.river.importer.sinfo.importitem.TkhKmLineImport;
33 import org.dive4elements.river.importer.sinfo.importitem.TkhSeriesImport; 33 import org.dive4elements.river.importer.sinfo.importitem.TkhSeriesImport;
34 import org.dive4elements.river.model.sinfo.Tkh; 34 import org.dive4elements.river.model.sinfo.Tkh;
35 import org.dive4elements.river.model.sinfo.TkhColumn; 35 import org.dive4elements.river.model.sinfo.TkhColumn;
36 import org.dive4elements.river.model.sinfo.TkhValue; 36 import org.dive4elements.river.model.sinfo.TkhValue;
37 import org.hibernate.Session;
37 38
38 /** 39 /**
39 * Reads and parses the header of a TKH file and handles the parse and store of the columns 40 * Reads and parses the header of a TKH file and handles the parse and store of the columns
40 * 41 *
41 * @author Matthias Schäfer 42 * @author Matthias Schäfer
129 parsers.add(new TkhParser(file, new File(relativeDir, file.getName()), river)); 130 parsers.add(new TkhParser(file, new File(relativeDir, file.getName()), river));
130 return parsers; 131 return parsers;
131 } 132 }
132 133
133 @Override 134 @Override
134 public void parse() throws IOException { 135 public void parse() throws Exception {
135 getLog().info("Start parsing:;'" + this.rootRelativePath + "'"); 136 getLog().info("Start parsing:;'" + this.rootRelativePath + "'");
136 // this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", "")); 137 // this.seriesHeader = createSeriesImport(this.importPath.getName().replaceAll("\\.csv", ""));
137 this.metaPatternsMatched.clear(); 138 this.metaPatternsMatched.clear();
138 this.kmExists.clear(); 139 this.kmExists.clear();
139 this.colParsers.clear(); 140 this.colParsers.clear();
141 try { 142 try {
142 try { 143 try {
143 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING)); 144 this.in = new LineNumberReader(new InputStreamReader(new FileInputStream(this.importPath), ENCODING));
144 } 145 }
145 catch (final Exception e) { 146 catch (final Exception e) {
146 logError("Could not open (" + e.getMessage() + ")"); 147 logError("Could not open (%s)", e.getMessage());
147 this.headerParsingState = ParsingState.STOP; 148 this.headerParsingState = ParsingState.STOP;
148 } 149 }
149 this.currentLine = null; 150 this.currentLine = null;
150 while (this.headerParsingState == ParsingState.CONTINUE) { 151 while (this.headerParsingState == ParsingState.CONTINUE) {
151 this.currentLine = this.in.readLine(); 152 this.currentLine = this.in.readLine();
153 break; 154 break;
154 this.currentLine = this.currentLine.trim(); 155 this.currentLine = this.currentLine.trim();
155 if (this.currentLine.isEmpty()) 156 if (this.currentLine.isEmpty())
156 continue; 157 continue;
157 handleMetaLine(); 158 handleMetaLine();
159 if (this.headerParsingState == ParsingState.DONE)
160 checkMetaData();
158 } 161 }
159 } 162 }
160 finally { 163 finally {
161 if (this.in != null) { 164 if (this.in != null) {
162 this.in.close(); 165 this.in.close();
192 try { 195 try {
193 if (!m.group(1).isEmpty()) 196 if (!m.group(1).isEmpty())
194 day = dateFormat.parse(m.group(1)); 197 day = dateFormat.parse(m.group(1));
195 } 198 }
196 catch (final Exception e) { 199 catch (final Exception e) {
197 logError("Invalid date in line " + this.in.getLineNumber()); 200 logLineWarning("Invalid date");
198 } 201 }
199 this.metaPatternsMatched.add(META_DATUM); 202 this.metaPatternsMatched.add(META_DATUM);
200 this.tkhGroup.setDay(day); 203 this.tkhGroup.setDay(day);
201 if (day == null) 204 if (day == null)
202 logWarning("No date specified"); 205 logLineWarning("No date specified");
203 return true; 206 return true;
204 } 207 }
205 return false; 208 return false;
206 } 209 }
207 210
239 for (int i = 1; i <= this.columnTitles.size() - 1; i++) { 242 for (int i = 1; i <= this.columnTitles.size() - 1; i++) {
240 final Matcher m = COLUMN_TITLE.matcher(this.columnTitles.get(i)); 243 final Matcher m = COLUMN_TITLE.matcher(this.columnTitles.get(i));
241 if (m.matches()) 244 if (m.matches())
242 this.colParsers.add(new TkhColumnParser(this.importPath, this.rootRelativePath, this.river, this.tkhGroup, i, m.group(7).trim())); 245 this.colParsers.add(new TkhColumnParser(this.importPath, this.rootRelativePath, this.river, this.tkhGroup, i, m.group(7).trim()));
243 else 246 else
244 logWarning("No title found in column " + i + ", skipped"); 247 logLineWarning("Invalid title in column %d (%s)", i + 1, this.columnTitles.get(i));
245 } 248 }
246 return true; 249 return true;
247 } 250 }
251
252 /**
253 * Checks the existence of the active series in the database
254 */
255 @Override
256 protected boolean checkSeriesExistsAlready() {
257 if (!checkRiverExists())
258 return false;
259 final Session session = ImporterSession.getInstance().getDatabaseSession();
260 final List<Tkh> rows = this.tkhGroup.querySeriesItem(session, this.river.getPeer());
261 return !rows.isEmpty();
262 }
263
248 264
249 @Override 265 @Override
250 public void store() { 266 public void store() {
251 if (this.headerParsingState != ParsingState.STOP) { 267 if (this.headerParsingState != ParsingState.STOP) {
252 this.tkhGroup.getPeer(this.river.getPeer()); 268 this.tkhGroup.getPeer(this.river.getPeer());
253 for (final TkhColumnParser colParser : this.colParsers) 269 for (final TkhColumnParser colParser : this.colParsers)
254 colParser.store(); 270 colParser.store();
255 } 271 }
256 else 272 else
257 logWarning("Severe parsing errors, not storing series '" + this.tkhGroup.getFilename() + "'"); 273 logWarning("Severe parsing errors, not storing series '%s'", this.tkhGroup.getFilename());
258 } 274 }
259 275
260 @Override 276 @Override
261 protected TkhColumnSeriesImport createSeriesImport(final String filename) { 277 protected TkhColumnSeriesImport createSeriesImport(final String filename) {
262 throw new UnsupportedOperationException(); 278 throw new UnsupportedOperationException();

http://dive4elements.wald.intevation.org