Mercurial > dive4elements > river
annotate backend/src/main/java/org/dive4elements/river/importer/parsers/InfoGewParser.java @ 9650:a2a42a6bac6b
Importer (s/u-info) extensions:
outer try/catch for parse and log of line no,
catching parsing exception if not enough value fields,
parsing error and warning log messages with line number,
detecting and rejecting duplicate data series,
better differentiation between error and warning log messages
author | mschaefer |
---|---|
date | Mon, 23 Mar 2020 14:57:03 +0100 |
parents | 5e38e2924c07 |
children |
rev | line source |
---|---|
5844
4dd33b86dc61
Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5838
diff
changeset
|
1 /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde |
4dd33b86dc61
Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5838
diff
changeset
|
2 * Software engineering by Intevation GmbH |
4dd33b86dc61
Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5838
diff
changeset
|
3 * |
5992
4c3ccf2b0304
Removed trailing whitespace.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5856
diff
changeset
|
4 * This file is Free Software under the GNU AGPL (>=v3) |
5844
4dd33b86dc61
Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5838
diff
changeset
|
5 * and comes with ABSOLUTELY NO WARRANTY! Check out the |
5992
4c3ccf2b0304
Removed trailing whitespace.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5856
diff
changeset
|
6 * documentation coming with Dive4Elements River for details. |
5844
4dd33b86dc61
Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5838
diff
changeset
|
7 */ |
4dd33b86dc61
Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5838
diff
changeset
|
8 |
5829
18619c1e7c2a
Repaired internal references.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5828
diff
changeset
|
9 package org.dive4elements.river.importer.parsers; |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
10 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
11 import java.io.File; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
12 import java.io.FileInputStream; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
13 import java.io.InputStreamReader; |
9650 | 14 import java.io.LineNumberReader; |
15 import java.util.ArrayList; | |
16 import java.util.List; | |
17 import java.util.regex.Matcher; | |
18 import java.util.regex.Pattern; | |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
19 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
20 import org.apache.log4j.Logger; |
5829
18619c1e7c2a
Repaired internal references.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5828
diff
changeset
|
21 import org.dive4elements.artifacts.common.utils.FileTools; |
18619c1e7c2a
Repaired internal references.
Sascha L. Teichmann <teichmann@intevation.de>
parents:
5828
diff
changeset
|
22 import org.dive4elements.river.importer.ImportRiver; |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
23 |
4709
e50d78334246
Documentation.
Felix Wolfsteller <felix.wolfsteller@intevation.de>
parents:
1234
diff
changeset
|
24 |
e50d78334246
Documentation.
Felix Wolfsteller <felix.wolfsteller@intevation.de>
parents:
1234
diff
changeset
|
25 /** Processes files mentioned in an info file for a river. */ |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
26 public class InfoGewParser |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
27 { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
28 private static Logger log = Logger.getLogger(InfoGewParser.class); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
29 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
30 public static final String ENCODING = "ISO-8859-1"; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
31 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
32 public static final Pattern GEWAESSER = |
9650 | 33 Pattern.compile("^\\s*Gew\u00e4sser\\s*:\\s*(.+)"); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
34 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
35 public static final Pattern WST_DATEI = |
9650 | 36 Pattern.compile("^\\s*WSTDatei\\s*:\\s*(.+)"); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
37 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
38 public static final Pattern BB_INFO = |
9650 | 39 Pattern.compile("^\\s*B\\+B-Info\\s*:\\s*(.+)"); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
40 |
7751
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
41 public static final Pattern GEW_UUID = |
9650 | 42 Pattern.compile("^\\s*uuid\\s*:\\s*(.+)"); |
7751
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
43 |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
44 protected ArrayList<ImportRiver> rivers; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
45 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
46 protected AnnotationClassifier annotationClassifier; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
47 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
48 public InfoGewParser() { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
49 this(null); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
50 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
51 |
9650 | 52 public InfoGewParser(final AnnotationClassifier annotationClassifier) { |
53 this.rivers = new ArrayList<>(); | |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
54 this.annotationClassifier = annotationClassifier; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
55 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
56 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
57 public List<ImportRiver> getRivers() { |
9650 | 58 return this.rivers; |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
59 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
60 |
9650 | 61 public static final String normalize(final String f) { |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
62 return f.replace("\\", "/").replace("/", File.separator); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
63 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
64 |
5016
255898799ed9
Minimal whitespace and doc cosmetics.
Felix Wolfsteller <felix.wolfsteller@intevation.de>
parents:
4709
diff
changeset
|
65 /** Handle a gew, wst, or bb_info file. */ |
9650 | 66 public void parse(final File file) throws Exception { |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
67 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
68 LineNumberReader in = null; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
69 |
9650 | 70 final File root = file.getParentFile(); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
71 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
72 try { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
73 in = |
9650 | 74 new LineNumberReader( |
75 new InputStreamReader( | |
76 new FileInputStream(file), ENCODING)); | |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
77 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
78 String line = null; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
79 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
80 String riverName = null; |
7751
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
81 String modelUuid = null; |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
82 File wstFile = null; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
83 File bbInfoFile = null; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
84 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
85 while ((line = in.readLine()) != null) { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
86 if ((line = line.trim()).length() == 0) { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
87 continue; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
88 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
89 Matcher m = GEWAESSER.matcher(line); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
90 |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
91 if (m.matches()) { |
9650 | 92 final String river = m.group(1); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
93 log.info("Found river '" + river + "'"); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
94 if (riverName != null) { |
9650 | 95 this.rivers.add(new ImportRiver( |
96 riverName, | |
97 modelUuid, | |
98 wstFile, | |
99 bbInfoFile, | |
100 this.annotationClassifier)); | |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
101 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
102 riverName = river; |
7751
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
103 modelUuid = null; |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
104 wstFile = null; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
105 bbInfoFile = null; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
106 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
107 else if ((m = WST_DATEI.matcher(line)).matches()) { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
108 String wstFilename = m.group(1); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
109 File wst = new File(wstFilename = normalize(wstFilename)); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
110 if (!wst.isAbsolute()) { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
111 wst = new File(root, wstFilename); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
112 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
113 wst = FileTools.repair(wst); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
114 if (!wst.isFile() || !wst.canRead()) { |
8856 | 115 log.error( |
9650 | 116 "cannot access WST file '" + wstFilename + "'"); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
117 continue; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
118 } |
8677
b44c76b6800c
Importer, do not jump knowingly over the cliff.
Tom Gottfried <tom@intevation.de>
parents:
7751
diff
changeset
|
119 log.info("Found wst file '" + wst + "'"); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
120 wstFile = wst; |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
121 } |
7751
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
122 else if ((m = GEW_UUID.matcher(line)).matches()) { |
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
123 modelUuid = m.group(1); |
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
124 log.debug("Found model uuid " + modelUuid + |
9650 | 125 " for river " + riverName); |
7751
24408bce2fdb
Parse and import model_uuid for rivers.
Raimund Renkert <rrenkert@intevation.de>
parents:
5992
diff
changeset
|
126 } |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
127 else if ((m = BB_INFO.matcher(line)).matches()) { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
128 //TODO: Make it relative to the wst file. |
9650 | 129 final String bbInfo = m.group(1); |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
130 bbInfoFile = new File(normalize(bbInfo)); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
131 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
132 } |
8677
b44c76b6800c
Importer, do not jump knowingly over the cliff.
Tom Gottfried <tom@intevation.de>
parents:
7751
diff
changeset
|
133 if (riverName != null && wstFile != null) { |
9650 | 134 this.rivers.add(new ImportRiver( |
135 riverName, | |
136 modelUuid, | |
137 wstFile, | |
138 bbInfoFile, | |
139 this.annotationClassifier)); | |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
140 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
141 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
142 finally { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
143 if (in != null) { |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
144 in.close(); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
145 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
146 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
147 |
9650 | 148 for (final ImportRiver river: this.rivers) { |
1211
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
149 river.parseDependencies(); |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
150 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
151 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
152 } |
f08fe480092c
Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff
changeset
|
153 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |