Mercurial > dive4elements > river
comparison flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java @ 2840:71175502d868
Added a parser for sediment yield files; started parsing and store values in db.
flys-backend/trunk@4277 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Ingo Weinzierl <ingo.weinzierl@intevation.de> |
---|---|
date | Thu, 19 Apr 2012 12:48:53 +0000 |
parents | |
children | 0d27d02b1208 |
comparison
equal
deleted
inserted
replaced
2839:163c037f2c7e | 2840:71175502d868 |
---|---|
1 package de.intevation.flys.importer.parsers; | |
2 | |
3 import java.io.File; | |
4 import java.io.IOException; | |
5 | |
6 import java.text.NumberFormat; | |
7 import java.text.ParseException; | |
8 | |
9 import java.util.ArrayList; | |
10 import java.util.List; | |
11 import java.util.regex.Matcher; | |
12 import java.util.regex.Pattern; | |
13 | |
14 import org.apache.log4j.Logger; | |
15 | |
16 import de.intevation.flys.importer.ImportGrainFraction; | |
17 import de.intevation.flys.importer.ImportSedimentYield; | |
18 import de.intevation.flys.importer.ImportSedimentYieldValue; | |
19 import de.intevation.flys.importer.ImportTimeInterval; | |
20 import de.intevation.flys.importer.ImportUnit; | |
21 import de.intevation.flys.model.GrainFraction; | |
22 | |
23 | |
24 public class SedimentYieldParser extends LineParser { | |
25 | |
26 private static final Logger log = | |
27 Logger.getLogger(SedimentYieldParser.class); | |
28 | |
29 | |
30 public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); | |
31 | |
32 | |
33 public static final String FRAKTION_START = "Fraktion:"; | |
34 | |
35 public static final String FRACTION_COARSE_STR = | |
36 "_Grobkorn.csv"; | |
37 | |
38 public static final String FRACTION_FINE_MIDDLE_STR = | |
39 "_Fein-Mittel-Kies.csv"; | |
40 | |
41 public static final String FRACTION_SAND = | |
42 "_Sand.csv"; | |
43 | |
44 public static final String FRACTION_SUSP_SAND = | |
45 "_susp_Sand.csv"; | |
46 | |
47 public static final String FRACTION_SUSP_SAND_BED = | |
48 "_susp_Sand_bettbildAnteil.csv"; | |
49 | |
50 public static final String FRACTION_SUSPENDED_SEDIMENT = | |
51 "_Schwebstoff.csv"; | |
52 | |
53 | |
54 public static final Pattern TIMEINTERVAL_SINGLE = | |
55 Pattern.compile("\\D*([0-9]+?)\\D*"); | |
56 | |
57 public static final Pattern TIMEINTERVAL_EPOCH = | |
58 Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); | |
59 | |
60 public static final Pattern META_FRACTION = | |
61 Pattern.compile("^Fraktion: (.*)"); | |
62 | |
63 public static final Pattern META_UNIT = | |
64 Pattern.compile("^Einheit: \\[(.*)\\].*"); | |
65 | |
66 public static final Pattern META_COLUMN_NAMES = | |
67 Pattern.compile("^Fluss-km.*"); | |
68 | |
69 public static final Pattern META_GRAIN_FRACTION_A = | |
70 Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*"); | |
71 | |
72 public static final Pattern META_GRAIN_FRACTION_B = | |
73 Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)"); | |
74 | |
75 public static final Pattern META_GRAIN_FRACTION_C = | |
76 Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))"); | |
77 | |
78 | |
79 protected List<ImportSedimentYield> sedimentYields; | |
80 | |
81 protected ImportSedimentYield[] current; | |
82 | |
83 protected ImportGrainFraction grainFraction; | |
84 | |
85 protected ImportUnit unit; | |
86 | |
87 protected String description; | |
88 | |
89 protected String[] columnNames; | |
90 | |
91 | |
92 public SedimentYieldParser() { | |
93 sedimentYields = new ArrayList<ImportSedimentYield>(); | |
94 } | |
95 | |
96 | |
97 @Override | |
98 public void parse(File file) throws IOException { | |
99 description = file.getName(); | |
100 | |
101 super.parse(file); | |
102 } | |
103 | |
104 | |
105 @Override | |
106 protected void reset() { | |
107 current = null; | |
108 grainFraction = null; | |
109 unit = null; | |
110 } | |
111 | |
112 | |
113 @Override | |
114 protected void finish() { | |
115 if (current != null) { | |
116 for (ImportSedimentYield isy: current) { | |
117 sedimentYields.add(isy); | |
118 } | |
119 } | |
120 | |
121 description = null; | |
122 } | |
123 | |
124 | |
125 @Override | |
126 protected void handleLine(String line) { | |
127 if (line.startsWith(START_META_CHAR)) { | |
128 handleMetaLine(stripMetaLine(line)); | |
129 } | |
130 else { | |
131 handleDataLine(line); | |
132 } | |
133 } | |
134 | |
135 | |
136 protected void handleMetaLine(String line) { | |
137 if (handleMetaUnit(line)) { | |
138 return; | |
139 } | |
140 else if (handleMetaFraction(line)) { | |
141 return; | |
142 } | |
143 else if (handleColumnNames(line)) { | |
144 return; | |
145 } | |
146 else { | |
147 log.warn("Unknown meta line: '" + line + "'"); | |
148 } | |
149 } | |
150 | |
151 | |
152 protected boolean handleMetaUnit(String line) { | |
153 Matcher m = META_UNIT.matcher(line); | |
154 | |
155 if (m.matches()) { | |
156 unit = new ImportUnit(m.group(1)); | |
157 return true; | |
158 } | |
159 | |
160 return false; | |
161 } | |
162 | |
163 | |
164 public boolean handleMetaFraction(String line) { | |
165 Matcher m = META_FRACTION.matcher(line); | |
166 | |
167 if (m.matches()) { | |
168 String tmp = m.group(1); | |
169 | |
170 this.grainFraction = buildGrainFraction(tmp); | |
171 | |
172 return true; | |
173 } | |
174 else if (line.startsWith(FRAKTION_START)) { | |
175 String newLine = line.replace(FRAKTION_START, "").trim(); | |
176 if (newLine.length() == 0) { | |
177 log.debug("Found total grain fraction."); | |
178 this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL); | |
179 | |
180 return true; | |
181 } | |
182 } | |
183 | |
184 return false; | |
185 } | |
186 | |
187 | |
188 public boolean handleColumnNames(String line) { | |
189 Matcher m = META_COLUMN_NAMES.matcher(line); | |
190 | |
191 if (m.matches()) { | |
192 columnNames = line.split(SEPERATOR_CHAR); | |
193 | |
194 initializeSedimentYields(); | |
195 | |
196 return true; | |
197 } | |
198 | |
199 return false; | |
200 } | |
201 | |
202 | |
203 protected void handleDataLine(String line) { | |
204 String[] vals = line.split(SEPERATOR_CHAR); | |
205 | |
206 if (vals == null || vals.length < columnNames.length-1) { | |
207 log.warn("skip invalid data line: '" + line + "'"); | |
208 return; | |
209 } | |
210 | |
211 try { | |
212 Double km = nf.parse(vals[0]).doubleValue(); | |
213 | |
214 for (int i = 1, n = columnNames.length-1; i < n; i++) { | |
215 String curVal = vals[i]; | |
216 | |
217 if (curVal != null && curVal.length() > 0) { | |
218 current[i-1].addValue(new ImportSedimentYieldValue( | |
219 km, nf.parse(vals[i]).doubleValue() | |
220 )); | |
221 } | |
222 } | |
223 } | |
224 catch (ParseException pe) { | |
225 log.warn("Error while parsing numbers in '" + line + "':", pe); | |
226 } | |
227 } | |
228 | |
229 | |
230 private void initializeSedimentYields() { | |
231 // skip first column (Fluss-km) and last column (Hinweise) | |
232 current = new ImportSedimentYield[columnNames.length-2]; | |
233 | |
234 for (int i = 0, n = columnNames.length; i < n-2; i++) { | |
235 current[i] = new ImportSedimentYield(this.description); | |
236 current[i].setTimeInterval(getTimeInterval(columnNames[i+1])); | |
237 current[i].setUnit(unit); | |
238 current[i].setGrainFraction(grainFraction); | |
239 } | |
240 } | |
241 | |
242 | |
243 private ImportTimeInterval getTimeInterval(String column) { | |
244 try { | |
245 Matcher a = TIMEINTERVAL_EPOCH.matcher(column); | |
246 if (a.matches()) { | |
247 int yearA = nf.parse(a.group(1)).intValue(); | |
248 int yearB = nf.parse(a.group(2)).intValue(); | |
249 | |
250 return new ImportTimeInterval( | |
251 getDateFromYear(yearA), | |
252 getDateFromYear(yearB) | |
253 ); | |
254 } | |
255 | |
256 Matcher b = TIMEINTERVAL_SINGLE.matcher(column); | |
257 if (b.matches()) { | |
258 int year = nf.parse(b.group(1)).intValue(); | |
259 | |
260 return new ImportTimeInterval(getDateFromYear(year)); | |
261 } | |
262 | |
263 log.warn("Unknown time interval string: '" + column + "'"); | |
264 } | |
265 catch (ParseException pe) { | |
266 log.warn("Error while parsing years: " + column, pe); | |
267 } | |
268 | |
269 return null; | |
270 } | |
271 | |
272 | |
273 private ImportGrainFraction buildGrainFraction(String gfStr) { | |
274 Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr); | |
275 if (a.matches()) { | |
276 String lowerA = a.group(2); | |
277 String lowerB = a.group(3); | |
278 | |
279 String upperA = a.group(4); | |
280 String upperB = a.group(5); | |
281 | |
282 String unitStr = a.group(7); | |
283 String lower = lowerA != null ? lowerA : lowerB; | |
284 String upper = upperA != null ? upperA : upperB; | |
285 | |
286 try { | |
287 return new ImportGrainFraction( | |
288 getGrainFractionTypeName(this.description), | |
289 nf.parse(lower).doubleValue(), | |
290 nf.parse(upper).doubleValue(), | |
291 new ImportUnit(unitStr) | |
292 ); | |
293 } | |
294 catch (ParseException pe) { | |
295 log.warn("Error while parsing ranges of: '" + gfStr + "'"); | |
296 } | |
297 } | |
298 | |
299 Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr); | |
300 if (b.matches()) { | |
301 String lowerA = b.group(4); | |
302 String lowerB = b.group(5); | |
303 String upperA = b.group(6); | |
304 String upperB = b.group(7); | |
305 String unitStr = b.group(9); | |
306 | |
307 String lower = lowerA != null ? lowerA : lowerB; | |
308 String upper = upperA != null ? upperA : upperB; | |
309 | |
310 try { | |
311 return new ImportGrainFraction( | |
312 getGrainFractionTypeName(this.description), | |
313 nf.parse(lower).doubleValue(), | |
314 nf.parse(upper).doubleValue(), | |
315 new ImportUnit(unitStr) | |
316 ); | |
317 } | |
318 catch (ParseException pe) { | |
319 log.warn("Error while parsing ranges of: '" + gfStr + "'"); | |
320 } | |
321 } | |
322 | |
323 Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr); | |
324 if (c.matches()) { | |
325 String oper = c.group(1); | |
326 String valueStr = c.group(3); | |
327 String unitStr = c.group(6); | |
328 | |
329 try { | |
330 Double value = nf.parse(valueStr).doubleValue(); | |
331 | |
332 if (oper.equals(">")) { | |
333 return new ImportGrainFraction( | |
334 getGrainFractionTypeName(this.description), | |
335 value, | |
336 null, | |
337 new ImportUnit(unitStr) | |
338 ); | |
339 } | |
340 else { | |
341 return new ImportGrainFraction( | |
342 getGrainFractionTypeName(this.description), | |
343 null, | |
344 value, | |
345 new ImportUnit(unitStr) | |
346 ); | |
347 } | |
348 } | |
349 catch (ParseException pe) { | |
350 log.warn("Error while parsing ranges of: '" + gfStr + "'"); | |
351 } | |
352 } | |
353 | |
354 log.warn("Unknow grain fraction: '" + gfStr + "'"); | |
355 | |
356 return null; | |
357 } | |
358 | |
359 | |
360 public static String getGrainFractionTypeName(String filename) { | |
361 if (filename.endsWith(FRACTION_COARSE_STR)) { | |
362 return GrainFraction.COARSE; | |
363 } | |
364 else if (filename.endsWith(FRACTION_FINE_MIDDLE_STR)) { | |
365 return GrainFraction.FINE_MIDDLE; | |
366 } | |
367 else if (filename.endsWith(FRACTION_SAND)) { | |
368 return GrainFraction.SAND; | |
369 } | |
370 else if (filename.endsWith(FRACTION_SUSP_SAND)) { | |
371 return GrainFraction.SUSP_SAND; | |
372 } | |
373 else if (filename.endsWith(FRACTION_SUSP_SAND_BED)) { | |
374 return GrainFraction.SUSP_SAND_BED; | |
375 } | |
376 else if (filename.endsWith(FRACTION_SUSPENDED_SEDIMENT)) { | |
377 return GrainFraction.SUSPENDED_SEDIMENT; | |
378 } | |
379 else { | |
380 log.warn("Unknown grain fraction type: '" + filename + "'"); | |
381 return "unknown"; | |
382 } | |
383 } | |
384 | |
385 | |
386 public List<ImportSedimentYield> getSedimentYields() { | |
387 return sedimentYields; | |
388 } | |
389 } | |
390 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |