comparison flys-backend/src/main/java/de/intevation/flys/importer/parsers/SedimentYieldParser.java @ 2840:71175502d868

Added a parser for sediment yield files; started parsing and store values in db. flys-backend/trunk@4277 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Ingo Weinzierl <ingo.weinzierl@intevation.de>
date Thu, 19 Apr 2012 12:48:53 +0000
parents
children 0d27d02b1208
comparison
equal deleted inserted replaced
2839:163c037f2c7e 2840:71175502d868
1 package de.intevation.flys.importer.parsers;
2
3 import java.io.File;
4 import java.io.IOException;
5
6 import java.text.NumberFormat;
7 import java.text.ParseException;
8
9 import java.util.ArrayList;
10 import java.util.List;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
13
14 import org.apache.log4j.Logger;
15
16 import de.intevation.flys.importer.ImportGrainFraction;
17 import de.intevation.flys.importer.ImportSedimentYield;
18 import de.intevation.flys.importer.ImportSedimentYieldValue;
19 import de.intevation.flys.importer.ImportTimeInterval;
20 import de.intevation.flys.importer.ImportUnit;
21 import de.intevation.flys.model.GrainFraction;
22
23
24 public class SedimentYieldParser extends LineParser {
25
26 private static final Logger log =
27 Logger.getLogger(SedimentYieldParser.class);
28
29
30 public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE);
31
32
33 public static final String FRAKTION_START = "Fraktion:";
34
35 public static final String FRACTION_COARSE_STR =
36 "_Grobkorn.csv";
37
38 public static final String FRACTION_FINE_MIDDLE_STR =
39 "_Fein-Mittel-Kies.csv";
40
41 public static final String FRACTION_SAND =
42 "_Sand.csv";
43
44 public static final String FRACTION_SUSP_SAND =
45 "_susp_Sand.csv";
46
47 public static final String FRACTION_SUSP_SAND_BED =
48 "_susp_Sand_bettbildAnteil.csv";
49
50 public static final String FRACTION_SUSPENDED_SEDIMENT =
51 "_Schwebstoff.csv";
52
53
54 public static final Pattern TIMEINTERVAL_SINGLE =
55 Pattern.compile("\\D*([0-9]+?)\\D*");
56
57 public static final Pattern TIMEINTERVAL_EPOCH =
58 Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*");
59
60 public static final Pattern META_FRACTION =
61 Pattern.compile("^Fraktion: (.*)");
62
63 public static final Pattern META_UNIT =
64 Pattern.compile("^Einheit: \\[(.*)\\].*");
65
66 public static final Pattern META_COLUMN_NAMES =
67 Pattern.compile("^Fluss-km.*");
68
69 public static final Pattern META_GRAIN_FRACTION_A =
70 Pattern.compile("\\D*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)\\W*\\D*");
71
72 public static final Pattern META_GRAIN_FRACTION_B =
73 Pattern.compile("(<|>){1}\\s*(\\w++)\\s*(([0-9]+?,[0-9]+?)\\s*-|([0-9]++)\\s*-)(([0-9]+?,[0-9]+?)|([0-9]++))\\s*([a-zA-Z]+?)");
74
75 public static final Pattern META_GRAIN_FRACTION_C =
76 Pattern.compile("(<|>){1}\\s*((([0-9]+?,[0-9]+?)|([0-9]++))\\s*(\\w+))");
77
78
79 protected List<ImportSedimentYield> sedimentYields;
80
81 protected ImportSedimentYield[] current;
82
83 protected ImportGrainFraction grainFraction;
84
85 protected ImportUnit unit;
86
87 protected String description;
88
89 protected String[] columnNames;
90
91
92 public SedimentYieldParser() {
93 sedimentYields = new ArrayList<ImportSedimentYield>();
94 }
95
96
97 @Override
98 public void parse(File file) throws IOException {
99 description = file.getName();
100
101 super.parse(file);
102 }
103
104
105 @Override
106 protected void reset() {
107 current = null;
108 grainFraction = null;
109 unit = null;
110 }
111
112
113 @Override
114 protected void finish() {
115 if (current != null) {
116 for (ImportSedimentYield isy: current) {
117 sedimentYields.add(isy);
118 }
119 }
120
121 description = null;
122 }
123
124
125 @Override
126 protected void handleLine(String line) {
127 if (line.startsWith(START_META_CHAR)) {
128 handleMetaLine(stripMetaLine(line));
129 }
130 else {
131 handleDataLine(line);
132 }
133 }
134
135
136 protected void handleMetaLine(String line) {
137 if (handleMetaUnit(line)) {
138 return;
139 }
140 else if (handleMetaFraction(line)) {
141 return;
142 }
143 else if (handleColumnNames(line)) {
144 return;
145 }
146 else {
147 log.warn("Unknown meta line: '" + line + "'");
148 }
149 }
150
151
152 protected boolean handleMetaUnit(String line) {
153 Matcher m = META_UNIT.matcher(line);
154
155 if (m.matches()) {
156 unit = new ImportUnit(m.group(1));
157 return true;
158 }
159
160 return false;
161 }
162
163
164 public boolean handleMetaFraction(String line) {
165 Matcher m = META_FRACTION.matcher(line);
166
167 if (m.matches()) {
168 String tmp = m.group(1);
169
170 this.grainFraction = buildGrainFraction(tmp);
171
172 return true;
173 }
174 else if (line.startsWith(FRAKTION_START)) {
175 String newLine = line.replace(FRAKTION_START, "").trim();
176 if (newLine.length() == 0) {
177 log.debug("Found total grain fraction.");
178 this.grainFraction = new ImportGrainFraction(GrainFraction.TOTAL);
179
180 return true;
181 }
182 }
183
184 return false;
185 }
186
187
188 public boolean handleColumnNames(String line) {
189 Matcher m = META_COLUMN_NAMES.matcher(line);
190
191 if (m.matches()) {
192 columnNames = line.split(SEPERATOR_CHAR);
193
194 initializeSedimentYields();
195
196 return true;
197 }
198
199 return false;
200 }
201
202
203 protected void handleDataLine(String line) {
204 String[] vals = line.split(SEPERATOR_CHAR);
205
206 if (vals == null || vals.length < columnNames.length-1) {
207 log.warn("skip invalid data line: '" + line + "'");
208 return;
209 }
210
211 try {
212 Double km = nf.parse(vals[0]).doubleValue();
213
214 for (int i = 1, n = columnNames.length-1; i < n; i++) {
215 String curVal = vals[i];
216
217 if (curVal != null && curVal.length() > 0) {
218 current[i-1].addValue(new ImportSedimentYieldValue(
219 km, nf.parse(vals[i]).doubleValue()
220 ));
221 }
222 }
223 }
224 catch (ParseException pe) {
225 log.warn("Error while parsing numbers in '" + line + "':", pe);
226 }
227 }
228
229
230 private void initializeSedimentYields() {
231 // skip first column (Fluss-km) and last column (Hinweise)
232 current = new ImportSedimentYield[columnNames.length-2];
233
234 for (int i = 0, n = columnNames.length; i < n-2; i++) {
235 current[i] = new ImportSedimentYield(this.description);
236 current[i].setTimeInterval(getTimeInterval(columnNames[i+1]));
237 current[i].setUnit(unit);
238 current[i].setGrainFraction(grainFraction);
239 }
240 }
241
242
243 private ImportTimeInterval getTimeInterval(String column) {
244 try {
245 Matcher a = TIMEINTERVAL_EPOCH.matcher(column);
246 if (a.matches()) {
247 int yearA = nf.parse(a.group(1)).intValue();
248 int yearB = nf.parse(a.group(2)).intValue();
249
250 return new ImportTimeInterval(
251 getDateFromYear(yearA),
252 getDateFromYear(yearB)
253 );
254 }
255
256 Matcher b = TIMEINTERVAL_SINGLE.matcher(column);
257 if (b.matches()) {
258 int year = nf.parse(b.group(1)).intValue();
259
260 return new ImportTimeInterval(getDateFromYear(year));
261 }
262
263 log.warn("Unknown time interval string: '" + column + "'");
264 }
265 catch (ParseException pe) {
266 log.warn("Error while parsing years: " + column, pe);
267 }
268
269 return null;
270 }
271
272
273 private ImportGrainFraction buildGrainFraction(String gfStr) {
274 Matcher a = META_GRAIN_FRACTION_A.matcher(gfStr);
275 if (a.matches()) {
276 String lowerA = a.group(2);
277 String lowerB = a.group(3);
278
279 String upperA = a.group(4);
280 String upperB = a.group(5);
281
282 String unitStr = a.group(7);
283 String lower = lowerA != null ? lowerA : lowerB;
284 String upper = upperA != null ? upperA : upperB;
285
286 try {
287 return new ImportGrainFraction(
288 getGrainFractionTypeName(this.description),
289 nf.parse(lower).doubleValue(),
290 nf.parse(upper).doubleValue(),
291 new ImportUnit(unitStr)
292 );
293 }
294 catch (ParseException pe) {
295 log.warn("Error while parsing ranges of: '" + gfStr + "'");
296 }
297 }
298
299 Matcher b = META_GRAIN_FRACTION_B.matcher(gfStr);
300 if (b.matches()) {
301 String lowerA = b.group(4);
302 String lowerB = b.group(5);
303 String upperA = b.group(6);
304 String upperB = b.group(7);
305 String unitStr = b.group(9);
306
307 String lower = lowerA != null ? lowerA : lowerB;
308 String upper = upperA != null ? upperA : upperB;
309
310 try {
311 return new ImportGrainFraction(
312 getGrainFractionTypeName(this.description),
313 nf.parse(lower).doubleValue(),
314 nf.parse(upper).doubleValue(),
315 new ImportUnit(unitStr)
316 );
317 }
318 catch (ParseException pe) {
319 log.warn("Error while parsing ranges of: '" + gfStr + "'");
320 }
321 }
322
323 Matcher c = META_GRAIN_FRACTION_C.matcher(gfStr);
324 if (c.matches()) {
325 String oper = c.group(1);
326 String valueStr = c.group(3);
327 String unitStr = c.group(6);
328
329 try {
330 Double value = nf.parse(valueStr).doubleValue();
331
332 if (oper.equals(">")) {
333 return new ImportGrainFraction(
334 getGrainFractionTypeName(this.description),
335 value,
336 null,
337 new ImportUnit(unitStr)
338 );
339 }
340 else {
341 return new ImportGrainFraction(
342 getGrainFractionTypeName(this.description),
343 null,
344 value,
345 new ImportUnit(unitStr)
346 );
347 }
348 }
349 catch (ParseException pe) {
350 log.warn("Error while parsing ranges of: '" + gfStr + "'");
351 }
352 }
353
354 log.warn("Unknow grain fraction: '" + gfStr + "'");
355
356 return null;
357 }
358
359
360 public static String getGrainFractionTypeName(String filename) {
361 if (filename.endsWith(FRACTION_COARSE_STR)) {
362 return GrainFraction.COARSE;
363 }
364 else if (filename.endsWith(FRACTION_FINE_MIDDLE_STR)) {
365 return GrainFraction.FINE_MIDDLE;
366 }
367 else if (filename.endsWith(FRACTION_SAND)) {
368 return GrainFraction.SAND;
369 }
370 else if (filename.endsWith(FRACTION_SUSP_SAND)) {
371 return GrainFraction.SUSP_SAND;
372 }
373 else if (filename.endsWith(FRACTION_SUSP_SAND_BED)) {
374 return GrainFraction.SUSP_SAND_BED;
375 }
376 else if (filename.endsWith(FRACTION_SUSPENDED_SEDIMENT)) {
377 return GrainFraction.SUSPENDED_SEDIMENT;
378 }
379 else {
380 log.warn("Unknown grain fraction type: '" + filename + "'");
381 return "unknown";
382 }
383 }
384
385
386 public List<ImportSedimentYield> getSedimentYields() {
387 return sedimentYields;
388 }
389 }
390 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org