comparison flys-backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java @ 5828:dfb26b03b179

Moved directories to org.dive4elements.river
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 11:53:11 +0200
parents flys-backend/src/main/java/de/intevation/flys/importer/parsers/StaFileParser.java@54077c9c9305
children 18619c1e7c2a
comparison
equal deleted inserted replaced
5827:e308d4ecd35a 5828:dfb26b03b179
1 package de.intevation.flys.importer.parsers;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.LineNumberReader;
6 import java.io.FileInputStream;
7 import java.io.InputStreamReader;
8
9 import java.math.BigDecimal;
10
11 import java.util.regex.Pattern;
12 import java.util.regex.Matcher;
13
14 import java.util.Date;
15 import java.util.HashMap;
16 import java.util.ArrayList;
17 import java.util.List;
18
19 import org.apache.log4j.Logger;
20
21 import de.intevation.flys.importer.ImportMainValueType;
22 import de.intevation.flys.importer.ImportMainValue;
23 import de.intevation.flys.importer.ImportNamedMainValue;
24 import de.intevation.flys.importer.ImportGauge;
25 import de.intevation.flys.importer.ImportTimeInterval;
26 import de.intevation.flys.utils.DateGuesser;
27
28 public class StaFileParser
29 {
30 private static Logger log = Logger.getLogger(StaFileParser.class);
31
32 public static final String ENCODING = "ISO-8859-1";
33
34 public static final String TYPES =
35 System.getProperty("flys.backend.main.value.types", "QWTD");
36
37 public static final boolean NOT_PARSE_GAUGE_NUMBERS =
38 Boolean.getBoolean("flys.backend.sta.not.parse.gauge.numbers");
39
40 public static final Pattern QWTD_ =
41 Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" +
42 Pattern.quote(TYPES) + "]).*");
43
44 public static final class NameAndTimeInterval {
45 private String name;
46 private ImportTimeInterval timeInterval;
47
48 public NameAndTimeInterval(String name) {
49 this(name, null);
50 }
51
52 public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) {
53 this.name = name;
54 this.timeInterval = timeInterval;
55 }
56
57 public String getName() {
58 return name;
59 }
60
61 public ImportTimeInterval getTimeInterval() {
62 return timeInterval;
63 }
64 } // class NameAndTimeInterval
65
66 public StaFileParser() {
67 }
68
69 public boolean parse(ImportGauge gauge) throws IOException {
70
71 File file = gauge.getStaFile();
72
73 log.info("parsing STA file: " + file);
74 LineNumberReader in = null;
75 try {
76 in =
77 new LineNumberReader(
78 new InputStreamReader(
79 new FileInputStream(file), ENCODING));
80
81 String line = in.readLine();
82
83 if (line == null) {
84 log.warn("STA file is empty.");
85 return false;
86 }
87
88 if (line.length() < 37) {
89 log.warn("First line in STA file is too short.");
90 return false;
91 }
92
93 String gaugeName = line.substring(16, 28).trim();
94
95 Long gaugeNumber = null;
96
97 if (!NOT_PARSE_GAUGE_NUMBERS) {
98 String gaugeNumberString = line.substring(8, 16).trim();
99
100 try {
101 gaugeNumber = Long.parseLong(gaugeNumberString);
102 }
103 catch (NumberFormatException nfe) {
104 log.warn("STA: '" + gaugeNumberString +
105 "' is not a valid long number.");
106 }
107 }
108
109 gauge.setName(gaugeName);
110 gauge.setOfficialNumber(gaugeNumber);
111
112 if (log.isDebugEnabled()) {
113 log.debug(
114 "name/number: '" + gaugeName + "' '" + gaugeNumber + "'");
115 }
116
117 String [] values = line.substring(38).trim().split("\\s+", 2);
118
119 if (values.length < 2) {
120 log.warn("STA: Not enough columns for aeo and datum.");
121 }
122 try {
123 gauge.setAeo(new BigDecimal(values[0].replace(",", ".")));
124 gauge.setDatum(new BigDecimal(values[1].replace(",", ".")));
125 }
126 catch (NumberFormatException nfe) {
127 log.warn("STA: cannot parse aeo or datum.");
128 return false;
129 }
130
131 line = in.readLine();
132
133 if (line == null) {
134 log.warn("STA file has not enough lines");
135 return false;
136 }
137
138 if (line.length() < 36) {
139 log.warn("STA: second line is too short");
140 return false;
141 }
142
143 try {
144 gauge.setStation(
145 new BigDecimal(line.substring(29, 36).trim()));
146 }
147 catch (NumberFormatException nfe) {
148 log.warn("STA: parsing of the datum of the gauge failed");
149 return false;
150 }
151
152 // overread the next six lines
153 for (int i = 0; i < 6; ++i) {
154 if ((line = in.readLine()) == null) {
155 log.warn("STA file is too short");
156 return false;
157 }
158 }
159
160 HashMap<String, ImportMainValueType> types =
161 new HashMap<String, ImportMainValueType>();
162
163 ArrayList<ImportNamedMainValue> namedMainValues =
164 new ArrayList<ImportNamedMainValue>();
165
166 ArrayList<ImportMainValue> mainValues =
167 new ArrayList<ImportMainValue>();
168
169 while ((line = in.readLine()) != null) {
170 Matcher m = QWTD_.matcher(line);
171 if (m.matches()) {
172 BigDecimal value;
173 try {
174 value = new BigDecimal(m.group(2).replace(",", "."));
175 }
176 catch (NumberFormatException nfe) {
177 log.warn("STA: value not parseable in line "
178 + in.getLineNumber());
179 continue;
180 }
181 String typeString = m.group(3);
182 log.debug("\t type: " + typeString);
183 ImportMainValueType type = types.get(typeString);
184 if (type == null) {
185 type = new ImportMainValueType(typeString);
186 types.put(typeString, type);
187 }
188 String name = m.group(1);
189 NameAndTimeInterval nat = parseName(name);
190 ImportNamedMainValue namedMainValue =
191 new ImportNamedMainValue(type, nat.getName());
192 namedMainValues.add(namedMainValue);
193
194 ImportMainValue mainValue = new ImportMainValue(
195 gauge,
196 namedMainValue,
197 value,
198 nat.getTimeInterval());
199
200 mainValues.add(mainValue);
201 }
202 else {
203 // TODO: treat as a comment
204 }
205 }
206 gauge.setMainValueTypes(
207 new ArrayList<ImportMainValueType>(types.values()));
208 gauge.setNamedMainValues(namedMainValues);
209 gauge.setMainValues(mainValues);
210 }
211 finally {
212 if (in != null) {
213 in.close();
214 }
215 }
216 log.info("finished parsing STA file: " + file);
217 return true;
218 }
219
220 protected NameAndTimeInterval parseName(String name) {
221 List<String> result = new ArrayList<String>();
222
223 unbracket(name, 0, result);
224
225 int length = result.size();
226
227 if (length < 1) { // Should not happen.
228 return new NameAndTimeInterval(name);
229 }
230
231 if (length == 1) { // No date at all -> use first part.
232 return new NameAndTimeInterval(result.get(0).trim());
233 }
234
235 if (length == 2) { // e.g. W(1994) or W(1994 - 1999)
236 String type = result.get(0).trim();
237
238 ImportTimeInterval timeInterval = getTimeInterval(
239 result.get(1).trim());
240
241 if (timeInterval == null) { // No date at all.
242 type = name;
243 }
244
245 return new NameAndTimeInterval(type, timeInterval);
246 }
247
248 if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999))
249
250 String type =
251 result.get(0).trim() + "(" +
252 result.get(1).trim() + ")";
253
254 ImportTimeInterval timeInterval = getTimeInterval(
255 result.get(2).trim());
256
257 if (timeInterval == null) { // No date at all.
258 type = name;
259 }
260
261 return new NameAndTimeInterval(type, timeInterval);
262 }
263
264 // more than 3 elements return unmodified.
265
266 return new NameAndTimeInterval(name);
267 }
268
269 private static ImportTimeInterval getTimeInterval(String datePart) {
270
271 int minus = datePart.indexOf('-');
272
273 if (minus < 0) { // '-' not found
274
275 Date date = null;
276 try {
277 date = DateGuesser.guessDate(datePart);
278 }
279 catch (IllegalArgumentException iae) {
280 log.warn("STA: Invalid date '" + datePart + "'");
281 return null;
282 }
283
284 return new ImportTimeInterval(date);
285 }
286
287 // Found '-' so we have <from> - <to>
288 String startPart = datePart.substring(0, minus).trim();
289 String endPart = datePart.substring(minus).trim();
290
291 Date startDate = null;
292 Date endDate = null;
293
294 try {
295 startDate = DateGuesser.guessDate(startPart);
296 }
297 catch (IllegalArgumentException iae) {
298 log.warn("STA: Invalid start date '" + startPart + "'");
299 }
300
301 try {
302 endDate = DateGuesser.guessDate(endPart);
303 }
304 catch (IllegalArgumentException iae) {
305 log.warn("STA: Invalid end date '" + endPart + "'");
306 }
307
308 if (startDate == null) {
309 log.warn("STA: Need start date.");
310 return null;
311 }
312
313 return new ImportTimeInterval(startDate, endDate);
314 }
315
316 private static int unbracket(String s, int index, List<String> result) {
317 StringBuilder sb = new StringBuilder();
318 int length = s.length();
319 while (index < length) {
320 char c = s.charAt(index);
321 switch (c) {
322 case '(':
323 index = unbracket(s, index+1, result);
324 break;
325 case ')':
326 result.add(0, sb.toString());
327 return index+1;
328 default:
329 sb.append(c);
330 ++index;
331 }
332 }
333 result.add(0, sb.toString());
334
335 return index;
336 }
337 }
338 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org