Mercurial > dive4elements > river
comparison flys-backend/src/main/java/de/intevation/flys/importer/parsers/AtFileParser.java @ 1211:f08fe480092c
Moved file parsers to separate package.
flys-backend/trunk@2337 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Fri, 15 Jul 2011 13:07:45 +0000 |
parents | |
children | 976ead36192d |
comparison
equal
deleted
inserted
replaced
1210:31d8638760b1 | 1211:f08fe480092c |
---|---|
1 package de.intevation.flys.importer.parsers; | |
2 | |
3 import java.io.BufferedReader; | |
4 import java.io.File; | |
5 import java.io.FileInputStream; | |
6 import java.io.InputStreamReader; | |
7 import java.io.IOException; | |
8 import java.math.BigDecimal; | |
9 | |
10 import org.apache.log4j.Logger; | |
11 | |
12 import de.intevation.flys.importer.ImportDischargeTable; | |
13 import de.intevation.flys.importer.ImportDischargeTableValue; | |
14 | |
15 import java.util.regex.Pattern; | |
16 import java.util.regex.Matcher; | |
17 | |
18 import java.util.Date; | |
19 import java.util.Calendar; | |
20 | |
21 import de.intevation.flys.importer.ImportTimeInterval; | |
22 | |
23 public class AtFileParser { | |
24 | |
25 public static final String ENCODING = "ISO-8859-1"; | |
26 | |
27 private static Logger logger = Logger.getLogger(AtFileParser.class); | |
28 | |
29 | |
30 // regular expression from hell to find out time range | |
31 public static final Pattern DATE_LINE = Pattern.compile( | |
32 "^\\*\\s*Abflu[^t]+tafel?\\s*([^\\d]+)" + | |
33 "(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4})\\s*(?:(?:bis)|-)?\\s*" + | |
34 "(?:(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4}))?\\s*.*$"); | |
35 | |
36 public AtFileParser() { | |
37 } | |
38 | |
39 | |
40 public ImportDischargeTable parse(File file) throws IOException { | |
41 return parse(file, "", 0); | |
42 } | |
43 | |
44 public ImportDischargeTable parse( | |
45 File file, | |
46 String prefix, | |
47 int kind | |
48 ) | |
49 throws IOException { | |
50 | |
51 logger.info("parsing AT file: " + file); | |
52 | |
53 BufferedReader br = null; | |
54 | |
55 String line = null; | |
56 | |
57 boolean beginning = true; | |
58 | |
59 ImportDischargeTable dischargeTable = | |
60 new ImportDischargeTable(kind, prefix + file.getName()); | |
61 | |
62 Date from = null; | |
63 Date to = null; | |
64 | |
65 try { | |
66 br = new BufferedReader( | |
67 new InputStreamReader( | |
68 new FileInputStream(file), ENCODING)); | |
69 | |
70 while ((line = br.readLine()) != null) { | |
71 | |
72 String tmp = line.trim(); | |
73 | |
74 if (tmp.length() == 0) { | |
75 continue; | |
76 } | |
77 | |
78 Matcher m = DATE_LINE.matcher(tmp); | |
79 if (m.matches()) { | |
80 from = guessDate(m.group(2), m.group(3), m.group(4)); | |
81 to = guessDate(m.group(5), m.group(6), m.group(7)); | |
82 if (from == null) { | |
83 Date t = from; from = to; to = t; | |
84 } | |
85 continue; | |
86 } | |
87 | |
88 if (tmp.startsWith("#! name=")) { | |
89 // XXX Skip the name, because we don't know where to save | |
90 // it at the moment | |
91 | |
92 //String name = tmp.substring(8); | |
93 continue; | |
94 } | |
95 | |
96 if (tmp.startsWith("#") || tmp.startsWith("*")) { | |
97 continue; | |
98 } | |
99 | |
100 String[] splits = tmp.replace(',', '.').split("\\s+"); | |
101 | |
102 if ((splits.length < 2) || (splits.length > 11)) { | |
103 logger.warn("Found an invalid row in the AT file."); | |
104 continue; | |
105 } | |
106 | |
107 String strW = splits[0].trim(); | |
108 double W = Double.parseDouble(strW); | |
109 | |
110 /* shift is used to differenciate between lines with | |
111 * exactly 10 Qs and lines with less than 10 Qs. The shift | |
112 * is only modified when it is the first line. | |
113 */ | |
114 int shift = -1; | |
115 | |
116 if (splits.length != 11 && beginning) { | |
117 shift = 10 - splits.length; | |
118 } | |
119 | |
120 | |
121 for (int i = 1; i < splits.length; i++) { | |
122 double iW = W + shift + i; | |
123 double iQ = Double.parseDouble(splits[i].trim()); | |
124 | |
125 dischargeTable.addDischargeTableValue( | |
126 new ImportDischargeTableValue( | |
127 new BigDecimal(iQ/100.0), | |
128 new BigDecimal(iW/100.0))); | |
129 } | |
130 | |
131 beginning = false; | |
132 } | |
133 } | |
134 catch (NumberFormatException pe) { | |
135 logger.warn(pe.getMessage()); | |
136 } | |
137 finally { | |
138 if (br != null) { | |
139 br.close(); | |
140 } | |
141 } | |
142 | |
143 if (from != null) { | |
144 if (to != null && from.compareTo(to) > 0) { | |
145 Date t = from; from = to; to = t; | |
146 } | |
147 logger.info("from: " + from + " to: " + to); | |
148 ImportTimeInterval interval = new ImportTimeInterval(from, to); | |
149 dischargeTable.setTimeInterval(interval); | |
150 } | |
151 | |
152 logger.info("Finished parsing AT file: " + file); | |
153 | |
154 return dischargeTable; | |
155 } | |
156 | |
157 public static Date guessDate(String day, String month, String year) { | |
158 if (day == null && month == null && year == null) { | |
159 return null; | |
160 } | |
161 | |
162 logger.debug("day: " + day + " month: " + month + " year: " + year); | |
163 | |
164 int dayI = 15; | |
165 if (day != null) { | |
166 try { | |
167 dayI = Integer.parseInt(day.trim()); | |
168 } | |
169 catch (NumberFormatException nfe) { | |
170 } | |
171 } | |
172 | |
173 int monthI = 6; | |
174 if (month != null) { | |
175 try { | |
176 monthI = Integer.parseInt(month.trim()); | |
177 } | |
178 catch (NumberFormatException nfe) { | |
179 } | |
180 } | |
181 | |
182 int yearI = 1900; | |
183 if (year != null) { | |
184 try { | |
185 yearI = Integer.parseInt(year.trim()); | |
186 if (yearI < 100) { | |
187 if (yearI < 20) { | |
188 yearI += 2000; | |
189 } | |
190 else { | |
191 yearI += 1900; | |
192 } | |
193 } | |
194 } | |
195 catch (NumberFormatException nfe) { | |
196 } | |
197 } | |
198 | |
199 Calendar cal = Calendar.getInstance(); | |
200 cal.set(yearI, monthI-1, dayI, 12, 0, 0); | |
201 long ms = cal.getTimeInMillis(); | |
202 cal.setTimeInMillis(ms - ms%1000); | |
203 return cal.getTime(); | |
204 } | |
205 } | |
206 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |