comparison backend/src/main/java/org/dive4elements/river/backend/utils/DateGuesser.java @ 8969:fe81eb39080c

Avoid guessing year 1950 for HQ50 etc.
author mschaefer
date Tue, 03 Apr 2018 09:54:26 +0200
parents 5e38e2924c07
children df5c5614e9a7
comparison
equal deleted inserted replaced
8968:66471a3b3db2 8969:fe81eb39080c
6 * documentation coming with Dive4Elements River for details. 6 * documentation coming with Dive4Elements River for details.
7 */ 7 */
8 8
9 package org.dive4elements.river.backend.utils; 9 package org.dive4elements.river.backend.utils;
10 10
11 import java.util.Calendar;
11 import java.util.Date; 12 import java.util.Date;
12 import java.util.Calendar; 13 import java.util.regex.Matcher;
13
14 import java.util.regex.Pattern; 14 import java.util.regex.Pattern;
15 import java.util.regex.Matcher;
16 15
17 public final class DateGuesser { 16 public final class DateGuesser {
18 public static final String [] MONTH = { 17 public static final String [] MONTH = {
19 "jan", "feb", "mrz", "apr", "mai", "jun", 18 "jan", "feb", "mrz", "apr", "mai", "jun",
20 "jul", "aug", "sep", "okt", "nov", "dez" 19 "jul", "aug", "sep", "okt", "nov", "dez"
21 }; 20 };
22 21
23 public static final int guessMonth(String s) { 22 public static final int guessMonth(final String s) {
24 s = s.toLowerCase(); 23 final String t = s.toLowerCase();
25 for (int i = 0; i < MONTH.length; ++i) 24 for (int i = 0; i < MONTH.length; ++i)
26 if (MONTH[i].equals(s)) { 25 if (MONTH[i].equals(t)) {
27 return i; 26 return i;
28 } 27 }
29 return -1; 28 return -1;
30 } 29 }
31 30
32 public static final Pattern YYYY_MM_DD = 31 public static final Pattern YYYY_MM_DD =
33 Pattern.compile("^(\\d{4})-(\\d{2})-(\\d{2})$"); 32 Pattern.compile("^(\\d{4})-(\\d{2})-(\\d{2})$");
34 33
35 public static final Pattern DD_MM_YYYY = 34 public static final Pattern DD_MM_YYYY =
36 Pattern.compile("^(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})$"); 35 Pattern.compile("^(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})$");
37 36
38 public static final Pattern MMM_YYYY = 37 public static final Pattern MMM_YYYY =
39 Pattern.compile("^(\\d{0,2})\\.?(\\w{3})\\.?(\\d{2,4})$"); 38 Pattern.compile("^((\\d{1,2})\\.)?(\\w{3})[\\. -]?((\\d\\d)|(\\d{4}))$");
40 39
41 public static final Pattern GARBAGE_YYYY = 40 public static final Pattern GARBAGE_YYYY =
42 Pattern.compile("^\\D*(\\d{2,4})$"); 41 Pattern.compile("^[^\\d=]*((\\d\\d)|([12]\\d{3}))$");
43 42
44 public static final Pattern YYYY_MM_DDThh_mm = 43 public static final Pattern YYYY_MM_DDThh_mm =
45 Pattern.compile("^(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2})$"); 44 Pattern.compile("^(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2})$");
46 45
47 public static final Pattern YYYY_MM_DDThh_mm_ss = 46 public static final Pattern YYYY_MM_DDThh_mm_ss =
48 Pattern.compile( 47 Pattern.compile(
49 "^(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})$"); 48 "^(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})$");
50 49
51 public static final Pattern DD_MM_YYYYThh_mm = 50 public static final Pattern DD_MM_YYYYThh_mm =
52 Pattern.compile( 51 Pattern.compile(
53 "^(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})T(\\d{1,2}):(\\d{2})$"); 52 "^(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})T(\\d{1,2}):(\\d{2})$");
54 53
55 public static final Pattern DD_MM_YYYYThh_mm_ss = 54 public static final Pattern DD_MM_YYYYThh_mm_ss =
56 Pattern.compile("^(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})" 55 Pattern.compile("^(\\d{1,2})\\.(\\d{1,2})\\.(\\d{2,4})"
57 + "T(\\d{1,2}):(\\d{2}):(\\d{2})$"); 56 + "T(\\d{1,2}):(\\d{2}):(\\d{2})$");
58 57
59 public static final Pattern DDMMYY = 58 public static final Pattern DDMMYY =
60 Pattern.compile("^(\\d{2})(\\d{2})(\\d{2})$"); 59 Pattern.compile("^(\\d{2})(\\d{2})(\\d{2})$");
61 60
62 private DateGuesser() { 61 private DateGuesser() {
63 } 62 }
64 63
65 public static final int calendarMonth(String month) { 64 public static final int calendarMonth(final String month) {
66 return calendarMonth(Integer.parseInt(month)); 65 return calendarMonth(Integer.parseInt(month));
67 } 66 }
68 67
69 public static final int calendarMonth(int month) { 68 public static final int calendarMonth(final int month) {
70 return Math.max(Math.min(month-1, 11), 0); 69 return Math.max(Math.min(month-1, 11), 0);
71 } 70 }
72 71
73 /** 72 /**
74 * Guess date by trying all different patterns. 73 * Guess date by trying all different patterns.
75 * Throws IllegalArgumentException if not able to guess. 74 * Throws IllegalArgumentException if not able to guess.
76 * @param s The date to be guessed (e.g. 11.02.2001). 75 * @param s The date to be guessed (e.g. 11.02.2001).
77 * @return the parsed Date. 76 * @return the parsed Date.
78 */ 77 */
79 public static Date guessDate(String s) { 78 public static Date guessDate(final String s) {
80 if (s == null || (s = s.trim()).length() == 0) { 79 if ((s == null) || s.trim().isEmpty()) {
81 throw new IllegalArgumentException(); 80 throw new IllegalArgumentException();
82 } 81 }
83 82
83 final String t = s.trim();
84 Matcher m; 84 Matcher m;
85 85
86 m = YYYY_MM_DD.matcher(s); 86 m = YYYY_MM_DD.matcher(t);
87 87
88 if (m.matches()) { 88 if (m.matches()) {
89 Calendar cal = Calendar.getInstance(); 89 final Calendar cal = Calendar.getInstance();
90 String year = m.group(1); 90 final String year = m.group(1);
91 String month = m.group(2); 91 final String month = m.group(2);
92 String day = m.group(3); 92 final String day = m.group(3);
93 cal.clear(); 93 cal.clear();
94 cal.set( 94 cal.set(
95 Integer.parseInt(year), 95 Integer.parseInt(year),
96 calendarMonth(month), 96 calendarMonth(month),
97 Integer.parseInt(day), 97 Integer.parseInt(day),
98 12, 0, 0); 98 12, 0, 0);
99 return cal.getTime(); 99 return cal.getTime();
100 } 100 }
101 101
102 m = DD_MM_YYYY.matcher(s); 102 m = DD_MM_YYYY.matcher(t);
103 103
104 if (m.matches()) { 104 if (m.matches()) {
105 Calendar cal = Calendar.getInstance(); 105 final Calendar cal = Calendar.getInstance();
106 String year = m.group(3); 106 final String year = m.group(3);
107 String month = m.group(2); 107 final String month = m.group(2);
108 String day = m.group(1); 108 final String day = m.group(1);
109 cal.clear(); 109 cal.clear();
110 cal.set( 110 cal.set(
111 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0), 111 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0),
112 calendarMonth(month), 112 calendarMonth(month),
113 Integer.parseInt(day), 113 Integer.parseInt(day),
114 12, 0, 0); 114 12, 0, 0);
115 return cal.getTime(); 115 return cal.getTime();
116 } 116 }
117 117
118 m = MMM_YYYY.matcher(s); 118 m = MMM_YYYY.matcher(t);
119 119
120 if (m.matches()) { 120 if (m.matches()) {
121 int month = guessMonth(m.group(2)); 121 final int month = guessMonth(m.group(3));
122 if (month >= 0) { 122 if (month >= 0) {
123 Calendar cal = Calendar.getInstance(); 123 final Calendar cal = Calendar.getInstance();
124 String year = m.group(3); 124 final String year = m.group(4);
125 String day = m.group(1); 125 final String day = m.group(1);
126 cal.clear(); 126 cal.clear();
127 cal.set( 127 cal.set(
128 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0), 128 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0),
129 month, 129 month,
130 day.length() == 0 ? 15 : Integer.parseInt(day), 130 ((day == null) || day.isEmpty()) ? 15 : Integer.parseInt(day),
131 12, 0, 0); 131 12, 0, 0);
132 return cal.getTime(); 132 return cal.getTime();
133 } 133 }
134 } 134 }
135 135
136 m = YYYY_MM_DDThh_mm.matcher(s); 136 m = YYYY_MM_DDThh_mm.matcher(t);
137 137
138 if (m.matches()) { 138 if (m.matches()) {
139 Calendar cal = Calendar.getInstance(); 139 final Calendar cal = Calendar.getInstance();
140 String year = m.group(1); 140 final String year = m.group(1);
141 String month = m.group(2); 141 final String month = m.group(2);
142 String day = m.group(3); 142 final String day = m.group(3);
143 String hour = m.group(4); 143 final String hour = m.group(4);
144 String minute = m.group(5); 144 final String minute = m.group(5);
145 cal.clear(); 145 cal.clear();
146 cal.set( 146 cal.set(
147 Integer.parseInt(year), 147 Integer.parseInt(year),
148 calendarMonth(month), 148 calendarMonth(month),
149 Integer.parseInt(day), 149 Integer.parseInt(day),
150 Integer.parseInt(hour), 150 Integer.parseInt(hour),
151 Integer.parseInt(minute), 151 Integer.parseInt(minute),
152 0 152 0
153 ); 153 );
154 return cal.getTime(); 154 return cal.getTime();
155 } 155 }
156 156
157 m = YYYY_MM_DDThh_mm_ss.matcher(s); 157 m = YYYY_MM_DDThh_mm_ss.matcher(t);
158 158
159 if (m.matches()) { 159 if (m.matches()) {
160 Calendar cal = Calendar.getInstance(); 160 final Calendar cal = Calendar.getInstance();
161 String year = m.group(1); 161 final String year = m.group(1);
162 String month = m.group(2); 162 final String month = m.group(2);
163 String day = m.group(3); 163 final String day = m.group(3);
164 String hour = m.group(4); 164 final String hour = m.group(4);
165 String minute = m.group(5); 165 final String minute = m.group(5);
166 String second = m.group(6); 166 final String second = m.group(6);
167 cal.clear(); 167 cal.clear();
168 cal.set( 168 cal.set(
169 Integer.parseInt(year), 169 Integer.parseInt(year),
170 calendarMonth(month), 170 calendarMonth(month),
171 Integer.parseInt(day), 171 Integer.parseInt(day),
172 Integer.parseInt(hour), 172 Integer.parseInt(hour),
173 Integer.parseInt(minute), 173 Integer.parseInt(minute),
174 Integer.parseInt(second) 174 Integer.parseInt(second)
175 ); 175 );
176 return cal.getTime(); 176 return cal.getTime();
177 } 177 }
178 178
179 m = DD_MM_YYYYThh_mm.matcher(s); 179 m = DD_MM_YYYYThh_mm.matcher(t);
180 180
181 if (m.matches()) { 181 if (m.matches()) {
182 Calendar cal = Calendar.getInstance(); 182 final Calendar cal = Calendar.getInstance();
183 String year = m.group(3); 183 final String year = m.group(3);
184 String month = m.group(2); 184 final String month = m.group(2);
185 String day = m.group(1); 185 final String day = m.group(1);
186 String hour = m.group(4); 186 final String hour = m.group(4);
187 String minute = m.group(5); 187 final String minute = m.group(5);
188 cal.clear(); 188 cal.clear();
189 cal.set( 189 cal.set(
190 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0), 190 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0),
191 calendarMonth(month), 191 calendarMonth(month),
192 Integer.parseInt(day), 192 Integer.parseInt(day),
193 Integer.parseInt(hour), 193 Integer.parseInt(hour),
194 Integer.parseInt(minute), 194 Integer.parseInt(minute),
195 0 195 0
196 ); 196 );
197 return cal.getTime(); 197 return cal.getTime();
198 } 198 }
199 199
200 m = DD_MM_YYYYThh_mm_ss.matcher(s); 200 m = DD_MM_YYYYThh_mm_ss.matcher(t);
201 201
202 if (m.matches()) { 202 if (m.matches()) {
203 Calendar cal = Calendar.getInstance(); 203 final Calendar cal = Calendar.getInstance();
204 String year = m.group(3); 204 final String year = m.group(3);
205 String month = m.group(2); 205 final String month = m.group(2);
206 String day = m.group(1); 206 final String day = m.group(1);
207 String hour = m.group(4); 207 final String hour = m.group(4);
208 String minute = m.group(5); 208 final String minute = m.group(5);
209 String second = m.group(6); 209 final String second = m.group(6);
210 cal.clear(); 210 cal.clear();
211 cal.set( 211 cal.set(
212 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0), 212 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0),
213 calendarMonth(month), 213 calendarMonth(month),
214 Integer.parseInt(day), 214 Integer.parseInt(day),
215 Integer.parseInt(hour), 215 Integer.parseInt(hour),
216 Integer.parseInt(minute), 216 Integer.parseInt(minute),
217 Integer.parseInt(second) 217 Integer.parseInt(second)
218 ); 218 );
219 return cal.getTime(); 219 return cal.getTime();
220 } 220 }
221 221
222 m = DDMMYY.matcher(s); 222 m = DDMMYY.matcher(t);
223 223
224 if (m.matches()) { 224 if (m.matches()) {
225 Calendar cal = Calendar.getInstance(); 225 final Calendar cal = Calendar.getInstance();
226 String day = m.group(1); 226 final String day = m.group(1);
227 String month = m.group(2); 227 final String month = m.group(2);
228 String yearS = m.group(3); 228 final String yearS = m.group(3);
229 int year = Integer.parseInt(yearS); 229 int year = Integer.parseInt(yearS);
230 230
231 if (year <= cal.get(Calendar.YEAR) % 100) { 231 if (year <= cal.get(Calendar.YEAR) % 100) {
232 year += 2000; 232 year += 2000;
233 } 233 }
234 else { 234 else {
235 year += 1900; 235 year += 1900;
236 } 236 }
237 cal.clear(); 237 // TODO Warning: two digit year
238 cal.set( 238 cal.clear();
239 year, 239 cal.set(
240 Integer.parseInt(month), // month 240 year,
241 Integer.parseInt(day), // day 241 Integer.parseInt(month), // month
242 12, 0, 0); 242 Integer.parseInt(day), // day
243 return cal.getTime(); 243 12, 0, 0);
244 } 244 return cal.getTime();
245 245 }
246 m = GARBAGE_YYYY.matcher(s); 246
247 247 m = GARBAGE_YYYY.matcher(t);
248 if (m.matches()) { 248
249 Calendar cal = Calendar.getInstance(); 249 if (m.matches() && !t.contains("HQ") && !t.contains("HW")) {
250 String year = m.group(1); 250 final Calendar cal = Calendar.getInstance();
251 cal.clear(); 251 final String year = m.group(1);
252 cal.set( 252 cal.clear();
253 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0), 253 cal.set(
254 5, // month 254 Integer.parseInt(year) + (year.length() == 2 ? 1900 : 0),
255 15, // day 255 5, // month
256 12, 0, 0); 256 15, // day
257 12, 0, 0);
257 return cal.getTime(); 258 return cal.getTime();
258 } 259 }
259 260
260 throw new IllegalArgumentException(); 261 throw new IllegalArgumentException();
261 } 262 }
262 263
263 public static void main(String [] args) { 264 public static void main(final String [] args) {
264 for (int i = 0; i < args.length; ++i) { 265 for (int i = 0; i < args.length; ++i) {
265 System.out.println(args[i] + ": " + guessDate(args[i])); 266 System.out.println(args[i] + ": " + guessDate(args[i]));
266 } 267 }
267 } 268 }
268 } 269 }

http://dive4elements.wald.intevation.org