Mercurial > dive4elements > river
comparison backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java @ 9658:d86c7cb68b41
Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date,
detecting wrong column titles and cancelling the import,
specific error message if gauge not found
author | mschaefer |
---|---|
date | Mon, 23 Mar 2020 15:33:40 +0100 |
parents | abe069eb180c |
children |
comparison
equal
deleted
inserted
replaced
9657:a79881a892c9 | 9658:d86c7cb68b41 |
---|---|
9 */ | 9 */ |
10 | 10 |
11 package org.dive4elements.river.importer.sinfo.parsers; | 11 package org.dive4elements.river.importer.sinfo.parsers; |
12 | 12 |
13 import java.io.File; | 13 import java.io.File; |
14 import java.io.FilenameFilter; | |
15 import java.text.DateFormat; | 14 import java.text.DateFormat; |
16 import java.text.SimpleDateFormat; | 15 import java.text.SimpleDateFormat; |
17 import java.util.ArrayList; | 16 import java.util.ArrayList; |
18 import java.util.Date; | 17 import java.util.Date; |
19 import java.util.List; | 18 import java.util.List; |
40 | 39 |
41 /***** FIELDS *****/ | 40 /***** FIELDS *****/ |
42 | 41 |
43 private static final Logger log = Logger.getLogger(DailyDischargeParser.class); | 42 private static final Logger log = Logger.getLogger(DailyDischargeParser.class); |
44 | 43 |
45 static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss.csv", Pattern.CASE_INSENSITIVE); | 44 static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss\\.csv", Pattern.CASE_INSENSITIVE); |
46 | 45 |
47 private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); | 46 private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); |
48 | 47 |
49 private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); | 48 // private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*", |
50 | 49 // Pattern.CASE_INSENSITIVE); |
51 private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*Datum\\s*;\\s*Q.*", Pattern.CASE_INSENSITIVE); | 50 |
51 private static final Pattern META_BETREIBER = Pattern.compile("^#\\s*Betreiber:.*", Pattern.CASE_INSENSITIVE); | |
52 private static final Pattern META_PARAMETER = Pattern.compile("^#\\s*Parameter-Name:.*", Pattern.CASE_INSENSITIVE); | |
53 private static final Pattern META_ZEITREIHE = Pattern.compile("^#\\s*# Zeitreihe.*", Pattern.CASE_INSENSITIVE); | |
54 private static final Pattern META_REIHEBEGINN = Pattern.compile("^#\\s*Beginn der Zeitreihe:.*", Pattern.CASE_INSENSITIVE); | |
55 private static final Pattern META_REIHEENDE = Pattern.compile("^#\\s*Ende der Zeitreihe:.*", Pattern.CASE_INSENSITIVE); | |
56 | |
57 private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*(Datum)\\s*;\\s*(Q[^;]*)", Pattern.CASE_INSENSITIVE); | |
52 | 58 |
53 private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy"); | 59 private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy"); |
60 | |
61 private final List<Date> dates; | |
54 | 62 |
55 | 63 |
56 /***** CONSTRUCTORS *****/ | 64 /***** CONSTRUCTORS *****/ |
57 | 65 |
58 public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) { | 66 public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) { |
59 super(importPath, rootRelativePath, river); | 67 super(importPath, rootRelativePath, river); |
68 this.dates = new ArrayList<>(); | |
60 } | 69 } |
61 | 70 |
62 | 71 |
63 /***** METHODS *****/ | 72 /***** METHODS *****/ |
64 | 73 |
78 * Creates a list of parsers for all daily discharge import files in a directory | 87 * Creates a list of parsers for all daily discharge import files in a directory |
79 */ | 88 */ |
80 public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) { | 89 public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) { |
81 final List<DailyDischargeParser> parsers = new ArrayList<>(); | 90 final List<DailyDischargeParser> parsers = new ArrayList<>(); |
82 if (importDir.exists()) { | 91 if (importDir.exists()) { |
83 final File[] files = importDir.listFiles(new FilenameFilter() { | 92 for (final File file : listFiles(importDir, IMPORT_FILENAME)) |
84 @Override | |
85 public boolean accept(final File dir, final String name) { | |
86 return IMPORT_FILENAME.matcher(name).matches(); | |
87 } | |
88 }); | |
89 for (final File file : files) | |
90 parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river)); | 93 parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river)); |
91 } | 94 } |
92 return parsers; | 95 return parsers; |
93 } | 96 } |
94 | 97 |
107 Date day = null; | 110 Date day = null; |
108 try { | 111 try { |
109 day = dateFormat.parse(values[0]); | 112 day = dateFormat.parse(values[0]); |
110 } | 113 } |
111 catch (final Exception e) { | 114 catch (final Exception e) { |
112 logError("Invalid date in line " + this.in.getLineNumber()); | 115 logLineWarning("Invalid date"); |
113 return null; | 116 return null; |
114 } | 117 } |
115 if (parseDoubleWithNull(values[1]) == null) { | 118 final Number q = parseDoubleCheckNull(values, 1); |
116 logError("Invalid discharge value in line " + this.in.getLineNumber()); | 119 if ((q == null) || Double.isNaN(q.doubleValue())) { |
120 logLineWarning(INVALID_VALUE_ERROR_FORMAT, "discharge"); | |
117 return null; | 121 return null; |
118 } | 122 } |
119 return new DailyDischargeDayLineImport(day, Double.valueOf(parseDoubleWithNull(values[1]).doubleValue())); | 123 if (this.dates.contains(day)) { |
124 logLineWarning("Duplicate date"); | |
125 return null; | |
126 } | |
127 this.dates.add(day); | |
128 return new DailyDischargeDayLineImport(day, q.doubleValue()); | |
120 } | 129 } |
121 | 130 |
122 @Override | 131 @Override |
123 protected boolean handleMetaOther() { | 132 protected boolean handleMetaOther() { |
124 if (handleMetaGaugeName()) | 133 if (handleMetaGaugeName()) |
125 return true; | 134 return true; |
126 else if (handleMetaGaugeNumber()) | 135 // else if (handleMetaGaugeNumber()) |
136 // return true; | |
137 else if (META_BETREIBER.matcher(this.currentLine).matches()) | |
138 return true; | |
139 else if (META_PARAMETER.matcher(this.currentLine).matches()) | |
140 return true; | |
141 else if (META_ZEITREIHE.matcher(this.currentLine).matches()) | |
142 return true; | |
143 else if (META_REIHEBEGINN.matcher(this.currentLine).matches()) | |
144 return true; | |
145 else if (META_REIHEENDE.matcher(this.currentLine).matches()) | |
127 return true; | 146 return true; |
128 else | 147 else |
129 return false; | 148 return false; |
130 } | 149 } |
131 | 150 |
137 return true; | 156 return true; |
138 } | 157 } |
139 return false; | 158 return false; |
140 } | 159 } |
141 | 160 |
142 private boolean handleMetaGaugeNumber() { | 161 // private boolean handleMetaGaugeNumber() { |
143 final Matcher m = META_GAUGENUMBER.matcher(this.currentLine); | 162 // final Matcher m = META_GAUGENUMBER.matcher(this.currentLine); |
144 if (m.matches()) { | 163 // if (m.matches()) { |
145 this.metaPatternsMatched.add(META_GAUGENUMBER); | 164 // this.metaPatternsMatched.add(META_GAUGENUMBER); |
146 this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1))); | 165 // this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1))); |
147 return true; | 166 // return true; |
148 } | 167 // } |
149 return false; | 168 // return false; |
150 } | 169 // } |
151 | 170 |
152 @Override | 171 @Override |
153 protected boolean handleMetaColumnTitles() { | 172 protected boolean handleMetaColumnTitles() { |
154 if (!META_COLUMNTITLES.matcher(this.currentLine).matches()) | 173 final Matcher m = META_COLUMNTITLES.matcher(this.currentLine); |
155 return false; | 174 if (!m.matches()) { |
175 return false; | |
176 } | |
156 this.metaPatternsMatched.add(META_COLUMNTITLES); | 177 this.metaPatternsMatched.add(META_COLUMNTITLES); |
157 this.columnTitles.clear(); | 178 this.columnTitles.clear(); |
158 final String[] titles = this.currentLine.split(SEPARATOR_CHAR, 0); | 179 this.columnTitles.add(m.group(1)); |
159 for (int i = 0; i <= titles.length - 1; i++) | 180 this.columnTitles.add(m.group(2)); |
160 this.columnTitles.add(titles[i].trim()); | 181 return true; |
182 } | |
183 | |
184 /** | |
185 * Check meta data after all meta lines (#) have been read | |
186 */ | |
187 @Override | |
188 protected boolean checkMetaData() { | |
189 if (!super.checkRiverExists()) | |
190 return false; | |
161 this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName())); | 191 this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName())); |
162 if (this.seriesHeader.getGauge() == null) { | 192 if (this.seriesHeader.getGauge() == null) { |
163 logError("Gauge not found, file skipped"); | 193 logError("Gauge not found (%s)", this.seriesHeader.getGaugeName()); |
164 this.headerParsingState = ParsingState.STOP; | 194 this.headerParsingState = ParsingState.STOP; |
195 return false; | |
196 } | |
197 if (super.checkMetaData() == false) | |
198 return false; | |
199 if (this.columnTitles.size() <= 1) { | |
200 logError("No valid column title line (Datum, Q) found"); | |
201 this.headerParsingState = ParsingState.STOP; | |
202 return false; | |
165 } | 203 } |
166 return true; | 204 return true; |
167 } | 205 } |
206 | |
168 } | 207 } |