comparison backend/src/main/java/org/dive4elements/river/importer/sinfo/parsers/DailyDischargeParser.java @ 9658:d86c7cb68b41

Importer (s/u-info) extensions: daily discharge: detecting, logging and skipping lines with missing date or q, or duplicate date, detecting wrong column titles and cancelling the import, specific error message if gauge not found
author mschaefer
date Mon, 23 Mar 2020 15:33:40 +0100
parents abe069eb180c
children
comparison
equal deleted inserted replaced
9657:a79881a892c9 9658:d86c7cb68b41
9 */ 9 */
10 10
11 package org.dive4elements.river.importer.sinfo.parsers; 11 package org.dive4elements.river.importer.sinfo.parsers;
12 12
13 import java.io.File; 13 import java.io.File;
14 import java.io.FilenameFilter;
15 import java.text.DateFormat; 14 import java.text.DateFormat;
16 import java.text.SimpleDateFormat; 15 import java.text.SimpleDateFormat;
17 import java.util.ArrayList; 16 import java.util.ArrayList;
18 import java.util.Date; 17 import java.util.Date;
19 import java.util.List; 18 import java.util.List;
40 39
41 /***** FIELDS *****/ 40 /***** FIELDS *****/
42 41
43 private static final Logger log = Logger.getLogger(DailyDischargeParser.class); 42 private static final Logger log = Logger.getLogger(DailyDischargeParser.class);
44 43
45 static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss.csv", Pattern.CASE_INSENSITIVE); 44 static final Pattern IMPORT_FILENAME = Pattern.compile("^(.+)_mittlerer_Tagesabfluss\\.csv", Pattern.CASE_INSENSITIVE);
46 45
47 private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); 46 private static final Pattern META_GAUGENAME = Pattern.compile("^#\\s*Stations-*Name:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE);
48 47
49 private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*", Pattern.CASE_INSENSITIVE); 48 // private static final Pattern META_GAUGENUMBER = Pattern.compile("^#\\s*Stations-*Nummer:\\s*(\\S[^;]*).*",
50 49 // Pattern.CASE_INSENSITIVE);
51 private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*Datum\\s*;\\s*Q.*", Pattern.CASE_INSENSITIVE); 50
51 private static final Pattern META_BETREIBER = Pattern.compile("^#\\s*Betreiber:.*", Pattern.CASE_INSENSITIVE);
52 private static final Pattern META_PARAMETER = Pattern.compile("^#\\s*Parameter-Name:.*", Pattern.CASE_INSENSITIVE);
53 private static final Pattern META_ZEITREIHE = Pattern.compile("^#\\s*# Zeitreihe.*", Pattern.CASE_INSENSITIVE);
54 private static final Pattern META_REIHEBEGINN = Pattern.compile("^#\\s*Beginn der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
55 private static final Pattern META_REIHEENDE = Pattern.compile("^#\\s*Ende der Zeitreihe:.*", Pattern.CASE_INSENSITIVE);
56
57 private static final Pattern META_COLUMNTITLES = Pattern.compile("^#*\\s*(Datum)\\s*;\\s*(Q[^;]*)", Pattern.CASE_INSENSITIVE);
52 58
53 private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy"); 59 private static final DateFormat dateFormat = new SimpleDateFormat("dd.MM.yyyy");
60
61 private final List<Date> dates;
54 62
55 63
56 /***** CONSTRUCTORS *****/ 64 /***** CONSTRUCTORS *****/
57 65
58 public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) { 66 public DailyDischargeParser(final File importPath, final File rootRelativePath, final ImportRiver river) {
59 super(importPath, rootRelativePath, river); 67 super(importPath, rootRelativePath, river);
68 this.dates = new ArrayList<>();
60 } 69 }
61 70
62 71
63 /***** METHODS *****/ 72 /***** METHODS *****/
64 73
78 * Creates a list of parsers for all daily discharge import files in a directory 87 * Creates a list of parsers for all daily discharge import files in a directory
79 */ 88 */
80 public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) { 89 public static List<DailyDischargeParser> createParsers(final File importDir, final File relativeDir, final ImportRiver river) {
81 final List<DailyDischargeParser> parsers = new ArrayList<>(); 90 final List<DailyDischargeParser> parsers = new ArrayList<>();
82 if (importDir.exists()) { 91 if (importDir.exists()) {
83 final File[] files = importDir.listFiles(new FilenameFilter() { 92 for (final File file : listFiles(importDir, IMPORT_FILENAME))
84 @Override
85 public boolean accept(final File dir, final String name) {
86 return IMPORT_FILENAME.matcher(name).matches();
87 }
88 });
89 for (final File file : files)
90 parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river)); 93 parsers.add(new DailyDischargeParser(file, new File(relativeDir, file.getName()), river));
91 } 94 }
92 return parsers; 95 return parsers;
93 } 96 }
94 97
107 Date day = null; 110 Date day = null;
108 try { 111 try {
109 day = dateFormat.parse(values[0]); 112 day = dateFormat.parse(values[0]);
110 } 113 }
111 catch (final Exception e) { 114 catch (final Exception e) {
112 logError("Invalid date in line " + this.in.getLineNumber()); 115 logLineWarning("Invalid date");
113 return null; 116 return null;
114 } 117 }
115 if (parseDoubleWithNull(values[1]) == null) { 118 final Number q = parseDoubleCheckNull(values, 1);
116 logError("Invalid discharge value in line " + this.in.getLineNumber()); 119 if ((q == null) || Double.isNaN(q.doubleValue())) {
120 logLineWarning(INVALID_VALUE_ERROR_FORMAT, "discharge");
117 return null; 121 return null;
118 } 122 }
119 return new DailyDischargeDayLineImport(day, Double.valueOf(parseDoubleWithNull(values[1]).doubleValue())); 123 if (this.dates.contains(day)) {
124 logLineWarning("Duplicate date");
125 return null;
126 }
127 this.dates.add(day);
128 return new DailyDischargeDayLineImport(day, q.doubleValue());
120 } 129 }
121 130
122 @Override 131 @Override
123 protected boolean handleMetaOther() { 132 protected boolean handleMetaOther() {
124 if (handleMetaGaugeName()) 133 if (handleMetaGaugeName())
125 return true; 134 return true;
126 else if (handleMetaGaugeNumber()) 135 // else if (handleMetaGaugeNumber())
136 // return true;
137 else if (META_BETREIBER.matcher(this.currentLine).matches())
138 return true;
139 else if (META_PARAMETER.matcher(this.currentLine).matches())
140 return true;
141 else if (META_ZEITREIHE.matcher(this.currentLine).matches())
142 return true;
143 else if (META_REIHEBEGINN.matcher(this.currentLine).matches())
144 return true;
145 else if (META_REIHEENDE.matcher(this.currentLine).matches())
127 return true; 146 return true;
128 else 147 else
129 return false; 148 return false;
130 } 149 }
131 150
137 return true; 156 return true;
138 } 157 }
139 return false; 158 return false;
140 } 159 }
141 160
142 private boolean handleMetaGaugeNumber() { 161 // private boolean handleMetaGaugeNumber() {
143 final Matcher m = META_GAUGENUMBER.matcher(this.currentLine); 162 // final Matcher m = META_GAUGENUMBER.matcher(this.currentLine);
144 if (m.matches()) { 163 // if (m.matches()) {
145 this.metaPatternsMatched.add(META_GAUGENUMBER); 164 // this.metaPatternsMatched.add(META_GAUGENUMBER);
146 this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1))); 165 // this.seriesHeader.setGaugeNumber(Long.parseLong(m.group(1)));
147 return true; 166 // return true;
148 } 167 // }
149 return false; 168 // return false;
150 } 169 // }
151 170
152 @Override 171 @Override
153 protected boolean handleMetaColumnTitles() { 172 protected boolean handleMetaColumnTitles() {
154 if (!META_COLUMNTITLES.matcher(this.currentLine).matches()) 173 final Matcher m = META_COLUMNTITLES.matcher(this.currentLine);
155 return false; 174 if (!m.matches()) {
175 return false;
176 }
156 this.metaPatternsMatched.add(META_COLUMNTITLES); 177 this.metaPatternsMatched.add(META_COLUMNTITLES);
157 this.columnTitles.clear(); 178 this.columnTitles.clear();
158 final String[] titles = this.currentLine.split(SEPARATOR_CHAR, 0); 179 this.columnTitles.add(m.group(1));
159 for (int i = 0; i <= titles.length - 1; i++) 180 this.columnTitles.add(m.group(2));
160 this.columnTitles.add(titles[i].trim()); 181 return true;
182 }
183
184 /**
185 * Check meta data after all meta lines (#) have been read
186 */
187 @Override
188 protected boolean checkMetaData() {
189 if (!super.checkRiverExists())
190 return false;
161 this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName())); 191 this.seriesHeader.setGauge(this.river.getPeer().findGauge(this.seriesHeader.getGaugeNumber(), this.seriesHeader.getGaugeName()));
162 if (this.seriesHeader.getGauge() == null) { 192 if (this.seriesHeader.getGauge() == null) {
163 logError("Gauge not found, file skipped"); 193 logError("Gauge not found (%s)", this.seriesHeader.getGaugeName());
164 this.headerParsingState = ParsingState.STOP; 194 this.headerParsingState = ParsingState.STOP;
195 return false;
196 }
197 if (super.checkMetaData() == false)
198 return false;
199 if (this.columnTitles.size() <= 1) {
200 logError("No valid column title line (Datum, Q) found");
201 this.headerParsingState = ParsingState.STOP;
202 return false;
165 } 203 }
166 return true; 204 return true;
167 } 205 }
206
168 } 207 }

http://dive4elements.wald.intevation.org