Mercurial > dive4elements > river
diff flys-backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java @ 5828:dfb26b03b179
Moved directories to org.dive4elements.river
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Thu, 25 Apr 2013 11:53:11 +0200 |
parents | flys-backend/src/main/java/de/intevation/flys/importer/parsers/StaFileParser.java@54077c9c9305 |
children | 18619c1e7c2a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java Thu Apr 25 11:53:11 2013 +0200 @@ -0,0 +1,338 @@ +package de.intevation.flys.importer.parsers; + +import java.io.File; +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.FileInputStream; +import java.io.InputStreamReader; + +import java.math.BigDecimal; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.util.Date; +import java.util.HashMap; +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.Logger; + +import de.intevation.flys.importer.ImportMainValueType; +import de.intevation.flys.importer.ImportMainValue; +import de.intevation.flys.importer.ImportNamedMainValue; +import de.intevation.flys.importer.ImportGauge; +import de.intevation.flys.importer.ImportTimeInterval; +import de.intevation.flys.utils.DateGuesser; + +public class StaFileParser +{ + private static Logger log = Logger.getLogger(StaFileParser.class); + + public static final String ENCODING = "ISO-8859-1"; + + public static final String TYPES = + System.getProperty("flys.backend.main.value.types", "QWTD"); + + public static final boolean NOT_PARSE_GAUGE_NUMBERS = + Boolean.getBoolean("flys.backend.sta.not.parse.gauge.numbers"); + + public static final Pattern QWTD_ = + Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + + Pattern.quote(TYPES) + "]).*"); + + public static final class NameAndTimeInterval { + private String name; + private ImportTimeInterval timeInterval; + + public NameAndTimeInterval(String name) { + this(name, null); + } + + public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) { + this.name = name; + this.timeInterval = timeInterval; + } + + public String getName() { + return name; + } + + public ImportTimeInterval getTimeInterval() { + return timeInterval; + } + } // class NameAndTimeInterval + + public StaFileParser() { + } + + public boolean parse(ImportGauge gauge) throws IOException { + + File file = gauge.getStaFile(); + + log.info("parsing STA file: " + file); + LineNumberReader in = null; + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = in.readLine(); + + if (line == null) { + log.warn("STA file is empty."); + return false; + } + + if (line.length() < 37) { + log.warn("First line in STA file is too short."); + return false; + } + + String gaugeName = line.substring(16, 28).trim(); + + Long gaugeNumber = null; + + if (!NOT_PARSE_GAUGE_NUMBERS) { + String gaugeNumberString = line.substring(8, 16).trim(); + + try { + gaugeNumber = Long.parseLong(gaugeNumberString); + } + catch (NumberFormatException nfe) { + log.warn("STA: '" + gaugeNumberString + + "' is not a valid long number."); + } + } + + gauge.setName(gaugeName); + gauge.setOfficialNumber(gaugeNumber); + + if (log.isDebugEnabled()) { + log.debug( + "name/number: '" + gaugeName + "' '" + gaugeNumber + "'"); + } + + String [] values = line.substring(38).trim().split("\\s+", 2); + + if (values.length < 2) { + log.warn("STA: Not enough columns for aeo and datum."); + } + try { + gauge.setAeo(new BigDecimal(values[0].replace(",", "."))); + gauge.setDatum(new BigDecimal(values[1].replace(",", "."))); + } + catch (NumberFormatException nfe) { + log.warn("STA: cannot parse aeo or datum."); + return false; + } + + line = in.readLine(); + + if (line == null) { + log.warn("STA file has not enough lines"); + return false; + } + + if (line.length() < 36) { + log.warn("STA: second line is too short"); + return false; + } + + try { + gauge.setStation( + new BigDecimal(line.substring(29, 36).trim())); + } + catch (NumberFormatException nfe) { + log.warn("STA: parsing of the datum of the gauge failed"); + return false; + } + + // overread the next six lines + for (int i = 0; i < 6; ++i) { + if ((line = in.readLine()) == null) { + log.warn("STA file is too short"); + return false; + } + } + + HashMap<String, ImportMainValueType> types = + new HashMap<String, ImportMainValueType>(); + + ArrayList<ImportNamedMainValue> namedMainValues = + new ArrayList<ImportNamedMainValue>(); + + ArrayList<ImportMainValue> mainValues = + new ArrayList<ImportMainValue>(); + + while ((line = in.readLine()) != null) { + Matcher m = QWTD_.matcher(line); + if (m.matches()) { + BigDecimal value; + try { + value = new BigDecimal(m.group(2).replace(",", ".")); + } + catch (NumberFormatException nfe) { + log.warn("STA: value not parseable in line " + + in.getLineNumber()); + continue; + } + String typeString = m.group(3); + log.debug("\t type: " + typeString); + ImportMainValueType type = types.get(typeString); + if (type == null) { + type = new ImportMainValueType(typeString); + types.put(typeString, type); + } + String name = m.group(1); + NameAndTimeInterval nat = parseName(name); + ImportNamedMainValue namedMainValue = + new ImportNamedMainValue(type, nat.getName()); + namedMainValues.add(namedMainValue); + + ImportMainValue mainValue = new ImportMainValue( + gauge, + namedMainValue, + value, + nat.getTimeInterval()); + + mainValues.add(mainValue); + } + else { + // TODO: treat as a comment + } + } + gauge.setMainValueTypes( + new ArrayList<ImportMainValueType>(types.values())); + gauge.setNamedMainValues(namedMainValues); + gauge.setMainValues(mainValues); + } + finally { + if (in != null) { + in.close(); + } + } + log.info("finished parsing STA file: " + file); + return true; + } + + protected NameAndTimeInterval parseName(String name) { + List<String> result = new ArrayList<String>(); + + unbracket(name, 0, result); + + int length = result.size(); + + if (length < 1) { // Should not happen. + return new NameAndTimeInterval(name); + } + + if (length == 1) { // No date at all -> use first part. + return new NameAndTimeInterval(result.get(0).trim()); + } + + if (length == 2) { // e.g. W(1994) or W(1994 - 1999) + String type = result.get(0).trim(); + + ImportTimeInterval timeInterval = getTimeInterval( + result.get(1).trim()); + + if (timeInterval == null) { // No date at all. + type = name; + } + + return new NameAndTimeInterval(type, timeInterval); + } + + if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999)) + + String type = + result.get(0).trim() + "(" + + result.get(1).trim() + ")"; + + ImportTimeInterval timeInterval = getTimeInterval( + result.get(2).trim()); + + if (timeInterval == null) { // No date at all. + type = name; + } + + return new NameAndTimeInterval(type, timeInterval); + } + + // more than 3 elements return unmodified. + + return new NameAndTimeInterval(name); + } + + private static ImportTimeInterval getTimeInterval(String datePart) { + + int minus = datePart.indexOf('-'); + + if (minus < 0) { // '-' not found + + Date date = null; + try { + date = DateGuesser.guessDate(datePart); + } + catch (IllegalArgumentException iae) { + log.warn("STA: Invalid date '" + datePart + "'"); + return null; + } + + return new ImportTimeInterval(date); + } + + // Found '-' so we have <from> - <to> + String startPart = datePart.substring(0, minus).trim(); + String endPart = datePart.substring(minus).trim(); + + Date startDate = null; + Date endDate = null; + + try { + startDate = DateGuesser.guessDate(startPart); + } + catch (IllegalArgumentException iae) { + log.warn("STA: Invalid start date '" + startPart + "'"); + } + + try { + endDate = DateGuesser.guessDate(endPart); + } + catch (IllegalArgumentException iae) { + log.warn("STA: Invalid end date '" + endPart + "'"); + } + + if (startDate == null) { + log.warn("STA: Need start date."); + return null; + } + + return new ImportTimeInterval(startDate, endDate); + } + + private static int unbracket(String s, int index, List<String> result) { + StringBuilder sb = new StringBuilder(); + int length = s.length(); + while (index < length) { + char c = s.charAt(index); + switch (c) { + case '(': + index = unbracket(s, index+1, result); + break; + case ')': + result.add(0, sb.toString()); + return index+1; + default: + sb.append(c); + ++index; + } + } + result.add(0, sb.toString()); + + return index; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :