teichmann@5844: /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde teichmann@5844: * Software engineering by Intevation GmbH teichmann@5844: * teichmann@5844: * This file is Free Software under the GNU AGPL (>=v3) teichmann@5844: * and comes with ABSOLUTELY NO WARRANTY! Check out the teichmann@5844: * documentation coming with Dive4Elements River for details. teichmann@5844: */ teichmann@5844: teichmann@5829: package org.dive4elements.river.importer.parsers; sascha@1211: sascha@1211: import java.io.File; sascha@1211: import java.io.IOException; sascha@1211: import java.io.LineNumberReader; sascha@1211: import java.io.FileInputStream; sascha@1211: import java.io.InputStreamReader; sascha@1211: sascha@1211: import java.math.BigDecimal; sascha@1211: sascha@1211: import java.util.regex.Pattern; sascha@1211: import java.util.regex.Matcher; sascha@1211: teichmann@5735: import java.util.Date; sascha@1211: import java.util.HashMap; sascha@1211: import java.util.ArrayList; teichmann@5735: import java.util.List; sascha@1211: sascha@1211: import org.apache.log4j.Logger; sascha@1211: teichmann@5829: import org.dive4elements.river.importer.ImportMainValueType; teichmann@5829: import org.dive4elements.river.importer.ImportMainValue; teichmann@5829: import org.dive4elements.river.importer.ImportNamedMainValue; teichmann@5829: import org.dive4elements.river.importer.ImportGauge; teichmann@5829: import org.dive4elements.river.importer.ImportTimeInterval; teichmann@5829: import org.dive4elements.river.utils.DateGuesser; sascha@1211: sascha@1211: public class StaFileParser sascha@1211: { sascha@1211: private static Logger log = Logger.getLogger(StaFileParser.class); sascha@1211: sascha@1211: public static final String ENCODING = "ISO-8859-1"; sascha@1211: sascha@1211: public static final String TYPES = sascha@1211: System.getProperty("flys.backend.main.value.types", "QWTD"); sascha@1211: tom@5176: public static final boolean NOT_PARSE_GAUGE_NUMBERS = tom@5176: Boolean.getBoolean("flys.backend.sta.not.parse.gauge.numbers"); sascha@2377: sascha@1211: public static final Pattern QWTD_ = sascha@1211: Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + sascha@1211: Pattern.quote(TYPES) + "]).*"); sascha@1211: teichmann@5902: // TODO: To be extented. teichmann@5902: private static final Pattern MAIN_VALUE = Pattern.compile( teichmann@5902: "^(HQ|MHW|GLQ|NMQ|HQEXT)(\\d*)$"); teichmann@5902: teichmann@5902: private static boolean isMainValue(String s) { teichmann@5902: s = s.replace(" ", "").toUpperCase(); teichmann@5902: return MAIN_VALUE.matcher(s).matches(); teichmann@5902: } teichmann@5902: teichmann@5902: teichmann@5735: public static final class NameAndTimeInterval { teichmann@5735: private String name; teichmann@5735: private ImportTimeInterval timeInterval; teichmann@5735: teichmann@5735: public NameAndTimeInterval(String name) { teichmann@5735: this(name, null); teichmann@5735: } teichmann@5735: teichmann@5735: public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) { teichmann@5735: this.name = name; teichmann@5735: this.timeInterval = timeInterval; teichmann@5735: } teichmann@5735: teichmann@5735: public String getName() { teichmann@5735: return name; teichmann@5735: } teichmann@5735: teichmann@5735: public ImportTimeInterval getTimeInterval() { teichmann@5735: return timeInterval; teichmann@5735: } teichmann@5902: teichmann@5902: @Override teichmann@5902: public String toString() { teichmann@5902: return "name: " + name + " time interval: " + timeInterval; teichmann@5902: } teichmann@5735: } // class NameAndTimeInterval teichmann@5735: sascha@1211: public StaFileParser() { sascha@1211: } sascha@1211: sascha@1211: public boolean parse(ImportGauge gauge) throws IOException { sascha@1211: sascha@1211: File file = gauge.getStaFile(); sascha@1211: sascha@1211: log.info("parsing STA file: " + file); sascha@1211: LineNumberReader in = null; sascha@1211: try { sascha@1211: in = sascha@1211: new LineNumberReader( sascha@1211: new InputStreamReader( sascha@1211: new FileInputStream(file), ENCODING)); sascha@1211: sascha@1211: String line = in.readLine(); sascha@1211: sascha@1211: if (line == null) { sascha@1211: log.warn("STA file is empty."); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: if (line.length() < 37) { sascha@3660: log.warn("First line in STA file is too short."); sascha@1211: return false; sascha@1211: } sascha@2377: tom@5954: String gaugeName = line.substring(16, 35).trim(); sascha@1211: sascha@2371: Long gaugeNumber = null; sascha@2377: tom@5176: if (!NOT_PARSE_GAUGE_NUMBERS) { tom@5632: String gaugeNumberString = line.substring(8, 16).trim(); sascha@2377: sascha@2377: try { sascha@2377: gaugeNumber = Long.parseLong(gaugeNumberString); sascha@2377: } sascha@2377: catch (NumberFormatException nfe) { sascha@3660: log.warn("STA: '" + gaugeNumberString + sascha@2377: "' is not a valid long number."); sascha@2377: } sascha@2371: } sascha@2371: sascha@2371: gauge.setName(gaugeName); sascha@2371: gauge.setOfficialNumber(gaugeNumber); sascha@2371: sascha@2371: if (log.isDebugEnabled()) { sascha@2371: log.debug( sascha@2371: "name/number: '" + gaugeName + "' '" + gaugeNumber + "'"); sascha@2371: } sascha@1211: sascha@1211: String [] values = line.substring(38).trim().split("\\s+", 2); sascha@1211: sascha@1211: if (values.length < 2) { sascha@3660: log.warn("STA: Not enough columns for aeo and datum."); sascha@1211: } sascha@1211: try { sascha@1211: gauge.setAeo(new BigDecimal(values[0].replace(",", "."))); sascha@1211: gauge.setDatum(new BigDecimal(values[1].replace(",", "."))); sascha@1211: } sascha@1211: catch (NumberFormatException nfe) { sascha@3660: log.warn("STA: cannot parse aeo or datum."); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: line = in.readLine(); sascha@1211: sascha@1211: if (line == null) { sascha@1211: log.warn("STA file has not enough lines"); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: if (line.length() < 36) { sascha@3660: log.warn("STA: second line is too short"); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: try { sascha@1211: gauge.setStation( sascha@1211: new BigDecimal(line.substring(29, 36).trim())); sascha@1211: } sascha@1211: catch (NumberFormatException nfe) { sascha@3660: log.warn("STA: parsing of the datum of the gauge failed"); sascha@1211: return false; sascha@1211: } sascha@1211: sascha@1211: // overread the next six lines sascha@1211: for (int i = 0; i < 6; ++i) { sascha@1211: if ((line = in.readLine()) == null) { sascha@1211: log.warn("STA file is too short"); sascha@1211: return false; sascha@1211: } sascha@1211: } sascha@1211: sascha@1211: HashMap types = sascha@1211: new HashMap(); sascha@1211: sascha@1211: ArrayList namedMainValues = sascha@1211: new ArrayList(); sascha@1211: sascha@1211: ArrayList mainValues = sascha@1211: new ArrayList(); sascha@1211: sascha@1211: while ((line = in.readLine()) != null) { sascha@1211: Matcher m = QWTD_.matcher(line); sascha@1211: if (m.matches()) { sascha@1211: BigDecimal value; sascha@1211: try { sascha@1211: value = new BigDecimal(m.group(2).replace(",", ".")); sascha@1211: } sascha@1211: catch (NumberFormatException nfe) { sascha@3660: log.warn("STA: value not parseable in line " sascha@1211: + in.getLineNumber()); sascha@1211: continue; sascha@1211: } sascha@1211: String typeString = m.group(3); sascha@1211: log.debug("\t type: " + typeString); sascha@1211: ImportMainValueType type = types.get(typeString); sascha@1211: if (type == null) { sascha@1211: type = new ImportMainValueType(typeString); sascha@1211: types.put(typeString, type); sascha@1211: } sascha@1211: String name = m.group(1); teichmann@5735: NameAndTimeInterval nat = parseName(name); sascha@1211: ImportNamedMainValue namedMainValue = teichmann@5735: new ImportNamedMainValue(type, nat.getName()); sascha@1211: namedMainValues.add(namedMainValue); sascha@1211: teichmann@5735: ImportMainValue mainValue = new ImportMainValue( teichmann@5735: gauge, teichmann@5735: namedMainValue, teichmann@5735: value, teichmann@5735: nat.getTimeInterval()); sascha@1211: sascha@1211: mainValues.add(mainValue); sascha@1211: } sascha@1211: else { sascha@1211: // TODO: treat as a comment sascha@1211: } sascha@1211: } sascha@1211: gauge.setMainValueTypes( sascha@1211: new ArrayList(types.values())); sascha@1211: gauge.setNamedMainValues(namedMainValues); sascha@1211: gauge.setMainValues(mainValues); sascha@1211: } sascha@1211: finally { sascha@1211: if (in != null) { sascha@1211: in.close(); sascha@1211: } sascha@1211: } sascha@1211: log.info("finished parsing STA file: " + file); sascha@1211: return true; sascha@1211: } teichmann@5735: teichmann@5902: protected static NameAndTimeInterval parseName(String name) { teichmann@5735: List result = new ArrayList(); teichmann@5735: teichmann@5735: unbracket(name, 0, result); teichmann@5735: teichmann@5735: int length = result.size(); teichmann@5735: teichmann@5735: if (length < 1) { // Should not happen. teichmann@5735: return new NameAndTimeInterval(name); teichmann@5735: } teichmann@5735: teichmann@5735: if (length == 1) { // No date at all -> use first part. teichmann@5735: return new NameAndTimeInterval(result.get(0).trim()); teichmann@5735: } teichmann@5735: teichmann@5902: if (length == 2) { // e.g. HQ(1994) or HQ(1994 - 1999) teichmann@5735: teichmann@5902: String type = result.get(0).trim(); teichmann@5902: ImportTimeInterval timeInterval = null; teichmann@5735: teichmann@5902: String datePart = result.get(1).trim(); teichmann@5902: if (isMainValue(datePart)) { // e.g. W(HQ100) teichmann@5902: type += "(" + datePart + ")"; teichmann@5902: timeInterval = null; teichmann@5902: } teichmann@5902: else { teichmann@5902: timeInterval = getTimeInterval(result.get(1).trim()); teichmann@5902: teichmann@5902: if (timeInterval == null) { // No date at all. teichmann@5902: type = name; teichmann@5902: } teichmann@5735: } teichmann@5735: teichmann@5735: return new NameAndTimeInterval(type, timeInterval); teichmann@5735: } teichmann@5735: teichmann@5735: if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999)) teichmann@5735: teichmann@5735: String type = teichmann@5902: result.get(0).trim() + "(" + teichmann@5735: result.get(1).trim() + ")"; teichmann@5735: teichmann@5735: ImportTimeInterval timeInterval = getTimeInterval( teichmann@5735: result.get(2).trim()); teichmann@5735: teichmann@5735: if (timeInterval == null) { // No date at all. teichmann@5735: type = name; teichmann@5735: } teichmann@5735: teichmann@5735: return new NameAndTimeInterval(type, timeInterval); teichmann@5735: } teichmann@5735: teichmann@5736: // more than 3 elements return unmodified. teichmann@5736: teichmann@5736: return new NameAndTimeInterval(name); teichmann@5735: } teichmann@5735: teichmann@5735: private static ImportTimeInterval getTimeInterval(String datePart) { teichmann@5735: teichmann@5735: int minus = datePart.indexOf('-'); teichmann@5735: teichmann@5735: if (minus < 0) { // '-' not found teichmann@5735: teichmann@5735: Date date = null; teichmann@5735: try { teichmann@5735: date = DateGuesser.guessDate(datePart); teichmann@5735: } teichmann@5735: catch (IllegalArgumentException iae) { teichmann@5735: log.warn("STA: Invalid date '" + datePart + "'"); teichmann@5735: return null; teichmann@5735: } teichmann@5735: teichmann@5735: return new ImportTimeInterval(date); teichmann@5735: } teichmann@5735: teichmann@5735: // Found '-' so we have - teichmann@5735: String startPart = datePart.substring(0, minus).trim(); teichmann@5735: String endPart = datePart.substring(minus).trim(); teichmann@5735: teichmann@5735: Date startDate = null; teichmann@5735: Date endDate = null; teichmann@5735: teichmann@5735: try { teichmann@5735: startDate = DateGuesser.guessDate(startPart); teichmann@5735: } teichmann@5735: catch (IllegalArgumentException iae) { teichmann@5735: log.warn("STA: Invalid start date '" + startPart + "'"); teichmann@5735: } teichmann@5735: teichmann@5735: try { teichmann@5735: endDate = DateGuesser.guessDate(endPart); teichmann@5735: } teichmann@5735: catch (IllegalArgumentException iae) { teichmann@5735: log.warn("STA: Invalid end date '" + endPart + "'"); teichmann@5735: } teichmann@5735: teichmann@5735: if (startDate == null) { teichmann@5735: log.warn("STA: Need start date."); teichmann@5735: return null; teichmann@5735: } teichmann@5735: teichmann@5735: return new ImportTimeInterval(startDate, endDate); teichmann@5735: } teichmann@5735: teichmann@5735: private static int unbracket(String s, int index, List result) { teichmann@5735: StringBuilder sb = new StringBuilder(); teichmann@5735: int length = s.length(); teichmann@5735: while (index < length) { teichmann@5735: char c = s.charAt(index); teichmann@5735: switch (c) { teichmann@5735: case '(': teichmann@5738: index = unbracket(s, index+1, result); teichmann@5735: break; teichmann@5735: case ')': teichmann@5735: result.add(0, sb.toString()); teichmann@5735: return index+1; teichmann@5735: default: teichmann@5735: sb.append(c); teichmann@5737: ++index; teichmann@5735: } teichmann@5735: } teichmann@5735: result.add(0, sb.toString()); teichmann@5735: teichmann@5735: return index; teichmann@5735: } teichmann@5902: teichmann@5902: /* teichmann@5902: public static void main(String [] args) { teichmann@5902: for (String arg: args) { teichmann@5902: NameAndTimeInterval nti = parseName(arg); teichmann@5902: System.out.println(arg + " -> " + nti); teichmann@5902: } teichmann@5902: } teichmann@5902: */ sascha@1211: } sascha@1211: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :