Mercurial > dive4elements > river
view flys-backend/src/main/java/de/intevation/flys/importer/parsers/StaFileParser.java @ 5738:54077c9c9305
Backend: Fix Stack overflow.
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Tue, 16 Apr 2013 18:55:52 +0200 |
parents | ce002608992d |
children |
line wrap: on
line source
package de.intevation.flys.importer.parsers; import java.io.File; import java.io.IOException; import java.io.LineNumberReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.math.BigDecimal; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.util.Date; import java.util.HashMap; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import de.intevation.flys.importer.ImportMainValueType; import de.intevation.flys.importer.ImportMainValue; import de.intevation.flys.importer.ImportNamedMainValue; import de.intevation.flys.importer.ImportGauge; import de.intevation.flys.importer.ImportTimeInterval; import de.intevation.flys.utils.DateGuesser; public class StaFileParser { private static Logger log = Logger.getLogger(StaFileParser.class); public static final String ENCODING = "ISO-8859-1"; public static final String TYPES = System.getProperty("flys.backend.main.value.types", "QWTD"); public static final boolean NOT_PARSE_GAUGE_NUMBERS = Boolean.getBoolean("flys.backend.sta.not.parse.gauge.numbers"); public static final Pattern QWTD_ = Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" + Pattern.quote(TYPES) + "]).*"); public static final class NameAndTimeInterval { private String name; private ImportTimeInterval timeInterval; public NameAndTimeInterval(String name) { this(name, null); } public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) { this.name = name; this.timeInterval = timeInterval; } public String getName() { return name; } public ImportTimeInterval getTimeInterval() { return timeInterval; } } // class NameAndTimeInterval public StaFileParser() { } public boolean parse(ImportGauge gauge) throws IOException { File file = gauge.getStaFile(); log.info("parsing STA file: " + file); LineNumberReader in = null; try { in = new LineNumberReader( new InputStreamReader( new FileInputStream(file), ENCODING)); String line = in.readLine(); if (line == null) { log.warn("STA file is empty."); return false; } if (line.length() < 37) { log.warn("First line in STA file is too short."); return false; } String gaugeName = line.substring(16, 28).trim(); Long gaugeNumber = null; if (!NOT_PARSE_GAUGE_NUMBERS) { String gaugeNumberString = line.substring(8, 16).trim(); try { gaugeNumber = Long.parseLong(gaugeNumberString); } catch (NumberFormatException nfe) { log.warn("STA: '" + gaugeNumberString + "' is not a valid long number."); } } gauge.setName(gaugeName); gauge.setOfficialNumber(gaugeNumber); if (log.isDebugEnabled()) { log.debug( "name/number: '" + gaugeName + "' '" + gaugeNumber + "'"); } String [] values = line.substring(38).trim().split("\\s+", 2); if (values.length < 2) { log.warn("STA: Not enough columns for aeo and datum."); } try { gauge.setAeo(new BigDecimal(values[0].replace(",", "."))); gauge.setDatum(new BigDecimal(values[1].replace(",", "."))); } catch (NumberFormatException nfe) { log.warn("STA: cannot parse aeo or datum."); return false; } line = in.readLine(); if (line == null) { log.warn("STA file has not enough lines"); return false; } if (line.length() < 36) { log.warn("STA: second line is too short"); return false; } try { gauge.setStation( new BigDecimal(line.substring(29, 36).trim())); } catch (NumberFormatException nfe) { log.warn("STA: parsing of the datum of the gauge failed"); return false; } // overread the next six lines for (int i = 0; i < 6; ++i) { if ((line = in.readLine()) == null) { log.warn("STA file is too short"); return false; } } HashMap<String, ImportMainValueType> types = new HashMap<String, ImportMainValueType>(); ArrayList<ImportNamedMainValue> namedMainValues = new ArrayList<ImportNamedMainValue>(); ArrayList<ImportMainValue> mainValues = new ArrayList<ImportMainValue>(); while ((line = in.readLine()) != null) { Matcher m = QWTD_.matcher(line); if (m.matches()) { BigDecimal value; try { value = new BigDecimal(m.group(2).replace(",", ".")); } catch (NumberFormatException nfe) { log.warn("STA: value not parseable in line " + in.getLineNumber()); continue; } String typeString = m.group(3); log.debug("\t type: " + typeString); ImportMainValueType type = types.get(typeString); if (type == null) { type = new ImportMainValueType(typeString); types.put(typeString, type); } String name = m.group(1); NameAndTimeInterval nat = parseName(name); ImportNamedMainValue namedMainValue = new ImportNamedMainValue(type, nat.getName()); namedMainValues.add(namedMainValue); ImportMainValue mainValue = new ImportMainValue( gauge, namedMainValue, value, nat.getTimeInterval()); mainValues.add(mainValue); } else { // TODO: treat as a comment } } gauge.setMainValueTypes( new ArrayList<ImportMainValueType>(types.values())); gauge.setNamedMainValues(namedMainValues); gauge.setMainValues(mainValues); } finally { if (in != null) { in.close(); } } log.info("finished parsing STA file: " + file); return true; } protected NameAndTimeInterval parseName(String name) { List<String> result = new ArrayList<String>(); unbracket(name, 0, result); int length = result.size(); if (length < 1) { // Should not happen. return new NameAndTimeInterval(name); } if (length == 1) { // No date at all -> use first part. return new NameAndTimeInterval(result.get(0).trim()); } if (length == 2) { // e.g. W(1994) or W(1994 - 1999) String type = result.get(0).trim(); ImportTimeInterval timeInterval = getTimeInterval( result.get(1).trim()); if (timeInterval == null) { // No date at all. type = name; } return new NameAndTimeInterval(type, timeInterval); } if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999)) String type = result.get(0).trim() + "(" + result.get(1).trim() + ")"; ImportTimeInterval timeInterval = getTimeInterval( result.get(2).trim()); if (timeInterval == null) { // No date at all. type = name; } return new NameAndTimeInterval(type, timeInterval); } // more than 3 elements return unmodified. return new NameAndTimeInterval(name); } private static ImportTimeInterval getTimeInterval(String datePart) { int minus = datePart.indexOf('-'); if (minus < 0) { // '-' not found Date date = null; try { date = DateGuesser.guessDate(datePart); } catch (IllegalArgumentException iae) { log.warn("STA: Invalid date '" + datePart + "'"); return null; } return new ImportTimeInterval(date); } // Found '-' so we have <from> - <to> String startPart = datePart.substring(0, minus).trim(); String endPart = datePart.substring(minus).trim(); Date startDate = null; Date endDate = null; try { startDate = DateGuesser.guessDate(startPart); } catch (IllegalArgumentException iae) { log.warn("STA: Invalid start date '" + startPart + "'"); } try { endDate = DateGuesser.guessDate(endPart); } catch (IllegalArgumentException iae) { log.warn("STA: Invalid end date '" + endPart + "'"); } if (startDate == null) { log.warn("STA: Need start date."); return null; } return new ImportTimeInterval(startDate, endDate); } private static int unbracket(String s, int index, List<String> result) { StringBuilder sb = new StringBuilder(); int length = s.length(); while (index < length) { char c = s.charAt(index); switch (c) { case '(': index = unbracket(s, index+1, result); break; case ')': result.add(0, sb.toString()); return index+1; default: sb.append(c); ++index; } } result.add(0, sb.toString()); return index; } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :