Mercurial > dive4elements > river
diff flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java @ 508:a9c7f6ec3a5a 2.3.1
merged flys-backend/2.3.1
author | Thomas Arendsen Hein <thomas@intevation.de> |
---|---|
date | Fri, 28 Sep 2012 12:14:12 +0200 |
parents | a92da0b3e8e7 |
children | 677a6fceea6e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java Fri Sep 28 12:14:12 2012 +0200 @@ -0,0 +1,400 @@ +package de.intevation.flys.importer; + +import java.util.ArrayList; +import java.util.HashSet; + +import java.io.File; +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.InputStreamReader; +import java.io.FileInputStream; + +import java.text.NumberFormat; + +import org.apache.log4j.Logger; + +import de.intevation.flys.utils.StringUtil; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.math.BigDecimal; + +public class WstParser +{ + private static Logger log = Logger.getLogger(WstParser.class); + + public static final String COLUMN_BEZ_TEXT = "column-bez-text"; + public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; + public static final String COLUMN_QUELLE = "column-quelle"; + public static final String COLUMN_DATUM = "column-datum"; + + public static final BigDecimal UNDEFINED_ZERO = + new BigDecimal(0.0); + public static final BigDecimal MIN_RANGE = + new BigDecimal(-Double.MAX_VALUE); + public static final BigDecimal MAX_RANGE = + new BigDecimal(Double.MAX_VALUE); + + public static final String ENCODING = "ISO-8859-1"; + + public static final Pattern UNIT_COMMENT = + Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)"); + + public static final Pattern UNIT = + Pattern.compile("[^\\[]*\\[([^]]+)\\].*"); + + protected ImportWst wst; + + public WstParser() { + } + + public ImportWst getWst() { + return wst; + } + + public void setWst(ImportWst wst) { + this.wst = wst; + } + + public void parse(File file) throws IOException { + + log.info("Parsing WST file '" + file + "'"); + + wst = new ImportWst(file.getName()); + + LineNumberReader in = null; + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String input; + boolean first = true; + int columnCount = 0; + + String [] lsBezeichner = null; + String [] langBezeichner = null; + int [] colNaWidths = null; + String [] quellen = null; + String [] daten = null; + + BigDecimal [] aktAbfluesse = null; + BigDecimal [] firstAbfluesse = null; + + BigDecimal minKm = MAX_RANGE; + BigDecimal maxKm = MIN_RANGE; + + boolean columnHeaderChecked = false; + + String einheit = "Wasserstand [NN + m]"; + + HashSet<BigDecimal> kms = new HashSet<BigDecimal>(); + + while ((input = in.readLine()) != null) { + String line = input; + if (first) { // fetch number of columns + if ((line = line.trim()).length() == 0) { + continue; + } + try { + columnCount = Integer.parseInt(line); + if (columnCount <= 0) { + throw new NumberFormatException( + "number columns <= 0"); + } + log.debug("Number of columns: " + columnCount); + wst.setNumberColumns(columnCount); + lsBezeichner = new String[columnCount]; + } + catch (NumberFormatException nfe) { + log.warn(nfe); + continue; + } + first = false; + continue; + } + + line = line.replace(',', '.'); + + if (line.startsWith("*\u001f")) { + BigDecimal [] data = + parseLineAsDouble(line, columnCount, false, true); + + if (aktAbfluesse != null) { + addInterval(minKm, maxKm, aktAbfluesse); + minKm = MAX_RANGE; + maxKm = MIN_RANGE; + } + + aktAbfluesse = new BigDecimal[columnCount]; + log.debug("new q range: " + columnCount); + for (int i = 0; i < Math.min(columnCount, data.length); ++i) { + if (data[i] != null) { + log.debug(" column: " + data[i]); + aktAbfluesse[i] = data[i]; + } + } + + if (firstAbfluesse == null) { + firstAbfluesse = (BigDecimal [])aktAbfluesse.clone(); + } + continue; + } + + if (line.startsWith("*!")) { + String spezial = line.substring(2).trim(); + + if (spezial.length() == 0) { + continue; + } + + if (spezial.startsWith(COLUMN_BEZ_TEXT)) { + spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); + if (spezial.length() == 0) { + continue; + } + langBezeichner = StringUtil.splitQuoted(spezial, '"'); + } + else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { + spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); + + if (spezial.length() == 0) { + continue; + } + + String[] split = spezial.split("\\s+"); + + colNaWidths = new int[split.length]; + for (int i=0; i < split.length; i++) { + colNaWidths[i] = Integer.parseInt(split[i]); + } + } + else if (spezial.startsWith(COLUMN_QUELLE)) { + if (spezial.length() == 0) { + continue; + } + quellen = StringUtil.splitQuoted(spezial, '"'); + } + else if (spezial.startsWith(COLUMN_DATUM)) { + spezial = spezial.substring(COLUMN_DATUM.length()).trim(); + if (spezial.length() == 0) { + continue; + } + daten = StringUtil.splitQuoted(spezial, '"'); + } + continue; + } + + if (line.length() < 11) { + continue; + } + + if (line.startsWith("*")) { + Matcher m = UNIT_COMMENT.matcher(line); + if (m.matches()) { + log.debug("unit comment found"); + // XXX: This hack is needed because desktop + // FLYS is broken figuring out the unit + String [] units = m.group(1).split("\\s{2,}"); + m = UNIT.matcher(units[0]); + einheit = m.matches() ? m.group(1) : units[0]; + log.debug("unit: " + einheit); + } + continue; + } + + if (firstAbfluesse != null) { + if (!columnHeaderChecked) { + int unknownCount = 0; + HashSet<String> uniqueColumnNames = + new HashSet<String>(); + for (int i = 0; i < lsBezeichner.length; ++i) { + if (lsBezeichner[i] == null + || lsBezeichner[i].length() == 0) { + double q = firstAbfluesse[i].doubleValue(); + if (q < 0.001) { + lsBezeichner[i] = + "<unbekannt #" + unknownCount + ">"; + ++unknownCount; + } + else { + lsBezeichner[i] = "Q="+format(q); + } + } + String candidate = lsBezeichner[i]; + int collision = 1; + while (!uniqueColumnNames.add(candidate)) { + candidate = lsBezeichner[i] + + " (" + collision + ")"; + ++collision; + } + wst.getColumn(i).setName(candidate); + } + columnHeaderChecked = true; + } + + BigDecimal [] data = + parseLineAsDouble(line, columnCount, true, false); + + BigDecimal kaem = data[0]; + + if (!kms.add(kaem)) { + log.warn( + "km " + kaem + + " (line " + in.getLineNumber() + + ") found more than once. -> ignored"); + continue; + } + + if (kaem.compareTo(minKm) < 0) { + minKm = kaem; + } + if (kaem.compareTo(maxKm) > 0) { + maxKm = kaem; + } + + // extract values + for (int i = 0; i < columnCount; ++i) { + addValue(kaem, data[i+1], i); + } + + } + else { // firstAbfluesse == null + if (langBezeichner != null) { + lsBezeichner = StringUtil.fitArray( + langBezeichner, lsBezeichner); + } + else if (colNaWidths != null) { + for (int j = 0, i = 0, N = input.length(); + j < colNaWidths.length && i < N; + i += colNaWidths[j++] + ) { + lsBezeichner[j] = input.substring( + i, i+colNaWidths[j]).trim(); + } + } + else { + // first column begins at position 8 in line + for (int i = 8, col = 0; i < input.length(); i += 9) { + if ((i + 9) > input.length()) { + i = input.length() - 10; + } + // one column header is 9 chars wide + lsBezeichner[col++] = + input.substring(i, i + 9).trim(); + + if (col == lsBezeichner.length) { + break; + } + } + } + } + + } + addInterval(minKm, maxKm, aktAbfluesse); + } + finally { + if (in != null) { + in.close(); + } + } + } + + protected void addValue(BigDecimal km, BigDecimal w, int index) { + if (w != null) { + ImportWstColumn column = wst.getColumn(index); + column.addColumnValue(km, w); + } + } + + private static final NumberFormat NF = getNumberFormat(); + + private static final NumberFormat getNumberFormat() { + NumberFormat nf = NumberFormat.getInstance(); + nf.setMinimumFractionDigits(2); + nf.setMaximumFractionDigits(2); + return nf; + } + + protected static String format(double value) { + return NF.format(value); + } + + protected void addInterval( + BigDecimal from, + BigDecimal to, + BigDecimal [] values + ) { + log.debug("addInterval: " + from + " " + to); + + if (values == null || from == MAX_RANGE) { + return; + } + + if (to.compareTo(from) < 0) { + BigDecimal t = from; from = to; to = t; + } + + ImportRange range = new ImportRange(from, to); + + for (int i = 0; i < values.length; ++i) { + ImportWstColumn column = wst.getColumn(i); + ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]); + column.addColumnQRange(wstQRange); + } + } + + private static final BigDecimal [] parseLineAsDouble( + String line, + int count, + boolean bStation, + boolean bParseEmptyAsZero + ) { + String [] tokens = parseLine(line, count, bStation); + + BigDecimal [] doubles = new BigDecimal[tokens.length]; + + for (int i = 0; i < doubles.length; ++i) { + String token = tokens[i].trim(); + if (token.length() != 0) { + doubles[i] = new BigDecimal(token); + } + else if (bParseEmptyAsZero) { + doubles[i] = UNDEFINED_ZERO; + } + } + + return doubles; + } + + private static String [] parseLine( + String line, + int tokenCount, + boolean bParseStation + ) { + ArrayList<String> strings = new ArrayList<String>(); + + if (bParseStation) { + if (line.length() < 8) { + throw new IllegalArgumentException("station too short"); + } + strings.add(line.substring(0, 8)); + } + + int pos = 9; + for (int i = 0; i < tokenCount; ++i) { + if (line.length() >= pos + 8) { + strings.add(line.substring(pos, pos + 8)); + } + else { + strings.add(""); + } + pos += 9; + } + + return strings.toArray(new String[strings.size()]); + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :