Mercurial > dive4elements > river
diff flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java @ 199:ed38839a6b08
Ported over some WST parsing stuff from desktop flys
flys-backend/trunk@1538 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 22 Mar 2011 15:48:09 +0000 |
parents | c0dcc2357106 |
children | 88048d4f6e4d |
line wrap: on
line diff
--- a/flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java Tue Mar 22 12:15:18 2011 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java Tue Mar 22 15:48:09 2011 +0000 @@ -1,15 +1,336 @@ package de.intevation.flys.importer; +import java.util.ArrayList; +import java.util.Map; +import java.util.HashMap; + import java.io.File; import java.io.IOException; +import java.io.LineNumberReader; +import java.io.InputStreamReader; +import java.io.FileInputStream; + +import java.text.NumberFormat; + +import org.apache.log4j.Logger; + +import de.intevation.flys.utils.StringUtil; public class WstParser { + private static Logger log = Logger.getLogger(WstParser.class); + + public static final String COLUMN_BEZ_TEXT = "column-bez-text"; + public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; + public static final String COLUMN_QUELLE = "column-quelle"; + public static final String COLUMN_DATUM = "column-datum"; + + public static final Double UNDEFINED_ZERO = Double.valueOf(0.0); + + public static final String ENCODING = "ISO-8859-1"; + public WstParser() { } public void parse(File file) throws IOException { - // TODO: Implement me! + + log.info("Parsing WST file '" + file + "'"); + + LineNumberReader in = null; + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String input; + boolean first = true; + int columnCount = 0; + + String [] lsHeader = null; + String [] lsBezeichner = null; + String [] langBezeichner = null; + int [] colNaWidths = null; + String [] quellen = null; + String [] daten = null; + double [] aktAbfluesse = null; + double [] firstAbfluesse = null; + + double minKm = Double.MAX_VALUE; + double maxKm = -Double.MAX_VALUE; + + boolean bFirstComment = true; + boolean columnHeaderChecked = false; + + double lastKm = Double.MAX_VALUE; + + String einheit = "Wassserstand [NN + m]"; + + HashMap<String, Double> oldEscapeLine = null; + + while ((input = in.readLine()) != null) { + String line = input; + if (first) { // fetch number of columns + if ((line = line.trim()).length() == 0) { + continue; + } + try { + columnCount = Integer.parseInt(line); + if (columnCount <= 0) { + throw new NumberFormatException( + "number columns <= 0"); + } + log.debug("Number of columns: " + columnCount); + lsBezeichner = new String[columnCount]; + lsHeader = new String[columnCount]; + aktAbfluesse = new double[columnCount]; + } + catch (NumberFormatException nfe) { + log.warn(nfe); + continue; + } + first = false; + continue; + } + + line = line.replace(',', '.'); + + if (line.startsWith("*\u001f")) { + Double [] data = + parseLineAsDouble(line, columnCount, false, true); + + if (oldEscapeLine != null) { + addInterval(minKm, maxKm, oldEscapeLine); + minKm = Double.MAX_VALUE; + maxKm = -Double.MAX_VALUE; + } + + oldEscapeLine = new HashMap<String, Double>(); + for (int i = 0; i < columnCount; ++i) { + if (lsHeader[i] != null) { + oldEscapeLine.put(lsHeader[i], data[i]); + } + } + + for (int i = Math.min(data.length, aktAbfluesse.length)-1; + i >= 0; --i) { + aktAbfluesse[i] = data[i].doubleValue(); + } + + if (firstAbfluesse == null) { + firstAbfluesse = (double [])aktAbfluesse.clone(); + } + continue; + } + + if (line.startsWith("*!")) { + String spezial = line.substring(2).trim(); + + if (spezial.length() == 0) { + continue; + } + + if (spezial.startsWith(COLUMN_BEZ_TEXT)) { + spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); + if (spezial.length() == 0) { + continue; + } + langBezeichner = StringUtil.splitQuoted(spezial, '"'); + } + else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { + spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); + + if (spezial.length() == 0) { + continue; + } + + String[] split = spezial.split("\\s+"); + + colNaWidths = new int[split.length]; + for (int i=0; i < split.length; i++) { + colNaWidths[i] = Integer.parseInt(split[i]); + } + } + else if (spezial.startsWith(COLUMN_QUELLE)) { + if (spezial.length() == 0) { + continue; + } + quellen = StringUtil.splitQuoted(spezial, '"'); + } + else if (spezial.startsWith(COLUMN_DATUM)) { + spezial = spezial.substring(COLUMN_DATUM.length()).trim(); + if (spezial.length() == 0) { + continue; + } + daten = StringUtil.splitQuoted(spezial, '"'); + } + continue; + } + + if (line.startsWith("*")) { + if (bFirstComment && line.length() >= 11) { + String yAxis = line.substring(10).trim(); + if (yAxis.length() > 0) { + einheit = yAxis; + } + bFirstComment = false; + } + continue; + } + + if (firstAbfluesse != null) { + if (!columnHeaderChecked) { + int unknownCount = 0; + for (int i = 0; i < lsHeader.length; ++i) { + if (lsBezeichner[i] == null + || lsBezeichner[i].length() == 0) { + double q = firstAbfluesse[i]; + if (q < 0.001) { + lsBezeichner[i] = + "<unbekannt#" + unknownCount + ">"; + ++unknownCount; + } + else { + lsBezeichner[i] = "Q="+format(q); + } + } + lsHeader[i] = lsBezeichner[i] + " " + einheit; + } + columnHeaderChecked = true; + } + + Double [] data = + parseLineAsDouble(line, columnCount, true, false); + + double kaem = data[0]; + + if (kaem < minKm) { + minKm = kaem; + } + if (kaem > maxKm) { + maxKm = kaem; + } + + lastKm = kaem; + + // extract values + for (int i = 0; i < columnCount; ++i) { + addValue(kaem, data[i].doubleValue(), lsBezeichner[i]); + } + + } + else { // firstAbfluesse == null + if (langBezeichner != null) { + lsBezeichner = StringUtil.fitArray( + langBezeichner, lsBezeichner); + } + else if (colNaWidths != null) { + for (int j = 0, i = 0, N = input.length(); + j < colNaWidths.length && i < N; + i += colNaWidths[j++] + ) { + lsBezeichner[j] = input.substring( + i, i+colNaWidths[j]).trim(); + } + } + else { + // first column begins at position 8 in line + for (int i = 8, col = 0; i < input.length(); i += 9) { + if ((i + 9) > input.length()) { + i = input.length() - 10; + } + // one column header is 9 chars wide + lsBezeichner[col++] = + input.substring(i, i + 9).trim(); + + if (col == lsBezeichner.length) { + break; + } + } + } + } + + } + addInterval(minKm, maxKm, oldEscapeLine); + } + finally { + if (in != null) { + in.close(); + } + } + } + + protected void addValue(double km, double w, String columnName) { + // TODO: store me! + } + + protected static String format(double value) { + NumberFormat nf = NumberFormat.getInstance(); + nf.setMinimumFractionDigits(2); + nf.setMaximumFractionDigits(2); + return nf.format(value); + } + + protected void addInterval( + double from, + double to, + Map<String, Double> values + ) { + log.debug("addInterval: " + from + " " + to); + if (values == null) { + return; + } + } + + private static final Double [] parseLineAsDouble( + String line, + int count, + boolean bStation, + boolean bParseEmptyAsZero + ) { + String [] tokens = parseLine(line, count, bStation); + + Double [] doubles = new Double[tokens.length]; + + for (int i = 0; i < doubles.length; ++i) { + String token = tokens[i].trim(); + if (token.length() != 0) { + doubles[i] = Double.valueOf(token); + } + else if (bParseEmptyAsZero) { + doubles[i] = UNDEFINED_ZERO; + } + } + + return doubles; + } + + private static String [] parseLine( + String line, + int tokenCount, + boolean bParseStation + ) { + ArrayList<String> strings = new ArrayList<String>(); + + if (bParseStation) { + if (line.length() < 8) { + throw new IllegalArgumentException("station too short"); + } + strings.add(line.substring(0, 8)); + } + + int pos = 9; + for (int i = 0; i < tokenCount; ++i) { + if (line.length() >= pos + 8) { + strings.add(line.substring(pos, pos + 8)); + } + else { + strings.add(""); + } + pos += 9; + } + + return strings.toArray(new String[strings.size()]); } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :