Mercurial > dive4elements > river
view flys-backend/src/main/java/de/intevation/flys/importer/WstParser.java @ 470:f4afea9b7537
Forget ',' in schema.
flys-backend/trunk@1709 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Fri, 15 Apr 2011 15:28:35 +0000 |
parents | d37ccb04ab5d |
children | 5d920695a7f0 |
line wrap: on
line source
package de.intevation.flys.importer; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.io.File; import java.io.IOException; import java.io.LineNumberReader; import java.io.InputStreamReader; import java.io.FileInputStream; import java.text.NumberFormat; import org.apache.log4j.Logger; import de.intevation.flys.utils.StringUtil; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.math.BigDecimal; public class WstParser { private static Logger log = Logger.getLogger(WstParser.class); public static final String COLUMN_BEZ_TEXT = "column-bez-text"; public static final String COLUMN_BEZ_BREITE = "column-bez-breite"; public static final String COLUMN_QUELLE = "column-quelle"; public static final String COLUMN_DATUM = "column-datum"; public static final Double UNDEFINED_ZERO = Double.valueOf(0.0); public static final String ENCODING = "ISO-8859-1"; public static final Pattern UNIT_COMMENT = Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)"); public static final Pattern UNIT = Pattern.compile("[^\\[]*\\[([^]]+)\\].*"); protected ImportWst wst; public WstParser() { } public ImportWst getWst() { return wst; } public void setWst(ImportWst wst) { this.wst = wst; } public void parse(File file) throws IOException { log.info("Parsing WST file '" + file + "'"); wst = new ImportWst(file.getName()); LineNumberReader in = null; try { in = new LineNumberReader( new InputStreamReader( new FileInputStream(file), ENCODING)); String input; boolean first = true; int columnCount = 0; String [] lsHeader = null; String [] lsBezeichner = null; String [] langBezeichner = null; int [] colNaWidths = null; String [] quellen = null; String [] daten = null; double [] aktAbfluesse = null; double [] firstAbfluesse = null; double minKm = Double.MAX_VALUE; double maxKm = -Double.MAX_VALUE; boolean columnHeaderChecked = false; double lastKm = Double.MAX_VALUE; String einheit = "Wasserstand [NN + m]"; HashMap<String, Double> oldEscapeLine = null; while ((input = in.readLine()) != null) { String line = input; if (first) { // fetch number of columns if ((line = line.trim()).length() == 0) { continue; } try { columnCount = Integer.parseInt(line); if (columnCount <= 0) { throw new NumberFormatException( "number columns <= 0"); } log.debug("Number of columns: " + columnCount); lsBezeichner = new String[columnCount]; lsHeader = new String[columnCount]; aktAbfluesse = new double[columnCount]; } catch (NumberFormatException nfe) { log.warn(nfe); continue; } first = false; continue; } line = line.replace(',', '.'); if (line.startsWith("*\u001f")) { Double [] data = parseLineAsDouble(line, columnCount, false, true); if (oldEscapeLine != null) { addInterval(minKm, maxKm, oldEscapeLine); minKm = Double.MAX_VALUE; maxKm = -Double.MAX_VALUE; } oldEscapeLine = new HashMap<String, Double>(); for (int i = 0; i < columnCount; ++i) { if (lsHeader[i] != null) { oldEscapeLine.put(lsHeader[i], data[i]); } } for (int i = Math.min(data.length, aktAbfluesse.length)-1; i >= 0; --i) { aktAbfluesse[i] = data[i].doubleValue(); } if (firstAbfluesse == null) { firstAbfluesse = (double [])aktAbfluesse.clone(); } continue; } if (line.startsWith("*!")) { String spezial = line.substring(2).trim(); if (spezial.length() == 0) { continue; } if (spezial.startsWith(COLUMN_BEZ_TEXT)) { spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim(); if (spezial.length() == 0) { continue; } langBezeichner = StringUtil.splitQuoted(spezial, '"'); } else if (spezial.startsWith(COLUMN_BEZ_BREITE)) { spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim(); if (spezial.length() == 0) { continue; } String[] split = spezial.split("\\s+"); colNaWidths = new int[split.length]; for (int i=0; i < split.length; i++) { colNaWidths[i] = Integer.parseInt(split[i]); } } else if (spezial.startsWith(COLUMN_QUELLE)) { if (spezial.length() == 0) { continue; } quellen = StringUtil.splitQuoted(spezial, '"'); } else if (spezial.startsWith(COLUMN_DATUM)) { spezial = spezial.substring(COLUMN_DATUM.length()).trim(); if (spezial.length() == 0) { continue; } daten = StringUtil.splitQuoted(spezial, '"'); } continue; } if (line.length() < 11) { continue; } if (line.startsWith("*")) { Matcher m = UNIT_COMMENT.matcher(line); if (m.matches()) { log.debug("unit comment found"); // XXX: This hack is needed because desktop // FLYS is broken figuring out the unit String [] units = m.group(1).split("\\s{2,}"); m = UNIT.matcher(units[0]); einheit = m.matches() ? m.group(1) : units[0]; log.debug("unit: " + einheit); } continue; } if (firstAbfluesse != null) { if (!columnHeaderChecked) { int unknownCount = 0; for (int i = 0; i < lsHeader.length; ++i) { if (lsBezeichner[i] == null || lsBezeichner[i].length() == 0) { double q = firstAbfluesse[i]; if (q < 0.001) { lsBezeichner[i] = "<unbekannt #" + unknownCount + ">"; ++unknownCount; } else { lsBezeichner[i] = "Q="+format(q); } } lsHeader[i] = lsBezeichner[i]; } columnHeaderChecked = true; } Double [] data = parseLineAsDouble(line, columnCount, true, false); double kaem = data[0]; if (kaem < minKm) { minKm = kaem; } if (kaem > maxKm) { maxKm = kaem; } lastKm = kaem; // extract values for (int i = 0; i < columnCount; ++i) { addValue(kaem, data[i].doubleValue(), lsBezeichner[i]); } } else { // firstAbfluesse == null if (langBezeichner != null) { lsBezeichner = StringUtil.fitArray( langBezeichner, lsBezeichner); } else if (colNaWidths != null) { for (int j = 0, i = 0, N = input.length(); j < colNaWidths.length && i < N; i += colNaWidths[j++] ) { lsBezeichner[j] = input.substring( i, i+colNaWidths[j]).trim(); } } else { // first column begins at position 8 in line for (int i = 8, col = 0; i < input.length(); i += 9) { if ((i + 9) > input.length()) { i = input.length() - 10; } // one column header is 9 chars wide lsBezeichner[col++] = input.substring(i, i + 9).trim(); if (col == lsBezeichner.length) { break; } } } } } addInterval(minKm, maxKm, oldEscapeLine); } finally { if (in != null) { in.close(); } } } protected void addValue(double km, double w, String columnName) { ImportWstColumn column = wst.getColumn(columnName); column.addColumnValue(new BigDecimal(km), new BigDecimal(w)); } private static final NumberFormat NF = getNumberFormat(); private static final NumberFormat getNumberFormat() { NumberFormat nf = NumberFormat.getInstance(); nf.setMinimumFractionDigits(2); nf.setMaximumFractionDigits(2); return nf; } protected static String format(double value) { return NF.format(value); } protected void addInterval( double from, double to, Map<String, Double> values ) { log.debug("addInterval: " + from + " " + to); if (values == null) { return; } if (from > to) { double t = from; from = to; to = t; } ImportRange range = new ImportRange( new BigDecimal(from), new BigDecimal(to)); for (Map.Entry<String, Double> entry: values.entrySet()) { BigDecimal q = new BigDecimal(entry.getValue()); ImportWstQRange wstQRange = new ImportWstQRange(range, q); String columnName = entry.getKey(); ImportWstColumn column = wst.getColumn(columnName); column.addColumnQRange(wstQRange); } } private static final Double [] parseLineAsDouble( String line, int count, boolean bStation, boolean bParseEmptyAsZero ) { String [] tokens = parseLine(line, count, bStation); Double [] doubles = new Double[tokens.length]; for (int i = 0; i < doubles.length; ++i) { String token = tokens[i].trim(); if (token.length() != 0) { doubles[i] = Double.valueOf(token); } else if (bParseEmptyAsZero) { doubles[i] = UNDEFINED_ZERO; } } return doubles; } private static String [] parseLine( String line, int tokenCount, boolean bParseStation ) { ArrayList<String> strings = new ArrayList<String>(); if (bParseStation) { if (line.length() < 8) { throw new IllegalArgumentException("station too short"); } strings.add(line.substring(0, 8)); } int pos = 9; for (int i = 0; i < tokenCount; ++i) { if (line.length() >= pos + 8) { strings.add(line.substring(pos, pos + 8)); } else { strings.add(""); } pos += 9; } return strings.toArray(new String[strings.size()]); } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :