diff backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java @ 5838:5aa05a7a34b7

Rename modules to more fitting names.
author Sascha L. Teichmann <teichmann@intevation.de>
date Thu, 25 Apr 2013 15:23:37 +0200
parents flys-backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java@18619c1e7c2a
children 4dd33b86dc61
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/src/main/java/org/dive4elements/river/importer/parsers/WstParser.java	Thu Apr 25 15:23:37 2013 +0200
@@ -0,0 +1,464 @@
+package org.dive4elements.river.importer.parsers;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+
+import java.text.NumberFormat;
+
+import org.apache.log4j.Logger;
+
+import org.dive4elements.river.utils.StringUtil;
+import org.dive4elements.river.utils.DateGuesser;
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import java.math.BigDecimal;
+
+import org.dive4elements.river.importer.ImportWstQRange;
+import org.dive4elements.river.importer.ImportWstColumn;
+import org.dive4elements.river.importer.ImportTimeInterval;
+import org.dive4elements.river.importer.ImportRange;
+import org.dive4elements.river.importer.ImportUnit;
+import org.dive4elements.river.importer.ImportWst;
+
+public class WstParser
+{
+    private static Logger log = Logger.getLogger(WstParser.class);
+
+    public static final String COLUMN_BEZ_TEXT   = "column-bez-text";
+    public static final String COLUMN_BEZ_BREITE = "column-bez-breite";
+    public static final String COLUMN_QUELLE     = "column-quelle";
+    public static final String COLUMN_DATUM      = "column-datum";
+
+    public static final BigDecimal UNDEFINED_ZERO =
+        new BigDecimal(0.0);
+    public static final BigDecimal MIN_RANGE =
+        new BigDecimal(-Double.MAX_VALUE);
+    public static final BigDecimal MAX_RANGE =
+        new BigDecimal(Double.MAX_VALUE);
+
+    public static final String ENCODING = "ISO-8859-1";
+
+    public static final Pattern UNIT_COMMENT =
+        Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)");
+
+    public static final Pattern UNIT =
+        Pattern.compile("[^\\[]*\\[([^]]+)\\].*");
+
+    public static final double INTERVAL_GAP = 0.00001d;
+
+    protected ImportWst wst;
+
+    protected ImportRange lastRange;
+
+    public WstParser() {
+    }
+
+    public ImportWst getWst() {
+        return wst;
+    }
+
+    public void setWst(ImportWst wst) {
+        this.wst = wst;
+    }
+
+    public ImportTimeInterval guessDate(String string) {
+        try {
+            return new ImportTimeInterval(
+                DateGuesser.guessDate(string));
+        }
+        catch (IllegalArgumentException iae) {
+        }
+        return null;
+    }
+
+    public void parse(File file) throws IOException {
+
+        log.info("Parsing WST file '" + file + "'");
+
+        wst = new ImportWst(file.getName());
+
+        LineNumberReader in = null;
+        try {
+            in =
+                new LineNumberReader(
+                new InputStreamReader(
+                new FileInputStream(file), ENCODING));
+
+            String input;
+            boolean first = true;
+            int columnCount = 0;
+
+            String [] lsBezeichner   = null;
+            String [] langBezeichner = null;
+            int    [] colNaWidths    = null;
+            String [] quellen        = null;
+            String [] daten          = null;
+
+            BigDecimal [] aktAbfluesse   = null;
+            BigDecimal [] firstAbfluesse = null;
+
+            BigDecimal minKm   = MAX_RANGE;
+            BigDecimal maxKm   = MIN_RANGE;
+            BigDecimal kmHist1 = null;
+            BigDecimal kmHist2 = null;
+
+            boolean columnHeaderChecked = false;
+
+            String einheit = "Wasserstand [NN + m]";
+
+            HashSet<BigDecimal> kms = new HashSet<BigDecimal>();
+
+            while ((input = in.readLine()) != null) {
+                String line = input;
+                if (first) { // fetch number of columns
+                    if ((line = line.trim()).length() == 0) {
+                        continue;
+                    }
+                    try {
+                        columnCount = Integer.parseInt(line);
+                        if (columnCount <= 0) {
+                            throw new NumberFormatException(
+                                "number columns <= 0");
+                        }
+                        log.debug("Number of columns: " + columnCount);
+                        wst.setNumberColumns(columnCount);
+                        lsBezeichner = new String[columnCount];
+                    }
+                    catch (NumberFormatException nfe) {
+                        log.warn("WST: invalid number.", nfe);
+                        continue;
+                    }
+                    first = false;
+                    continue;
+                }
+
+                line = line.replace(',', '.');
+
+                if (line.startsWith("*\u001f")) {
+                    BigDecimal [] data =
+                        parseLineAsDouble(line, columnCount, false, true);
+
+                    if (aktAbfluesse != null) {
+                        if (kmHist1 != null && kmHist2 != null
+                        && kmHist1.compareTo(kmHist2) < 0) {
+                            BigDecimal t = minKm; minKm = maxKm; maxKm = t;
+                        }
+                        addInterval(minKm, maxKm, aktAbfluesse);
+                        minKm = MAX_RANGE;
+                        maxKm = MIN_RANGE;
+                    }
+
+                    aktAbfluesse = new BigDecimal[columnCount];
+                    log.debug("new q range: " + columnCount);
+                    for (int i = 0; i < Math.min(columnCount, data.length); ++i) {
+                        if (data[i] != null) {
+                            log.debug("  column: " + data[i]);
+                            aktAbfluesse[i] = data[i];
+                        }
+                    }
+
+                    if (firstAbfluesse == null) {
+                        firstAbfluesse = (BigDecimal [])aktAbfluesse.clone();
+                    }
+                    continue;
+                }
+
+                if (line.startsWith("*!")) {
+                    String spezial = line.substring(2).trim();
+
+                    if (spezial.length() == 0) {
+                        continue;
+                    }
+
+                    if (spezial.startsWith(COLUMN_BEZ_TEXT)) {
+                        spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim();
+                        if (spezial.length() == 0) {
+                            continue;
+                        }
+                        langBezeichner = StringUtil.splitQuoted(spezial, '"');
+                    }
+                    else if (spezial.startsWith(COLUMN_BEZ_BREITE)) {
+                        spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim();
+
+                        if (spezial.length() == 0) {
+                            continue;
+                        }
+
+                        String[] split = spezial.split("\\s+");
+
+                        colNaWidths = new int[split.length];
+                        for (int i=0; i < split.length; i++) {
+                            colNaWidths[i] = Integer.parseInt(split[i]);
+                        }
+                    }
+                    else if (spezial.startsWith(COLUMN_QUELLE)) {
+                        if (spezial.length() == 0) {
+                            continue;
+                        }
+                        quellen = StringUtil.splitQuoted(spezial, '"');
+                    }
+                    else if (spezial.startsWith(COLUMN_DATUM)) {
+                        spezial = spezial.substring(COLUMN_DATUM.length()).trim();
+                        if (spezial.length() == 0) {
+                            continue;
+                        }
+                        daten = StringUtil.splitQuoted(spezial, '"');
+                    }
+                    continue;
+                }
+
+                if (line.length() < 11) {
+                    continue;
+                }
+
+                if (line.startsWith("*")) {
+                    Matcher m = UNIT_COMMENT.matcher(line);
+                    if (m.matches()) {
+                        log.debug("unit comment found");
+                        // XXX: This hack is needed because desktop
+                        // FLYS is broken figuring out the unit
+                        String [] units = m.group(1).split("\\s{2,}");
+                        m = UNIT.matcher(units[0]);
+                        einheit = m.matches() ? m.group(1) : units[0];
+                        log.debug("unit: " + einheit);
+                    }
+                    continue;
+                }
+
+                if (firstAbfluesse != null) {
+                    if (!columnHeaderChecked) {
+                        int unknownCount = 0;
+                        HashSet<String> uniqueColumnNames =
+                            new HashSet<String>();
+                        for (int i = 0; i < lsBezeichner.length; ++i) {
+                            if (lsBezeichner[i] == null
+                            || lsBezeichner[i].length() == 0) {
+                                double q = firstAbfluesse[i].doubleValue();
+                                if (q < 0.001) {
+                                    lsBezeichner[i] =
+                                        "<unbekannt #" + unknownCount + ">";
+                                    ++unknownCount;
+                                }
+                                else {
+                                    lsBezeichner[i] = "Q="+format(q);
+                                }
+                            }
+                            String candidate = lsBezeichner[i];
+                            int collision = 1;
+                            while (!uniqueColumnNames.add(candidate)) {
+                                candidate = lsBezeichner[i] +
+                                    " (" + collision + ")";
+                                ++collision;
+                            }
+                            ImportWstColumn iwc = wst.getColumn(i);
+                            iwc.setName(candidate);
+                            String potentialDate = daten != null && i < daten.length
+                                ? daten[i]
+                                : candidate;
+                            iwc.setTimeInterval(guessDate(potentialDate));
+                        }
+                        columnHeaderChecked = true;
+                    }
+
+                    BigDecimal [] data =
+                        parseLineAsDouble(line, columnCount, true, false);
+
+                    BigDecimal kaem = data[0];
+
+                    if (!kms.add(kaem)) {
+                        log.warn(
+                            "WST: km " + kaem +
+                            " (line " + in.getLineNumber() +
+                            ") found more than once. -> ignored");
+                        continue;
+                    }
+
+                    kmHist2 = kmHist1;
+                    kmHist1 = kaem;
+
+                    if (kaem.compareTo(minKm) < 0) {
+                        minKm = kaem;
+                    }
+                    if (kaem.compareTo(maxKm) > 0) {
+                        maxKm = kaem;
+                    }
+
+                    // extract values
+                    for (int i = 0; i < columnCount; ++i) {
+                        addValue(kaem, data[i+1], i);
+                    }
+
+                }
+                else { // firstAbfluesse == null
+                    if (langBezeichner != null) {
+                        lsBezeichner = StringUtil.fitArray(
+                            langBezeichner, lsBezeichner);
+                    }
+                    else if (colNaWidths != null) {
+                        for (int j = 0, i = 0, N = input.length();
+                             j < colNaWidths.length && i < N;
+                             i += colNaWidths[j++]
+                        ) {
+                            lsBezeichner[j] = input.substring(
+                                i, i+colNaWidths[j]).trim();
+                        }
+                    }
+                    else {
+                        // first column begins at position 8 in line
+                        for (int i = 8, col = 0; i < input.length(); i += 9) {
+                            if ((i + 9) > input.length()) {
+                                i = input.length() - 10;
+                            }
+                            // one column header is 9 chars wide
+                            lsBezeichner[col++] =
+                                input.substring(i, i + 9).trim();
+
+                            if (col == lsBezeichner.length) {
+                                break;
+                            }
+                        }
+                    }
+                }
+
+            } // for all lines in WST file
+
+            wst.setUnit(new ImportUnit(einheit));
+
+            if (kmHist1 != null && kmHist2 != null
+            && kmHist1.compareTo(kmHist2) < 0) {
+                BigDecimal t = minKm; minKm = maxKm; maxKm = t;
+            }
+            addInterval(minKm, maxKm, aktAbfluesse);
+
+            fixRangesOrder();
+        }
+        finally {
+            if (in != null) {
+                in.close();
+            }
+        }
+    }
+
+    protected void fixRangesOrder() {
+        wst.fixRangesOrder();
+    }
+
+    protected void addValue(BigDecimal km, BigDecimal w, int index) {
+        if (w != null) {
+            ImportWstColumn column = wst.getColumn(index);
+            column.addColumnValue(km, w);
+        }
+    }
+
+    private static final NumberFormat NF = getNumberFormat();
+
+    private static final NumberFormat getNumberFormat() {
+        NumberFormat nf = NumberFormat.getInstance();
+        nf.setMinimumFractionDigits(2);
+        nf.setMaximumFractionDigits(2);
+        return nf;
+    }
+
+    protected static String format(double value) {
+        return NF.format(value);
+    }
+
+    protected void addInterval(
+        BigDecimal    from,
+        BigDecimal    to,
+        BigDecimal [] values
+    ) {
+        log.debug("addInterval: " + from + " " + to);
+
+        if (values == null || from == MAX_RANGE || from == MIN_RANGE) {
+            return;
+        }
+
+        ImportRange range = new ImportRange(from, to);
+
+        // little workaround to make the q ranges tightly fit.
+        // Leave a very small gap to ensure that the range queries
+        // still work.
+
+        if (lastRange != null) {
+            double a1 = lastRange.getA().doubleValue();
+            double b1 = lastRange.getB().doubleValue();
+            double a2 = range.getA().doubleValue();
+
+            if (a1 < b1) {
+                lastRange.setB(new BigDecimal(a2 - INTERVAL_GAP));
+            }
+            else { // a1 >= b1
+                lastRange.setB(new BigDecimal(a2 + INTERVAL_GAP));
+            }
+        }
+
+        for (int i = 0; i < values.length; ++i) {
+            ImportWstColumn column = wst.getColumn(i);
+            ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]);
+            column.addColumnQRange(wstQRange);
+        }
+
+        lastRange = range;
+    }
+
+    private static final BigDecimal [] parseLineAsDouble(
+        String  line,
+        int     count,
+        boolean bStation,
+        boolean bParseEmptyAsZero
+    ) {
+        String [] tokens = parseLine(line, count, bStation);
+
+        BigDecimal [] doubles = new BigDecimal[tokens.length];
+
+        for (int i = 0; i < doubles.length; ++i) {
+            String token = tokens[i].trim();
+            if (token.length() != 0) {
+                doubles[i] = new BigDecimal(token);
+            }
+            else if (bParseEmptyAsZero) {
+                doubles[i] = UNDEFINED_ZERO;
+            }
+        }
+
+        return doubles;
+    }
+
+    private static String [] parseLine(
+        String  line,
+        int     tokenCount,
+        boolean bParseStation
+    ) {
+        ArrayList<String> strings = new ArrayList<String>();
+
+        if (bParseStation) {
+            if (line.length() < 8) {
+                throw new IllegalArgumentException("station too short");
+            }
+            strings.add(line.substring(0, 8));
+        }
+
+        int pos = 9;
+        for (int i = 0; i < tokenCount; ++i) {
+            if (line.length() >= pos + 8) {
+                strings.add(line.substring(pos, pos + 8));
+            }
+            else {
+                strings.add("");
+            }
+            pos += 9;
+        }
+
+        return strings.toArray(new String[strings.size()]);
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org