view flys-backend/src/main/java/de/intevation/flys/importer/parsers/WstParser.java @ 4198:1cdbd8a0c994

Added two new tables ClickableQDTable and ClickableWTable and made Ws and Qs clickable in historical discharge calculation. The new tables define listener interfaces (clicked lower or upper icon) to listen to user clicks. In addition to this, there is an enum ClickMode with NONE, SINGLE and RANGE options, which allows to specifiy, which icons are displayed in the tables. NONE means no icon for user clicks, SINGLE has 1 icon, RANGE 2 icons for lower and upper.
author Ingo Weinzierl <ingo.weinzierl@intevation.de>
date Mon, 22 Oct 2012 13:31:25 +0200
parents 0d27d02b1208
children b09c095a0b7e
line wrap: on
line source
package de.intevation.flys.importer.parsers;

import java.util.ArrayList;
import java.util.HashSet;

import java.io.File;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;

import java.text.NumberFormat;

import org.apache.log4j.Logger;

import de.intevation.flys.utils.StringUtil;
import de.intevation.flys.utils.DateGuesser;

import java.util.regex.Pattern;
import java.util.regex.Matcher;

import java.math.BigDecimal;

import de.intevation.flys.importer.ImportWstQRange;
import de.intevation.flys.importer.ImportWstColumn;
import de.intevation.flys.importer.ImportTimeInterval;
import de.intevation.flys.importer.ImportRange;
import de.intevation.flys.importer.ImportUnit;
import de.intevation.flys.importer.ImportWst;

public class WstParser
{
    private static Logger log = Logger.getLogger(WstParser.class);

    public static final String COLUMN_BEZ_TEXT   = "column-bez-text";
    public static final String COLUMN_BEZ_BREITE = "column-bez-breite";
    public static final String COLUMN_QUELLE     = "column-quelle";
    public static final String COLUMN_DATUM      = "column-datum";

    public static final BigDecimal UNDEFINED_ZERO =
        new BigDecimal(0.0);
    public static final BigDecimal MIN_RANGE =
        new BigDecimal(-Double.MAX_VALUE);
    public static final BigDecimal MAX_RANGE =
        new BigDecimal(Double.MAX_VALUE);

    public static final String ENCODING = "ISO-8859-1";

    public static final Pattern UNIT_COMMENT =
        Pattern.compile("\\*\\s*[kK][mM]\\s+(.+)");

    public static final Pattern UNIT =
        Pattern.compile("[^\\[]*\\[([^]]+)\\].*");

    public static final BigDecimal INTERVAL_GAP =
        new BigDecimal(0.00001);

    protected ImportWst wst;

    protected ImportRange lastRange;

    public WstParser() {
    }

    public ImportWst getWst() {
        return wst;
    }

    public void setWst(ImportWst wst) {
        this.wst = wst;
    }

    public ImportTimeInterval guessDate(String string) {
        try {
            return new ImportTimeInterval(
                DateGuesser.guessDate(string));
        }
        catch (IllegalArgumentException iae) {
        }
        return null;
    }

    public void parse(File file) throws IOException {

        log.info("Parsing WST file '" + file + "'");

        wst = new ImportWst(file.getName());

        LineNumberReader in = null;
        try {
            in =
                new LineNumberReader(
                new InputStreamReader(
                new FileInputStream(file), ENCODING));

            String input;
            boolean first = true;
            int columnCount = 0;

            String [] lsBezeichner   = null;
            String [] langBezeichner = null;
            int    [] colNaWidths    = null;
            String [] quellen        = null;
            String [] daten          = null;

            BigDecimal [] aktAbfluesse   = null;
            BigDecimal [] firstAbfluesse = null;

            BigDecimal minKm = MAX_RANGE;
            BigDecimal maxKm = MIN_RANGE;

            boolean columnHeaderChecked = false;

            String einheit = "Wasserstand [NN + m]";

            HashSet<BigDecimal> kms = new HashSet<BigDecimal>();

            while ((input = in.readLine()) != null) {
                String line = input;
                if (first) { // fetch number of columns
                    if ((line = line.trim()).length() == 0) {
                        continue;
                    }
                    try {
                        columnCount = Integer.parseInt(line);
                        if (columnCount <= 0) {
                            throw new NumberFormatException(
                                "number columns <= 0");
                        }
                        log.debug("Number of columns: " + columnCount);
                        wst.setNumberColumns(columnCount);
                        lsBezeichner = new String[columnCount];
                    }
                    catch (NumberFormatException nfe) {
                        log.warn("WST: invalid number.", nfe);
                        continue;
                    }
                    first = false;
                    continue;
                }

                line = line.replace(',', '.');

                if (line.startsWith("*\u001f")) {
                    BigDecimal [] data =
                        parseLineAsDouble(line, columnCount, false, true);

                    if (aktAbfluesse != null) {
                        addInterval(minKm, maxKm, aktAbfluesse);
                        minKm = MAX_RANGE;
                        maxKm = MIN_RANGE;
                    }

                    aktAbfluesse = new BigDecimal[columnCount];
                    log.debug("new q range: " + columnCount);
                    for (int i = 0; i < Math.min(columnCount, data.length); ++i) {
                        if (data[i] != null) {
                            log.debug("  column: " + data[i]);
                            aktAbfluesse[i] = data[i];
                        }
                    }

                    if (firstAbfluesse == null) {
                        firstAbfluesse = (BigDecimal [])aktAbfluesse.clone();
                    }
                    continue;
                }

                if (line.startsWith("*!")) {
                    String spezial = line.substring(2).trim();

                    if (spezial.length() == 0) {
                        continue;
                    }

                    if (spezial.startsWith(COLUMN_BEZ_TEXT)) {
                        spezial = spezial.substring(COLUMN_BEZ_TEXT.length()).trim();
                        if (spezial.length() == 0) {
                            continue;
                        }
                        langBezeichner = StringUtil.splitQuoted(spezial, '"');
                    }
                    else if (spezial.startsWith(COLUMN_BEZ_BREITE)) {
                        spezial = spezial.substring(COLUMN_BEZ_BREITE.length()).trim();

                        if (spezial.length() == 0) {
                            continue;
                        }

                        String[] split = spezial.split("\\s+");

                        colNaWidths = new int[split.length];
                        for (int i=0; i < split.length; i++) {
                            colNaWidths[i] = Integer.parseInt(split[i]);
                        }
                    }
                    else if (spezial.startsWith(COLUMN_QUELLE)) {
                        if (spezial.length() == 0) {
                            continue;
                        }
                        quellen = StringUtil.splitQuoted(spezial, '"');
                    }
                    else if (spezial.startsWith(COLUMN_DATUM)) {
                        spezial = spezial.substring(COLUMN_DATUM.length()).trim();
                        if (spezial.length() == 0) {
                            continue;
                        }
                        daten = StringUtil.splitQuoted(spezial, '"');
                    }
                    continue;
                }

                if (line.length() < 11) {
                    continue;
                }

                if (line.startsWith("*")) {
                    Matcher m = UNIT_COMMENT.matcher(line);
                    if (m.matches()) {
                        log.debug("unit comment found");
                        // XXX: This hack is needed because desktop
                        // FLYS is broken figuring out the unit
                        String [] units = m.group(1).split("\\s{2,}");
                        m = UNIT.matcher(units[0]);
                        einheit = m.matches() ? m.group(1) : units[0];
                        log.debug("unit: " + einheit);
                    }
                    continue;
                }

                if (firstAbfluesse != null) {
                    if (!columnHeaderChecked) {
                        int unknownCount = 0;
                        HashSet<String> uniqueColumnNames =
                            new HashSet<String>();
                        for (int i = 0; i < lsBezeichner.length; ++i) {
                            if (lsBezeichner[i] == null
                            || lsBezeichner[i].length() == 0) {
                                double q = firstAbfluesse[i].doubleValue();
                                if (q < 0.001) {
                                    lsBezeichner[i] =
                                        "<unbekannt #" + unknownCount + ">";
                                    ++unknownCount;
                                }
                                else {
                                    lsBezeichner[i] = "Q="+format(q);
                                }
                            }
                            String candidate = lsBezeichner[i];
                            int collision = 1;
                            while (!uniqueColumnNames.add(candidate)) {
                                candidate = lsBezeichner[i] +
                                    " (" + collision + ")";
                                ++collision;
                            }
                            ImportWstColumn iwc = wst.getColumn(i);
                            iwc.setName(candidate);
                            iwc.setTimeInterval(guessDate(candidate));
                        }
                        columnHeaderChecked = true;
                    }

                    BigDecimal [] data =
                        parseLineAsDouble(line, columnCount, true, false);

                    BigDecimal kaem = data[0];

                    if (!kms.add(kaem)) {
                        log.warn(
                            "WST: km " + kaem +
                            " (line " + in.getLineNumber() +
                            ") found more than once. -> ignored");
                        continue;
                    }

                    if (kaem.compareTo(minKm) < 0) {
                        minKm = kaem;
                    }
                    if (kaem.compareTo(maxKm) > 0) {
                        maxKm = kaem;
                    }

                    // extract values
                    for (int i = 0; i < columnCount; ++i) {
                        addValue(kaem, data[i+1], i);
                    }

                }
                else { // firstAbfluesse == null
                    if (langBezeichner != null) {
                        lsBezeichner = StringUtil.fitArray(
                            langBezeichner, lsBezeichner);
                    }
                    else if (colNaWidths != null) {
                        for (int j = 0, i = 0, N = input.length();
                             j < colNaWidths.length && i < N;
                             i += colNaWidths[j++]
                        ) {
                            lsBezeichner[j] = input.substring(
                                i, i+colNaWidths[j]).trim();
                        }
                    }
                    else {
                        // first column begins at position 8 in line
                        for (int i = 8, col = 0; i < input.length(); i += 9) {
                            if ((i + 9) > input.length()) {
                                i = input.length() - 10;
                            }
                            // one column header is 9 chars wide
                            lsBezeichner[col++] =
                                input.substring(i, i + 9).trim();

                            if (col == lsBezeichner.length) {
                                break;
                            }
                        }
                    }
                }

            }

            wst.setUnit(new ImportUnit(einheit));

            addInterval(minKm, maxKm, aktAbfluesse);
        }
        finally {
            if (in != null) {
                in.close();
            }
        }
    }

    protected void addValue(BigDecimal km, BigDecimal w, int index) {
        if (w != null) {
            ImportWstColumn column = wst.getColumn(index);
            column.addColumnValue(km, w);
        }
    }

    private static final NumberFormat NF = getNumberFormat();

    private static final NumberFormat getNumberFormat() {
        NumberFormat nf = NumberFormat.getInstance();
        nf.setMinimumFractionDigits(2);
        nf.setMaximumFractionDigits(2);
        return nf;
    }

    protected static String format(double value) {
        return NF.format(value);
    }

    protected void addInterval(
        BigDecimal    from,
        BigDecimal    to,
        BigDecimal [] values
    ) {
        log.debug("addInterval: " + from + " " + to);

        if (values == null || from == MAX_RANGE) {
            return;
        }

        if (to.compareTo(from) < 0) {
            BigDecimal t = from; from = to; to = t;
        }

        ImportRange range = new ImportRange(from, to);

        // little workaround to make the q ranges tightly fit.
        // Leave a very small gap to ensure that the range queries
        // still work.

        if (lastRange != null) {
            double d1 = Math.abs(
                lastRange.getB().doubleValue() - range.getA().doubleValue());
            double d2 = Math.abs(
                range.getB().doubleValue() - lastRange.getA().doubleValue());

            if (d1 < d2) {
                lastRange.setB(range.getA().subtract(INTERVAL_GAP));
            }
            else {
                range.setA(lastRange.getB().subtract(INTERVAL_GAP));
            }
        }

        for (int i = 0; i < values.length; ++i) {
            ImportWstColumn column = wst.getColumn(i);
            ImportWstQRange wstQRange = new ImportWstQRange(range, values[i]);
            column.addColumnQRange(wstQRange);
        }

        lastRange = range;
    }

    private static final BigDecimal [] parseLineAsDouble(
        String  line,
        int     count,
        boolean bStation,
        boolean bParseEmptyAsZero
    ) {
        String [] tokens = parseLine(line, count, bStation);

        BigDecimal [] doubles = new BigDecimal[tokens.length];

        for (int i = 0; i < doubles.length; ++i) {
            String token = tokens[i].trim();
            if (token.length() != 0) {
                doubles[i] = new BigDecimal(token);
            }
            else if (bParseEmptyAsZero) {
                doubles[i] = UNDEFINED_ZERO;
            }
        }

        return doubles;
    }

    private static String [] parseLine(
        String  line,
        int     tokenCount,
        boolean bParseStation
    ) {
        ArrayList<String> strings = new ArrayList<String>();

        if (bParseStation) {
            if (line.length() < 8) {
                throw new IllegalArgumentException("station too short");
            }
            strings.add(line.substring(0, 8));
        }

        int pos = 9;
        for (int i = 0; i < tokenCount; ++i) {
            if (line.length() >= pos + 8) {
                strings.add(line.substring(pos, pos + 8));
            }
            else {
                strings.add("");
            }
            pos += 9;
        }

        return strings.toArray(new String[strings.size()]);
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org