view backend/src/main/java/org/dive4elements/river/importer/parsers/StaFileParser.java @ 5955:b819209732a0

STA parser: more informative warning
author Tom Gottfried <tom.gottfried@intevation.de>
date Wed, 08 May 2013 18:23:41 +0200
parents f944cc25484c
children 4c3ccf2b0304
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
 * Software engineering by Intevation GmbH
 *
 * This file is Free Software under the GNU AGPL (>=v3) 
 * and comes with ABSOLUTELY NO WARRANTY! Check out the
 * documentation coming with Dive4Elements River for details. 
 */

package org.dive4elements.river.importer.parsers;

import java.io.File;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;

import java.math.BigDecimal;

import java.util.regex.Pattern;
import java.util.regex.Matcher;

import java.util.Date;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;

import org.apache.log4j.Logger;

import org.dive4elements.river.importer.ImportMainValueType;
import org.dive4elements.river.importer.ImportMainValue;
import org.dive4elements.river.importer.ImportNamedMainValue;
import org.dive4elements.river.importer.ImportGauge;
import org.dive4elements.river.importer.ImportTimeInterval;
import org.dive4elements.river.utils.DateGuesser;

public class StaFileParser
{
    private static Logger log = Logger.getLogger(StaFileParser.class);

    public static final String ENCODING = "ISO-8859-1";

    public static final String TYPES =
        System.getProperty("flys.backend.main.value.types", "QWTD");

    public static final boolean NOT_PARSE_GAUGE_NUMBERS =
        Boolean.getBoolean("flys.backend.sta.not.parse.gauge.numbers");

    public static final Pattern QWTD_ =
        Pattern.compile("\\s*([^\\s]+)\\s+([^\\s]+)\\s+([" +
            Pattern.quote(TYPES) + "]).*");

    // TODO: To be extented.
    private static final Pattern MAIN_VALUE = Pattern.compile(
        "^(HQ|MHW|GLQ|NMQ|HQEXT)(\\d*)$");

    private static boolean isMainValue(String s) {
        s = s.replace(" ", "").toUpperCase();
        return MAIN_VALUE.matcher(s).matches();
    }


    public static final class NameAndTimeInterval {
        private String             name;
        private ImportTimeInterval timeInterval;

        public NameAndTimeInterval(String name) {
            this(name, null);
        }

        public NameAndTimeInterval(String name, ImportTimeInterval timeInterval) {
            this.name         = name;
            this.timeInterval = timeInterval;
        }

        public String getName() {
            return name;
        }

        public ImportTimeInterval getTimeInterval() {
            return timeInterval;
        }

        @Override
        public String toString() {
            return "name: " + name + " time interval: " + timeInterval;
        }
    } // class NameAndTimeInterval

    public StaFileParser() {
    }

    public boolean parse(ImportGauge gauge) throws IOException {

        File file = gauge.getStaFile();

        log.info("parsing STA file: " + file);
        LineNumberReader in = null;
        try {
            in =
                new LineNumberReader(
                new InputStreamReader(
                new FileInputStream(file), ENCODING));

            String line = in.readLine();

            if (line == null) {
                log.warn("STA file is empty.");
                return false;
            }

            if (line.length() < 37) {
                log.warn("First line in STA file is too short.");
                return false;
            }

            String gaugeName = line.substring(16, 35).trim();

            Long gaugeNumber = null;

            if (!NOT_PARSE_GAUGE_NUMBERS) {
                String gaugeNumberString = line.substring(8, 16).trim();

                try {
                    gaugeNumber = Long.parseLong(gaugeNumberString);
                }
                catch (NumberFormatException nfe) {
                    log.warn("STA: gauge number '" + gaugeNumberString +
                        "' is not a valid long number.");
                }
            }

            gauge.setName(gaugeName);
            gauge.setOfficialNumber(gaugeNumber);

            if (log.isDebugEnabled()) {
                log.debug(
                    "name/number: '" + gaugeName + "' '" + gaugeNumber + "'");
            }

            String [] values = line.substring(38).trim().split("\\s+", 2);

            if (values.length < 2) {
                log.warn("STA: Not enough columns for aeo and datum.");
            }
            try {
                gauge.setAeo(new BigDecimal(values[0].replace(",", ".")));
                gauge.setDatum(new BigDecimal(values[1].replace(",", ".")));
            }
            catch (NumberFormatException nfe) {
                log.warn("STA: cannot parse aeo or datum.");
                return false;
            }

            line = in.readLine();

            if (line == null) {
                log.warn("STA file has not enough lines");
                return false;
            }

            if (line.length() < 36) {
                log.warn("STA: second line is too short");
                return false;
            }

            try {
                gauge.setStation(
                    new BigDecimal(line.substring(29, 36).trim()));
            }
            catch (NumberFormatException nfe) {
                log.warn("STA: parsing of the datum of the gauge failed");
                return false;
            }

            // overread the next six lines
            for (int i = 0; i < 6; ++i) {
                if ((line = in.readLine()) == null) {
                    log.warn("STA file is too short");
                    return false;
                }
            }

            HashMap<String, ImportMainValueType> types =
                new HashMap<String, ImportMainValueType>();

            ArrayList<ImportNamedMainValue> namedMainValues =
                new ArrayList<ImportNamedMainValue>();

            ArrayList<ImportMainValue> mainValues =
                new ArrayList<ImportMainValue>();

            while ((line = in.readLine()) != null) {
                Matcher m = QWTD_.matcher(line);
                if (m.matches()) {
                    BigDecimal value;
                    try {
                        value = new BigDecimal(m.group(2).replace(",", "."));
                    }
                    catch (NumberFormatException nfe) {
                        log.warn("STA: value not parseable in line "
                            + in.getLineNumber());
                        continue;
                    }
                    String typeString = m.group(3);
                    log.debug("\t type: " + typeString);
                    ImportMainValueType type = types.get(typeString);
                    if (type == null) {
                        type = new ImportMainValueType(typeString);
                        types.put(typeString, type);
                    }
                    String name = m.group(1);
                    NameAndTimeInterval nat = parseName(name);
                    ImportNamedMainValue namedMainValue =
                        new ImportNamedMainValue(type, nat.getName());
                    namedMainValues.add(namedMainValue);

                    ImportMainValue mainValue = new ImportMainValue(
                        gauge,
                        namedMainValue,
                        value,
                        nat.getTimeInterval());

                    mainValues.add(mainValue);
                }
                else {
                    // TODO: treat as a comment
                }
            }
            gauge.setMainValueTypes(
                new ArrayList<ImportMainValueType>(types.values()));
            gauge.setNamedMainValues(namedMainValues);
            gauge.setMainValues(mainValues);
        }
        finally {
            if (in != null) {
                in.close();
            }
        }
        log.info("finished parsing STA file: " + file);
        return true;
    }

    protected static NameAndTimeInterval parseName(String name) {
        List<String> result = new ArrayList<String>();

        unbracket(name, 0, result);

        int length = result.size();

        if (length < 1) { // Should not happen.
            return new NameAndTimeInterval(name);
        }

        if (length == 1) { // No date at all -> use first part.
            return new NameAndTimeInterval(result.get(0).trim());
        }

        if (length == 2) { // e.g. HQ(1994) or HQ(1994 - 1999)

            String type = result.get(0).trim();
            ImportTimeInterval timeInterval = null;

            String datePart = result.get(1).trim();
            if (isMainValue(datePart)) { // e.g. W(HQ100)
                type += "(" + datePart + ")";
                timeInterval = null;
            }
            else {
                timeInterval = getTimeInterval(result.get(1).trim());

                if (timeInterval == null) { // No date at all.
                    type = name;
                }
            }

            return new NameAndTimeInterval(type, timeInterval);
        }

        if (length == 3) { // e.g W(Q(1994)) or W(Q(1994 - 1999))

            String type =
                result.get(0).trim() + "(" +
                result.get(1).trim() + ")";

            ImportTimeInterval timeInterval = getTimeInterval(
                result.get(2).trim());

            if (timeInterval == null) { // No date at all.
                type = name;
            }

            return new NameAndTimeInterval(type, timeInterval);
        }

        // more than 3 elements return unmodified.

        return new NameAndTimeInterval(name);
    }

    private static ImportTimeInterval getTimeInterval(String datePart) {

        int minus = datePart.indexOf('-');

        if (minus < 0) { // '-' not found

            Date date = null;
            try {
                date = DateGuesser.guessDate(datePart);
            }
            catch (IllegalArgumentException iae) {
                log.warn("STA: Invalid date '" + datePart + "'");
                return null;
            }

            return new ImportTimeInterval(date);
        }

        // Found '-' so we have <from> - <to>
        String startPart = datePart.substring(0, minus).trim();
        String endPart   = datePart.substring(minus).trim();

        Date startDate = null;
        Date endDate   = null;

        try {
            startDate = DateGuesser.guessDate(startPart);
        }
        catch (IllegalArgumentException iae) {
            log.warn("STA: Invalid start date '" + startPart + "'");
        }

        try {
            endDate = DateGuesser.guessDate(endPart);
        }
        catch (IllegalArgumentException iae) {
            log.warn("STA: Invalid end date '" + endPart + "'");
        }

        if (startDate == null) {
            log.warn("STA: Need start date.");
            return null;
        }

        return new ImportTimeInterval(startDate, endDate);
    }

    private static int unbracket(String s, int index, List<String> result) {
        StringBuilder sb = new StringBuilder();
        int length = s.length();
        while (index < length) {
            char c = s.charAt(index);
            switch (c) {
                case '(':
                    index = unbracket(s, index+1, result);
                    break;
                case ')':
                    result.add(0, sb.toString());
                    return index+1;
                default:
                    sb.append(c);
                    ++index;
            }
        }
        result.add(0, sb.toString());

        return index;
    }

    /*
    public static void main(String [] args) {
        for (String arg: args) {
            NameAndTimeInterval nti = parseName(arg);
            System.out.println(arg + " -> " + nti);
        }
    }
    */
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org