view flys-backend/src/main/java/de/intevation/flys/importer/AtFileParser.java @ 1197:ce3dacc6ea92

PRFParser: extract km from lines. TODO: extract data. flys-backend/trunk@2297 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Thu, 07 Jul 2011 09:29:31 +0000
parents 6dc847194625
children
line wrap: on
line source
package de.intevation.flys.importer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.math.BigDecimal;

import org.apache.log4j.Logger;

import de.intevation.flys.importer.ImportDischargeTable;
import de.intevation.flys.importer.ImportDischargeTableValue;

import java.util.regex.Pattern;
import java.util.regex.Matcher;

import java.util.Date;
import java.util.Calendar;

public class AtFileParser {

    public static final String ENCODING = "ISO-8859-1";

    private static Logger logger = Logger.getLogger(AtFileParser.class);


    // regular expression from hell to find out time range
    public static final Pattern DATE_LINE = Pattern.compile(
        "^\\*\\s*Abflu[^t]+tafel?\\s*([^\\d]+)"  +
        "(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4})\\s*(?:(?:bis)|-)?\\s*" +
        "(?:(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4}))?\\s*.*$");

    public AtFileParser() {
    }


    public ImportDischargeTable parse(File file) throws IOException {
        return parse(file, "", 0);
    }

    public ImportDischargeTable parse(
        File   file,
        String prefix,
        int    kind
    )
    throws IOException {

        logger.info("parsing AT file: " + file);

        BufferedReader br = null;

        String line       = null;

        boolean beginning = true;

        ImportDischargeTable dischargeTable =
            new ImportDischargeTable(kind, prefix + file.getName());

        Date from = null;
        Date to   = null;

        try {
            br = new BufferedReader(
                 new InputStreamReader(
                 new FileInputStream(file), ENCODING));

            while ((line = br.readLine()) != null) {

                String tmp = line.trim();

                if (tmp.length() == 0) {
                    continue;
                }

                Matcher m = DATE_LINE.matcher(tmp);
                if (m.matches()) {
                    from = guessDate(m.group(2), m.group(3), m.group(4));
                    to   = guessDate(m.group(5), m.group(6), m.group(7));
                    if (from == null) {
                        Date t = from; from = to; to = t;
                    }
                    continue;
                }

                if (tmp.startsWith("#! name=")) {
                    // XXX Skip the name,  because we don't know where to save
                    // it at the moment

                    //String name = tmp.substring(8);
                    continue;
                }

                if (tmp.startsWith("#") || tmp.startsWith("*")) {
                    continue;
                }

                String[] splits = tmp.replace(',', '.').split("\\s+");

                if ((splits.length < 2) || (splits.length > 11)) {
                    logger.warn("Found an invalid row in the AT file.");
                    continue;
                }

                String strW = splits[0].trim();
                double W    = Double.parseDouble(strW);

                /* shift is used to differenciate between lines with
                 * exactly 10 Qs and lines with less than 10 Qs. The shift
                 * is only modified when it is the first line.
                 */
                int shift = -1;

                if (splits.length != 11 && beginning) {
                    shift = 10 - splits.length;
                }


                for (int i = 1; i < splits.length; i++) {
                    double iW = W + shift + i;
                    double iQ = Double.parseDouble(splits[i].trim());

                    dischargeTable.addDischargeTableValue(
                        new ImportDischargeTableValue(
                            new BigDecimal(iQ/100.0),
                            new BigDecimal(iW/100.0)));
                }

                beginning = false;
            }
        }
        catch (NumberFormatException pe) {
            logger.warn(pe.getMessage());
        }
        finally {
            if (br != null) {
                br.close();
            }
        }

        if (from != null) {
            if (to != null && from.compareTo(to) > 0) {
                Date t = from; from = to; to = t;
            }
            logger.info("from: " + from + " to: " + to);
            ImportTimeInterval interval = new ImportTimeInterval(from, to);
            dischargeTable.setTimeInterval(interval);
        }

        logger.info("Finished parsing AT file: " + file);

        return dischargeTable;
    }

    public static Date guessDate(String day, String month, String year) {
        if (day == null && month == null && year == null) {
            return null;
        }

        logger.debug("day: " + day + " month: " + month + " year: " + year);

        int dayI = 15;
        if (day != null) {
            try {
                dayI = Integer.parseInt(day.trim());
            }
            catch (NumberFormatException nfe) {
            }
        }

        int monthI = 6;
        if (month != null) {
            try {
                monthI = Integer.parseInt(month.trim());
            }
            catch (NumberFormatException nfe) {
            }
        }

        int yearI = 1900;
        if (year != null) {
            try {
                yearI = Integer.parseInt(year.trim());
                if (yearI < 100) {
                    if (yearI < 20) {
                        yearI += 2000;
                    }
                    else {
                        yearI += 1900;
                    }
                }
            }
            catch (NumberFormatException nfe) {
            }
        }

        Calendar cal = Calendar.getInstance();
        cal.set(yearI, monthI-1, dayI, 12, 0, 0);
        long ms = cal.getTimeInMillis();
        cal.setTimeInMillis(ms - ms%1000);
        return cal.getTime();
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org