view flys-backend/src/main/java/de/intevation/flys/importer/AtFileParser.java @ 485:6b231041dc18

Importer: Try to extract time ranges from at files. flys-backend/trunk@1811 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 03 May 2011 17:09:38 +0000
parents d980e545ccab
children 8ea09ec7f0c8
line wrap: on
line source
package de.intevation.flys.importer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.math.BigDecimal;
import java.text.NumberFormat;
import java.text.ParseException;

import org.apache.log4j.Logger;

import de.intevation.flys.importer.ImportDischargeTable;
import de.intevation.flys.importer.ImportDischargeTableValue;

import java.util.regex.Pattern;
import java.util.regex.Matcher;

import java.util.Date;
import java.util.Calendar;

public class AtFileParser {

    public static final String ENCODING = "ISO-8859-1";

    private static Logger logger = Logger.getLogger(AtFileParser.class);

    private static NumberFormat nf = NumberFormat.getInstance();


    // regular expression from hell to find out time range
    public static final Pattern DATE_LINE = Pattern.compile(
        "^\\*\\s*Abflu[^t]+tafel?\\s*([^\\d]+)"  + 
        "(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4})\\s*(?:(?:bis)|-)?\\s*" +
        "(?:(\\d{1,2})?\\.?(\\d{1,2})?\\.?(\\d{2,4}))?\\s*.*$");

    public AtFileParser() {
    }


    public ImportDischargeTable parse(ImportGauge gauge) throws IOException {

        File file = gauge.getAtFile();

        logger.info("parsing AT file: " + file);

        BufferedReader br = null;

        String line       = null;

        boolean beginning = true;

        ImportDischargeTable dischargeTable = new ImportDischargeTable();

        Date from = null;
        Date to   = null;

        try {
            br = new BufferedReader(
                 new InputStreamReader(
                 new FileInputStream(file), ENCODING));

            while ((line = br.readLine()) != null) {

                String tmp = line.trim();

                if (tmp.length() == 0) {
                    continue;
                }

                Matcher m = DATE_LINE.matcher(tmp);
                if (m.matches()) {
                    from = guessDate(m.group(1), m.group(2), m.group(3));
                    to   = guessDate(m.group(4), m.group(5), m.group(6));
                    if (from == null) {
                        Date t = from; from = to; to = t;
                    }
                    continue;
                }

                if (tmp.startsWith("#! name=")) {
                    // XXX Skip the name,  because we don't know where to save
                    // it at the moment

                    //String name = tmp.substring(8);
                    continue;
                }

                if (tmp.startsWith("#") || tmp.startsWith("*")) {
                    continue;
                }

                String[] splits = tmp.replace(',', '.').split("\\s+");

                if ((splits.length < 2) || (splits.length > 11)) {
                    logger.warn("Found an invalid row in the AT file.");
                    continue;
                }

                String strW = splits[0].trim();
                double W    = nf.parse(strW).doubleValue();

                /* shift is used to differenciate between lines with
                 * exactly 10 Qs and lines with less than 10 Qs. The shift
                 * is only modified when it is the first line.
                 */
                int shift = 0;

                if (splits.length != 11 && beginning) {
                    shift = 11 - splits.length;
                }


                for (int i = 1; i < splits.length; i++) {
                    double iW = W + shift + i;
                    double iQ = nf.parse(splits[i].trim()).doubleValue();

                    dischargeTable.addDischargeTableValue(
                        new ImportDischargeTableValue(
                            new BigDecimal(iQ/100.0),
                            new BigDecimal(iW/100.0)));
                }

                beginning = false;
            }
        }
        catch (ParseException pe) {
            logger.warn(pe.getMessage());
        }
        finally {
            if (br != null) {
                br.close();
            }
        }

        logger.info("Finished parsing AT file: " + file);

        return dischargeTable;
    }

    public static Date guessDate(String day, String month, String year) {
        if (day == null && month == null && year == null) {
            return null;
        }

        int dayI = 15;
        if (day != null) {
            try {
                dayI = Integer.parseInt(day.trim());
            }
            catch (NumberFormatException nfe) {
            }
        }

        int monthI = 6;
        if (month != null) {
            try {
                monthI = Integer.parseInt(month.trim());
            }
            catch (NumberFormatException nfe) {
            }
        }

        int yearI = 1900;
        if (year != null) {
            try {
                yearI = Integer.parseInt(year.trim());
                if (yearI < 100) {
                    if (yearI < 20) {
                        yearI += 2000;
                    }
                    else {
                        yearI += 1900;
                    }
                }
            }
            catch (NumberFormatException nfe) {
            }
        }

        Calendar cal = Calendar.getInstance();
        cal.set(yearI, monthI-1, dayI);
        return cal.getTime();
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org