view flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/FixCalculation.java @ 3011:ab81ffd1343e

FixA: Reactivated rewrite of the outlier checks. flys-artifacts/trunk@4576 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Mon, 04 Jun 2012 16:44:56 +0000
parents 05a3fe8800b3
children e341606faeb4
line wrap: on
line source
package de.intevation.flys.artifacts.model.fixings;

import de.intevation.flys.artifacts.FixationArtifactAccess;

import de.intevation.flys.artifacts.math.fitting.Function;
import de.intevation.flys.artifacts.math.fitting.FunctionFactory;

import de.intevation.flys.artifacts.model.Calculation;
import de.intevation.flys.artifacts.model.CalculationResult;
import de.intevation.flys.artifacts.model.FixingsColumn;
import de.intevation.flys.artifacts.model.FixingsColumnFactory;

import de.intevation.flys.artifacts.model.FixingsOverview.AndFilter;
import de.intevation.flys.artifacts.model.FixingsOverview.DateRangeFilter;
import de.intevation.flys.artifacts.model.FixingsOverview.Fixing;
import de.intevation.flys.artifacts.model.FixingsOverview.IdFilter;
import de.intevation.flys.artifacts.model.FixingsOverview.KmFilter;
import de.intevation.flys.artifacts.model.FixingsOverview.Range;
import de.intevation.flys.artifacts.model.FixingsOverview.SectorRangeFilter;
import de.intevation.flys.artifacts.model.FixingsOverview.SectorFilter;

import de.intevation.flys.artifacts.model.FixingsOverview;
import de.intevation.flys.artifacts.model.FixingsOverviewFactory;
import de.intevation.flys.artifacts.model.Parameters;

import de.intevation.flys.utils.DateAverager;
import de.intevation.flys.utils.DoubleUtil;
import de.intevation.flys.utils.EpsilonComparator;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.log4j.Logger;

public class FixCalculation
extends      Calculation
{
    private static Logger log = Logger.getLogger(FixCalculation.class);

    public static final double EPSILON = 1e-4;

    protected String       river;
    protected double       from;
    protected double       to;
    protected double       step;
    protected boolean      preprocessing;
    protected String       function;
    protected int []       events;
    protected DateRange [] analysisPeriods;
    protected int          qSectorStart;
    protected int          qSectorEnd;

    public FixCalculation() {
    }

    public FixCalculation(FixationArtifactAccess access) {

        String    river              = access.getRiver();
        Double    from               = access.getFrom();
        Double    to                 = access.getTo();
        Double    step               = access.getStep();
        String    function           = access.getFunction();
        int []    events             = access.getEvents();
        DateRange [] analysisPeriods = access.getAnalysisPeriods();
        Integer   qSectorStart       = access.getQSectorStart();
        Integer   qSectorEnd         = access.getQSectorEnd();
        Boolean   preprocessing      = access.getPreprocessing();

        if (river == null) {
            // TODO: i18n
            addProblem("fix.missing.river");
        }

        if (from == null) {
            // TODO: i18n
            addProblem("fix.missing.from");
        }

        if (to == null) {
            // TODO: i18n
            addProblem("fix.missing.to");
        }

        if (step == null) {
            // TODO: i18n
            addProblem("fix.missing.step");
        }

        if (function == null) {
            // TODO: i18n
            addProblem("fix.missing.function");
        }

        if (events == null || events.length < 1) {
            // TODO: i18n
            addProblem("fix.missing.events");
        }

        if (analysisPeriods == null || analysisPeriods.length < 1) {
            // TODO: i18n
            addProblem("fix.missing.analysis.periods");
        }

        if (qSectorStart == null) {
            // TODO: i18n
            addProblem("fix.missing.qstart.sector");
        }

        if (qSectorEnd == null) {
            // TODO: i18n
            addProblem("fix.missing.qend.sector");
        }

        if (preprocessing == null) {
            // TODO: i18n
            addProblem("fix.missing.preprocessing");
        }

        if (!hasProblems()) {
            this.river           = river;
            this.from            = from;
            this.to              = to;
            this.step            = step;
            this.function        = function;
            this.events          = events;
            this.analysisPeriods = analysisPeriods;
            this.qSectorStart    = qSectorStart;
            this.qSectorEnd      = qSectorEnd;
            this.preprocessing   = preprocessing;
        }
    }

    public CalculationResult calculate() {

        boolean debug = log.isDebugEnabled();

        FixingsOverview overview =
            FixingsOverviewFactory.getOverview(river);

        if (overview == null) {
            addProblem("fix.no.overview.available");
        }

        Function func = FunctionFactory.getInstance()
            .getFunction(function);

        if (func == null) {
            // TODO: i18n
            addProblem("fix.invalid.function.name");
        }

        if (hasProblems()) {
            return new CalculationResult(this);
        }

        final List<Column> eventColumns = getEventColumns(overview);

        if (eventColumns.size() < 2) {
            // TODO: i18n
            addProblem("fix.too.less.data.columns");
            return new CalculationResult(this);
        }

        double [] kms = DoubleUtil.explode(from, to, step / 1000.0);

        final double [] qs = new double[eventColumns.size()];
        final double [] ws = new double[qs.length];

        // Depending on preprocessing we need to find the outliers.
        Fitting.QWFactory qwFactory = !preprocessing
            ? null // No outliers
            : new Fitting.QWFactory() {
                @Override
                public QW create(double q, double w) {
                    // Check all the event columns for close match
                    // and take the description and the date from meta.
                    for (int i = 0; i < qs.length; ++i) {
                        if (Math.abs(qs[i]-q) < EPSILON
                        &&  Math.abs(ws[i]-w) < EPSILON) {
                            Column column = eventColumns.get(i);
                            return new QW(
                                q, w,
                                column.getDescription(),
                                column.getDate());
                        }
                    }
                    log.warn("cannot find column for (" + q + ", " + w + ")");
                    return new QW(q, w);
                }
            };

        Fitting fitting = new Fitting(func, qwFactory);

        String [] parameterNames = func.getParameterNames();

        Parameters results =
            new Parameters(createColumnNames(parameterNames));

        boolean invalid = false;

        if (debug) {
            log.debug("number of kms: " + kms.length);
        }

        TreeMap<Double, QW []> outliers =
            new TreeMap<Double, QW []>(EpsilonComparator.INSTANCE);

        int kmIndex             = results.columnIndex("km");
        int chiSqrIndex         = results.columnIndex("chi_sqr");
        int [] parameterIndices = results.columnIndices(parameterNames);

        int numFailed = 0;

        for (int i = 0; i < kms.length; ++i) {
            double km = kms[i];

            // Fill Qs and Ws from event columns.
            for (int j = 0; j < ws.length; ++j) {
                boolean interpolated =
                    eventColumns.get(j).getQW(km, qs, ws, j);
                // TODO: mark as interpolated.
            }

            fitting.reset();

            if (!fitting.fit(qs, ws)) {
                ++numFailed;
                // TODO: i18n
                addProblem(km, "fix.fitting.failed");
                continue;
            }

            if (fitting.hasOutliers()) {
                outliers.put(Double.valueOf(km), fitting.outliersToArray());
            }

            int row = results.newRow();

            results.set(row, kmIndex, km);
            results.set(row, chiSqrIndex, fitting.getChiSquare());
            invalid |= results.set(
                row, parameterIndices, fitting.getParameters());
        }

        if (debug) {
            log.debug("success: " + (kms.length - numFailed));
            log.debug("failed: " + numFailed);
        }

        if (invalid) {
            // TODO: i18n
            addProblem("fix.invalid.values");
            results.removeNaNs();
        }

        // Calculate Delta W/t
        DeltaWTsKM deltaWTsKM = calculateDeltaWTs(
            func,
            overview,
            results);

        FixResult fr = new FixResult(results, deltaWTsKM, outliers);

        return new CalculationResult(fr, this);
    }


    protected List<Column> getEventColumns(FixingsOverview overview) {

        FixingsColumnFactory fcf = FixingsColumnFactory.getInstance();

        List<Column> columns = new ArrayList<Column>(events.length);

        for (int eventId: events) {
            IdFilter idFilter = new IdFilter(eventId);

            List<Fixing.Column> metas = overview.filter(null, idFilter);

            if (metas.isEmpty()) {
                // TODO: i18n
                addProblem("fix.missing.column", eventId);
                continue;
            }

            FixingsColumn data = fcf.getColumnData(metas.get(0));
            if (data == null) {
                // TODO: i18n
                addProblem("fix.cannot.load.data", eventId);
                continue;
            }

            columns.add(new Column(metas.get(0), data));
        }

        return columns;
    }

    public DeltaWTsKM calculateDeltaWTs(
        Function        function,
        FixingsOverview overview,
        Parameters      results
    ) {
        boolean debug = log.isDebugEnabled();

        DeltaWTsKM deltaWTsKM = new DeltaWTsKM(results.size());

        Column [][] analysisColumns = getAnalysisColumns(overview);

        int [] parameterIndices = 
            results.columnIndices(function.getParameterNames());

        double [] parameterValues =
            new double[parameterIndices.length];

        double [] ow = new double[1];

        int kmIdx = results.columnIndex("km");

        for (int i = 0, N = results.size(); i < N; ++i) {
            double km = results.get(i, kmIdx);
            results.get(i, parameterIndices, parameterValues);

            DeltaWTsKM.KM dwtkm = new DeltaWTsKM.KM(km);
            deltaWTsKM.add(dwtkm);

            // This is the paraterized function for a given km.
            de.intevation.flys.artifacts.math.Function instance =
                function.instantiate(parameterValues);

            // Evaluate all columns for all analysis periods.
            for (int j = 0; j < analysisColumns.length; ++j) {
                Column [] periodColumns = analysisColumns[j];

                int failedQ = 0;
                int failedW = 0;
                int failedC = 0;

                for (int k = 0; k < periodColumns.length; ++k) {
                    Column pc = periodColumns[k];

                    // Q from real data.
                    double q = pc.data.getQ(km);
                    if (Double.isNaN(q)) {
                        ++failedQ;
                        continue;
                    }

                    // Calculate W from function.
                    double nw = instance.value(q);
                    if (Double.isNaN(nw)) {
                        ++failedC;
                        continue;
                    }

                    // W from real data.
                    pc.data.getW(km, ow);

                    if (Double.isNaN(ow[0])) {
                        ++failedW;
                        continue;
                    }

                    double deltaW = (ow[0] - nw)*100.0; // in cm

                    DeltaWT deltaWT = new DeltaWT(
                        deltaW,
                        pc.meta.getStartTime(),
                        pc.meta.getDescription());

                    dwtkm.add(deltaWT);
                }
                if (debug) {
                    log.debug("failed W: " + failedW);
                    log.debug("failed Q: " + failedQ);
                    log.debug("failed C: " + failedC);
                    log.debug("input size: " + periodColumns.length);
                    log.debug("outpt size: " + dwtkm.size());
                }
            }
        }

        return deltaWTsKM;
    }

    protected List<AnalysisPeriodsKM> calculateAnalysisPeriods(
        Function        function,
        Parameters      parameters,
        FixingsOverview overview
    ) {
        ArrayList<AnalysisPeriodsKM> results
            = new ArrayList<AnalysisPeriodsKM>(parameters.size());

        Range range = new Range(from, to);

        int kmIndex = parameters.columnIndex("km");

        ColumnCache cc = new ColumnCache();

        double [] wq = new double[2];

        int [] parameterIndices = 
            parameters.columnIndices(function.getParameterNames());

        double [] parameterValues = new double[parameterIndices.length];

        DateAverager dateAverager = new DateAverager();

        for (int row = 0, P = parameters.size(); row < P; ++row) {
            double km = parameters.get(row, kmIndex);
            parameters.get(row, parameterIndices, parameterValues);

            // This is the paraterized function for a given km.
            de.intevation.flys.artifacts.math.Function instance =
                function.instantiate(parameterValues);

            KmFilter kmFilter = new KmFilter(km);

            for (DateRange analysisPeriod: analysisPeriods) {
                DateRangeFilter drf = new DateRangeFilter(
                    analysisPeriod.getFrom(),
                    analysisPeriod.getTo());

                // for all Q sectors.
                for (int qSector = qSectorStart; qSector < qSectorEnd; ++qSector) {

                    AndFilter filter = new AndFilter();
                    filter.add(kmFilter);
                    filter.add(new SectorFilter(qSector));
                    filter.add(drf);

                    List<Fixing.Column> metas = overview.filter(range, filter);

                    if (metas.isEmpty()) {
                        // No fixings for km and analysis period
                        continue;
                    }

                    double sumQ = 0.0;
                    double sumW = 0.0;

                    List<QWD> qwds = new ArrayList<QWD>(metas.size());

                    dateAverager.clear();

                    for (Fixing.Column meta: metas) {
                        if (meta.findQSector(km) != qSector) {
                            // Ignore not matching sectors.
                            continue;
                        }

                        Column column = cc.getColumn(meta);
                        if (column == null || !column.getQW(km, wq)) {
                            continue;
                        }

                        double fw = instance.value(wq[1]);
                        if (Double.isNaN(fw)) {
                            continue;
                        }

                        double dw = (wq[0] - fw)*100.0;

                        Date date = column.getDate();
                        String description = column.getDescription();

                        QWD qwd = new QWD(wq[1], wq[0], description, date, dw);

                        qwds.add(qwd);

                        sumW += wq[0];
                        sumQ += wq[1];

                        dateAverager.add(date);
                    }

                    // Calulate average per Q sector.
                    int N = qwds.size();
                    if (N > 0) {
                        double avgW = sumW / N;
                        double avgQ = sumQ / N;

                        double avgFw = instance.value(avgQ);
                        if (!Double.isNaN(avgFw)) {
                            double avgDw = (avgW - avgFw)*100.0;
                            Date avgDate = dateAverager.getAverage();

                            String avgDescription = "avg.deltawt." + qSector;

                            QWD avgQWD = new QWD(
                                avgQ, avgW, avgDescription, avgDate, avgDw);

                            // TODO: Store average value.
                        }
                        else {
                            // TODO: Store
                        }
                    }
                } // for all Q sectors
            }
        }

        return results;
    }

    /** Helper class to bundle the meta information of a column 
     *  and the real data.
     */
    protected static class Column {

        protected Fixing.Column meta;
        protected FixingsColumn data;

        public Column() {
        }

        public Column(Fixing.Column meta, FixingsColumn data) {
            this.meta = meta;
            this.data = data;
        }

        public Date getDate() {
            return meta.getStartTime();
        }

        public String getDescription() {
            return meta.getDescription();
        }

        public boolean getQW(
            double    km, 
            double [] qs, 
            double [] ws,
            int       index
        ) {
            qs[index] = data.getQ(km);
            return data.getW(km, ws, index);
        }

        public boolean getQW(double km, double [] wq) {
            data.getW(km, wq, 0);
            if (Double.isNaN(wq[0])) return false;
            wq[1] = data.getQ(km);
            return !Double.isNaN(wq[1]);
        }
    } // class Column


    /**
     * Helper class to find the data belonging to meta info more quickly.
     */
    public static class ColumnCache {

        protected Map<Integer, Column> columns;

        public ColumnCache() {
            columns = new HashMap<Integer, Column>();
        }

        public Column getColumn(Fixing.Column meta) {
            Integer key = meta.getId();
            Column column = columns.get(key);
            if (column == null) {
                FixingsColumn data = FixingsColumnFactory
                    .getInstance()
                    .getColumnData(meta);
                if (data != null) {
                    column = new Column(meta, data);
                    columns.put(key, column);
                }
            }
            return column;
        }
    } // class ColumnCache

    /** Fetch meta and data columns for analysis periods. */
    protected Column [][] getAnalysisColumns(FixingsOverview overview) {

        boolean debug = log.isDebugEnabled();
        if (debug) {
            log.debug("number analysis periods: " + analysisPeriods.length);
        }

        Column columns [][] = new Column[analysisPeriods.length][];

        Range range = new Range(from, to);
        SectorRangeFilter sectorRangeFilter =
            new SectorRangeFilter(qSectorStart, qSectorEnd);

        FixingsColumnFactory fcf = FixingsColumnFactory.getInstance();

        for (int i = 0; i < columns.length; ++i) {

            // Construct filter for period.
            DateRange period = analysisPeriods[i];

            AndFilter filter = new AndFilter();

            DateRangeFilter dateRangeFilter =
                new DateRangeFilter(period.getFrom(), period.getTo());

            filter.add(dateRangeFilter);
            filter.add(sectorRangeFilter);

            List<Fixing.Column> metaCols = overview.filter(range, filter);

            if (debug) {
                log.debug("number of filtered columns: " + metaCols.size());
            }

            ArrayList<Column> cols = new ArrayList<Column>(metaCols.size());

            // Only use columns which have data.
            for (Fixing.Column meta: metaCols) {
                FixingsColumn data = fcf.getColumnData(meta);
                if (data != null) {
                    cols.add(new Column(meta, data));
                }
            }

            if (debug) {
                log.debug("failed loading: " + (metaCols.size()-cols.size()));
            }
            columns[i] = cols.toArray(new Column[cols.size()]);
        }

        return columns;
    }

    protected static String [] createColumnNames(String [] parameters) {
        String [] result = new String[parameters.length + 2];
        result[0] = "km";
        result[0] = "chi_sqr";
        System.arraycopy(parameters, 0, result, 2, parameters.length);
        return result;
    }
}
// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org