diff flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/FixCalculation.java @ 3011:ab81ffd1343e

FixA: Reactivated rewrite of the outlier checks. flys-artifacts/trunk@4576 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Mon, 04 Jun 2012 16:44:56 +0000
parents 05a3fe8800b3
children e341606faeb4
line wrap: on
line diff
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/FixCalculation.java	Mon Jun 04 10:13:20 2012 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/FixCalculation.java	Mon Jun 04 16:44:56 2012 +0000
@@ -25,19 +25,14 @@
 
 import de.intevation.flys.utils.DateAverager;
 import de.intevation.flys.utils.DoubleUtil;
-import de.intevation.flys.utils.Pair;
+import de.intevation.flys.utils.EpsilonComparator;
 
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
-import org.apache.commons.math.MathException;
-
-import org.apache.commons.math.optimization.fitting.CurveFitter;
-
-import org.apache.commons.math.optimization.general.LevenbergMarquardtOptimizer;
+import java.util.TreeMap;
 
 import org.apache.log4j.Logger;
 
@@ -46,6 +41,8 @@
 {
     private static Logger log = Logger.getLogger(FixCalculation.class);
 
+    public static final double EPSILON = 1e-4;
+
     protected String       river;
     protected double       from;
     protected double       to;
@@ -71,6 +68,7 @@
         DateRange [] analysisPeriods = access.getAnalysisPeriods();
         Integer   qSectorStart       = access.getQSectorStart();
         Integer   qSectorEnd         = access.getQSectorEnd();
+        Boolean   preprocessing      = access.getPreprocessing();
 
         if (river == null) {
             // TODO: i18n
@@ -117,6 +115,11 @@
             addProblem("fix.missing.qend.sector");
         }
 
+        if (preprocessing == null) {
+            // TODO: i18n
+            addProblem("fix.missing.preprocessing");
+        }
+
         if (!hasProblems()) {
             this.river           = river;
             this.from            = from;
@@ -127,6 +130,7 @@
             this.analysisPeriods = analysisPeriods;
             this.qSectorStart    = qSectorStart;
             this.qSectorEnd      = qSectorEnd;
+            this.preprocessing   = preprocessing;
         }
     }
 
@@ -153,30 +157,9 @@
             return new CalculationResult(this);
         }
 
-        FixingsColumnFactory fcf = FixingsColumnFactory.getInstance();
-
-        List<FixingsColumn> dataColumns =
-            new ArrayList<FixingsColumn>(events.length);
-
-        for (int eventId: events) {
-            IdFilter idFilter = new IdFilter(eventId);
+        final List<Column> eventColumns = getEventColumns(overview);
 
-            List<Fixing.Column> columns = overview.filter(null, idFilter);
-            if (columns.isEmpty()) {
-                // TODO: i18n
-                addProblem("fix.missing.column", eventId);
-                continue;
-            }
-            FixingsColumn dataColumn = fcf.getColumnData(columns.get(0));
-            if (dataColumn == null) {
-                // TODO: i18n
-                addProblem("fix.cannot.load.data", eventId);
-                continue;
-            }
-            dataColumns.add(dataColumn);
-        }
-
-        if (dataColumns.size() < 2) {
+        if (eventColumns.size() < 2) {
             // TODO: i18n
             addProblem("fix.too.less.data.columns");
             return new CalculationResult(this);
@@ -184,8 +167,33 @@
 
         double [] kms = DoubleUtil.explode(from, to, step / 1000.0);
 
-        double [] ws = new double[dataColumns.size()];
-        double [] qs = new double[ws.length];
+        final double [] qs = new double[eventColumns.size()];
+        final double [] ws = new double[qs.length];
+
+        // Depending on preprocessing we need to find the outliers.
+        Fitting.QWFactory qwFactory = !preprocessing
+            ? null // No outliers
+            : new Fitting.QWFactory() {
+                @Override
+                public QW create(double q, double w) {
+                    // Check all the event columns for close match
+                    // and take the description and the date from meta.
+                    for (int i = 0; i < qs.length; ++i) {
+                        if (Math.abs(qs[i]-q) < EPSILON
+                        &&  Math.abs(ws[i]-w) < EPSILON) {
+                            Column column = eventColumns.get(i);
+                            return new QW(
+                                q, w,
+                                column.getDescription(),
+                                column.getDate());
+                        }
+                    }
+                    log.warn("cannot find column for (" + q + ", " + w + ")");
+                    return new QW(q, w);
+                }
+            };
+
+        Fitting fitting = new Fitting(func, qwFactory);
 
         String [] parameterNames = func.getParameterNames();
 
@@ -198,43 +206,44 @@
             log.debug("number of kms: " + kms.length);
         }
 
-        int kmIndex     = results.columnIndex("km");
-        int chiSqrIndex = results.columnIndex("chi_sqr");
+        TreeMap<Double, QW []> outliers =
+            new TreeMap<Double, QW []>(EpsilonComparator.INSTANCE);
+
+        int kmIndex             = results.columnIndex("km");
+        int chiSqrIndex         = results.columnIndex("chi_sqr");
+        int [] parameterIndices = results.columnIndices(parameterNames);
 
         int numFailed = 0;
 
         for (int i = 0; i < kms.length; ++i) {
             double km = kms[i];
 
+            // Fill Qs and Ws from event columns.
             for (int j = 0; j < ws.length; ++j) {
-                FixingsColumn column = dataColumns.get(j);
-                qs[j] = column.getQ(km);
-                boolean interpolated = column.getW(km, ws, j);
+                boolean interpolated =
+                    eventColumns.get(j).getQW(km, qs, ws, j);
                 // TODO: mark as interpolated.
             }
 
-            Pair<double [], Double> fitResult = fit(func, km, ws, qs);
+            fitting.reset();
 
-            // TODO: Do preprocessing here!
-            if (fitResult == null) { // Problems are reported already.
+            if (!fitting.fit(qs, ws)) {
                 ++numFailed;
+                // TODO: i18n
+                addProblem(km, "fix.fitting.failed");
                 continue;
             }
 
+            if (fitting.hasOutliers()) {
+                outliers.put(Double.valueOf(km), fitting.outliersToArray());
+            }
+
             int row = results.newRow();
 
             results.set(row, kmIndex, km);
-            results.set(row, chiSqrIndex, fitResult.getB());
-
-            double [] parameters = fitResult.getA();
-            for (int j = 0; j < parameters.length; ++j) {
-                if (Double.isNaN(parameters[j])) {
-                    invalid = true;
-                }
-                else {
-                    results.set(row, parameterNames[j], parameters[j]);
-                }
-            }
+            results.set(row, chiSqrIndex, fitting.getChiSquare());
+            invalid |= results.set(
+                row, parameterIndices, fitting.getParameters());
         }
 
         if (debug) {
@@ -254,11 +263,42 @@
             overview,
             results);
 
-        FixResult fr = new FixResult(results, deltaWTsKM);
+        FixResult fr = new FixResult(results, deltaWTsKM, outliers);
 
         return new CalculationResult(fr, this);
     }
 
+
+    protected List<Column> getEventColumns(FixingsOverview overview) {
+
+        FixingsColumnFactory fcf = FixingsColumnFactory.getInstance();
+
+        List<Column> columns = new ArrayList<Column>(events.length);
+
+        for (int eventId: events) {
+            IdFilter idFilter = new IdFilter(eventId);
+
+            List<Fixing.Column> metas = overview.filter(null, idFilter);
+
+            if (metas.isEmpty()) {
+                // TODO: i18n
+                addProblem("fix.missing.column", eventId);
+                continue;
+            }
+
+            FixingsColumn data = fcf.getColumnData(metas.get(0));
+            if (data == null) {
+                // TODO: i18n
+                addProblem("fix.cannot.load.data", eventId);
+                continue;
+            }
+
+            columns.add(new Column(metas.get(0), data));
+        }
+
+        return columns;
+    }
+
     public DeltaWTsKM calculateDeltaWTs(
         Function        function,
         FixingsOverview overview,
@@ -413,7 +453,7 @@
                         }
 
                         Column column = cc.getColumn(meta);
-                        if (column == null || !column.getWQ(km, wq)) {
+                        if (column == null || !column.getQW(km, wq)) {
                             continue;
                         }
 
@@ -427,7 +467,7 @@
                         Date date = column.getDate();
                         String description = column.getDescription();
 
-                        QWD qwd = new QWD(wq[1], wq[0], dw, date, description);
+                        QWD qwd = new QWD(wq[1], wq[0], description, date, dw);
 
                         qwds.add(qwd);
 
@@ -451,7 +491,7 @@
                             String avgDescription = "avg.deltawt." + qSector;
 
                             QWD avgQWD = new QWD(
-                                avgQ, avgW, avgDw, avgDate, avgDescription);
+                                avgQ, avgW, avgDescription, avgDate, avgDw);
 
                             // TODO: Store average value.
                         }
@@ -490,7 +530,17 @@
             return meta.getDescription();
         }
 
-        public boolean getWQ(double km, double [] wq) {
+        public boolean getQW(
+            double    km, 
+            double [] qs, 
+            double [] ws,
+            int       index
+        ) {
+            qs[index] = data.getQ(km);
+            return data.getW(km, ws, index);
+        }
+
+        public boolean getQW(double km, double [] wq) {
             data.getW(km, wq, 0);
             if (Double.isNaN(wq[0])) return false;
             wq[1] = data.getQ(km);
@@ -587,35 +637,5 @@
         System.arraycopy(parameters, 0, result, 2, parameters.length);
         return result;
     }
-
-    protected Pair<double [], Double> fit(
-        Function  function,
-        double    km,
-        double [] ws, 
-        double [] qs
-    ) {
-        LevenbergMarquardtOptimizer lmo = new LevenbergMarquardtOptimizer();
-        CurveFitter cf = new CurveFitter(lmo);
-
-        for (int i = 0; i < ws.length; ++i) {
-            if (!Double.isNaN(ws[i]) && !Double.isNaN(qs[i])) {
-                cf.addObservedPoint(qs[i], ws[i]);
-            }
-        }
-
-        try {
-            double [] parameters =
-                cf.fit(function, function.getInitialGuess());
-            double chiSqr = lmo.getChiSquare();
-
-            return new Pair<double [], Double>(parameters, chiSqr);
-        }
-        catch (MathException me) {
-            log.warn(me, me);
-            addProblem(km, "fix.fitting.failed");
-        }
-
-        return null;
-    }
 }
 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org