changeset 3565:b136113dad53

FixA: Only evict only one(!) data point as outlier before recalculating the function. flys-artifacts/trunk@5163 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 31 Jul 2012 16:46:36 +0000
parents e01b9d1bc941
children 8d0f06b76e09
files flys-artifacts/ChangeLog flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java
diffstat 3 files changed, 87 insertions(+), 186 deletions(-) [+]
line wrap: on
line diff
--- a/flys-artifacts/ChangeLog	Tue Jul 31 16:14:17 2012 +0000
+++ b/flys-artifacts/ChangeLog	Tue Jul 31 16:46:36 2012 +0000
@@ -1,4 +1,12 @@
-2012-07-30	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
+2012-07-31	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
+
+	* src/main/java/de/intevation/flys/artifacts/math/Outlier.java:
+	  Only evict only one(!) data point as outlier.
+
+	* src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java:
+	  Recalculate the function when one point is removed.
+
+2012-07-31	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
 
 	* src/main/java/de/intevation/flys/artifacts/math/Outlier.java:
 	  Corrected the formulas of Grubbs' test for outliers.
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java	Tue Jul 31 16:14:17 2012 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java	Tue Jul 31 16:46:36 2012 +0000
@@ -1,16 +1,14 @@
 package de.intevation.flys.artifacts.math;
 
+import java.util.List;
+
 import org.apache.commons.math.MathException;
 
+import org.apache.commons.math.distribution.TDistributionImpl;
+
 import org.apache.commons.math.stat.descriptive.moment.Mean;
 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
 
-import org.apache.commons.math.distribution.TDistributionImpl;
-
-import java.util.Collections;
-import java.util.List;
-import java.util.ArrayList;
-
 import org.apache.log4j.Logger;
 
 public class Outlier
@@ -21,92 +19,14 @@
 
     private static Logger log = Logger.getLogger(Outlier.class);
 
-    public static class IndexedValue
-    implements          Comparable<IndexedValue>
-    {
-        protected int    index;
-        protected double value;
-
-        public IndexedValue() {
-        }
-
-        public IndexedValue(int index, double value) {
-            this.index = index;
-            this.value = value;
-        }
-
-        public int getIndex() {
-            return index;
-        }
-
-        public void setIndex(int index) {
-            this.index = index;
-        }
-
-        public double getValue() {
-            return value;
-        }
-
-        public void setValue(double value) {
-            this.value = value;
-        }
-
-        @Override
-        public int compareTo(IndexedValue other) {
-            int diff = index - other.index;
-            if (diff < 0) return -1;
-            return diff > 0 ? +1 : 0;
-        }
-    } // class IndexedValue
-
-    public static class Outliers {
-
-        protected List<IndexedValue> retained;
-        protected List<IndexedValue> removed;
-
-        public Outliers() {
-        }
-
-        public Outliers(
-            List<IndexedValue> retained,
-            List<IndexedValue> removed
-        ) {
-            this.retained = retained;
-            this.removed  = removed;
-        }
-
-        public boolean hasOutliers() {
-            return !removed.isEmpty();
-        }
-
-        public List<IndexedValue> getRetained() {
-            return retained;
-        }
-
-        public void setRetained(List<IndexedValue> retained) {
-            this.retained = retained;
-        }
-
-        public List<IndexedValue> getRemoved() {
-            return removed;
-        }
-
-        public void setRemoved(List<IndexedValue> removed) {
-            this.removed = removed;
-        }
-    } // class Outliers
-
-    public Outlier() {
+    protected Outlier() {
     }
 
-    public static Outliers findOutliers(List<IndexedValue> inputValues) {
-        return findOutliers(inputValues, DEFAULT_ALPHA);
+    public static Integer findOutlier(List<Double> values) {
+        return findOutlier(values, DEFAULT_ALPHA);
     }
 
-    public static Outliers findOutliers(
-        List<IndexedValue> inputValues,
-        double             alpha
-    ) {
+    public static Integer findOutlier(List<Double> values, double alpha) {
         boolean debug = log.isDebugEnabled();
 
         if (debug) {
@@ -115,93 +35,73 @@
 
         alpha = 1d - alpha;
 
-        ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>();
-
-        ArrayList<IndexedValue> values =
-            new ArrayList<IndexedValue>(inputValues);
-
-        for (;;) {
-            int N = values.size();
-
-            if (debug) {
-                log.debug("Values to check: " + N);
-            }
-
-            if (N < 4) {
-                break;
-            }
-
-            Mean mean = new Mean();
-            StandardDeviation std = new StandardDeviation();
-
-            for (IndexedValue value: values) {
-                mean.increment(value.getValue());
-                std .increment(value.getValue());
-            }
-
-            double m = mean.getResult();
-            double s = std.getResult();
-
-            if (debug) {
-                log.debug("mean: " + m);
-                log.debug("std dev: " + s);
-            }
+        int N = values.size();
 
-            double maxZ = -Double.MAX_VALUE;
-            int iv = -1;
-            for (int i = N-1; i >= 0; --i) {
-                IndexedValue v = values.get(i);
-                double z = Math.abs(v.getValue()-m);
-                if (debug) {
-                    log.debug("z candidate: " + z);
-                }
-                if (z > maxZ) {
-                    maxZ = z;
-                    iv = i;
-                }
-            }
-
-            if (Math.abs(s) < EPSILON) {
-                break;
-            }
-
-            maxZ /= s;
-
-            TDistributionImpl tdist = new TDistributionImpl(N-2);
-
-            double t;
+        if (debug) {
+            log.debug("Values to check: " + N);
+        }
 
-            try {
-                t = tdist.inverseCumulativeProbability(alpha/(N+N));
-            }
-            catch (MathException me) {
-                log.error(me);
-                break;
-            }
-
-            t *= t;
-
-            double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
+        if (N < 3) {
+            return null;
+        }
 
-            if (debug) {
-                log.debug("max: " + maxZ + " crit: " + za);
-            }
+        Mean mean = new Mean();
+        StandardDeviation std = new StandardDeviation();
 
-            if (maxZ > za) {
-                outliers.add(values.get(iv));
-                values.remove(iv);
-            }
-            else {
-                if (debug) {
-                    log.debug("values left: " + N);
-                }
-                break;
+        for (Double value: values) {
+            double v = value.doubleValue();
+            mean.increment(v);
+            std .increment(v);
+        }
+
+        double m = mean.getResult();
+        double s = std.getResult();
+
+        if (debug) {
+            log.debug("mean: " + m);
+            log.debug("std dev: " + s);
+        }
+
+        double maxZ = -Double.MAX_VALUE;
+        int iv = -1;
+        for (int i = N-1; i >= 0; --i) {
+            double v = values.get(i).doubleValue();
+            double z = Math.abs(v - m);
+            if (z > maxZ) {
+                maxZ = z;
+                iv = i;
             }
         }
 
-        Collections.sort(outliers);
+        if (Math.abs(s) < EPSILON) {
+            return null;
+        }
 
-        return new Outliers(values, outliers);
+        maxZ /= s;
+
+        TDistributionImpl tdist = new TDistributionImpl(N-2);
+
+        double t;
+
+        try {
+            t = tdist.inverseCumulativeProbability(alpha/(N+N));
+        }
+        catch (MathException me) {
+            log.error(me);
+            return null;
+        }
+
+        t *= t;
+
+        double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
+
+        if (debug) {
+            log.debug("max: " + maxZ + " crit: " + za);
+        }
+
+        return maxZ > za
+            ? Integer.valueOf(iv)
+            : null;
     }
 }
 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java	Tue Jul 31 16:14:17 2012 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java	Tue Jul 31 16:46:36 2012 +0000
@@ -1,8 +1,5 @@
 package de.intevation.flys.artifacts.model.fixings;
 
-import de.intevation.flys.artifacts.math.Outlier.IndexedValue;
-import de.intevation.flys.artifacts.math.Outlier.Outliers;
-
 import de.intevation.flys.artifacts.math.Outlier;
 
 import de.intevation.flys.artifacts.math.fitting.Function;
@@ -157,7 +154,7 @@
             return false;
         }
 
-        List<IndexedValue> inputs = new ArrayList<IndexedValue>(xs.size());
+        List<Double> inputs = new ArrayList<Double>(xs.size());
 
         de.intevation.flys.artifacts.math.Function instance = null;
 
@@ -204,27 +201,23 @@
             for (int i = 0, N = xs.size(); i < N; ++i) {
                 double y = instance.value(xs.getQuick(i));
                 if (Double.isNaN(y)) {
-                    continue;
+                    y = Double.MAX_VALUE;
                 }
-                inputs.add(new IndexedValue(i, ys.getQuick(i) - y));
+                inputs.add(Double.valueOf(ys.getQuick(i) - y));
             }
 
-            Outliers outliers = Outlier.findOutliers(inputs);
+            Integer outlier = Outlier.findOutlier(inputs);
 
-            if (!outliers.hasOutliers()) {
+            if (outlier == null) {
                 break;
             }
 
-            List<IndexedValue> rem = outliers.getRemoved();
-
-            for (int i = rem.size()-1; i >= 0; --i) {
-                int idx = rem.get(i).getIndex();
-                removed.add(
-                    qwdFactory.create(
-                        xs.getQuick(idx), ys.getQuick(idx)));
-                xs.remove(idx);
-                ys.remove(idx);
-            }
+            int idx = outlier.intValue();
+            removed.add(
+                qwdFactory.create(
+                    xs.getQuick(idx), ys.getQuick(idx)));
+            xs.remove(idx);
+            ys.remove(idx);
         }
 
         StandardDeviation stdDev = new StandardDeviation();

http://dive4elements.wald.intevation.org