changeset 3564:e01b9d1bc941

FixA: Corrected the formulas of Grubbs' test for outliers. Still a bit broken. flys-artifacts/trunk@5162 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 31 Jul 2012 16:14:17 +0000
parents 5063c93dfb8e
children b136113dad53
files flys-artifacts/ChangeLog flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java
diffstat 2 files changed, 61 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/flys-artifacts/ChangeLog	Mon Jul 30 11:25:19 2012 +0000
+++ b/flys-artifacts/ChangeLog	Tue Jul 31 16:14:17 2012 +0000
@@ -1,3 +1,11 @@
+2012-07-30	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
+
+	* src/main/java/de/intevation/flys/artifacts/math/Outlier.java:
+	  Corrected the formulas of Grubbs' test for outliers.
+	  TODO: Remove only one(!) data point. Currently it removes
+	  more than on point without recalculating the fitting curve.
+	  This leads to too much removed points.
+
 2012-07-30	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
 
 	* src/main/java/de/intevation/flys/artifacts/states/SQRelation.java:
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java	Mon Jul 30 11:25:19 2012 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java	Tue Jul 31 16:14:17 2012 +0000
@@ -15,6 +15,8 @@
 
 public class Outlier
 {
+    public static final double EPSILON = 1e-5;
+
     public static final double DEFAULT_ALPHA = 0.05;
 
     private static Logger log = Logger.getLogger(Outlier.class);
@@ -52,8 +54,8 @@
         @Override
         public int compareTo(IndexedValue other) {
             int diff = index - other.index;
-            if (index < 0) return -1;
-            return index > 0 ? +1 : 0;
+            if (diff < 0) return -1;
+            return diff > 0 ? +1 : 0;
         }
     } // class IndexedValue
 
@@ -105,6 +107,14 @@
         List<IndexedValue> inputValues,
         double             alpha
     ) {
+        boolean debug = log.isDebugEnabled();
+
+        if (debug) {
+            log.debug("outliers significance: " + alpha);
+        }
+
+        alpha = 1d - alpha;
+
         ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>();
 
         ArrayList<IndexedValue> values =
@@ -113,6 +123,10 @@
         for (;;) {
             int N = values.size();
 
+            if (debug) {
+                log.debug("Values to check: " + N);
+            }
+
             if (N < 4) {
                 break;
             }
@@ -128,35 +142,60 @@
             double m = mean.getResult();
             double s = std.getResult();
 
+            if (debug) {
+                log.debug("mean: " + m);
+                log.debug("std dev: " + s);
+            }
+
             double maxZ = -Double.MAX_VALUE;
             int iv = -1;
             for (int i = N-1; i >= 0; --i) {
                 IndexedValue v = values.get(i);
-                double z = Math.abs(m - v.getValue())/s;
+                double z = Math.abs(v.getValue()-m);
+                if (debug) {
+                    log.debug("z candidate: " + z);
+                }
                 if (z > maxZ) {
                     maxZ = z;
                     iv = i;
                 }
             }
 
-            double t = Math.sqrt((N*(N-2)*maxZ*maxZ)
-                /((N-1)*(N-1) - N*maxZ*maxZ));
+            if (Math.abs(s) < EPSILON) {
+                break;
+            }
+
+            maxZ /= s;
 
             TDistributionImpl tdist = new TDistributionImpl(N-2);
 
-            try {
-                double p = tdist.cumulativeProbability(t);
+            double t;
 
-                if (p < alpha) {
-                    outliers.add(values.get(iv));
-                    values.remove(iv);
-                }
-                else {
-                    break;
-                }
+            try {
+                t = tdist.inverseCumulativeProbability(alpha/(N+N));
             }
             catch (MathException me) {
                 log.error(me);
+                break;
+            }
+
+            t *= t;
+
+            double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
+
+            if (debug) {
+                log.debug("max: " + maxZ + " crit: " + za);
+            }
+
+            if (maxZ > za) {
+                outliers.add(values.get(iv));
+                values.remove(iv);
+            }
+            else {
+                if (debug) {
+                    log.debug("values left: " + N);
+                }
+                break;
             }
         }
 

http://dive4elements.wald.intevation.org