changeset 4794:a7d080347ac3

MINFO: Allow two methods for outlier test in SQ relation. * Methods can be switched as option in conf.xml. * Methods: - Find outliers via multiples of the standard deviation. - Grubbs (used in Fix-Analysis)
author Raimund Renkert <rrenkert@intevation.de>
date Fri, 11 Jan 2013 13:57:38 +0100
parents c0d6391bec6f
children 8ee270a3ef25 bf2fd9c58ac4
files flys-artifacts/doc/conf/conf.xml flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/GrubbsOutlier.java flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/StdDevOutlier.java flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java
diffstat 6 files changed, 235 insertions(+), 146 deletions(-) [+]
line wrap: on
line diff
--- a/flys-artifacts/doc/conf/conf.xml	Wed Jan 09 13:17:09 2013 +0100
+++ b/flys-artifacts/doc/conf/conf.xml	Fri Jan 11 13:57:38 2013 +0100
@@ -403,5 +403,9 @@
             <zoom-scale river="Elbe" range="100" radius="5" />
             <zoom-scale river="Elbe" range="500" radius="10" />
         </zoom-scales>
+        <minfo-sq>
+            <!-- valid names: grubbs or std-dev -->
+            <outlier-method name="grubbs"/>
+        </minfo-sq>
     </options>
 </artifact-database>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/GrubbsOutlier.java	Fri Jan 11 13:57:38 2013 +0100
@@ -0,0 +1,113 @@
+package de.intevation.flys.artifacts.math;
+
+import java.util.List;
+
+import org.apache.commons.math.MathException;
+
+import org.apache.commons.math.distribution.TDistributionImpl;
+
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+
+import org.apache.log4j.Logger;
+
+public class GrubbsOutlier
+{
+    public static final double EPSILON = 1e-5;
+
+    public static final double DEFAULT_ALPHA = 0.05;
+
+    private static Logger log = Logger.getLogger(GrubbsOutlier.class);
+
+    protected GrubbsOutlier() {
+    }
+
+    public static Integer findOutlier(List<Double> values) {
+        return findOutlier(values, DEFAULT_ALPHA, null);
+    }
+
+    public static Integer findOutlier(
+        List<Double> values,
+        double alpha,
+        double[] stdDevResult
+    ) {
+        boolean debug = log.isDebugEnabled();
+
+        if (debug) {
+            log.debug("outliers significance: " + alpha);
+        }
+
+        alpha = 1d - alpha;
+
+        int N = values.size();
+
+        if (debug) {
+            log.debug("Values to check: " + N);
+        }
+
+        if (N < 3) {
+            return null;
+        }
+
+        Mean mean = new Mean();
+        StandardDeviation std = new StandardDeviation();
+
+        for (Double value: values) {
+            double v = value.doubleValue();
+            mean.increment(v);
+            std .increment(v);
+        }
+
+        double m = mean.getResult();
+        double s = std.getResult();
+
+        if (debug) {
+            log.debug("mean: " + m);
+            log.debug("std dev: " + s);
+        }
+
+        double maxZ = -Double.MAX_VALUE;
+        int iv = -1;
+        for (int i = N-1; i >= 0; --i) {
+            double v = values.get(i).doubleValue();
+            double z = Math.abs(v - m);
+            if (z > maxZ) {
+                maxZ = z;
+                iv = i;
+            }
+        }
+
+        if (Math.abs(s) < EPSILON) {
+            return null;
+        }
+
+        maxZ /= s;
+
+        TDistributionImpl tdist = new TDistributionImpl(N-2);
+
+        double t;
+
+        try {
+            t = tdist.inverseCumulativeProbability(alpha/(N+N));
+        }
+        catch (MathException me) {
+            log.error(me);
+            return null;
+        }
+
+        t *= t;
+
+        double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
+
+        if (debug) {
+            log.debug("max: " + maxZ + " crit: " + za);
+        }
+        if (stdDevResult != null) {
+            stdDevResult[0] = std.getResult();
+        }
+        return maxZ > za
+            ? Integer.valueOf(iv)
+            : null;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java	Wed Jan 09 13:17:09 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-package de.intevation.flys.artifacts.math;
-
-import java.util.List;
-
-import org.apache.commons.math.MathException;
-
-import org.apache.commons.math.distribution.TDistributionImpl;
-
-import org.apache.commons.math.stat.descriptive.moment.Mean;
-import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
-
-import org.apache.log4j.Logger;
-
-public class Outlier
-{
-    public static final double EPSILON = 1e-5;
-
-    public static final double DEFAULT_ALPHA = 0.05;
-
-    private static Logger log = Logger.getLogger(Outlier.class);
-
-    protected Outlier() {
-    }
-
-    public static Integer findOutlier(List<Double> values) {
-        return findOutlier(values, DEFAULT_ALPHA);
-    }
-
-    public static Integer findOutlier(List<Double> values, double alpha) {
-        boolean debug = log.isDebugEnabled();
-
-        if (debug) {
-            log.debug("outliers significance: " + alpha);
-        }
-
-        alpha = 1d - alpha;
-
-        int N = values.size();
-
-        if (debug) {
-            log.debug("Values to check: " + N);
-        }
-
-        if (N < 3) {
-            return null;
-        }
-
-        Mean mean = new Mean();
-        StandardDeviation std = new StandardDeviation();
-
-        for (Double value: values) {
-            double v = value.doubleValue();
-            mean.increment(v);
-            std .increment(v);
-        }
-
-        double m = mean.getResult();
-        double s = std.getResult();
-
-        if (debug) {
-            log.debug("mean: " + m);
-            log.debug("std dev: " + s);
-        }
-
-        double maxZ = -Double.MAX_VALUE;
-        int iv = -1;
-        for (int i = N-1; i >= 0; --i) {
-            double v = values.get(i).doubleValue();
-            double z = Math.abs(v - m);
-            if (z > maxZ) {
-                maxZ = z;
-                iv = i;
-            }
-        }
-
-        if (Math.abs(s) < EPSILON) {
-            return null;
-        }
-
-        maxZ /= s;
-
-        TDistributionImpl tdist = new TDistributionImpl(N-2);
-
-        double t;
-
-        try {
-            t = tdist.inverseCumulativeProbability(alpha/(N+N));
-        }
-        catch (MathException me) {
-            log.error(me);
-            return null;
-        }
-
-        t *= t;
-
-        double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
-
-        if (debug) {
-            log.debug("max: " + maxZ + " crit: " + za);
-        }
-
-        return maxZ > za
-            ? Integer.valueOf(iv)
-            : null;
-    }
-}
-// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/StdDevOutlier.java	Fri Jan 11 13:57:38 2013 +0100
@@ -0,0 +1,78 @@
+package de.intevation.flys.artifacts.math;
+
+import java.util.List;
+
+import org.apache.commons.math.MathException;
+
+import org.apache.commons.math.distribution.TDistributionImpl;
+
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+
+import org.apache.log4j.Logger;
+
+import de.intevation.flys.artifacts.model.sq.SQ;
+
+public class StdDevOutlier
+{
+    public static final double EPSILON = 1e-5;
+
+    public static final double DEFAULT_FACTOR = 3;
+
+    private static Logger log = Logger.getLogger(StdDevOutlier.class);
+
+    protected StdDevOutlier() {
+    }
+
+    public static Integer findOutlier(List<Double> values) {
+        return findOutlier(values, DEFAULT_FACTOR, null);
+    }
+
+    public static Integer findOutlier(List<Double> values, double factor, double[] stdDevResult) {
+        boolean debug = log.isDebugEnabled();
+
+        if (debug) {
+            log.debug("factor for std dev: " + factor);
+        }
+
+        int N = values.size();
+
+        if (debug) {
+            log.debug("Values to check: " + N);
+        }
+
+        if (values.size() < 3) {
+            return null;
+        }
+
+        StandardDeviation stdDev = new StandardDeviation();
+
+        double maxValue = -Double.MAX_VALUE;
+        int    maxIndex = -1;
+        int    ndx = 0;
+        for (int i = values.size()-1; i >= 0; --i) {
+            double value = Math.abs(values.get(i));
+            stdDev.increment(value);
+            if (value > maxValue) {
+                maxValue = value;
+                maxIndex = ndx;
+            }
+            ++ndx;
+        }
+
+        double sd = stdDev.getResult();
+
+        double accepted = factor * sd;
+
+        if (debug) {
+            log.debug("std dev: " + stdDev);
+            log.debug("accepted: " + accepted);
+            log.debug("max value: " + maxValue);
+        }
+        if (stdDevResult != null) {
+            stdDevResult[0] = sd;
+        }
+        return maxValue > accepted ? maxIndex : null;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java	Wed Jan 09 13:17:09 2013 +0100
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java	Fri Jan 11 13:57:38 2013 +0100
@@ -1,23 +1,18 @@
 package de.intevation.flys.artifacts.model.fixings;
 
-import de.intevation.flys.artifacts.math.Outlier;
-
-import de.intevation.flys.artifacts.math.fitting.Function;
-
 import gnu.trove.TDoubleArrayList;
 
 import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.commons.math.MathException;
-
 import org.apache.commons.math.optimization.fitting.CurveFitter;
-
 import org.apache.commons.math.optimization.general.LevenbergMarquardtOptimizer;
-
 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+import org.apache.log4j.Logger;
 
-import org.apache.log4j.Logger;
+import de.intevation.flys.artifacts.math.GrubbsOutlier;
+import de.intevation.flys.artifacts.math.fitting.Function;
 
 public class Fitting
 {
@@ -206,7 +201,7 @@
                 inputs.add(Double.valueOf(ys.getQuick(i) - y));
             }
 
-            Integer outlier = Outlier.findOutlier(inputs);
+            Integer outlier = GrubbsOutlier.findOutlier(inputs);
 
             if (outlier == null) {
                 break;
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java	Wed Jan 09 13:17:09 2013 +0100
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java	Fri Jan 11 13:57:38 2013 +0100
@@ -9,10 +9,23 @@
 
 import org.apache.log4j.Logger;
 
+import de.intevation.artifacts.GlobalContext;
+import de.intevation.artifacts.common.utils.Config;
+import de.intevation.flys.artifacts.context.FLYSContext;
+import de.intevation.flys.artifacts.math.GrubbsOutlier;
+import de.intevation.flys.artifacts.math.StdDevOutlier;
+
 public class Outlier
 {
     private static Logger log = Logger.getLogger(Outlier.class);
 
+    private static final String OUTLIER_METHOD =
+        "/artifact-database/options/minfo-sq/outlier-method/@name";
+
+    private static final String GRUBBS = "grubbs";
+
+    private static final String STD_DEV = "std-dev";
+
     public interface Callback {
 
         void initialize(List<SQ> sqs) throws MathException;
@@ -38,46 +51,39 @@
         if (debug) {
             log.debug("stdDevFactor: " + stdDevFactor);
         }
-
+        String method = Config.getStringXPath(OUTLIER_METHOD);
+        log.debug("method: " + method);
+        if (method == null) {
+            method = "std-dev";
+        }
         List<SQ> data = new ArrayList<SQ>(sqs);
 
         while (data.size() > 2) {
 
             callback.initialize(data);
 
-            StandardDeviation stdDev = new StandardDeviation();
-
-            double maxValue = -Double.MAX_VALUE;
-            int    maxIndex = -1;
-
-            for (int i = data.size()-1; i >= 0; --i) {
-                double value = Math.abs(callback.eval(data.get(i)));
-                stdDev.increment(value);
-                if (value > maxValue) {
-                    maxValue = value;
-                    maxIndex = i;
-                }
+            List<Double> values = new ArrayList<Double>();
+            for (SQ sq: data) {
+                values.add(callback.eval(sq));
             }
 
-            double sd = stdDev.getResult();
-
-            double accepted = stdDevFactor * sd;
-
-            if (debug) {
-                log.debug("std dev: " + stdDev);
-                log.debug("accepted: " + accepted);
-                log.debug("max value: " + maxValue);
+            Integer ndx = null;
+            double[] stdDev = new double[1];
+            if (method.equals(GRUBBS)) {
+                ndx = GrubbsOutlier.findOutlier(values, stdDevFactor/100, stdDev);
+            }
+            else {
+                ndx = StdDevOutlier.findOutlier(values, stdDevFactor, stdDev);
+            }
+            if (ndx == null) {
+                callback.iterationFinished(stdDev[0], null, data);
+                break;
             }
 
-            SQ outlier = maxValue > accepted
-                ? data.remove(maxIndex)
-                : null;
-
-            callback.iterationFinished(sd, outlier, data);
-
-            if (outlier == null) {
-                break;
-            }
+            SQ outlier = data.remove((int)ndx);
+            log.debug("stdDev: " + stdDev[0]);
+            log.debug("removed " + ndx + "; S: " + outlier.getS() + " Q: " + outlier.getQ());
+            callback.iterationFinished(stdDev[0], outlier, data);
         }
     }
 }

http://dive4elements.wald.intevation.org