diff flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java @ 4794:a7d080347ac3

MINFO: Allow two methods for outlier test in SQ relation. * Methods can be switched as option in conf.xml. * Methods: - Find outliers via multiples of the standard deviation. - Grubbs (used in Fix-Analysis)
author Raimund Renkert <rrenkert@intevation.de>
date Fri, 11 Jan 2013 13:57:38 +0100
parents b8b1280606c2
children 8ee270a3ef25
line wrap: on
line diff
--- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java	Wed Jan 09 13:17:09 2013 +0100
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java	Fri Jan 11 13:57:38 2013 +0100
@@ -9,10 +9,23 @@
 
 import org.apache.log4j.Logger;
 
+import de.intevation.artifacts.GlobalContext;
+import de.intevation.artifacts.common.utils.Config;
+import de.intevation.flys.artifacts.context.FLYSContext;
+import de.intevation.flys.artifacts.math.GrubbsOutlier;
+import de.intevation.flys.artifacts.math.StdDevOutlier;
+
 public class Outlier
 {
     private static Logger log = Logger.getLogger(Outlier.class);
 
+    private static final String OUTLIER_METHOD =
+        "/artifact-database/options/minfo-sq/outlier-method/@name";
+
+    private static final String GRUBBS = "grubbs";
+
+    private static final String STD_DEV = "std-dev";
+
     public interface Callback {
 
         void initialize(List<SQ> sqs) throws MathException;
@@ -38,46 +51,39 @@
         if (debug) {
             log.debug("stdDevFactor: " + stdDevFactor);
         }
-
+        String method = Config.getStringXPath(OUTLIER_METHOD);
+        log.debug("method: " + method);
+        if (method == null) {
+            method = "std-dev";
+        }
         List<SQ> data = new ArrayList<SQ>(sqs);
 
         while (data.size() > 2) {
 
             callback.initialize(data);
 
-            StandardDeviation stdDev = new StandardDeviation();
-
-            double maxValue = -Double.MAX_VALUE;
-            int    maxIndex = -1;
-
-            for (int i = data.size()-1; i >= 0; --i) {
-                double value = Math.abs(callback.eval(data.get(i)));
-                stdDev.increment(value);
-                if (value > maxValue) {
-                    maxValue = value;
-                    maxIndex = i;
-                }
+            List<Double> values = new ArrayList<Double>();
+            for (SQ sq: data) {
+                values.add(callback.eval(sq));
             }
 
-            double sd = stdDev.getResult();
-
-            double accepted = stdDevFactor * sd;
-
-            if (debug) {
-                log.debug("std dev: " + stdDev);
-                log.debug("accepted: " + accepted);
-                log.debug("max value: " + maxValue);
+            Integer ndx = null;
+            double[] stdDev = new double[1];
+            if (method.equals(GRUBBS)) {
+                ndx = GrubbsOutlier.findOutlier(values, stdDevFactor/100, stdDev);
+            }
+            else {
+                ndx = StdDevOutlier.findOutlier(values, stdDevFactor, stdDev);
+            }
+            if (ndx == null) {
+                callback.iterationFinished(stdDev[0], null, data);
+                break;
             }
 
-            SQ outlier = maxValue > accepted
-                ? data.remove(maxIndex)
-                : null;
-
-            callback.iterationFinished(sd, outlier, data);
-
-            if (outlier == null) {
-                break;
-            }
+            SQ outlier = data.remove((int)ndx);
+            log.debug("stdDev: " + stdDev[0]);
+            log.debug("removed " + ndx + "; S: " + outlier.getS() + " Q: " + outlier.getQ());
+            callback.iterationFinished(stdDev[0], outlier, data);
         }
     }
 }

http://dive4elements.wald.intevation.org