changeset 6714:b265cd6cfda5

issue748: Change StandardDeviation implmentation to what BFG calls Standard Deviation Which is actually a calculation that removes outliers based on Standard Error Developed and analyized together with Tom.
author Andre Heinecke <aheinecke@intevation.de>
date Tue, 30 Jul 2013 17:32:28 +0200
parents 5ce1b6755174
children b494a9cf25e5
files artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java
diffstat 1 files changed, 21 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java	Tue Jul 30 16:24:59 2013 +0200
+++ b/artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java	Tue Jul 30 17:32:28 2013 +0200
@@ -10,9 +10,13 @@
 
 import java.util.List;
 
-import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+import org.apache.log4j.Logger;
 
-import org.apache.log4j.Logger;
+/* XXX:
+ * Warning: This class is called StdDevOutlier because it caculates the
+ * Standard Deviation method for outlier removal as the BFG calls it.
+ * But the actual calculation used to remove the outliers calculates
+ * the Standard Error and not the Standard Deviation! */
 
 public class StdDevOutlier
 {
@@ -30,12 +34,12 @@
     public static Integer findOutlier(
         List<Double> values,
         double       factor,
-        double []    stdDevResult
+        double []    stdErrResult
     ) {
         boolean debug = log.isDebugEnabled();
 
         if (debug) {
-            log.debug("factor for std dev: " + factor);
+            log.debug("factor for std dev test (that calculates std err): " + factor);
         }
 
         int N = values.size();
@@ -48,31 +52,34 @@
             return null;
         }
 
-        StandardDeviation stdDev = new StandardDeviation();
-
         double maxValue = -Double.MAX_VALUE;
         int    maxIndex = -1;
+
+        double squareSumResiduals = 0;
+        for (Double db: values) {
+            squareSumResiduals += Math.pow(db, 2);
+        }
+
+        double stdErr = Math.sqrt(squareSumResiduals / (N - 2));
+
+        double accepted = factor * stdErr;
+
         for (int i = N-1; i >= 0; --i) {
             double value = Math.abs(values.get(i));
-            stdDev.increment(value);
             if (value > maxValue) {
                 maxValue = value;
                 maxIndex = i;
             }
         }
 
-        double sd = stdDev.getResult();
-
-        double accepted = factor * sd;
-
         if (debug) {
-            log.debug("std dev: " + stdDev);
+            log.debug("std err: " + stdErr);
             log.debug("accepted: " + accepted);
             log.debug("max value: " + maxValue);
         }
 
-        if (stdDevResult != null) {
-            stdDevResult[0] = sd;
+        if (stdErrResult != null) {
+            stdErrResult[0] = stdErr;
         }
 
         return maxValue > accepted ? maxIndex : null;

http://dive4elements.wald.intevation.org