# HG changeset patch # User Andre Heinecke # Date 1375198348 -7200 # Node ID b265cd6cfda5fa03a9396ac1a951b3a26c3b8218 # Parent 5ce1b675517463946271e42c2f615f7de52bdd71 issue748: Change StandardDeviation implmentation to what BFG calls Standard Deviation Which is actually a calculation that removes outliers based on Standard Error Developed and analyized together with Tom. diff -r 5ce1b6755174 -r b265cd6cfda5 artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java --- a/artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java Tue Jul 30 16:24:59 2013 +0200 +++ b/artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java Tue Jul 30 17:32:28 2013 +0200 @@ -10,9 +10,13 @@ import java.util.List; -import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; +import org.apache.log4j.Logger; -import org.apache.log4j.Logger; +/* XXX: + * Warning: This class is called StdDevOutlier because it caculates the + * Standard Deviation method for outlier removal as the BFG calls it. + * But the actual calculation used to remove the outliers calculates + * the Standard Error and not the Standard Deviation! */ public class StdDevOutlier { @@ -30,12 +34,12 @@ public static Integer findOutlier( List values, double factor, - double [] stdDevResult + double [] stdErrResult ) { boolean debug = log.isDebugEnabled(); if (debug) { - log.debug("factor for std dev: " + factor); + log.debug("factor for std dev test (that calculates std err): " + factor); } int N = values.size(); @@ -48,31 +52,34 @@ return null; } - StandardDeviation stdDev = new StandardDeviation(); - double maxValue = -Double.MAX_VALUE; int maxIndex = -1; + + double squareSumResiduals = 0; + for (Double db: values) { + squareSumResiduals += Math.pow(db, 2); + } + + double stdErr = Math.sqrt(squareSumResiduals / (N - 2)); + + double accepted = factor * stdErr; + for (int i = N-1; i >= 0; --i) { double value = Math.abs(values.get(i)); - stdDev.increment(value); if (value > maxValue) { maxValue = value; maxIndex = i; } } - double sd = stdDev.getResult(); - - double accepted = factor * sd; - if (debug) { - log.debug("std dev: " + stdDev); + log.debug("std err: " + stdErr); log.debug("accepted: " + accepted); log.debug("max value: " + maxValue); } - if (stdDevResult != null) { - stdDevResult[0] = sd; + if (stdErrResult != null) { + stdErrResult[0] = stdErr; } return maxValue > accepted ? maxIndex : null;