Mercurial > dive4elements > river
diff artifacts/src/main/java/org/dive4elements/river/artifacts/math/GrubbsOutlier.java @ 5838:5aa05a7a34b7
Rename modules to more fitting names.
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Thu, 25 Apr 2013 15:23:37 +0200 |
parents | flys-artifacts/src/main/java/org/dive4elements/river/artifacts/math/GrubbsOutlier.java@bd047b71ab37 |
children | 4897a58c8746 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/artifacts/src/main/java/org/dive4elements/river/artifacts/math/GrubbsOutlier.java Thu Apr 25 15:23:37 2013 +0200 @@ -0,0 +1,113 @@ +package org.dive4elements.river.artifacts.math; + +import java.util.List; + +import org.apache.commons.math.MathException; + +import org.apache.commons.math.distribution.TDistributionImpl; + +import org.apache.commons.math.stat.descriptive.moment.Mean; +import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; + +import org.apache.log4j.Logger; + +public class GrubbsOutlier +{ + public static final double EPSILON = 1e-5; + + public static final double DEFAULT_ALPHA = 0.05; + + private static Logger log = Logger.getLogger(GrubbsOutlier.class); + + protected GrubbsOutlier() { + } + + public static Integer findOutlier(List<Double> values) { + return findOutlier(values, DEFAULT_ALPHA, null); + } + + public static Integer findOutlier( + List<Double> values, + double alpha, + double[] stdDevResult + ) { + boolean debug = log.isDebugEnabled(); + + if (debug) { + log.debug("outliers significance: " + alpha); + } + + alpha = 1d - alpha; + + int N = values.size(); + + if (debug) { + log.debug("Values to check: " + N); + } + + if (N < 3) { + return null; + } + + Mean mean = new Mean(); + StandardDeviation std = new StandardDeviation(); + + for (Double value: values) { + double v = value.doubleValue(); + mean.increment(v); + std .increment(v); + } + + double m = mean.getResult(); + double s = std.getResult(); + + if (debug) { + log.debug("mean: " + m); + log.debug("std dev: " + s); + } + + double maxZ = -Double.MAX_VALUE; + int iv = -1; + for (int i = N-1; i >= 0; --i) { + double v = values.get(i).doubleValue(); + double z = Math.abs(v - m); + if (z > maxZ) { + maxZ = z; + iv = i; + } + } + + if (Math.abs(s) < EPSILON) { + return null; + } + + maxZ /= s; + + TDistributionImpl tdist = new TDistributionImpl(N-2); + + double t; + + try { + t = tdist.inverseCumulativeProbability(alpha/(N+N)); + } + catch (MathException me) { + log.error(me); + return null; + } + + t *= t; + + double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); + + if (debug) { + log.debug("max: " + maxZ + " crit: " + za); + } + if (stdDevResult != null) { + stdDevResult[0] = std.getResult(); + } + return maxZ > za + ? Integer.valueOf(iv) + : null; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :