teichmann@5831: package org.dive4elements.river.artifacts.math; rrenkert@4794: rrenkert@4794: import java.util.List; rrenkert@4794: rrenkert@4794: import org.apache.commons.math.MathException; rrenkert@4794: rrenkert@4794: import org.apache.commons.math.distribution.TDistributionImpl; rrenkert@4794: rrenkert@4794: import org.apache.commons.math.stat.descriptive.moment.Mean; rrenkert@4794: import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; rrenkert@4794: rrenkert@4794: import org.apache.log4j.Logger; rrenkert@4794: rrenkert@4794: public class GrubbsOutlier rrenkert@4794: { rrenkert@4794: public static final double EPSILON = 1e-5; rrenkert@4794: rrenkert@4794: public static final double DEFAULT_ALPHA = 0.05; rrenkert@4794: rrenkert@4794: private static Logger log = Logger.getLogger(GrubbsOutlier.class); rrenkert@4794: rrenkert@4794: protected GrubbsOutlier() { rrenkert@4794: } rrenkert@4794: rrenkert@4794: public static Integer findOutlier(List values) { rrenkert@4794: return findOutlier(values, DEFAULT_ALPHA, null); rrenkert@4794: } rrenkert@4794: rrenkert@4794: public static Integer findOutlier( rrenkert@4794: List values, rrenkert@4794: double alpha, rrenkert@4794: double[] stdDevResult rrenkert@4794: ) { rrenkert@4794: boolean debug = log.isDebugEnabled(); rrenkert@4794: rrenkert@4794: if (debug) { rrenkert@4794: log.debug("outliers significance: " + alpha); rrenkert@4794: } rrenkert@4794: rrenkert@4794: alpha = 1d - alpha; rrenkert@4794: rrenkert@4794: int N = values.size(); rrenkert@4794: rrenkert@4794: if (debug) { rrenkert@4794: log.debug("Values to check: " + N); rrenkert@4794: } rrenkert@4794: rrenkert@4794: if (N < 3) { rrenkert@4794: return null; rrenkert@4794: } rrenkert@4794: rrenkert@4794: Mean mean = new Mean(); rrenkert@4794: StandardDeviation std = new StandardDeviation(); rrenkert@4794: rrenkert@4794: for (Double value: values) { rrenkert@4794: double v = value.doubleValue(); rrenkert@4794: mean.increment(v); rrenkert@4794: std .increment(v); rrenkert@4794: } rrenkert@4794: rrenkert@4794: double m = mean.getResult(); rrenkert@4794: double s = std.getResult(); rrenkert@4794: rrenkert@4794: if (debug) { rrenkert@4794: log.debug("mean: " + m); rrenkert@4794: log.debug("std dev: " + s); rrenkert@4794: } rrenkert@4794: rrenkert@4794: double maxZ = -Double.MAX_VALUE; rrenkert@4794: int iv = -1; rrenkert@4794: for (int i = N-1; i >= 0; --i) { rrenkert@4794: double v = values.get(i).doubleValue(); rrenkert@4794: double z = Math.abs(v - m); rrenkert@4794: if (z > maxZ) { rrenkert@4794: maxZ = z; rrenkert@4794: iv = i; rrenkert@4794: } rrenkert@4794: } rrenkert@4794: rrenkert@4794: if (Math.abs(s) < EPSILON) { rrenkert@4794: return null; rrenkert@4794: } rrenkert@4794: rrenkert@4794: maxZ /= s; rrenkert@4794: rrenkert@4794: TDistributionImpl tdist = new TDistributionImpl(N-2); rrenkert@4794: rrenkert@4794: double t; rrenkert@4794: rrenkert@4794: try { rrenkert@4794: t = tdist.inverseCumulativeProbability(alpha/(N+N)); rrenkert@4794: } rrenkert@4794: catch (MathException me) { rrenkert@4794: log.error(me); rrenkert@4794: return null; rrenkert@4794: } rrenkert@4794: rrenkert@4794: t *= t; rrenkert@4794: rrenkert@4794: double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); rrenkert@4794: rrenkert@4794: if (debug) { rrenkert@4794: log.debug("max: " + maxZ + " crit: " + za); rrenkert@4794: } rrenkert@4794: if (stdDevResult != null) { rrenkert@4794: stdDevResult[0] = std.getResult(); rrenkert@4794: } rrenkert@4794: return maxZ > za rrenkert@4794: ? Integer.valueOf(iv) rrenkert@4794: : null; rrenkert@4794: } rrenkert@4794: } rrenkert@4794: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :