sascha@2645: package de.intevation.flys.artifacts.math; sascha@2645: sascha@3565: import java.util.List; sascha@3565: sascha@2646: import org.apache.commons.math.MathException; sascha@2646: sascha@3565: import org.apache.commons.math.distribution.TDistributionImpl; sascha@3565: sascha@2645: import org.apache.commons.math.stat.descriptive.moment.Mean; sascha@2645: import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; sascha@2645: sascha@2646: import org.apache.log4j.Logger; sascha@2646: sascha@2645: public class Outlier sascha@2645: { sascha@3564: public static final double EPSILON = 1e-5; sascha@3564: sascha@3011: public static final double DEFAULT_ALPHA = 0.05; sascha@3011: sascha@2646: private static Logger log = Logger.getLogger(Outlier.class); sascha@2646: sascha@3565: protected Outlier() { sascha@2645: } sascha@2645: sascha@3565: public static Integer findOutlier(List values) { sascha@3565: return findOutlier(values, DEFAULT_ALPHA); sascha@3011: } sascha@3011: sascha@3565: public static Integer findOutlier(List values, double alpha) { sascha@3564: boolean debug = log.isDebugEnabled(); sascha@3564: sascha@3564: if (debug) { sascha@3564: log.debug("outliers significance: " + alpha); sascha@3564: } sascha@3564: sascha@3564: alpha = 1d - alpha; sascha@3564: sascha@3565: int N = values.size(); sascha@3564: sascha@3565: if (debug) { sascha@3565: log.debug("Values to check: " + N); sascha@3565: } sascha@2646: sascha@3565: if (N < 3) { sascha@3565: return null; sascha@3565: } sascha@3564: sascha@3565: Mean mean = new Mean(); sascha@3565: StandardDeviation std = new StandardDeviation(); sascha@3564: sascha@3565: for (Double value: values) { sascha@3565: double v = value.doubleValue(); sascha@3565: mean.increment(v); sascha@3565: std .increment(v); sascha@3565: } sascha@3565: sascha@3565: double m = mean.getResult(); sascha@3565: double s = std.getResult(); sascha@3565: sascha@3565: if (debug) { sascha@3565: log.debug("mean: " + m); sascha@3565: log.debug("std dev: " + s); sascha@3565: } sascha@3565: sascha@3565: double maxZ = -Double.MAX_VALUE; sascha@3565: int iv = -1; sascha@3565: for (int i = N-1; i >= 0; --i) { sascha@3565: double v = values.get(i).doubleValue(); sascha@3565: double z = Math.abs(v - m); sascha@3565: if (z > maxZ) { sascha@3565: maxZ = z; sascha@3565: iv = i; sascha@2646: } sascha@2645: } sascha@2645: sascha@3565: if (Math.abs(s) < EPSILON) { sascha@3565: return null; sascha@3565: } sascha@2645: sascha@3565: maxZ /= s; sascha@3565: sascha@3565: TDistributionImpl tdist = new TDistributionImpl(N-2); sascha@3565: sascha@3565: double t; sascha@3565: sascha@3565: try { sascha@3565: t = tdist.inverseCumulativeProbability(alpha/(N+N)); sascha@3565: } sascha@3565: catch (MathException me) { sascha@3565: log.error(me); sascha@3565: return null; sascha@3565: } sascha@3565: sascha@3565: t *= t; sascha@3565: sascha@3565: double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); sascha@3565: sascha@3565: if (debug) { sascha@3565: log.debug("max: " + maxZ + " crit: " + za); sascha@3565: } sascha@3565: sascha@3565: return maxZ > za sascha@3565: ? Integer.valueOf(iv) sascha@3565: : null; sascha@2645: } sascha@2645: } sascha@2645: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :