rrenkert@4794: package de.intevation.flys.artifacts.math;
rrenkert@4794: 
rrenkert@4794: import java.util.List;
rrenkert@4794: 
rrenkert@4794: import org.apache.commons.math.MathException;
rrenkert@4794: 
rrenkert@4794: import org.apache.commons.math.distribution.TDistributionImpl;
rrenkert@4794: 
rrenkert@4794: import org.apache.commons.math.stat.descriptive.moment.Mean;
rrenkert@4794: import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
rrenkert@4794: 
rrenkert@4794: import org.apache.log4j.Logger;
rrenkert@4794: 
rrenkert@4794: public class GrubbsOutlier
rrenkert@4794: {
rrenkert@4794:     public static final double EPSILON = 1e-5;
rrenkert@4794: 
rrenkert@4794:     public static final double DEFAULT_ALPHA = 0.05;
rrenkert@4794: 
rrenkert@4794:     private static Logger log = Logger.getLogger(GrubbsOutlier.class);
rrenkert@4794: 
rrenkert@4794:     protected GrubbsOutlier() {
rrenkert@4794:     }
rrenkert@4794: 
rrenkert@4794:     public static Integer findOutlier(List<Double> values) {
rrenkert@4794:         return findOutlier(values, DEFAULT_ALPHA, null);
rrenkert@4794:     }
rrenkert@4794: 
rrenkert@4794:     public static Integer findOutlier(
rrenkert@4794:         List<Double> values,
rrenkert@4794:         double alpha,
rrenkert@4794:         double[] stdDevResult
rrenkert@4794:     ) {
rrenkert@4794:         boolean debug = log.isDebugEnabled();
rrenkert@4794: 
rrenkert@4794:         if (debug) {
rrenkert@4794:             log.debug("outliers significance: " + alpha);
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         alpha = 1d - alpha;
rrenkert@4794: 
rrenkert@4794:         int N = values.size();
rrenkert@4794: 
rrenkert@4794:         if (debug) {
rrenkert@4794:             log.debug("Values to check: " + N);
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         if (N < 3) {
rrenkert@4794:             return null;
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         Mean mean = new Mean();
rrenkert@4794:         StandardDeviation std = new StandardDeviation();
rrenkert@4794: 
rrenkert@4794:         for (Double value: values) {
rrenkert@4794:             double v = value.doubleValue();
rrenkert@4794:             mean.increment(v);
rrenkert@4794:             std .increment(v);
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         double m = mean.getResult();
rrenkert@4794:         double s = std.getResult();
rrenkert@4794: 
rrenkert@4794:         if (debug) {
rrenkert@4794:             log.debug("mean: " + m);
rrenkert@4794:             log.debug("std dev: " + s);
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         double maxZ = -Double.MAX_VALUE;
rrenkert@4794:         int iv = -1;
rrenkert@4794:         for (int i = N-1; i >= 0; --i) {
rrenkert@4794:             double v = values.get(i).doubleValue();
rrenkert@4794:             double z = Math.abs(v - m);
rrenkert@4794:             if (z > maxZ) {
rrenkert@4794:                 maxZ = z;
rrenkert@4794:                 iv = i;
rrenkert@4794:             }
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         if (Math.abs(s) < EPSILON) {
rrenkert@4794:             return null;
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         maxZ /= s;
rrenkert@4794: 
rrenkert@4794:         TDistributionImpl tdist = new TDistributionImpl(N-2);
rrenkert@4794: 
rrenkert@4794:         double t;
rrenkert@4794: 
rrenkert@4794:         try {
rrenkert@4794:             t = tdist.inverseCumulativeProbability(alpha/(N+N));
rrenkert@4794:         }
rrenkert@4794:         catch (MathException me) {
rrenkert@4794:             log.error(me);
rrenkert@4794:             return null;
rrenkert@4794:         }
rrenkert@4794: 
rrenkert@4794:         t *= t;
rrenkert@4794: 
rrenkert@4794:         double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
rrenkert@4794: 
rrenkert@4794:         if (debug) {
rrenkert@4794:             log.debug("max: " + maxZ + " crit: " + za);
rrenkert@4794:         }
rrenkert@4794:         if (stdDevResult != null) {
rrenkert@4794:             stdDevResult[0] = std.getResult();
rrenkert@4794:         }
rrenkert@4794:         return maxZ > za
rrenkert@4794:             ? Integer.valueOf(iv)
rrenkert@4794:             : null;
rrenkert@4794:     }
rrenkert@4794: }
rrenkert@4794: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :