sascha@2645: package de.intevation.flys.artifacts.math;
sascha@2645: 
sascha@3565: import java.util.List;
sascha@3565: 
sascha@2646: import org.apache.commons.math.MathException;
sascha@2646: 
sascha@3565: import org.apache.commons.math.distribution.TDistributionImpl;
sascha@3565: 
sascha@2645: import org.apache.commons.math.stat.descriptive.moment.Mean;
sascha@2645: import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
sascha@2645: 
sascha@2646: import org.apache.log4j.Logger;
sascha@2646: 
sascha@2645: public class Outlier
sascha@2645: {
sascha@3564:     public static final double EPSILON = 1e-5;
sascha@3564: 
sascha@3011:     public static final double DEFAULT_ALPHA = 0.05;
sascha@3011: 
sascha@2646:     private static Logger log = Logger.getLogger(Outlier.class);
sascha@2646: 
sascha@3565:     protected Outlier() {
sascha@2645:     }
sascha@2645: 
sascha@3565:     public static Integer findOutlier(List<Double> values) {
sascha@3565:         return findOutlier(values, DEFAULT_ALPHA);
sascha@3011:     }
sascha@3011: 
sascha@3565:     public static Integer findOutlier(List<Double> values, double alpha) {
sascha@3564:         boolean debug = log.isDebugEnabled();
sascha@3564: 
sascha@3564:         if (debug) {
sascha@3564:             log.debug("outliers significance: " + alpha);
sascha@3564:         }
sascha@3564: 
sascha@3564:         alpha = 1d - alpha;
sascha@3564: 
sascha@3565:         int N = values.size();
sascha@3564: 
sascha@3565:         if (debug) {
sascha@3565:             log.debug("Values to check: " + N);
sascha@3565:         }
sascha@2646: 
sascha@3565:         if (N < 3) {
sascha@3565:             return null;
sascha@3565:         }
sascha@3564: 
sascha@3565:         Mean mean = new Mean();
sascha@3565:         StandardDeviation std = new StandardDeviation();
sascha@3564: 
sascha@3565:         for (Double value: values) {
sascha@3565:             double v = value.doubleValue();
sascha@3565:             mean.increment(v);
sascha@3565:             std .increment(v);
sascha@3565:         }
sascha@3565: 
sascha@3565:         double m = mean.getResult();
sascha@3565:         double s = std.getResult();
sascha@3565: 
sascha@3565:         if (debug) {
sascha@3565:             log.debug("mean: " + m);
sascha@3565:             log.debug("std dev: " + s);
sascha@3565:         }
sascha@3565: 
sascha@3565:         double maxZ = -Double.MAX_VALUE;
sascha@3565:         int iv = -1;
sascha@3565:         for (int i = N-1; i >= 0; --i) {
sascha@3565:             double v = values.get(i).doubleValue();
sascha@3565:             double z = Math.abs(v - m);
sascha@3565:             if (z > maxZ) {
sascha@3565:                 maxZ = z;
sascha@3565:                 iv = i;
sascha@2646:             }
sascha@2645:         }
sascha@2645: 
sascha@3565:         if (Math.abs(s) < EPSILON) {
sascha@3565:             return null;
sascha@3565:         }
sascha@2645: 
sascha@3565:         maxZ /= s;
sascha@3565: 
sascha@3565:         TDistributionImpl tdist = new TDistributionImpl(N-2);
sascha@3565: 
sascha@3565:         double t;
sascha@3565: 
sascha@3565:         try {
sascha@3565:             t = tdist.inverseCumulativeProbability(alpha/(N+N));
sascha@3565:         }
sascha@3565:         catch (MathException me) {
sascha@3565:             log.error(me);
sascha@3565:             return null;
sascha@3565:         }
sascha@3565: 
sascha@3565:         t *= t;
sascha@3565: 
sascha@3565:         double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
sascha@3565: 
sascha@3565:         if (debug) {
sascha@3565:             log.debug("max: " + maxZ + " crit: " + za);
sascha@3565:         }
sascha@3565: 
sascha@3565:         return maxZ > za
sascha@3565:             ? Integer.valueOf(iv)
sascha@3565:             : null;
sascha@2645:     }
sascha@2645: }
sascha@2645: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :