sascha@2645: package de.intevation.flys.artifacts.math; sascha@2645: sascha@2646: import org.apache.commons.math.MathException; sascha@2646: sascha@2645: import org.apache.commons.math.stat.descriptive.moment.Mean; sascha@2645: import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; sascha@2645: sascha@2645: import org.apache.commons.math.distribution.TDistributionImpl; sascha@2645: sascha@3011: import java.util.Collections; sascha@2645: import java.util.List; sascha@2645: import java.util.ArrayList; sascha@2645: sascha@2646: import org.apache.log4j.Logger; sascha@2646: sascha@2645: public class Outlier sascha@2645: { sascha@3564: public static final double EPSILON = 1e-5; sascha@3564: sascha@3011: public static final double DEFAULT_ALPHA = 0.05; sascha@3011: sascha@2646: private static Logger log = Logger.getLogger(Outlier.class); sascha@2646: sascha@3011: public static class IndexedValue sascha@3011: implements Comparable sascha@3011: { sascha@2645: protected int index; sascha@2645: protected double value; sascha@2645: sascha@2645: public IndexedValue() { sascha@2645: } sascha@2645: sascha@2645: public IndexedValue(int index, double value) { sascha@2645: this.index = index; sascha@2645: this.value = value; sascha@2645: } sascha@2645: sascha@2645: public int getIndex() { sascha@2645: return index; sascha@2645: } sascha@2645: sascha@2645: public void setIndex(int index) { sascha@2645: this.index = index; sascha@2645: } sascha@2645: sascha@2645: public double getValue() { sascha@2645: return value; sascha@2645: } sascha@2645: sascha@2645: public void setValue(double value) { sascha@2645: this.value = value; sascha@2645: } sascha@3011: sascha@3011: @Override sascha@3011: public int compareTo(IndexedValue other) { sascha@3011: int diff = index - other.index; sascha@3564: if (diff < 0) return -1; sascha@3564: return diff > 0 ? +1 : 0; sascha@3011: } sascha@2645: } // class IndexedValue sascha@2645: sascha@3011: public static class Outliers { sascha@3011: sascha@3011: protected List retained; sascha@3011: protected List removed; sascha@3011: sascha@3011: public Outliers() { sascha@3011: } sascha@3011: sascha@3011: public Outliers( sascha@3011: List retained, sascha@3011: List removed sascha@3011: ) { sascha@3011: this.retained = retained; sascha@3011: this.removed = removed; sascha@3011: } sascha@3011: sascha@3011: public boolean hasOutliers() { sascha@3011: return !removed.isEmpty(); sascha@3011: } sascha@3011: sascha@3011: public List getRetained() { sascha@3011: return retained; sascha@3011: } sascha@3011: sascha@3011: public void setRetained(List retained) { sascha@3011: this.retained = retained; sascha@3011: } sascha@3011: sascha@3011: public List getRemoved() { sascha@3011: return removed; sascha@3011: } sascha@3011: sascha@3011: public void setRemoved(List removed) { sascha@3011: this.removed = removed; sascha@3011: } sascha@3011: } // class Outliers sascha@3011: sascha@2645: public Outlier() { sascha@2645: } sascha@2645: sascha@3011: public static Outliers findOutliers(List inputValues) { sascha@3011: return findOutliers(inputValues, DEFAULT_ALPHA); sascha@3011: } sascha@3011: sascha@3011: public static Outliers findOutliers( sascha@2645: List inputValues, sascha@2645: double alpha sascha@2645: ) { sascha@3564: boolean debug = log.isDebugEnabled(); sascha@3564: sascha@3564: if (debug) { sascha@3564: log.debug("outliers significance: " + alpha); sascha@3564: } sascha@3564: sascha@3564: alpha = 1d - alpha; sascha@3564: sascha@2645: ArrayList outliers = new ArrayList(); sascha@2645: sascha@2645: ArrayList values = sascha@2645: new ArrayList(inputValues); sascha@2645: sascha@2645: for (;;) { sascha@2645: int N = values.size(); sascha@2645: sascha@3564: if (debug) { sascha@3564: log.debug("Values to check: " + N); sascha@3564: } sascha@3564: sascha@2645: if (N < 4) { sascha@2645: break; sascha@2645: } sascha@2645: sascha@2645: Mean mean = new Mean(); sascha@2645: StandardDeviation std = new StandardDeviation(); sascha@2645: sascha@2645: for (IndexedValue value: values) { sascha@2645: mean.increment(value.getValue()); sascha@3011: std .increment(value.getValue()); sascha@2645: } sascha@2645: sascha@2645: double m = mean.getResult(); sascha@2645: double s = std.getResult(); sascha@2645: sascha@3564: if (debug) { sascha@3564: log.debug("mean: " + m); sascha@3564: log.debug("std dev: " + s); sascha@3564: } sascha@3564: sascha@2645: double maxZ = -Double.MAX_VALUE; sascha@2645: int iv = -1; sascha@2646: for (int i = N-1; i >= 0; --i) { sascha@2645: IndexedValue v = values.get(i); sascha@3564: double z = Math.abs(v.getValue()-m); sascha@3564: if (debug) { sascha@3564: log.debug("z candidate: " + z); sascha@3564: } sascha@2645: if (z > maxZ) { sascha@2645: maxZ = z; sascha@2645: iv = i; sascha@2645: } sascha@2645: } sascha@2645: sascha@3564: if (Math.abs(s) < EPSILON) { sascha@3564: break; sascha@3564: } sascha@3564: sascha@3564: maxZ /= s; sascha@2645: sascha@2645: TDistributionImpl tdist = new TDistributionImpl(N-2); sascha@2645: sascha@3564: double t; sascha@2646: sascha@3564: try { sascha@3564: t = tdist.inverseCumulativeProbability(alpha/(N+N)); sascha@2646: } sascha@2646: catch (MathException me) { sascha@2646: log.error(me); sascha@3564: break; sascha@3564: } sascha@3564: sascha@3564: t *= t; sascha@3564: sascha@3564: double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); sascha@3564: sascha@3564: if (debug) { sascha@3564: log.debug("max: " + maxZ + " crit: " + za); sascha@3564: } sascha@3564: sascha@3564: if (maxZ > za) { sascha@3564: outliers.add(values.get(iv)); sascha@3564: values.remove(iv); sascha@3564: } sascha@3564: else { sascha@3564: if (debug) { sascha@3564: log.debug("values left: " + N); sascha@3564: } sascha@3564: break; sascha@2646: } sascha@2645: } sascha@2645: sascha@3011: Collections.sort(outliers); sascha@2645: sascha@3011: return new Outliers(values, outliers); sascha@2645: } sascha@2645: } sascha@2645: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :