Mercurial > dive4elements > river
diff flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3318:dbe2f85bf160
merged flys-artifacts/2.8
author | Thomas Arendsen Hein <thomas@intevation.de> |
---|---|
date | Fri, 28 Sep 2012 12:14:35 +0200 |
parents | ab81ffd1343e |
children | e01b9d1bc941 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java Fri Sep 28 12:14:35 2012 +0200 @@ -0,0 +1,168 @@ +package de.intevation.flys.artifacts.math; + +import org.apache.commons.math.MathException; + +import org.apache.commons.math.stat.descriptive.moment.Mean; +import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; + +import org.apache.commons.math.distribution.TDistributionImpl; + +import java.util.Collections; +import java.util.List; +import java.util.ArrayList; + +import org.apache.log4j.Logger; + +public class Outlier +{ + public static final double DEFAULT_ALPHA = 0.05; + + private static Logger log = Logger.getLogger(Outlier.class); + + public static class IndexedValue + implements Comparable<IndexedValue> + { + protected int index; + protected double value; + + public IndexedValue() { + } + + public IndexedValue(int index, double value) { + this.index = index; + this.value = value; + } + + public int getIndex() { + return index; + } + + public void setIndex(int index) { + this.index = index; + } + + public double getValue() { + return value; + } + + public void setValue(double value) { + this.value = value; + } + + @Override + public int compareTo(IndexedValue other) { + int diff = index - other.index; + if (index < 0) return -1; + return index > 0 ? +1 : 0; + } + } // class IndexedValue + + public static class Outliers { + + protected List<IndexedValue> retained; + protected List<IndexedValue> removed; + + public Outliers() { + } + + public Outliers( + List<IndexedValue> retained, + List<IndexedValue> removed + ) { + this.retained = retained; + this.removed = removed; + } + + public boolean hasOutliers() { + return !removed.isEmpty(); + } + + public List<IndexedValue> getRetained() { + return retained; + } + + public void setRetained(List<IndexedValue> retained) { + this.retained = retained; + } + + public List<IndexedValue> getRemoved() { + return removed; + } + + public void setRemoved(List<IndexedValue> removed) { + this.removed = removed; + } + } // class Outliers + + public Outlier() { + } + + public static Outliers findOutliers(List<IndexedValue> inputValues) { + return findOutliers(inputValues, DEFAULT_ALPHA); + } + + public static Outliers findOutliers( + List<IndexedValue> inputValues, + double alpha + ) { + ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); + + ArrayList<IndexedValue> values = + new ArrayList<IndexedValue>(inputValues); + + for (;;) { + int N = values.size(); + + if (N < 4) { + break; + } + + Mean mean = new Mean(); + StandardDeviation std = new StandardDeviation(); + + for (IndexedValue value: values) { + mean.increment(value.getValue()); + std .increment(value.getValue()); + } + + double m = mean.getResult(); + double s = std.getResult(); + + double maxZ = -Double.MAX_VALUE; + int iv = -1; + for (int i = N-1; i >= 0; --i) { + IndexedValue v = values.get(i); + double z = Math.abs(m - v.getValue())/s; + if (z > maxZ) { + maxZ = z; + iv = i; + } + } + + double t = Math.sqrt((N*(N-2)*maxZ*maxZ) + /((N-1)*(N-1) - N*maxZ*maxZ)); + + TDistributionImpl tdist = new TDistributionImpl(N-2); + + try { + double p = tdist.cumulativeProbability(t); + + if (p < alpha) { + outliers.add(values.get(iv)); + values.remove(iv); + } + else { + break; + } + } + catch (MathException me) { + log.error(me); + } + } + + Collections.sort(outliers); + + return new Outliers(values, outliers); + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :