Mercurial > dive4elements > river
view flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3564:e01b9d1bc941
FixA: Corrected the formulas of Grubbs' test for outliers. Still a bit broken.
flys-artifacts/trunk@5162 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 31 Jul 2012 16:14:17 +0000 |
parents | ab81ffd1343e |
children | b136113dad53 |
line wrap: on
line source
package de.intevation.flys.artifacts.math; import org.apache.commons.math.MathException; import org.apache.commons.math.stat.descriptive.moment.Mean; import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; import org.apache.commons.math.distribution.TDistributionImpl; import java.util.Collections; import java.util.List; import java.util.ArrayList; import org.apache.log4j.Logger; public class Outlier { public static final double EPSILON = 1e-5; public static final double DEFAULT_ALPHA = 0.05; private static Logger log = Logger.getLogger(Outlier.class); public static class IndexedValue implements Comparable<IndexedValue> { protected int index; protected double value; public IndexedValue() { } public IndexedValue(int index, double value) { this.index = index; this.value = value; } public int getIndex() { return index; } public void setIndex(int index) { this.index = index; } public double getValue() { return value; } public void setValue(double value) { this.value = value; } @Override public int compareTo(IndexedValue other) { int diff = index - other.index; if (diff < 0) return -1; return diff > 0 ? +1 : 0; } } // class IndexedValue public static class Outliers { protected List<IndexedValue> retained; protected List<IndexedValue> removed; public Outliers() { } public Outliers( List<IndexedValue> retained, List<IndexedValue> removed ) { this.retained = retained; this.removed = removed; } public boolean hasOutliers() { return !removed.isEmpty(); } public List<IndexedValue> getRetained() { return retained; } public void setRetained(List<IndexedValue> retained) { this.retained = retained; } public List<IndexedValue> getRemoved() { return removed; } public void setRemoved(List<IndexedValue> removed) { this.removed = removed; } } // class Outliers public Outlier() { } public static Outliers findOutliers(List<IndexedValue> inputValues) { return findOutliers(inputValues, DEFAULT_ALPHA); } public static Outliers findOutliers( List<IndexedValue> inputValues, double alpha ) { boolean debug = log.isDebugEnabled(); if (debug) { log.debug("outliers significance: " + alpha); } alpha = 1d - alpha; ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); ArrayList<IndexedValue> values = new ArrayList<IndexedValue>(inputValues); for (;;) { int N = values.size(); if (debug) { log.debug("Values to check: " + N); } if (N < 4) { break; } Mean mean = new Mean(); StandardDeviation std = new StandardDeviation(); for (IndexedValue value: values) { mean.increment(value.getValue()); std .increment(value.getValue()); } double m = mean.getResult(); double s = std.getResult(); if (debug) { log.debug("mean: " + m); log.debug("std dev: " + s); } double maxZ = -Double.MAX_VALUE; int iv = -1; for (int i = N-1; i >= 0; --i) { IndexedValue v = values.get(i); double z = Math.abs(v.getValue()-m); if (debug) { log.debug("z candidate: " + z); } if (z > maxZ) { maxZ = z; iv = i; } } if (Math.abs(s) < EPSILON) { break; } maxZ /= s; TDistributionImpl tdist = new TDistributionImpl(N-2); double t; try { t = tdist.inverseCumulativeProbability(alpha/(N+N)); } catch (MathException me) { log.error(me); break; } t *= t; double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); if (debug) { log.debug("max: " + maxZ + " crit: " + za); } if (maxZ > za) { outliers.add(values.get(iv)); values.remove(iv); } else { if (debug) { log.debug("values left: " + N); } break; } } Collections.sort(outliers); return new Outliers(values, outliers); } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :