# HG changeset patch # User Sascha L. Teichmann # Date 1343753196 0 # Node ID b136113dad531aa9813ceb26538079e3a516f728 # Parent e01b9d1bc941849721114e2727023d1a737cac8a FixA: Only evict only one(!) data point as outlier before recalculating the function. flys-artifacts/trunk@5163 c6561f87-3c4e-4783-a992-168aeb5c3f6f diff -r e01b9d1bc941 -r b136113dad53 flys-artifacts/ChangeLog --- a/flys-artifacts/ChangeLog Tue Jul 31 16:14:17 2012 +0000 +++ b/flys-artifacts/ChangeLog Tue Jul 31 16:46:36 2012 +0000 @@ -1,4 +1,12 @@ -2012-07-30 Sascha L. Teichmann +2012-07-31 Sascha L. Teichmann + + * src/main/java/de/intevation/flys/artifacts/math/Outlier.java: + Only evict only one(!) data point as outlier. + + * src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java: + Recalculate the function when one point is removed. + +2012-07-31 Sascha L. Teichmann * src/main/java/de/intevation/flys/artifacts/math/Outlier.java: Corrected the formulas of Grubbs' test for outliers. diff -r e01b9d1bc941 -r b136113dad53 flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java --- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java Tue Jul 31 16:14:17 2012 +0000 +++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java Tue Jul 31 16:46:36 2012 +0000 @@ -1,16 +1,14 @@ package de.intevation.flys.artifacts.math; +import java.util.List; + import org.apache.commons.math.MathException; +import org.apache.commons.math.distribution.TDistributionImpl; + import org.apache.commons.math.stat.descriptive.moment.Mean; import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; -import org.apache.commons.math.distribution.TDistributionImpl; - -import java.util.Collections; -import java.util.List; -import java.util.ArrayList; - import org.apache.log4j.Logger; public class Outlier @@ -21,92 +19,14 @@ private static Logger log = Logger.getLogger(Outlier.class); - public static class IndexedValue - implements Comparable - { - protected int index; - protected double value; - - public IndexedValue() { - } - - public IndexedValue(int index, double value) { - this.index = index; - this.value = value; - } - - public int getIndex() { - return index; - } - - public void setIndex(int index) { - this.index = index; - } - - public double getValue() { - return value; - } - - public void setValue(double value) { - this.value = value; - } - - @Override - public int compareTo(IndexedValue other) { - int diff = index - other.index; - if (diff < 0) return -1; - return diff > 0 ? +1 : 0; - } - } // class IndexedValue - - public static class Outliers { - - protected List retained; - protected List removed; - - public Outliers() { - } - - public Outliers( - List retained, - List removed - ) { - this.retained = retained; - this.removed = removed; - } - - public boolean hasOutliers() { - return !removed.isEmpty(); - } - - public List getRetained() { - return retained; - } - - public void setRetained(List retained) { - this.retained = retained; - } - - public List getRemoved() { - return removed; - } - - public void setRemoved(List removed) { - this.removed = removed; - } - } // class Outliers - - public Outlier() { + protected Outlier() { } - public static Outliers findOutliers(List inputValues) { - return findOutliers(inputValues, DEFAULT_ALPHA); + public static Integer findOutlier(List values) { + return findOutlier(values, DEFAULT_ALPHA); } - public static Outliers findOutliers( - List inputValues, - double alpha - ) { + public static Integer findOutlier(List values, double alpha) { boolean debug = log.isDebugEnabled(); if (debug) { @@ -115,93 +35,73 @@ alpha = 1d - alpha; - ArrayList outliers = new ArrayList(); - - ArrayList values = - new ArrayList(inputValues); - - for (;;) { - int N = values.size(); - - if (debug) { - log.debug("Values to check: " + N); - } - - if (N < 4) { - break; - } - - Mean mean = new Mean(); - StandardDeviation std = new StandardDeviation(); - - for (IndexedValue value: values) { - mean.increment(value.getValue()); - std .increment(value.getValue()); - } - - double m = mean.getResult(); - double s = std.getResult(); - - if (debug) { - log.debug("mean: " + m); - log.debug("std dev: " + s); - } + int N = values.size(); - double maxZ = -Double.MAX_VALUE; - int iv = -1; - for (int i = N-1; i >= 0; --i) { - IndexedValue v = values.get(i); - double z = Math.abs(v.getValue()-m); - if (debug) { - log.debug("z candidate: " + z); - } - if (z > maxZ) { - maxZ = z; - iv = i; - } - } - - if (Math.abs(s) < EPSILON) { - break; - } - - maxZ /= s; - - TDistributionImpl tdist = new TDistributionImpl(N-2); - - double t; + if (debug) { + log.debug("Values to check: " + N); + } - try { - t = tdist.inverseCumulativeProbability(alpha/(N+N)); - } - catch (MathException me) { - log.error(me); - break; - } - - t *= t; - - double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); + if (N < 3) { + return null; + } - if (debug) { - log.debug("max: " + maxZ + " crit: " + za); - } + Mean mean = new Mean(); + StandardDeviation std = new StandardDeviation(); - if (maxZ > za) { - outliers.add(values.get(iv)); - values.remove(iv); - } - else { - if (debug) { - log.debug("values left: " + N); - } - break; + for (Double value: values) { + double v = value.doubleValue(); + mean.increment(v); + std .increment(v); + } + + double m = mean.getResult(); + double s = std.getResult(); + + if (debug) { + log.debug("mean: " + m); + log.debug("std dev: " + s); + } + + double maxZ = -Double.MAX_VALUE; + int iv = -1; + for (int i = N-1; i >= 0; --i) { + double v = values.get(i).doubleValue(); + double z = Math.abs(v - m); + if (z > maxZ) { + maxZ = z; + iv = i; } } - Collections.sort(outliers); + if (Math.abs(s) < EPSILON) { + return null; + } - return new Outliers(values, outliers); + maxZ /= s; + + TDistributionImpl tdist = new TDistributionImpl(N-2); + + double t; + + try { + t = tdist.inverseCumulativeProbability(alpha/(N+N)); + } + catch (MathException me) { + log.error(me); + return null; + } + + t *= t; + + double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); + + if (debug) { + log.debug("max: " + maxZ + " crit: " + za); + } + + return maxZ > za + ? Integer.valueOf(iv) + : null; } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r e01b9d1bc941 -r b136113dad53 flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java --- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java Tue Jul 31 16:14:17 2012 +0000 +++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java Tue Jul 31 16:46:36 2012 +0000 @@ -1,8 +1,5 @@ package de.intevation.flys.artifacts.model.fixings; -import de.intevation.flys.artifacts.math.Outlier.IndexedValue; -import de.intevation.flys.artifacts.math.Outlier.Outliers; - import de.intevation.flys.artifacts.math.Outlier; import de.intevation.flys.artifacts.math.fitting.Function; @@ -157,7 +154,7 @@ return false; } - List inputs = new ArrayList(xs.size()); + List inputs = new ArrayList(xs.size()); de.intevation.flys.artifacts.math.Function instance = null; @@ -204,27 +201,23 @@ for (int i = 0, N = xs.size(); i < N; ++i) { double y = instance.value(xs.getQuick(i)); if (Double.isNaN(y)) { - continue; + y = Double.MAX_VALUE; } - inputs.add(new IndexedValue(i, ys.getQuick(i) - y)); + inputs.add(Double.valueOf(ys.getQuick(i) - y)); } - Outliers outliers = Outlier.findOutliers(inputs); + Integer outlier = Outlier.findOutlier(inputs); - if (!outliers.hasOutliers()) { + if (outlier == null) { break; } - List rem = outliers.getRemoved(); - - for (int i = rem.size()-1; i >= 0; --i) { - int idx = rem.get(i).getIndex(); - removed.add( - qwdFactory.create( - xs.getQuick(idx), ys.getQuick(idx))); - xs.remove(idx); - ys.remove(idx); - } + int idx = outlier.intValue(); + removed.add( + qwdFactory.create( + xs.getQuick(idx), ys.getQuick(idx))); + xs.remove(idx); + ys.remove(idx); } StandardDeviation stdDev = new StandardDeviation();