# HG changeset patch # User Raimund Renkert # Date 1357909058 -3600 # Node ID a7d080347ac3e2d82af94f78b1518f711ce602d8 # Parent c0d6391bec6fb7a993f3a5460ca5b29add81a58c MINFO: Allow two methods for outlier test in SQ relation. * Methods can be switched as option in conf.xml. * Methods: - Find outliers via multiples of the standard deviation. - Grubbs (used in Fix-Analysis) diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/doc/conf/conf.xml --- a/flys-artifacts/doc/conf/conf.xml Wed Jan 09 13:17:09 2013 +0100 +++ b/flys-artifacts/doc/conf/conf.xml Fri Jan 11 13:57:38 2013 +0100 @@ -403,5 +403,9 @@ + + + + diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/GrubbsOutlier.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/GrubbsOutlier.java Fri Jan 11 13:57:38 2013 +0100 @@ -0,0 +1,113 @@ +package de.intevation.flys.artifacts.math; + +import java.util.List; + +import org.apache.commons.math.MathException; + +import org.apache.commons.math.distribution.TDistributionImpl; + +import org.apache.commons.math.stat.descriptive.moment.Mean; +import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; + +import org.apache.log4j.Logger; + +public class GrubbsOutlier +{ + public static final double EPSILON = 1e-5; + + public static final double DEFAULT_ALPHA = 0.05; + + private static Logger log = Logger.getLogger(GrubbsOutlier.class); + + protected GrubbsOutlier() { + } + + public static Integer findOutlier(List values) { + return findOutlier(values, DEFAULT_ALPHA, null); + } + + public static Integer findOutlier( + List values, + double alpha, + double[] stdDevResult + ) { + boolean debug = log.isDebugEnabled(); + + if (debug) { + log.debug("outliers significance: " + alpha); + } + + alpha = 1d - alpha; + + int N = values.size(); + + if (debug) { + log.debug("Values to check: " + N); + } + + if (N < 3) { + return null; + } + + Mean mean = new Mean(); + StandardDeviation std = new StandardDeviation(); + + for (Double value: values) { + double v = value.doubleValue(); + mean.increment(v); + std .increment(v); + } + + double m = mean.getResult(); + double s = std.getResult(); + + if (debug) { + log.debug("mean: " + m); + log.debug("std dev: " + s); + } + + double maxZ = -Double.MAX_VALUE; + int iv = -1; + for (int i = N-1; i >= 0; --i) { + double v = values.get(i).doubleValue(); + double z = Math.abs(v - m); + if (z > maxZ) { + maxZ = z; + iv = i; + } + } + + if (Math.abs(s) < EPSILON) { + return null; + } + + maxZ /= s; + + TDistributionImpl tdist = new TDistributionImpl(N-2); + + double t; + + try { + t = tdist.inverseCumulativeProbability(alpha/(N+N)); + } + catch (MathException me) { + log.error(me); + return null; + } + + t *= t; + + double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); + + if (debug) { + log.debug("max: " + maxZ + " crit: " + za); + } + if (stdDevResult != null) { + stdDevResult[0] = std.getResult(); + } + return maxZ > za + ? Integer.valueOf(iv) + : null; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java --- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java Wed Jan 09 13:17:09 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,107 +0,0 @@ -package de.intevation.flys.artifacts.math; - -import java.util.List; - -import org.apache.commons.math.MathException; - -import org.apache.commons.math.distribution.TDistributionImpl; - -import org.apache.commons.math.stat.descriptive.moment.Mean; -import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; - -import org.apache.log4j.Logger; - -public class Outlier -{ - public static final double EPSILON = 1e-5; - - public static final double DEFAULT_ALPHA = 0.05; - - private static Logger log = Logger.getLogger(Outlier.class); - - protected Outlier() { - } - - public static Integer findOutlier(List values) { - return findOutlier(values, DEFAULT_ALPHA); - } - - public static Integer findOutlier(List values, double alpha) { - boolean debug = log.isDebugEnabled(); - - if (debug) { - log.debug("outliers significance: " + alpha); - } - - alpha = 1d - alpha; - - int N = values.size(); - - if (debug) { - log.debug("Values to check: " + N); - } - - if (N < 3) { - return null; - } - - Mean mean = new Mean(); - StandardDeviation std = new StandardDeviation(); - - for (Double value: values) { - double v = value.doubleValue(); - mean.increment(v); - std .increment(v); - } - - double m = mean.getResult(); - double s = std.getResult(); - - if (debug) { - log.debug("mean: " + m); - log.debug("std dev: " + s); - } - - double maxZ = -Double.MAX_VALUE; - int iv = -1; - for (int i = N-1; i >= 0; --i) { - double v = values.get(i).doubleValue(); - double z = Math.abs(v - m); - if (z > maxZ) { - maxZ = z; - iv = i; - } - } - - if (Math.abs(s) < EPSILON) { - return null; - } - - maxZ /= s; - - TDistributionImpl tdist = new TDistributionImpl(N-2); - - double t; - - try { - t = tdist.inverseCumulativeProbability(alpha/(N+N)); - } - catch (MathException me) { - log.error(me); - return null; - } - - t *= t; - - double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); - - if (debug) { - log.debug("max: " + maxZ + " crit: " + za); - } - - return maxZ > za - ? Integer.valueOf(iv) - : null; - } -} -// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/StdDevOutlier.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/StdDevOutlier.java Fri Jan 11 13:57:38 2013 +0100 @@ -0,0 +1,78 @@ +package de.intevation.flys.artifacts.math; + +import java.util.List; + +import org.apache.commons.math.MathException; + +import org.apache.commons.math.distribution.TDistributionImpl; + +import org.apache.commons.math.stat.descriptive.moment.Mean; +import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; + +import org.apache.log4j.Logger; + +import de.intevation.flys.artifacts.model.sq.SQ; + +public class StdDevOutlier +{ + public static final double EPSILON = 1e-5; + + public static final double DEFAULT_FACTOR = 3; + + private static Logger log = Logger.getLogger(StdDevOutlier.class); + + protected StdDevOutlier() { + } + + public static Integer findOutlier(List values) { + return findOutlier(values, DEFAULT_FACTOR, null); + } + + public static Integer findOutlier(List values, double factor, double[] stdDevResult) { + boolean debug = log.isDebugEnabled(); + + if (debug) { + log.debug("factor for std dev: " + factor); + } + + int N = values.size(); + + if (debug) { + log.debug("Values to check: " + N); + } + + if (values.size() < 3) { + return null; + } + + StandardDeviation stdDev = new StandardDeviation(); + + double maxValue = -Double.MAX_VALUE; + int maxIndex = -1; + int ndx = 0; + for (int i = values.size()-1; i >= 0; --i) { + double value = Math.abs(values.get(i)); + stdDev.increment(value); + if (value > maxValue) { + maxValue = value; + maxIndex = ndx; + } + ++ndx; + } + + double sd = stdDev.getResult(); + + double accepted = factor * sd; + + if (debug) { + log.debug("std dev: " + stdDev); + log.debug("accepted: " + accepted); + log.debug("max value: " + maxValue); + } + if (stdDevResult != null) { + stdDevResult[0] = sd; + } + return maxValue > accepted ? maxIndex : null; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java --- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java Wed Jan 09 13:17:09 2013 +0100 +++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/fixings/Fitting.java Fri Jan 11 13:57:38 2013 +0100 @@ -1,23 +1,18 @@ package de.intevation.flys.artifacts.model.fixings; -import de.intevation.flys.artifacts.math.Outlier; - -import de.intevation.flys.artifacts.math.fitting.Function; - import gnu.trove.TDoubleArrayList; import java.util.ArrayList; import java.util.List; import org.apache.commons.math.MathException; - import org.apache.commons.math.optimization.fitting.CurveFitter; - import org.apache.commons.math.optimization.general.LevenbergMarquardtOptimizer; - import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; +import org.apache.log4j.Logger; -import org.apache.log4j.Logger; +import de.intevation.flys.artifacts.math.GrubbsOutlier; +import de.intevation.flys.artifacts.math.fitting.Function; public class Fitting { @@ -206,7 +201,7 @@ inputs.add(Double.valueOf(ys.getQuick(i) - y)); } - Integer outlier = Outlier.findOutlier(inputs); + Integer outlier = GrubbsOutlier.findOutlier(inputs); if (outlier == null) { break; diff -r c0d6391bec6f -r a7d080347ac3 flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java --- a/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java Wed Jan 09 13:17:09 2013 +0100 +++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/model/sq/Outlier.java Fri Jan 11 13:57:38 2013 +0100 @@ -9,10 +9,23 @@ import org.apache.log4j.Logger; +import de.intevation.artifacts.GlobalContext; +import de.intevation.artifacts.common.utils.Config; +import de.intevation.flys.artifacts.context.FLYSContext; +import de.intevation.flys.artifacts.math.GrubbsOutlier; +import de.intevation.flys.artifacts.math.StdDevOutlier; + public class Outlier { private static Logger log = Logger.getLogger(Outlier.class); + private static final String OUTLIER_METHOD = + "/artifact-database/options/minfo-sq/outlier-method/@name"; + + private static final String GRUBBS = "grubbs"; + + private static final String STD_DEV = "std-dev"; + public interface Callback { void initialize(List sqs) throws MathException; @@ -38,46 +51,39 @@ if (debug) { log.debug("stdDevFactor: " + stdDevFactor); } - + String method = Config.getStringXPath(OUTLIER_METHOD); + log.debug("method: " + method); + if (method == null) { + method = "std-dev"; + } List data = new ArrayList(sqs); while (data.size() > 2) { callback.initialize(data); - StandardDeviation stdDev = new StandardDeviation(); - - double maxValue = -Double.MAX_VALUE; - int maxIndex = -1; - - for (int i = data.size()-1; i >= 0; --i) { - double value = Math.abs(callback.eval(data.get(i))); - stdDev.increment(value); - if (value > maxValue) { - maxValue = value; - maxIndex = i; - } + List values = new ArrayList(); + for (SQ sq: data) { + values.add(callback.eval(sq)); } - double sd = stdDev.getResult(); - - double accepted = stdDevFactor * sd; - - if (debug) { - log.debug("std dev: " + stdDev); - log.debug("accepted: " + accepted); - log.debug("max value: " + maxValue); + Integer ndx = null; + double[] stdDev = new double[1]; + if (method.equals(GRUBBS)) { + ndx = GrubbsOutlier.findOutlier(values, stdDevFactor/100, stdDev); + } + else { + ndx = StdDevOutlier.findOutlier(values, stdDevFactor, stdDev); + } + if (ndx == null) { + callback.iterationFinished(stdDev[0], null, data); + break; } - SQ outlier = maxValue > accepted - ? data.remove(maxIndex) - : null; - - callback.iterationFinished(sd, outlier, data); - - if (outlier == null) { - break; - } + SQ outlier = data.remove((int)ndx); + log.debug("stdDev: " + stdDev[0]); + log.debug("removed " + ndx + "; S: " + outlier.getS() + " Q: " + outlier.getQ()); + callback.iterationFinished(stdDev[0], outlier, data); } } }