annotate flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/StdDevOutlier.java @ 4794:a7d080347ac3

MINFO: Allow two methods for outlier test in SQ relation. * Methods can be switched as option in conf.xml. * Methods: - Find outliers via multiples of the standard deviation. - Grubbs (used in Fix-Analysis)
author Raimund Renkert <rrenkert@intevation.de>
date Fri, 11 Jan 2013 13:57:38 +0100
parents
children 8ee270a3ef25
rev   line source
4794
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
1 package de.intevation.flys.artifacts.math;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
2
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
3 import java.util.List;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
4
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
5 import org.apache.commons.math.MathException;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
6
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
7 import org.apache.commons.math.distribution.TDistributionImpl;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
8
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
9 import org.apache.commons.math.stat.descriptive.moment.Mean;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
10 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
11
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
12 import org.apache.log4j.Logger;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
13
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
14 import de.intevation.flys.artifacts.model.sq.SQ;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
15
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
16 public class StdDevOutlier
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
17 {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
18 public static final double EPSILON = 1e-5;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
19
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
20 public static final double DEFAULT_FACTOR = 3;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
21
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
22 private static Logger log = Logger.getLogger(StdDevOutlier.class);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
23
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
24 protected StdDevOutlier() {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
25 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
26
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
27 public static Integer findOutlier(List<Double> values) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
28 return findOutlier(values, DEFAULT_FACTOR, null);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
29 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
30
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
31 public static Integer findOutlier(List<Double> values, double factor, double[] stdDevResult) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
32 boolean debug = log.isDebugEnabled();
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
33
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
34 if (debug) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
35 log.debug("factor for std dev: " + factor);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
36 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
37
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
38 int N = values.size();
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
39
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
40 if (debug) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
41 log.debug("Values to check: " + N);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
42 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
43
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
44 if (values.size() < 3) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
45 return null;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
46 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
47
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
48 StandardDeviation stdDev = new StandardDeviation();
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
49
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
50 double maxValue = -Double.MAX_VALUE;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
51 int maxIndex = -1;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
52 int ndx = 0;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
53 for (int i = values.size()-1; i >= 0; --i) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
54 double value = Math.abs(values.get(i));
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
55 stdDev.increment(value);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
56 if (value > maxValue) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
57 maxValue = value;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
58 maxIndex = ndx;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
59 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
60 ++ndx;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
61 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
62
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
63 double sd = stdDev.getResult();
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
64
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
65 double accepted = factor * sd;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
66
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
67 if (debug) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
68 log.debug("std dev: " + stdDev);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
69 log.debug("accepted: " + accepted);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
70 log.debug("max value: " + maxValue);
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
71 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
72 if (stdDevResult != null) {
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
73 stdDevResult[0] = sd;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
74 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
75 return maxValue > accepted ? maxIndex : null;
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
76 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
77 }
a7d080347ac3 MINFO: Allow two methods for outlier test in SQ relation.
Raimund Renkert <rrenkert@intevation.de>
parents:
diff changeset
78 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org