Mercurial > dive4elements > river
comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/GrubbsOutlier.java @ 4794:a7d080347ac3
MINFO: Allow two methods for outlier test in SQ relation.
* Methods can be switched as option in conf.xml.
* Methods:
- Find outliers via multiples of the standard deviation.
- Grubbs (used in Fix-Analysis)
author | Raimund Renkert <rrenkert@intevation.de> |
---|---|
date | Fri, 11 Jan 2013 13:57:38 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4793:c0d6391bec6f | 4794:a7d080347ac3 |
---|---|
1 package de.intevation.flys.artifacts.math; | |
2 | |
3 import java.util.List; | |
4 | |
5 import org.apache.commons.math.MathException; | |
6 | |
7 import org.apache.commons.math.distribution.TDistributionImpl; | |
8 | |
9 import org.apache.commons.math.stat.descriptive.moment.Mean; | |
10 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; | |
11 | |
12 import org.apache.log4j.Logger; | |
13 | |
14 public class GrubbsOutlier | |
15 { | |
16 public static final double EPSILON = 1e-5; | |
17 | |
18 public static final double DEFAULT_ALPHA = 0.05; | |
19 | |
20 private static Logger log = Logger.getLogger(GrubbsOutlier.class); | |
21 | |
22 protected GrubbsOutlier() { | |
23 } | |
24 | |
25 public static Integer findOutlier(List<Double> values) { | |
26 return findOutlier(values, DEFAULT_ALPHA, null); | |
27 } | |
28 | |
29 public static Integer findOutlier( | |
30 List<Double> values, | |
31 double alpha, | |
32 double[] stdDevResult | |
33 ) { | |
34 boolean debug = log.isDebugEnabled(); | |
35 | |
36 if (debug) { | |
37 log.debug("outliers significance: " + alpha); | |
38 } | |
39 | |
40 alpha = 1d - alpha; | |
41 | |
42 int N = values.size(); | |
43 | |
44 if (debug) { | |
45 log.debug("Values to check: " + N); | |
46 } | |
47 | |
48 if (N < 3) { | |
49 return null; | |
50 } | |
51 | |
52 Mean mean = new Mean(); | |
53 StandardDeviation std = new StandardDeviation(); | |
54 | |
55 for (Double value: values) { | |
56 double v = value.doubleValue(); | |
57 mean.increment(v); | |
58 std .increment(v); | |
59 } | |
60 | |
61 double m = mean.getResult(); | |
62 double s = std.getResult(); | |
63 | |
64 if (debug) { | |
65 log.debug("mean: " + m); | |
66 log.debug("std dev: " + s); | |
67 } | |
68 | |
69 double maxZ = -Double.MAX_VALUE; | |
70 int iv = -1; | |
71 for (int i = N-1; i >= 0; --i) { | |
72 double v = values.get(i).doubleValue(); | |
73 double z = Math.abs(v - m); | |
74 if (z > maxZ) { | |
75 maxZ = z; | |
76 iv = i; | |
77 } | |
78 } | |
79 | |
80 if (Math.abs(s) < EPSILON) { | |
81 return null; | |
82 } | |
83 | |
84 maxZ /= s; | |
85 | |
86 TDistributionImpl tdist = new TDistributionImpl(N-2); | |
87 | |
88 double t; | |
89 | |
90 try { | |
91 t = tdist.inverseCumulativeProbability(alpha/(N+N)); | |
92 } | |
93 catch (MathException me) { | |
94 log.error(me); | |
95 return null; | |
96 } | |
97 | |
98 t *= t; | |
99 | |
100 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); | |
101 | |
102 if (debug) { | |
103 log.debug("max: " + maxZ + " crit: " + za); | |
104 } | |
105 if (stdDevResult != null) { | |
106 stdDevResult[0] = std.getResult(); | |
107 } | |
108 return maxZ > za | |
109 ? Integer.valueOf(iv) | |
110 : null; | |
111 } | |
112 } | |
113 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |