comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3651:06a65baae494

merged flys-artifacts/2.9
author Thomas Arendsen Hein <thomas@intevation.de>
date Fri, 28 Sep 2012 12:14:43 +0200
parents b136113dad53
children
comparison
equal deleted inserted replaced
3549:6a8f83c538e3 3651:06a65baae494
1 package de.intevation.flys.artifacts.math;
2
3 import java.util.List;
4
5 import org.apache.commons.math.MathException;
6
7 import org.apache.commons.math.distribution.TDistributionImpl;
8
9 import org.apache.commons.math.stat.descriptive.moment.Mean;
10 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
11
12 import org.apache.log4j.Logger;
13
14 public class Outlier
15 {
16 public static final double EPSILON = 1e-5;
17
18 public static final double DEFAULT_ALPHA = 0.05;
19
20 private static Logger log = Logger.getLogger(Outlier.class);
21
22 protected Outlier() {
23 }
24
25 public static Integer findOutlier(List<Double> values) {
26 return findOutlier(values, DEFAULT_ALPHA);
27 }
28
29 public static Integer findOutlier(List<Double> values, double alpha) {
30 boolean debug = log.isDebugEnabled();
31
32 if (debug) {
33 log.debug("outliers significance: " + alpha);
34 }
35
36 alpha = 1d - alpha;
37
38 int N = values.size();
39
40 if (debug) {
41 log.debug("Values to check: " + N);
42 }
43
44 if (N < 3) {
45 return null;
46 }
47
48 Mean mean = new Mean();
49 StandardDeviation std = new StandardDeviation();
50
51 for (Double value: values) {
52 double v = value.doubleValue();
53 mean.increment(v);
54 std .increment(v);
55 }
56
57 double m = mean.getResult();
58 double s = std.getResult();
59
60 if (debug) {
61 log.debug("mean: " + m);
62 log.debug("std dev: " + s);
63 }
64
65 double maxZ = -Double.MAX_VALUE;
66 int iv = -1;
67 for (int i = N-1; i >= 0; --i) {
68 double v = values.get(i).doubleValue();
69 double z = Math.abs(v - m);
70 if (z > maxZ) {
71 maxZ = z;
72 iv = i;
73 }
74 }
75
76 if (Math.abs(s) < EPSILON) {
77 return null;
78 }
79
80 maxZ /= s;
81
82 TDistributionImpl tdist = new TDistributionImpl(N-2);
83
84 double t;
85
86 try {
87 t = tdist.inverseCumulativeProbability(alpha/(N+N));
88 }
89 catch (MathException me) {
90 log.error(me);
91 return null;
92 }
93
94 t *= t;
95
96 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
97
98 if (debug) {
99 log.debug("max: " + maxZ + " crit: " + za);
100 }
101
102 return maxZ > za
103 ? Integer.valueOf(iv)
104 : null;
105 }
106 }
107 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org