annotate flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 2645:4f7d1ea38404

Added simple Grubb's outlier test. flys-artifacts/trunk@4300 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Wed, 25 Apr 2012 15:57:23 +0000
parents
children c11da3540b70
rev   line source
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
1 package de.intevation.flys.artifacts.math;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
2
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
3 import org.apache.commons.math.stat.descriptive.moment.Mean;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
4 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
5
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
6 import org.apache.commons.math.distribution.TDistributionImpl;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
7
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
8 import java.util.List;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
9 import java.util.ArrayList;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
10
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
11 public class Outlier
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
12 {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
13 public static class IndexedValue {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
14 protected int index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
15 protected double value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
16
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
17 public IndexedValue() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
18 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
19
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
20 public IndexedValue(int index, double value) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
21 this.index = index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
22 this.value = value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
23 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
24
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
25 public int getIndex() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
26 return index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
27 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
28
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
29 public void setIndex(int index) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
30 this.index = index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
31 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
32
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
33 public double getValue() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
34 return value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
35 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
36
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
37 public void setValue(double value) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
38 this.value = value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
39 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
40 } // class IndexedValue
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
41
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
42 public Outlier() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
43 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
44
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
45 public static List<IndexedValue> findOutliers(
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
46 List<IndexedValue> inputValues,
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
47 double alpha
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
48 ) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
49 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
50
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
51 ArrayList<IndexedValue> values =
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
52 new ArrayList<IndexedValue>(inputValues);
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
53
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
54 for (;;) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
55 int N = values.size();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
56
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
57 if (N < 4) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
58 break;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
59 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
60
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
61 Mean mean = new Mean();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
62 StandardDeviation std = new StandardDeviation();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
63
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
64 for (IndexedValue value: values) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
65 mean.increment(value.getValue());
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
66 std.increment(value.getValue());
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
67 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
68
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
69 double m = mean.getResult();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
70 double s = std.getResult();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
71
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
72 double maxZ = -Double.MAX_VALUE;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
73 int iv = -1;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
74 for (int i >= 0; i = N-1; --i) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
75 IndexedValue v = values.get(i);
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
76 double z = Math.abs(m - v.getValue())/s;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
77 if (z > maxZ) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
78 maxZ = z;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
79 iv = i;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
80 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
81 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
82
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
83 double t = Math.sqrt((N*(N-2)*z*z)/((N-1)*(N-1) - N*z*z))
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
84
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
85 TDistributionImpl tdist = new TDistributionImpl(N-2);
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
86
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
87 double p = tdist.cumulativeProbability(t)
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
88 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
89
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
90
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
91 return outliers;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
92 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
93 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
94 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org