annotate flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 2681:b94b367d8b4d

Minor refactoring, docs. flys-artifacts/trunk@4379 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Felix Wolfsteller <felix.wolfsteller@intevation.de>
date Thu, 10 May 2012 09:44:31 +0000
parents c11da3540b70
children ab81ffd1343e
rev   line source
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
1 package de.intevation.flys.artifacts.math;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
2
2646
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
3 import org.apache.commons.math.MathException;
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
4
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
5 import org.apache.commons.math.stat.descriptive.moment.Mean;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
6 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
7
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
8 import org.apache.commons.math.distribution.TDistributionImpl;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
9
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
10 import java.util.List;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
11 import java.util.ArrayList;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
12
2646
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
13 import org.apache.log4j.Logger;
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
14
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
15 public class Outlier
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
16 {
2646
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
17 private static Logger log = Logger.getLogger(Outlier.class);
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
18
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
19 public static class IndexedValue {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
20 protected int index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
21 protected double value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
22
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
23 public IndexedValue() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
24 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
25
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
26 public IndexedValue(int index, double value) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
27 this.index = index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
28 this.value = value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
29 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
30
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
31 public int getIndex() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
32 return index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
33 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
34
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
35 public void setIndex(int index) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
36 this.index = index;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
37 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
38
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
39 public double getValue() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
40 return value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
41 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
42
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
43 public void setValue(double value) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
44 this.value = value;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
45 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
46 } // class IndexedValue
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
47
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
48 public Outlier() {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
49 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
50
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
51 public static List<IndexedValue> findOutliers(
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
52 List<IndexedValue> inputValues,
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
53 double alpha
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
54 ) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
55 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
56
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
57 ArrayList<IndexedValue> values =
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
58 new ArrayList<IndexedValue>(inputValues);
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
59
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
60 for (;;) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
61 int N = values.size();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
62
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
63 if (N < 4) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
64 break;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
65 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
66
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
67 Mean mean = new Mean();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
68 StandardDeviation std = new StandardDeviation();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
69
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
70 for (IndexedValue value: values) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
71 mean.increment(value.getValue());
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
72 std.increment(value.getValue());
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
73 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
74
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
75 double m = mean.getResult();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
76 double s = std.getResult();
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
77
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
78 double maxZ = -Double.MAX_VALUE;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
79 int iv = -1;
2646
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
80 for (int i = N-1; i >= 0; --i) {
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
81 IndexedValue v = values.get(i);
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
82 double z = Math.abs(m - v.getValue())/s;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
83 if (z > maxZ) {
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
84 maxZ = z;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
85 iv = i;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
86 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
87 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
88
2646
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
89 double t = Math.sqrt((N*(N-2)*maxZ*maxZ)
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
90 /((N-1)*(N-1) - N*maxZ*maxZ));
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
91
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
92 TDistributionImpl tdist = new TDistributionImpl(N-2);
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
93
2646
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
94 try {
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
95 double p = tdist.cumulativeProbability(t);
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
96
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
97 if (p < alpha) {
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
98 outliers.add(values.get(iv));
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
99 values.remove(iv);
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
100 }
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
101 }
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
102 catch (MathException me) {
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
103 log.error(me);
c11da3540b70 Checked in out dated version of outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 2645
diff changeset
104 }
2645
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
105 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
106
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
107
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
108 return outliers;
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
109 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
110 }
4f7d1ea38404 Added simple Grubb's outlier test.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
111 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org