Mercurial > dive4elements > river
comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3318:dbe2f85bf160
merged flys-artifacts/2.8
author | Thomas Arendsen Hein <thomas@intevation.de> |
---|---|
date | Fri, 28 Sep 2012 12:14:35 +0200 |
parents | ab81ffd1343e |
children | e01b9d1bc941 |
comparison
equal
deleted
inserted
replaced
2987:98c7a46ec5ae | 3318:dbe2f85bf160 |
---|---|
1 package de.intevation.flys.artifacts.math; | |
2 | |
3 import org.apache.commons.math.MathException; | |
4 | |
5 import org.apache.commons.math.stat.descriptive.moment.Mean; | |
6 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; | |
7 | |
8 import org.apache.commons.math.distribution.TDistributionImpl; | |
9 | |
10 import java.util.Collections; | |
11 import java.util.List; | |
12 import java.util.ArrayList; | |
13 | |
14 import org.apache.log4j.Logger; | |
15 | |
16 public class Outlier | |
17 { | |
18 public static final double DEFAULT_ALPHA = 0.05; | |
19 | |
20 private static Logger log = Logger.getLogger(Outlier.class); | |
21 | |
22 public static class IndexedValue | |
23 implements Comparable<IndexedValue> | |
24 { | |
25 protected int index; | |
26 protected double value; | |
27 | |
28 public IndexedValue() { | |
29 } | |
30 | |
31 public IndexedValue(int index, double value) { | |
32 this.index = index; | |
33 this.value = value; | |
34 } | |
35 | |
36 public int getIndex() { | |
37 return index; | |
38 } | |
39 | |
40 public void setIndex(int index) { | |
41 this.index = index; | |
42 } | |
43 | |
44 public double getValue() { | |
45 return value; | |
46 } | |
47 | |
48 public void setValue(double value) { | |
49 this.value = value; | |
50 } | |
51 | |
52 @Override | |
53 public int compareTo(IndexedValue other) { | |
54 int diff = index - other.index; | |
55 if (index < 0) return -1; | |
56 return index > 0 ? +1 : 0; | |
57 } | |
58 } // class IndexedValue | |
59 | |
60 public static class Outliers { | |
61 | |
62 protected List<IndexedValue> retained; | |
63 protected List<IndexedValue> removed; | |
64 | |
65 public Outliers() { | |
66 } | |
67 | |
68 public Outliers( | |
69 List<IndexedValue> retained, | |
70 List<IndexedValue> removed | |
71 ) { | |
72 this.retained = retained; | |
73 this.removed = removed; | |
74 } | |
75 | |
76 public boolean hasOutliers() { | |
77 return !removed.isEmpty(); | |
78 } | |
79 | |
80 public List<IndexedValue> getRetained() { | |
81 return retained; | |
82 } | |
83 | |
84 public void setRetained(List<IndexedValue> retained) { | |
85 this.retained = retained; | |
86 } | |
87 | |
88 public List<IndexedValue> getRemoved() { | |
89 return removed; | |
90 } | |
91 | |
92 public void setRemoved(List<IndexedValue> removed) { | |
93 this.removed = removed; | |
94 } | |
95 } // class Outliers | |
96 | |
97 public Outlier() { | |
98 } | |
99 | |
100 public static Outliers findOutliers(List<IndexedValue> inputValues) { | |
101 return findOutliers(inputValues, DEFAULT_ALPHA); | |
102 } | |
103 | |
104 public static Outliers findOutliers( | |
105 List<IndexedValue> inputValues, | |
106 double alpha | |
107 ) { | |
108 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); | |
109 | |
110 ArrayList<IndexedValue> values = | |
111 new ArrayList<IndexedValue>(inputValues); | |
112 | |
113 for (;;) { | |
114 int N = values.size(); | |
115 | |
116 if (N < 4) { | |
117 break; | |
118 } | |
119 | |
120 Mean mean = new Mean(); | |
121 StandardDeviation std = new StandardDeviation(); | |
122 | |
123 for (IndexedValue value: values) { | |
124 mean.increment(value.getValue()); | |
125 std .increment(value.getValue()); | |
126 } | |
127 | |
128 double m = mean.getResult(); | |
129 double s = std.getResult(); | |
130 | |
131 double maxZ = -Double.MAX_VALUE; | |
132 int iv = -1; | |
133 for (int i = N-1; i >= 0; --i) { | |
134 IndexedValue v = values.get(i); | |
135 double z = Math.abs(m - v.getValue())/s; | |
136 if (z > maxZ) { | |
137 maxZ = z; | |
138 iv = i; | |
139 } | |
140 } | |
141 | |
142 double t = Math.sqrt((N*(N-2)*maxZ*maxZ) | |
143 /((N-1)*(N-1) - N*maxZ*maxZ)); | |
144 | |
145 TDistributionImpl tdist = new TDistributionImpl(N-2); | |
146 | |
147 try { | |
148 double p = tdist.cumulativeProbability(t); | |
149 | |
150 if (p < alpha) { | |
151 outliers.add(values.get(iv)); | |
152 values.remove(iv); | |
153 } | |
154 else { | |
155 break; | |
156 } | |
157 } | |
158 catch (MathException me) { | |
159 log.error(me); | |
160 } | |
161 } | |
162 | |
163 Collections.sort(outliers); | |
164 | |
165 return new Outliers(values, outliers); | |
166 } | |
167 } | |
168 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |