Mercurial > dive4elements > river
comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3564:e01b9d1bc941
FixA: Corrected the formulas of Grubbs' test for outliers. Still a bit broken.
flys-artifacts/trunk@5162 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 31 Jul 2012 16:14:17 +0000 |
parents | ab81ffd1343e |
children | b136113dad53 |
comparison
equal
deleted
inserted
replaced
3563:5063c93dfb8e | 3564:e01b9d1bc941 |
---|---|
13 | 13 |
14 import org.apache.log4j.Logger; | 14 import org.apache.log4j.Logger; |
15 | 15 |
16 public class Outlier | 16 public class Outlier |
17 { | 17 { |
18 public static final double EPSILON = 1e-5; | |
19 | |
18 public static final double DEFAULT_ALPHA = 0.05; | 20 public static final double DEFAULT_ALPHA = 0.05; |
19 | 21 |
20 private static Logger log = Logger.getLogger(Outlier.class); | 22 private static Logger log = Logger.getLogger(Outlier.class); |
21 | 23 |
22 public static class IndexedValue | 24 public static class IndexedValue |
50 } | 52 } |
51 | 53 |
52 @Override | 54 @Override |
53 public int compareTo(IndexedValue other) { | 55 public int compareTo(IndexedValue other) { |
54 int diff = index - other.index; | 56 int diff = index - other.index; |
55 if (index < 0) return -1; | 57 if (diff < 0) return -1; |
56 return index > 0 ? +1 : 0; | 58 return diff > 0 ? +1 : 0; |
57 } | 59 } |
58 } // class IndexedValue | 60 } // class IndexedValue |
59 | 61 |
60 public static class Outliers { | 62 public static class Outliers { |
61 | 63 |
103 | 105 |
104 public static Outliers findOutliers( | 106 public static Outliers findOutliers( |
105 List<IndexedValue> inputValues, | 107 List<IndexedValue> inputValues, |
106 double alpha | 108 double alpha |
107 ) { | 109 ) { |
110 boolean debug = log.isDebugEnabled(); | |
111 | |
112 if (debug) { | |
113 log.debug("outliers significance: " + alpha); | |
114 } | |
115 | |
116 alpha = 1d - alpha; | |
117 | |
108 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); | 118 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); |
109 | 119 |
110 ArrayList<IndexedValue> values = | 120 ArrayList<IndexedValue> values = |
111 new ArrayList<IndexedValue>(inputValues); | 121 new ArrayList<IndexedValue>(inputValues); |
112 | 122 |
113 for (;;) { | 123 for (;;) { |
114 int N = values.size(); | 124 int N = values.size(); |
125 | |
126 if (debug) { | |
127 log.debug("Values to check: " + N); | |
128 } | |
115 | 129 |
116 if (N < 4) { | 130 if (N < 4) { |
117 break; | 131 break; |
118 } | 132 } |
119 | 133 |
125 std .increment(value.getValue()); | 139 std .increment(value.getValue()); |
126 } | 140 } |
127 | 141 |
128 double m = mean.getResult(); | 142 double m = mean.getResult(); |
129 double s = std.getResult(); | 143 double s = std.getResult(); |
144 | |
145 if (debug) { | |
146 log.debug("mean: " + m); | |
147 log.debug("std dev: " + s); | |
148 } | |
130 | 149 |
131 double maxZ = -Double.MAX_VALUE; | 150 double maxZ = -Double.MAX_VALUE; |
132 int iv = -1; | 151 int iv = -1; |
133 for (int i = N-1; i >= 0; --i) { | 152 for (int i = N-1; i >= 0; --i) { |
134 IndexedValue v = values.get(i); | 153 IndexedValue v = values.get(i); |
135 double z = Math.abs(m - v.getValue())/s; | 154 double z = Math.abs(v.getValue()-m); |
155 if (debug) { | |
156 log.debug("z candidate: " + z); | |
157 } | |
136 if (z > maxZ) { | 158 if (z > maxZ) { |
137 maxZ = z; | 159 maxZ = z; |
138 iv = i; | 160 iv = i; |
139 } | 161 } |
140 } | 162 } |
141 | 163 |
142 double t = Math.sqrt((N*(N-2)*maxZ*maxZ) | 164 if (Math.abs(s) < EPSILON) { |
143 /((N-1)*(N-1) - N*maxZ*maxZ)); | 165 break; |
166 } | |
167 | |
168 maxZ /= s; | |
144 | 169 |
145 TDistributionImpl tdist = new TDistributionImpl(N-2); | 170 TDistributionImpl tdist = new TDistributionImpl(N-2); |
146 | 171 |
172 double t; | |
173 | |
147 try { | 174 try { |
148 double p = tdist.cumulativeProbability(t); | 175 t = tdist.inverseCumulativeProbability(alpha/(N+N)); |
149 | |
150 if (p < alpha) { | |
151 outliers.add(values.get(iv)); | |
152 values.remove(iv); | |
153 } | |
154 else { | |
155 break; | |
156 } | |
157 } | 176 } |
158 catch (MathException me) { | 177 catch (MathException me) { |
159 log.error(me); | 178 log.error(me); |
179 break; | |
180 } | |
181 | |
182 t *= t; | |
183 | |
184 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); | |
185 | |
186 if (debug) { | |
187 log.debug("max: " + maxZ + " crit: " + za); | |
188 } | |
189 | |
190 if (maxZ > za) { | |
191 outliers.add(values.get(iv)); | |
192 values.remove(iv); | |
193 } | |
194 else { | |
195 if (debug) { | |
196 log.debug("values left: " + N); | |
197 } | |
198 break; | |
160 } | 199 } |
161 } | 200 } |
162 | 201 |
163 Collections.sort(outliers); | 202 Collections.sort(outliers); |
164 | 203 |