comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3564:e01b9d1bc941

FixA: Corrected the formulas of Grubbs' test for outliers. Still a bit broken. flys-artifacts/trunk@5162 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 31 Jul 2012 16:14:17 +0000
parents ab81ffd1343e
children b136113dad53
comparison
equal deleted inserted replaced
3563:5063c93dfb8e 3564:e01b9d1bc941
13 13
14 import org.apache.log4j.Logger; 14 import org.apache.log4j.Logger;
15 15
16 public class Outlier 16 public class Outlier
17 { 17 {
18 public static final double EPSILON = 1e-5;
19
18 public static final double DEFAULT_ALPHA = 0.05; 20 public static final double DEFAULT_ALPHA = 0.05;
19 21
20 private static Logger log = Logger.getLogger(Outlier.class); 22 private static Logger log = Logger.getLogger(Outlier.class);
21 23
22 public static class IndexedValue 24 public static class IndexedValue
50 } 52 }
51 53
52 @Override 54 @Override
53 public int compareTo(IndexedValue other) { 55 public int compareTo(IndexedValue other) {
54 int diff = index - other.index; 56 int diff = index - other.index;
55 if (index < 0) return -1; 57 if (diff < 0) return -1;
56 return index > 0 ? +1 : 0; 58 return diff > 0 ? +1 : 0;
57 } 59 }
58 } // class IndexedValue 60 } // class IndexedValue
59 61
60 public static class Outliers { 62 public static class Outliers {
61 63
103 105
104 public static Outliers findOutliers( 106 public static Outliers findOutliers(
105 List<IndexedValue> inputValues, 107 List<IndexedValue> inputValues,
106 double alpha 108 double alpha
107 ) { 109 ) {
110 boolean debug = log.isDebugEnabled();
111
112 if (debug) {
113 log.debug("outliers significance: " + alpha);
114 }
115
116 alpha = 1d - alpha;
117
108 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); 118 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>();
109 119
110 ArrayList<IndexedValue> values = 120 ArrayList<IndexedValue> values =
111 new ArrayList<IndexedValue>(inputValues); 121 new ArrayList<IndexedValue>(inputValues);
112 122
113 for (;;) { 123 for (;;) {
114 int N = values.size(); 124 int N = values.size();
125
126 if (debug) {
127 log.debug("Values to check: " + N);
128 }
115 129
116 if (N < 4) { 130 if (N < 4) {
117 break; 131 break;
118 } 132 }
119 133
125 std .increment(value.getValue()); 139 std .increment(value.getValue());
126 } 140 }
127 141
128 double m = mean.getResult(); 142 double m = mean.getResult();
129 double s = std.getResult(); 143 double s = std.getResult();
144
145 if (debug) {
146 log.debug("mean: " + m);
147 log.debug("std dev: " + s);
148 }
130 149
131 double maxZ = -Double.MAX_VALUE; 150 double maxZ = -Double.MAX_VALUE;
132 int iv = -1; 151 int iv = -1;
133 for (int i = N-1; i >= 0; --i) { 152 for (int i = N-1; i >= 0; --i) {
134 IndexedValue v = values.get(i); 153 IndexedValue v = values.get(i);
135 double z = Math.abs(m - v.getValue())/s; 154 double z = Math.abs(v.getValue()-m);
155 if (debug) {
156 log.debug("z candidate: " + z);
157 }
136 if (z > maxZ) { 158 if (z > maxZ) {
137 maxZ = z; 159 maxZ = z;
138 iv = i; 160 iv = i;
139 } 161 }
140 } 162 }
141 163
142 double t = Math.sqrt((N*(N-2)*maxZ*maxZ) 164 if (Math.abs(s) < EPSILON) {
143 /((N-1)*(N-1) - N*maxZ*maxZ)); 165 break;
166 }
167
168 maxZ /= s;
144 169
145 TDistributionImpl tdist = new TDistributionImpl(N-2); 170 TDistributionImpl tdist = new TDistributionImpl(N-2);
146 171
172 double t;
173
147 try { 174 try {
148 double p = tdist.cumulativeProbability(t); 175 t = tdist.inverseCumulativeProbability(alpha/(N+N));
149
150 if (p < alpha) {
151 outliers.add(values.get(iv));
152 values.remove(iv);
153 }
154 else {
155 break;
156 }
157 } 176 }
158 catch (MathException me) { 177 catch (MathException me) {
159 log.error(me); 178 log.error(me);
179 break;
180 }
181
182 t *= t;
183
184 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
185
186 if (debug) {
187 log.debug("max: " + maxZ + " crit: " + za);
188 }
189
190 if (maxZ > za) {
191 outliers.add(values.get(iv));
192 values.remove(iv);
193 }
194 else {
195 if (debug) {
196 log.debug("values left: " + N);
197 }
198 break;
160 } 199 }
161 } 200 }
162 201
163 Collections.sort(outliers); 202 Collections.sort(outliers);
164 203

http://dive4elements.wald.intevation.org