Mercurial > dive4elements > river
comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3565:b136113dad53
FixA: Only evict only one(!) data point as outlier before recalculating the function.
flys-artifacts/trunk@5163 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 31 Jul 2012 16:46:36 +0000 |
parents | e01b9d1bc941 |
children |
comparison
equal
deleted
inserted
replaced
3564:e01b9d1bc941 | 3565:b136113dad53 |
---|---|
1 package de.intevation.flys.artifacts.math; | 1 package de.intevation.flys.artifacts.math; |
2 | |
3 import java.util.List; | |
2 | 4 |
3 import org.apache.commons.math.MathException; | 5 import org.apache.commons.math.MathException; |
4 | 6 |
7 import org.apache.commons.math.distribution.TDistributionImpl; | |
8 | |
5 import org.apache.commons.math.stat.descriptive.moment.Mean; | 9 import org.apache.commons.math.stat.descriptive.moment.Mean; |
6 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; | 10 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; |
7 | |
8 import org.apache.commons.math.distribution.TDistributionImpl; | |
9 | |
10 import java.util.Collections; | |
11 import java.util.List; | |
12 import java.util.ArrayList; | |
13 | 11 |
14 import org.apache.log4j.Logger; | 12 import org.apache.log4j.Logger; |
15 | 13 |
16 public class Outlier | 14 public class Outlier |
17 { | 15 { |
19 | 17 |
20 public static final double DEFAULT_ALPHA = 0.05; | 18 public static final double DEFAULT_ALPHA = 0.05; |
21 | 19 |
22 private static Logger log = Logger.getLogger(Outlier.class); | 20 private static Logger log = Logger.getLogger(Outlier.class); |
23 | 21 |
24 public static class IndexedValue | 22 protected Outlier() { |
25 implements Comparable<IndexedValue> | |
26 { | |
27 protected int index; | |
28 protected double value; | |
29 | |
30 public IndexedValue() { | |
31 } | |
32 | |
33 public IndexedValue(int index, double value) { | |
34 this.index = index; | |
35 this.value = value; | |
36 } | |
37 | |
38 public int getIndex() { | |
39 return index; | |
40 } | |
41 | |
42 public void setIndex(int index) { | |
43 this.index = index; | |
44 } | |
45 | |
46 public double getValue() { | |
47 return value; | |
48 } | |
49 | |
50 public void setValue(double value) { | |
51 this.value = value; | |
52 } | |
53 | |
54 @Override | |
55 public int compareTo(IndexedValue other) { | |
56 int diff = index - other.index; | |
57 if (diff < 0) return -1; | |
58 return diff > 0 ? +1 : 0; | |
59 } | |
60 } // class IndexedValue | |
61 | |
62 public static class Outliers { | |
63 | |
64 protected List<IndexedValue> retained; | |
65 protected List<IndexedValue> removed; | |
66 | |
67 public Outliers() { | |
68 } | |
69 | |
70 public Outliers( | |
71 List<IndexedValue> retained, | |
72 List<IndexedValue> removed | |
73 ) { | |
74 this.retained = retained; | |
75 this.removed = removed; | |
76 } | |
77 | |
78 public boolean hasOutliers() { | |
79 return !removed.isEmpty(); | |
80 } | |
81 | |
82 public List<IndexedValue> getRetained() { | |
83 return retained; | |
84 } | |
85 | |
86 public void setRetained(List<IndexedValue> retained) { | |
87 this.retained = retained; | |
88 } | |
89 | |
90 public List<IndexedValue> getRemoved() { | |
91 return removed; | |
92 } | |
93 | |
94 public void setRemoved(List<IndexedValue> removed) { | |
95 this.removed = removed; | |
96 } | |
97 } // class Outliers | |
98 | |
99 public Outlier() { | |
100 } | 23 } |
101 | 24 |
102 public static Outliers findOutliers(List<IndexedValue> inputValues) { | 25 public static Integer findOutlier(List<Double> values) { |
103 return findOutliers(inputValues, DEFAULT_ALPHA); | 26 return findOutlier(values, DEFAULT_ALPHA); |
104 } | 27 } |
105 | 28 |
106 public static Outliers findOutliers( | 29 public static Integer findOutlier(List<Double> values, double alpha) { |
107 List<IndexedValue> inputValues, | |
108 double alpha | |
109 ) { | |
110 boolean debug = log.isDebugEnabled(); | 30 boolean debug = log.isDebugEnabled(); |
111 | 31 |
112 if (debug) { | 32 if (debug) { |
113 log.debug("outliers significance: " + alpha); | 33 log.debug("outliers significance: " + alpha); |
114 } | 34 } |
115 | 35 |
116 alpha = 1d - alpha; | 36 alpha = 1d - alpha; |
117 | 37 |
118 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); | 38 int N = values.size(); |
119 | 39 |
120 ArrayList<IndexedValue> values = | 40 if (debug) { |
121 new ArrayList<IndexedValue>(inputValues); | 41 log.debug("Values to check: " + N); |
42 } | |
122 | 43 |
123 for (;;) { | 44 if (N < 3) { |
124 int N = values.size(); | 45 return null; |
46 } | |
125 | 47 |
126 if (debug) { | 48 Mean mean = new Mean(); |
127 log.debug("Values to check: " + N); | 49 StandardDeviation std = new StandardDeviation(); |
128 } | |
129 | 50 |
130 if (N < 4) { | 51 for (Double value: values) { |
131 break; | 52 double v = value.doubleValue(); |
132 } | 53 mean.increment(v); |
54 std .increment(v); | |
55 } | |
133 | 56 |
134 Mean mean = new Mean(); | 57 double m = mean.getResult(); |
135 StandardDeviation std = new StandardDeviation(); | 58 double s = std.getResult(); |
136 | 59 |
137 for (IndexedValue value: values) { | 60 if (debug) { |
138 mean.increment(value.getValue()); | 61 log.debug("mean: " + m); |
139 std .increment(value.getValue()); | 62 log.debug("std dev: " + s); |
140 } | 63 } |
141 | 64 |
142 double m = mean.getResult(); | 65 double maxZ = -Double.MAX_VALUE; |
143 double s = std.getResult(); | 66 int iv = -1; |
144 | 67 for (int i = N-1; i >= 0; --i) { |
145 if (debug) { | 68 double v = values.get(i).doubleValue(); |
146 log.debug("mean: " + m); | 69 double z = Math.abs(v - m); |
147 log.debug("std dev: " + s); | 70 if (z > maxZ) { |
148 } | 71 maxZ = z; |
149 | 72 iv = i; |
150 double maxZ = -Double.MAX_VALUE; | |
151 int iv = -1; | |
152 for (int i = N-1; i >= 0; --i) { | |
153 IndexedValue v = values.get(i); | |
154 double z = Math.abs(v.getValue()-m); | |
155 if (debug) { | |
156 log.debug("z candidate: " + z); | |
157 } | |
158 if (z > maxZ) { | |
159 maxZ = z; | |
160 iv = i; | |
161 } | |
162 } | |
163 | |
164 if (Math.abs(s) < EPSILON) { | |
165 break; | |
166 } | |
167 | |
168 maxZ /= s; | |
169 | |
170 TDistributionImpl tdist = new TDistributionImpl(N-2); | |
171 | |
172 double t; | |
173 | |
174 try { | |
175 t = tdist.inverseCumulativeProbability(alpha/(N+N)); | |
176 } | |
177 catch (MathException me) { | |
178 log.error(me); | |
179 break; | |
180 } | |
181 | |
182 t *= t; | |
183 | |
184 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); | |
185 | |
186 if (debug) { | |
187 log.debug("max: " + maxZ + " crit: " + za); | |
188 } | |
189 | |
190 if (maxZ > za) { | |
191 outliers.add(values.get(iv)); | |
192 values.remove(iv); | |
193 } | |
194 else { | |
195 if (debug) { | |
196 log.debug("values left: " + N); | |
197 } | |
198 break; | |
199 } | 73 } |
200 } | 74 } |
201 | 75 |
202 Collections.sort(outliers); | 76 if (Math.abs(s) < EPSILON) { |
77 return null; | |
78 } | |
203 | 79 |
204 return new Outliers(values, outliers); | 80 maxZ /= s; |
81 | |
82 TDistributionImpl tdist = new TDistributionImpl(N-2); | |
83 | |
84 double t; | |
85 | |
86 try { | |
87 t = tdist.inverseCumulativeProbability(alpha/(N+N)); | |
88 } | |
89 catch (MathException me) { | |
90 log.error(me); | |
91 return null; | |
92 } | |
93 | |
94 t *= t; | |
95 | |
96 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t)); | |
97 | |
98 if (debug) { | |
99 log.debug("max: " + maxZ + " crit: " + za); | |
100 } | |
101 | |
102 return maxZ > za | |
103 ? Integer.valueOf(iv) | |
104 : null; | |
205 } | 105 } |
206 } | 106 } |
207 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : | 107 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : |