comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3565:b136113dad53

FixA: Only evict only one(!) data point as outlier before recalculating the function. flys-artifacts/trunk@5163 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 31 Jul 2012 16:46:36 +0000
parents e01b9d1bc941
children
comparison
equal deleted inserted replaced
3564:e01b9d1bc941 3565:b136113dad53
1 package de.intevation.flys.artifacts.math; 1 package de.intevation.flys.artifacts.math;
2
3 import java.util.List;
2 4
3 import org.apache.commons.math.MathException; 5 import org.apache.commons.math.MathException;
4 6
7 import org.apache.commons.math.distribution.TDistributionImpl;
8
5 import org.apache.commons.math.stat.descriptive.moment.Mean; 9 import org.apache.commons.math.stat.descriptive.moment.Mean;
6 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; 10 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
7
8 import org.apache.commons.math.distribution.TDistributionImpl;
9
10 import java.util.Collections;
11 import java.util.List;
12 import java.util.ArrayList;
13 11
14 import org.apache.log4j.Logger; 12 import org.apache.log4j.Logger;
15 13
16 public class Outlier 14 public class Outlier
17 { 15 {
19 17
20 public static final double DEFAULT_ALPHA = 0.05; 18 public static final double DEFAULT_ALPHA = 0.05;
21 19
22 private static Logger log = Logger.getLogger(Outlier.class); 20 private static Logger log = Logger.getLogger(Outlier.class);
23 21
24 public static class IndexedValue 22 protected Outlier() {
25 implements Comparable<IndexedValue>
26 {
27 protected int index;
28 protected double value;
29
30 public IndexedValue() {
31 }
32
33 public IndexedValue(int index, double value) {
34 this.index = index;
35 this.value = value;
36 }
37
38 public int getIndex() {
39 return index;
40 }
41
42 public void setIndex(int index) {
43 this.index = index;
44 }
45
46 public double getValue() {
47 return value;
48 }
49
50 public void setValue(double value) {
51 this.value = value;
52 }
53
54 @Override
55 public int compareTo(IndexedValue other) {
56 int diff = index - other.index;
57 if (diff < 0) return -1;
58 return diff > 0 ? +1 : 0;
59 }
60 } // class IndexedValue
61
62 public static class Outliers {
63
64 protected List<IndexedValue> retained;
65 protected List<IndexedValue> removed;
66
67 public Outliers() {
68 }
69
70 public Outliers(
71 List<IndexedValue> retained,
72 List<IndexedValue> removed
73 ) {
74 this.retained = retained;
75 this.removed = removed;
76 }
77
78 public boolean hasOutliers() {
79 return !removed.isEmpty();
80 }
81
82 public List<IndexedValue> getRetained() {
83 return retained;
84 }
85
86 public void setRetained(List<IndexedValue> retained) {
87 this.retained = retained;
88 }
89
90 public List<IndexedValue> getRemoved() {
91 return removed;
92 }
93
94 public void setRemoved(List<IndexedValue> removed) {
95 this.removed = removed;
96 }
97 } // class Outliers
98
99 public Outlier() {
100 } 23 }
101 24
102 public static Outliers findOutliers(List<IndexedValue> inputValues) { 25 public static Integer findOutlier(List<Double> values) {
103 return findOutliers(inputValues, DEFAULT_ALPHA); 26 return findOutlier(values, DEFAULT_ALPHA);
104 } 27 }
105 28
106 public static Outliers findOutliers( 29 public static Integer findOutlier(List<Double> values, double alpha) {
107 List<IndexedValue> inputValues,
108 double alpha
109 ) {
110 boolean debug = log.isDebugEnabled(); 30 boolean debug = log.isDebugEnabled();
111 31
112 if (debug) { 32 if (debug) {
113 log.debug("outliers significance: " + alpha); 33 log.debug("outliers significance: " + alpha);
114 } 34 }
115 35
116 alpha = 1d - alpha; 36 alpha = 1d - alpha;
117 37
118 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>(); 38 int N = values.size();
119 39
120 ArrayList<IndexedValue> values = 40 if (debug) {
121 new ArrayList<IndexedValue>(inputValues); 41 log.debug("Values to check: " + N);
42 }
122 43
123 for (;;) { 44 if (N < 3) {
124 int N = values.size(); 45 return null;
46 }
125 47
126 if (debug) { 48 Mean mean = new Mean();
127 log.debug("Values to check: " + N); 49 StandardDeviation std = new StandardDeviation();
128 }
129 50
130 if (N < 4) { 51 for (Double value: values) {
131 break; 52 double v = value.doubleValue();
132 } 53 mean.increment(v);
54 std .increment(v);
55 }
133 56
134 Mean mean = new Mean(); 57 double m = mean.getResult();
135 StandardDeviation std = new StandardDeviation(); 58 double s = std.getResult();
136 59
137 for (IndexedValue value: values) { 60 if (debug) {
138 mean.increment(value.getValue()); 61 log.debug("mean: " + m);
139 std .increment(value.getValue()); 62 log.debug("std dev: " + s);
140 } 63 }
141 64
142 double m = mean.getResult(); 65 double maxZ = -Double.MAX_VALUE;
143 double s = std.getResult(); 66 int iv = -1;
144 67 for (int i = N-1; i >= 0; --i) {
145 if (debug) { 68 double v = values.get(i).doubleValue();
146 log.debug("mean: " + m); 69 double z = Math.abs(v - m);
147 log.debug("std dev: " + s); 70 if (z > maxZ) {
148 } 71 maxZ = z;
149 72 iv = i;
150 double maxZ = -Double.MAX_VALUE;
151 int iv = -1;
152 for (int i = N-1; i >= 0; --i) {
153 IndexedValue v = values.get(i);
154 double z = Math.abs(v.getValue()-m);
155 if (debug) {
156 log.debug("z candidate: " + z);
157 }
158 if (z > maxZ) {
159 maxZ = z;
160 iv = i;
161 }
162 }
163
164 if (Math.abs(s) < EPSILON) {
165 break;
166 }
167
168 maxZ /= s;
169
170 TDistributionImpl tdist = new TDistributionImpl(N-2);
171
172 double t;
173
174 try {
175 t = tdist.inverseCumulativeProbability(alpha/(N+N));
176 }
177 catch (MathException me) {
178 log.error(me);
179 break;
180 }
181
182 t *= t;
183
184 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
185
186 if (debug) {
187 log.debug("max: " + maxZ + " crit: " + za);
188 }
189
190 if (maxZ > za) {
191 outliers.add(values.get(iv));
192 values.remove(iv);
193 }
194 else {
195 if (debug) {
196 log.debug("values left: " + N);
197 }
198 break;
199 } 73 }
200 } 74 }
201 75
202 Collections.sort(outliers); 76 if (Math.abs(s) < EPSILON) {
77 return null;
78 }
203 79
204 return new Outliers(values, outliers); 80 maxZ /= s;
81
82 TDistributionImpl tdist = new TDistributionImpl(N-2);
83
84 double t;
85
86 try {
87 t = tdist.inverseCumulativeProbability(alpha/(N+N));
88 }
89 catch (MathException me) {
90 log.error(me);
91 return null;
92 }
93
94 t *= t;
95
96 double za = ((N-1)/Math.sqrt(N))*Math.sqrt(t/(N-2d+t));
97
98 if (debug) {
99 log.debug("max: " + maxZ + " crit: " + za);
100 }
101
102 return maxZ > za
103 ? Integer.valueOf(iv)
104 : null;
205 } 105 }
206 } 106 }
207 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : 107 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org