comparison flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3318:dbe2f85bf160

merged flys-artifacts/2.8
author Thomas Arendsen Hein <thomas@intevation.de>
date Fri, 28 Sep 2012 12:14:35 +0200
parents ab81ffd1343e
children e01b9d1bc941
comparison
equal deleted inserted replaced
2987:98c7a46ec5ae 3318:dbe2f85bf160
1 package de.intevation.flys.artifacts.math;
2
3 import org.apache.commons.math.MathException;
4
5 import org.apache.commons.math.stat.descriptive.moment.Mean;
6 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
7
8 import org.apache.commons.math.distribution.TDistributionImpl;
9
10 import java.util.Collections;
11 import java.util.List;
12 import java.util.ArrayList;
13
14 import org.apache.log4j.Logger;
15
16 public class Outlier
17 {
18 public static final double DEFAULT_ALPHA = 0.05;
19
20 private static Logger log = Logger.getLogger(Outlier.class);
21
22 public static class IndexedValue
23 implements Comparable<IndexedValue>
24 {
25 protected int index;
26 protected double value;
27
28 public IndexedValue() {
29 }
30
31 public IndexedValue(int index, double value) {
32 this.index = index;
33 this.value = value;
34 }
35
36 public int getIndex() {
37 return index;
38 }
39
40 public void setIndex(int index) {
41 this.index = index;
42 }
43
44 public double getValue() {
45 return value;
46 }
47
48 public void setValue(double value) {
49 this.value = value;
50 }
51
52 @Override
53 public int compareTo(IndexedValue other) {
54 int diff = index - other.index;
55 if (index < 0) return -1;
56 return index > 0 ? +1 : 0;
57 }
58 } // class IndexedValue
59
60 public static class Outliers {
61
62 protected List<IndexedValue> retained;
63 protected List<IndexedValue> removed;
64
65 public Outliers() {
66 }
67
68 public Outliers(
69 List<IndexedValue> retained,
70 List<IndexedValue> removed
71 ) {
72 this.retained = retained;
73 this.removed = removed;
74 }
75
76 public boolean hasOutliers() {
77 return !removed.isEmpty();
78 }
79
80 public List<IndexedValue> getRetained() {
81 return retained;
82 }
83
84 public void setRetained(List<IndexedValue> retained) {
85 this.retained = retained;
86 }
87
88 public List<IndexedValue> getRemoved() {
89 return removed;
90 }
91
92 public void setRemoved(List<IndexedValue> removed) {
93 this.removed = removed;
94 }
95 } // class Outliers
96
97 public Outlier() {
98 }
99
100 public static Outliers findOutliers(List<IndexedValue> inputValues) {
101 return findOutliers(inputValues, DEFAULT_ALPHA);
102 }
103
104 public static Outliers findOutliers(
105 List<IndexedValue> inputValues,
106 double alpha
107 ) {
108 ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>();
109
110 ArrayList<IndexedValue> values =
111 new ArrayList<IndexedValue>(inputValues);
112
113 for (;;) {
114 int N = values.size();
115
116 if (N < 4) {
117 break;
118 }
119
120 Mean mean = new Mean();
121 StandardDeviation std = new StandardDeviation();
122
123 for (IndexedValue value: values) {
124 mean.increment(value.getValue());
125 std .increment(value.getValue());
126 }
127
128 double m = mean.getResult();
129 double s = std.getResult();
130
131 double maxZ = -Double.MAX_VALUE;
132 int iv = -1;
133 for (int i = N-1; i >= 0; --i) {
134 IndexedValue v = values.get(i);
135 double z = Math.abs(m - v.getValue())/s;
136 if (z > maxZ) {
137 maxZ = z;
138 iv = i;
139 }
140 }
141
142 double t = Math.sqrt((N*(N-2)*maxZ*maxZ)
143 /((N-1)*(N-1) - N*maxZ*maxZ));
144
145 TDistributionImpl tdist = new TDistributionImpl(N-2);
146
147 try {
148 double p = tdist.cumulativeProbability(t);
149
150 if (p < alpha) {
151 outliers.add(values.get(iv));
152 values.remove(iv);
153 }
154 else {
155 break;
156 }
157 }
158 catch (MathException me) {
159 log.error(me);
160 }
161 }
162
163 Collections.sort(outliers);
164
165 return new Outliers(values, outliers);
166 }
167 }
168 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org