Mercurial > dive4elements > river
comparison artifacts/src/main/java/org/dive4elements/river/artifacts/math/StdDevOutlier.java @ 6714:b265cd6cfda5
issue748: Change StandardDeviation implmentation to what BFG calls Standard Deviation
Which is actually a calculation that removes outliers based on
Standard Error
Developed and analyized together with Tom.
author | Andre Heinecke <aheinecke@intevation.de> |
---|---|
date | Tue, 30 Jul 2013 17:32:28 +0200 |
parents | af13ceeba52a |
children | 5e38e2924c07 |
comparison
equal
deleted
inserted
replaced
6713:5ce1b6755174 | 6714:b265cd6cfda5 |
---|---|
8 | 8 |
9 package org.dive4elements.river.artifacts.math; | 9 package org.dive4elements.river.artifacts.math; |
10 | 10 |
11 import java.util.List; | 11 import java.util.List; |
12 | 12 |
13 import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; | 13 import org.apache.log4j.Logger; |
14 | 14 |
15 import org.apache.log4j.Logger; | 15 /* XXX: |
16 * Warning: This class is called StdDevOutlier because it caculates the | |
17 * Standard Deviation method for outlier removal as the BFG calls it. | |
18 * But the actual calculation used to remove the outliers calculates | |
19 * the Standard Error and not the Standard Deviation! */ | |
16 | 20 |
17 public class StdDevOutlier | 21 public class StdDevOutlier |
18 { | 22 { |
19 public static final double DEFAULT_FACTOR = 3; | 23 public static final double DEFAULT_FACTOR = 3; |
20 | 24 |
28 } | 32 } |
29 | 33 |
30 public static Integer findOutlier( | 34 public static Integer findOutlier( |
31 List<Double> values, | 35 List<Double> values, |
32 double factor, | 36 double factor, |
33 double [] stdDevResult | 37 double [] stdErrResult |
34 ) { | 38 ) { |
35 boolean debug = log.isDebugEnabled(); | 39 boolean debug = log.isDebugEnabled(); |
36 | 40 |
37 if (debug) { | 41 if (debug) { |
38 log.debug("factor for std dev: " + factor); | 42 log.debug("factor for std dev test (that calculates std err): " + factor); |
39 } | 43 } |
40 | 44 |
41 int N = values.size(); | 45 int N = values.size(); |
42 | 46 |
43 if (debug) { | 47 if (debug) { |
46 | 50 |
47 if (N < 3) { | 51 if (N < 3) { |
48 return null; | 52 return null; |
49 } | 53 } |
50 | 54 |
51 StandardDeviation stdDev = new StandardDeviation(); | |
52 | |
53 double maxValue = -Double.MAX_VALUE; | 55 double maxValue = -Double.MAX_VALUE; |
54 int maxIndex = -1; | 56 int maxIndex = -1; |
57 | |
58 double squareSumResiduals = 0; | |
59 for (Double db: values) { | |
60 squareSumResiduals += Math.pow(db, 2); | |
61 } | |
62 | |
63 double stdErr = Math.sqrt(squareSumResiduals / (N - 2)); | |
64 | |
65 double accepted = factor * stdErr; | |
66 | |
55 for (int i = N-1; i >= 0; --i) { | 67 for (int i = N-1; i >= 0; --i) { |
56 double value = Math.abs(values.get(i)); | 68 double value = Math.abs(values.get(i)); |
57 stdDev.increment(value); | |
58 if (value > maxValue) { | 69 if (value > maxValue) { |
59 maxValue = value; | 70 maxValue = value; |
60 maxIndex = i; | 71 maxIndex = i; |
61 } | 72 } |
62 } | 73 } |
63 | 74 |
64 double sd = stdDev.getResult(); | |
65 | |
66 double accepted = factor * sd; | |
67 | |
68 if (debug) { | 75 if (debug) { |
69 log.debug("std dev: " + stdDev); | 76 log.debug("std err: " + stdErr); |
70 log.debug("accepted: " + accepted); | 77 log.debug("accepted: " + accepted); |
71 log.debug("max value: " + maxValue); | 78 log.debug("max value: " + maxValue); |
72 } | 79 } |
73 | 80 |
74 if (stdDevResult != null) { | 81 if (stdErrResult != null) { |
75 stdDevResult[0] = sd; | 82 stdErrResult[0] = stdErr; |
76 } | 83 } |
77 | 84 |
78 return maxValue > accepted ? maxIndex : null; | 85 return maxValue > accepted ? maxIndex : null; |
79 } | 86 } |
80 } | 87 } |