diff flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java @ 3318:dbe2f85bf160

merged flys-artifacts/2.8
author Thomas Arendsen Hein <thomas@intevation.de>
date Fri, 28 Sep 2012 12:14:35 +0200
parents ab81ffd1343e
children e01b9d1bc941
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-artifacts/src/main/java/de/intevation/flys/artifacts/math/Outlier.java	Fri Sep 28 12:14:35 2012 +0200
@@ -0,0 +1,168 @@
+package de.intevation.flys.artifacts.math;
+
+import org.apache.commons.math.MathException;
+
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
+
+import org.apache.commons.math.distribution.TDistributionImpl;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.log4j.Logger;
+
+public class Outlier
+{
+    public static final double DEFAULT_ALPHA = 0.05;
+
+    private static Logger log = Logger.getLogger(Outlier.class);
+
+    public static class IndexedValue
+    implements          Comparable<IndexedValue>
+    {
+        protected int    index;
+        protected double value;
+
+        public IndexedValue() {
+        }
+
+        public IndexedValue(int index, double value) {
+            this.index = index;
+            this.value = value;
+        }
+
+        public int getIndex() {
+            return index;
+        }
+
+        public void setIndex(int index) {
+            this.index = index;
+        }
+
+        public double getValue() {
+            return value;
+        }
+
+        public void setValue(double value) {
+            this.value = value;
+        }
+
+        @Override
+        public int compareTo(IndexedValue other) {
+            int diff = index - other.index;
+            if (index < 0) return -1;
+            return index > 0 ? +1 : 0;
+        }
+    } // class IndexedValue
+
+    public static class Outliers {
+
+        protected List<IndexedValue> retained;
+        protected List<IndexedValue> removed;
+
+        public Outliers() {
+        }
+
+        public Outliers(
+            List<IndexedValue> retained,
+            List<IndexedValue> removed
+        ) {
+            this.retained = retained;
+            this.removed  = removed;
+        }
+
+        public boolean hasOutliers() {
+            return !removed.isEmpty();
+        }
+
+        public List<IndexedValue> getRetained() {
+            return retained;
+        }
+
+        public void setRetained(List<IndexedValue> retained) {
+            this.retained = retained;
+        }
+
+        public List<IndexedValue> getRemoved() {
+            return removed;
+        }
+
+        public void setRemoved(List<IndexedValue> removed) {
+            this.removed = removed;
+        }
+    } // class Outliers
+
+    public Outlier() {
+    }
+
+    public static Outliers findOutliers(List<IndexedValue> inputValues) {
+        return findOutliers(inputValues, DEFAULT_ALPHA);
+    }
+
+    public static Outliers findOutliers(
+        List<IndexedValue> inputValues,
+        double             alpha
+    ) {
+        ArrayList<IndexedValue> outliers = new ArrayList<IndexedValue>();
+
+        ArrayList<IndexedValue> values =
+            new ArrayList<IndexedValue>(inputValues);
+
+        for (;;) {
+            int N = values.size();
+
+            if (N < 4) {
+                break;
+            }
+
+            Mean mean = new Mean();
+            StandardDeviation std = new StandardDeviation();
+
+            for (IndexedValue value: values) {
+                mean.increment(value.getValue());
+                std .increment(value.getValue());
+            }
+
+            double m = mean.getResult();
+            double s = std.getResult();
+
+            double maxZ = -Double.MAX_VALUE;
+            int iv = -1;
+            for (int i = N-1; i >= 0; --i) {
+                IndexedValue v = values.get(i);
+                double z = Math.abs(m - v.getValue())/s;
+                if (z > maxZ) {
+                    maxZ = z;
+                    iv = i;
+                }
+            }
+
+            double t = Math.sqrt((N*(N-2)*maxZ*maxZ)
+                /((N-1)*(N-1) - N*maxZ*maxZ));
+
+            TDistributionImpl tdist = new TDistributionImpl(N-2);
+
+            try {
+                double p = tdist.cumulativeProbability(t);
+
+                if (p < alpha) {
+                    outliers.add(values.get(iv));
+                    values.remove(iv);
+                }
+                else {
+                    break;
+                }
+            }
+            catch (MathException me) {
+                log.error(me);
+            }
+        }
+
+        Collections.sort(outliers);
+
+        return new Outliers(values, outliers);
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org