teichmann@5863: /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde teichmann@5863: * Software engineering by Intevation GmbH teichmann@5863: * teichmann@5994: * This file is Free Software under the GNU AGPL (>=v3) teichmann@5863: * and comes with ABSOLUTELY NO WARRANTY! Check out the teichmann@5994: * documentation coming with Dive4Elements River for details. teichmann@5863: */ teichmann@5863: teichmann@5831: package org.dive4elements.river.artifacts.math; rrenkert@4794: rrenkert@4794: import java.util.List; rrenkert@4794: tom@9726: import org.apache.logging.log4j.Logger; tom@9726: import org.apache.logging.log4j.LogManager; rrenkert@4794: aheinecke@6714: /* XXX: aheinecke@6714: * Warning: This class is called StdDevOutlier because it caculates the aheinecke@6714: * Standard Deviation method for outlier removal as the BFG calls it. aheinecke@6714: * But the actual calculation used to remove the outliers calculates aheinecke@6714: * the Standard Error and not the Standard Deviation! */ rrenkert@4794: rrenkert@4794: public class StdDevOutlier rrenkert@4794: { rrenkert@4794: public static final double DEFAULT_FACTOR = 3; rrenkert@4794: tom@9726: private static Logger log = LogManager.getLogger(StdDevOutlier.class); rrenkert@4794: rrenkert@4794: protected StdDevOutlier() { rrenkert@4794: } rrenkert@4794: rrenkert@4794: public static Integer findOutlier(List values) { rrenkert@4794: return findOutlier(values, DEFAULT_FACTOR, null); rrenkert@4794: } rrenkert@4794: teichmann@4795: public static Integer findOutlier( teichmann@4795: List values, teichmann@4816: double factor, aheinecke@6714: double [] stdErrResult teichmann@4795: ) { rrenkert@4794: boolean debug = log.isDebugEnabled(); rrenkert@4794: rrenkert@4794: if (debug) { tom@8856: log.debug("factor for std dev test (that calculates std err): " tom@8856: + factor); rrenkert@4794: } rrenkert@4794: rrenkert@4794: int N = values.size(); rrenkert@4794: rrenkert@4794: if (debug) { rrenkert@4794: log.debug("Values to check: " + N); rrenkert@4794: } rrenkert@4794: teichmann@4795: if (N < 3) { rrenkert@4794: return null; rrenkert@4794: } rrenkert@4794: rrenkert@4794: double maxValue = -Double.MAX_VALUE; rrenkert@4794: int maxIndex = -1; aheinecke@6714: aheinecke@6714: double squareSumResiduals = 0; aheinecke@6714: for (Double db: values) { aheinecke@6714: squareSumResiduals += Math.pow(db, 2); aheinecke@6714: } aheinecke@6714: aheinecke@6714: double stdErr = Math.sqrt(squareSumResiduals / (N - 2)); aheinecke@6714: aheinecke@6714: double accepted = factor * stdErr; aheinecke@6714: teichmann@4795: for (int i = N-1; i >= 0; --i) { rrenkert@4794: double value = Math.abs(values.get(i)); rrenkert@4794: if (value > maxValue) { rrenkert@4794: maxValue = value; teichmann@4795: maxIndex = i; rrenkert@4794: } rrenkert@4794: } rrenkert@4794: rrenkert@4794: if (debug) { aheinecke@6714: log.debug("std err: " + stdErr); rrenkert@4794: log.debug("accepted: " + accepted); rrenkert@4794: log.debug("max value: " + maxValue); rrenkert@4794: } teichmann@4795: aheinecke@6714: if (stdErrResult != null) { aheinecke@6714: stdErrResult[0] = stdErr; rrenkert@4794: } teichmann@4795: rrenkert@4794: return maxValue > accepted ? maxIndex : null; rrenkert@4794: } rrenkert@4794: } rrenkert@4794: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :