# HG changeset patch # User Sascha L. Teichmann # Date 1264523616 0 # Node ID 825781a39c7014a86602e9e7d1c8679180c7fd59 # Parent 0ee3c0ed40e4a45eee2f7e2d76155cf8598c858a Fixed gnv/issue107 geo-backend/trunk@632 c6561f87-3c4e-4783-a992-168aeb5c3f6f diff -r 0ee3c0ed40e4 -r 825781a39c70 geo-backend/ChangeLog --- a/geo-backend/ChangeLog Tue Jan 26 13:55:55 2010 +0000 +++ b/geo-backend/ChangeLog Tue Jan 26 16:33:36 2010 +0000 @@ -1,3 +1,25 @@ +2009-01-25 Sascha L. Teichmann + + Fix for gnv/issue107 + + * src/main/java/de/intevation/gnv/geobackend/util/RedundancyRemover.java: + New. Used to reduce the redundancy of data in SQL result sets. Column + values are very distinct so the degree or redundancy is very high. E.g + the SDE produces for each date value a java.util.GregorianCalendar object + which are very heavy weighted. We only use them r/o so we can hash each date + value to one unique representative. Same logic applys to integer values + and so on. + + With this technique we are able to reduce the memory consumption by + over 90%. Because it is not very feasible to store the whole history + of column we only have a limited number of cached values per column. + This is controlled via the system property + "de.intevation.gnv.geobackend.util.RedundancyRemover.lookback" which + defaults to 1029. + + * src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java: + Use RedundancyRemovers to filter the incoming traffic from SDE. + 2010-01-26 Tim Englich * src/test/java/de/intevation/gnv/geobackend/base/query/ToCharSample.java : diff -r 0ee3c0ed40e4 -r 825781a39c70 geo-backend/src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java --- a/geo-backend/src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java Tue Jan 26 13:55:55 2010 +0000 +++ b/geo-backend/src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java Tue Jan 26 16:33:36 2010 +0000 @@ -1,6 +1,3 @@ -/** - * - */ package de.intevation.gnv.geobackend.sde.datasources; import com.esri.sde.sdk.client.SDEPoint; @@ -28,6 +25,8 @@ import com.vividsolutions.jts.io.WKTReader; +import de.intevation.gnv.geobackend.util.RedundancyRemover; + import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; @@ -491,26 +490,49 @@ } SDEResultSet lSet = new SDEResultSet(); - SeRow row = null;; + SeRow row = null; int lCount; if (!isRaster){ + RedundancyRemover [] removers = null; + SeColumnDefinition [] lCols = null; + for (lCount = 0; (row = pSeQuery.fetch()) != null; lCount++) { // one time execution if (lCount == 0) { // analyze cols of result set - SeColumnDefinition[] lCols = row.getColumns(); + lCols = row.getColumns(); for (SeColumnDefinition lCol : lCols) { lSet.addCol(new ColDefinition(lCol.getName(), lCol.getType())); // notice: esri-types have been copied into colDefinition class! } + removers = new RedundancyRemover[lCols.length]; + for (int i = 0; i < removers.length; ++i) { + removers[i] = new RedundancyRemover(); + } } short lNumCols = row.getNumColumns(); Row lBackingRow = new Row(lNumCols); for (int i = 0; i < lNumCols; i++) { - lBackingRow.addObject(row.getObject(i), i); + lBackingRow.addObject( + removers[i].filter(row.getObject(i)), + i); } lSet.addRow(lBackingRow); } + + if (debug && removers != null) { + log.debug("datasets: " + lCount); + StringBuilder sb = new StringBuilder("removed redundancy: "); + float percent = 100f/lCount; + for (int i = 0; i < removers.length; ++i) { + if (i > 0) sb.append(", "); + sb.append(lCols[i].getName()) + .append(": ").append(removers[i].numRemoved()) + .append(" (").append(removers[i].numRemoved()*percent) + .append("%)"); + } + log.debug(sb.toString()); + } }else{ try { pSeQuery.execute(); diff -r 0ee3c0ed40e4 -r 825781a39c70 geo-backend/src/main/java/de/intevation/gnv/geobackend/util/RedundancyRemover.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/geo-backend/src/main/java/de/intevation/gnv/geobackend/util/RedundancyRemover.java Tue Jan 26 16:33:36 2010 +0000 @@ -0,0 +1,56 @@ +package de.intevation.gnv.geobackend.util; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * @author Sascha L. Teichmann (sascha.teichmann@intevation.de) + */ +public final class RedundancyRemover +extends LinkedHashMap +{ + /** ceil(1029 * 1.75) = 1801, which is prime + * -> suitable for a hash map slot size. + */ + public static final int DEFAULT_LOOKBACK = + Integer.getInteger( + "de.intevation.gnv.geobackend.util.RedundancyRemover.lookback", + 1029); + + private int maxCapacity; + private int removed; + + public RedundancyRemover() { + this(DEFAULT_LOOKBACK); + } + + public RedundancyRemover(int maxCapacity) { + super((int)Math.ceil(maxCapacity * 1.75f)); + this.maxCapacity = maxCapacity; + } + + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > maxCapacity; + } + + public int numRemoved() { + return removed; + } + + public Object filter(Object object) { + if (object == null) { + return object; + } + Object old = get(object); + + if (old != null) { + if (old != object) { // count only identical + ++removed; + } + return old; + } + put(object, object); + return object; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :