changeset 555:825781a39c70

Fixed gnv/issue107 geo-backend/trunk@632 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Tue, 26 Jan 2010 16:33:36 +0000
parents 0ee3c0ed40e4
children 48121fa4dc95
files geo-backend/ChangeLog geo-backend/src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java geo-backend/src/main/java/de/intevation/gnv/geobackend/util/RedundancyRemover.java
diffstat 3 files changed, 106 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/geo-backend/ChangeLog	Tue Jan 26 13:55:55 2010 +0000
+++ b/geo-backend/ChangeLog	Tue Jan 26 16:33:36 2010 +0000
@@ -1,3 +1,25 @@
+2009-01-25	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
+
+	Fix for gnv/issue107
+
+	* src/main/java/de/intevation/gnv/geobackend/util/RedundancyRemover.java:
+	  New. Used to reduce the redundancy of data in SQL result sets. Column
+	  values are very distinct so the degree or redundancy is very high. E.g 
+	  the SDE produces for each date value a java.util.GregorianCalendar object
+	  which are very heavy weighted. We only use them r/o so we can hash each date 
+	  value to one unique representative. Same logic applys to integer values 
+	  and so on.
+	  
+	  With this technique we are able to reduce the memory consumption by
+	  over 90%. Because it is not very feasible to store the whole history
+	  of column we only have a limited number of cached values per column.
+	  This is controlled via the system property
+	  "de.intevation.gnv.geobackend.util.RedundancyRemover.lookback" which
+	  defaults to 1029.
+
+	* src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java:
+	  Use RedundancyRemovers to filter the incoming traffic from SDE.
+
 2010-01-26  Tim Englich  <tim.englich@intevation.de>
 
 	* src/test/java/de/intevation/gnv/geobackend/base/query/ToCharSample.java : 
--- a/geo-backend/src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java	Tue Jan 26 13:55:55 2010 +0000
+++ b/geo-backend/src/main/java/de/intevation/gnv/geobackend/sde/datasources/ArcSDEStatement.java	Tue Jan 26 16:33:36 2010 +0000
@@ -1,6 +1,3 @@
-/**
- *
- */
 package de.intevation.gnv.geobackend.sde.datasources;
 
 import com.esri.sde.sdk.client.SDEPoint;
@@ -28,6 +25,8 @@
 
 import com.vividsolutions.jts.io.WKTReader;
 
+import de.intevation.gnv.geobackend.util.RedundancyRemover;
+
 import java.sql.Connection;
 import java.sql.ResultSet;
 import java.sql.SQLException;
@@ -491,26 +490,49 @@
         }
 
         SDEResultSet lSet = new SDEResultSet();
-        SeRow row = null;;
+        SeRow row = null;
         int lCount;
         if (!isRaster){
+            RedundancyRemover  [] removers = null;
+            SeColumnDefinition [] lCols    = null;
+
             for (lCount = 0; (row = pSeQuery.fetch()) != null; lCount++) {
                 // one time execution
                 if (lCount == 0) {
                     // analyze cols of result set
-                    SeColumnDefinition[] lCols = row.getColumns();
+                    lCols = row.getColumns();
                     for (SeColumnDefinition lCol : lCols) {
                         lSet.addCol(new ColDefinition(lCol.getName(), lCol.getType()));
                         // notice: esri-types have been copied into colDefinition class!
                     }
+                    removers = new RedundancyRemover[lCols.length];
+                    for (int i = 0; i < removers.length; ++i) {
+                        removers[i] = new RedundancyRemover();
+                    }
                 }
                 short lNumCols = row.getNumColumns();
                 Row lBackingRow = new Row(lNumCols);
                 for (int i = 0; i < lNumCols; i++) {
-                    lBackingRow.addObject(row.getObject(i), i);
+                    lBackingRow.addObject(
+                        removers[i].filter(row.getObject(i)), 
+                        i);
                 }
                 lSet.addRow(lBackingRow);
             }
+
+            if (debug && removers != null) {
+                log.debug("datasets: " + lCount);
+                StringBuilder sb = new StringBuilder("removed redundancy: ");
+                float percent = 100f/lCount;
+                for (int i = 0; i < removers.length; ++i) {
+                    if (i > 0) sb.append(", ");
+                    sb.append(lCols[i].getName())
+                      .append(": ").append(removers[i].numRemoved())
+                      .append(" (").append(removers[i].numRemoved()*percent)
+                      .append("%)");
+                }
+                log.debug(sb.toString());
+            }
         }else{
             try {
                 pSeQuery.execute();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geo-backend/src/main/java/de/intevation/gnv/geobackend/util/RedundancyRemover.java	Tue Jan 26 16:33:36 2010 +0000
@@ -0,0 +1,56 @@
+package de.intevation.gnv.geobackend.util;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * @author Sascha L. Teichmann (sascha.teichmann@intevation.de)
+ */
+public final class RedundancyRemover
+extends            LinkedHashMap
+{
+    /** ceil(1029 * 1.75) = 1801, which is prime 
+     * -&gt; suitable for a hash map slot size.
+     */
+    public static final int DEFAULT_LOOKBACK =
+        Integer.getInteger(
+            "de.intevation.gnv.geobackend.util.RedundancyRemover.lookback",
+            1029);
+
+    private int maxCapacity;
+    private int removed;
+
+    public RedundancyRemover() {
+        this(DEFAULT_LOOKBACK);
+    }
+
+    public RedundancyRemover(int maxCapacity) {
+        super((int)Math.ceil(maxCapacity * 1.75f));
+        this.maxCapacity = maxCapacity;
+    }
+
+    protected boolean removeEldestEntry(Map.Entry eldest) {
+        return size() > maxCapacity;
+    }
+
+    public int numRemoved() {
+        return removed;
+    }
+
+    public Object filter(Object object) {
+        if (object == null) {
+            return object;
+        }
+        Object old = get(object);
+
+        if (old != null) {
+            if (old != object) { // count only identical
+                ++removed;
+            }
+            return old;
+        }
+        put(object, object);
+        return object;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org