# HG changeset patch # User Sascha L. Teichmann # Date 1376762931 -7200 # Node ID b0c88ac9f63bbbcfbadcc65178fe1aa32d92238b # Parent b87b20c71407fdb8b8f142b14e5aae5993d69930 Fixing Analysis: Significantly (8-10x) improved the performance of the generation of the overview data. The data is now prefetched in batches. diff -r b87b20c71407 -r b0c88ac9f63b artifacts/src/main/java/org/dive4elements/river/artifacts/model/FixingsOverview.java --- a/artifacts/src/main/java/org/dive4elements/river/artifacts/model/FixingsOverview.java Sat Aug 17 12:17:42 2013 +0200 +++ b/artifacts/src/main/java/org/dive4elements/river/artifacts/model/FixingsOverview.java Sat Aug 17 20:08:51 2013 +0200 @@ -28,8 +28,10 @@ import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.dive4elements.river.utils.BatchLoader; -/** Generate Fixings Table chart. */ + +/** Generate Fixings Table overview data structure to be stored in cache. */ public class FixingsOverview implements Serializable { @@ -56,39 +58,154 @@ "WHERE" + " river_id = :river_id AND kind = 2"; - /** All columns from given wst. */ - public static final String SQL_FIXING_COLUMNS = - "SELECT" + - " wc.id AS wst_column_id," + - " ti.start_time AS start_time," + - " wc.name AS name " + - "FROM wst_columns wc" + - " JOIN time_intervals ti ON wc.time_interval_id = ti.id " + - "WHERE" + - " wc.wst_id = :wst_id " + - "ORDER BY position"; + public static final String SQL_FIXING_COLUMNS_BATCH = + "SELECT " + + "wc.wst_id AS wst_id," + + "wc.id AS wst_column_id," + + "ti.start_time AS start_time," + + "wc.name AS name " + + "FROM wst_columns wc " + + "JOIN time_intervals ti ON wc.time_interval_id = ti.id " + + "WHERE " + + "wc.wst_id IN ($IDS) " + + "ORDER BY wc.wst_id, position"; - public static final String SQL_FIXING_COLUMN_Q_RANGES = - "SELECT" + - " wqr.q AS q," + - " r.a AS start_km," + - " r.b AS stop_km " + - "FROM wst_column_q_ranges wcqr" + - " JOIN wst_q_ranges wqr ON wcqr.wst_q_range_id = wqr.id" + - " JOIN ranges r ON wqr.range_id = r.id " + - "WHERE" + - " wcqr.wst_column_id = :column_id " + - "ORDER BY r.a"; + public static final String SQL_FIXING_COLUMN_Q_RANGES_BATCH = + "SELECT " + + "wcqr.wst_column_id AS wst_column_id," + + "wqr.q AS q," + + "r.a AS start_km," + + "r.b AS stop_km " + + "FROM wst_column_q_ranges wcqr " + + "JOIN wst_q_ranges wqr ON wcqr.wst_q_range_id = wqr.id " + + "JOIN ranges r ON wqr.range_id = r.id " + + "WHERE " + + "wcqr.wst_column_id IN ($IDS) " + + "ORDER BY wcqr.wst_column_id, r.a"; - public static final String SQL_FIXING_COLUMN_KM_RANGE = - "SELECT" + - " MIN(position) AS start_km," + - " MAX(position) AS stop_km " + - "FROM" + - " wst_column_values " + - "WHERE" + - " wst_column_id = :column_id"; + public static final String SQL_FIXING_COLUMN_KM_RANGE_BATCH = + "SELECT " + + "wst_column_id," + + "MIN(position) AS start_km," + + "MAX(position) AS stop_km " + + "FROM " + + "wst_column_values " + + "WHERE " + + "wst_column_id IN ($IDS) " + + "GROUP BY wst_column_id"; + public static final class KMRangeLoader extends BatchLoader { + + public KMRangeLoader(List columns, Session session) { + super(columns, session, SQL_FIXING_COLUMN_KM_RANGE_BATCH); + } + + @Override + protected void fill(SQLQuery query) { + query + .addScalar("wst_column_id", StandardBasicTypes.INTEGER) + .addScalar("start_km", StandardBasicTypes.DOUBLE) + .addScalar("stop_km", StandardBasicTypes.DOUBLE); + + List ranges = query.list(); + for (Object [] r: ranges) { + Integer cid = (Integer)r[0]; + double [] vs = new double [] { (Double)r[1], (Double)r[2] }; + cache(cid, vs); + } + } + } // class KMRangeLoader + + public static final class ColumnQRangeLoader extends BatchLoader> { + + public ColumnQRangeLoader(List columns, Session session) { + super(columns, session, SQL_FIXING_COLUMN_Q_RANGES_BATCH); + } + + @Override + protected void fill(SQLQuery query) { + query + .addScalar("wst_column_id", StandardBasicTypes.INTEGER) + .addScalar("q", StandardBasicTypes.DOUBLE) + .addScalar("start_km", StandardBasicTypes.DOUBLE) + .addScalar("stop_km", StandardBasicTypes.DOUBLE); + + int lastId = Integer.MIN_VALUE; + List column = new ArrayList(); + + List ranges = query.list(); + for (Object [] r: ranges) { + int cid = (Integer)r[0]; + + if (cid != lastId && !column.isEmpty()) { + cache(lastId, column); + column = new ArrayList(); + } + column.add(new double [] { + (Double)r[1], + (Double)r[2], + (Double)r[3] + }); + + lastId = cid; + } + + if (!column.isEmpty()) { + cache(lastId, column); + } + } + } // class ColumnQRangeLoader + + /** Helper class to store data from batching fixing columns. */ + private static final class FixColumn { + int columnId; + Date startTime; + String name; + + FixColumn(int columnId, Date startTime, String name) { + this.columnId = columnId; + this.startTime = startTime; + this.name = name; + } + } // class FixColumn + + public static final class FixColumnLoader extends BatchLoader> { + + public FixColumnLoader(List columns, Session session) { + super(columns, session, SQL_FIXING_COLUMNS_BATCH); + } + + @Override + protected void fill(SQLQuery query) { + query + .addScalar("wst_id", StandardBasicTypes.INTEGER) + .addScalar("wst_column_id", StandardBasicTypes.INTEGER) + .addScalar("start_time", StandardBasicTypes.DATE) + .addScalar("name", StandardBasicTypes.STRING); + + int lastId = Integer.MIN_VALUE; + List cols = new ArrayList(); + + List columns = query.list(); + for (Object [] c: columns) { + int wid = (Integer)c[0]; + + if (wid != lastId && !cols.isEmpty()) { + cache(lastId, cols); + cols = new ArrayList(); + } + cols.add(new FixColumn( + (Integer)c[1], + (Date) c[2], + (String) c[3])); + + lastId = wid; + } + if (!cols.isEmpty()) { + cache(lastId, cols); + } + } + } // class FixColumnLoader public static class QRange extends Range { @@ -255,34 +372,35 @@ } // for all Q ranges } - public void loadKmRange(SQLQuery query) { - query.setInteger("column_id", columnId); - - List kms = query.list(); + public void loadKmRange(KMRangeLoader loader) { - if (kms.isEmpty()) { + double [] range = loader.get(columnId); + + if (range == null) { log.warn("No km range for column " + columnId + "."); + return; } - else { - Object [] obj = kms.get(0); - start = (Double)obj[0]; - end = (Double)obj[1]; - } + start = range[0]; + end = range[1]; } public void loadQRanges( - SQLQuery query, - GaugeFinder gaugeFinder + ColumnQRangeLoader loader, + GaugeFinder gaugeFinder ) { - query.setInteger("column_id", columnId); - List list = query.list(); + List qrs = loader.get(columnId); + if (qrs == null) { + log.warn("No q ranges found for column " + columnId); + return; + } - List qRanges = new ArrayList(list.size()); + List qRanges = new ArrayList(qrs.size()); - for (Object [] row: list) { - double q = (Double)row[0]; - double start = (Double)row[1]; - double end = (Double)row[2]; + for (double [] qr: qrs) { + double q = qr[0]; + double start = qr[1]; + double end = qr[2]; + QRange qRange = new QRange(start, end, q); if (qRange.clip(this)) { qRanges.add(qRange); @@ -314,20 +432,26 @@ columns = new ArrayList(); } - public void loadColumns(SQLQuery query) { - query.setInteger("wst_id", wstId); - List list = query.list(); - for (Object [] row: list) { - int columnId = (Integer)row[0]; - Date startTime = (Date) row[1]; - String name = (String) row[2]; - columns.add(new Column(columnId, startTime, name)); + public void allColumnIds(List cIds) { + for (Column column: columns) { + cIds.add(column.columnId); } } - public void loadColumnsKmRange(SQLQuery query) { + public void loadColumns(FixColumnLoader loader) { + List fcs = loader.get(wstId); + if (fcs == null) { + log.warn("No columns for wst " + wstId); + return; + } + for (FixColumn fc: fcs) { + columns.add(new Column(fc.columnId, fc.startTime, fc.name)); + } + } + + public void loadColumnsKmRange(KMRangeLoader loader) { for (Column column: columns) { - column.loadKmRange(query); + column.loadKmRange(loader); } } @@ -338,11 +462,11 @@ } public void loadColumnsQRanges( - SQLQuery query, - GaugeFinder gaugeFinder + ColumnQRangeLoader loader, + GaugeFinder gaugeFinder ) { for (Column column: columns) { - column.loadQRanges(query, gaugeFinder); + column.loadQRanges(loader, gaugeFinder); } } @@ -427,23 +551,40 @@ } protected void loadFixingsColumns(Session session) { - SQLQuery query = session.createSQLQuery(SQL_FIXING_COLUMNS) - .addScalar("wst_column_id", StandardBasicTypes.INTEGER) - .addScalar("start_time", StandardBasicTypes.DATE) - .addScalar("name", StandardBasicTypes.STRING); + + FixColumnLoader loader = new FixColumnLoader( + allFixingIds(), + session); for (Fixing fixing: fixings) { - fixing.loadColumns(query); + fixing.loadColumns(loader); } } + protected List allFixingIds() { + List ids = new ArrayList(fixings.size()); + for (Fixing fixing: fixings) { + ids.add(fixing.getId()); + } + return ids; + } + + protected List allColumnIds() { + List cIds = new ArrayList(); + for (Fixing fixing: fixings) { + fixing.allColumnIds(cIds); + } + return cIds; + } + protected void loadFixingsColumnsKmRange(Session session) { - SQLQuery query = session.createSQLQuery(SQL_FIXING_COLUMN_KM_RANGE) - .addScalar("start_km", StandardBasicTypes.DOUBLE) - .addScalar("stop_km", StandardBasicTypes.DOUBLE); + + KMRangeLoader loader = new KMRangeLoader( + allColumnIds(), + session); for (Fixing fixing: fixings) { - fixing.loadColumnsKmRange(query); + fixing.loadColumnsKmRange(loader); } } @@ -451,13 +592,13 @@ Session session, GaugeFinder gaugeFinder ) { - SQLQuery query = session.createSQLQuery(SQL_FIXING_COLUMN_Q_RANGES) - .addScalar("q", StandardBasicTypes.DOUBLE) - .addScalar("start_km", StandardBasicTypes.DOUBLE) - .addScalar("stop_km", StandardBasicTypes.DOUBLE); + + ColumnQRangeLoader loader = new ColumnQRangeLoader( + allColumnIds(), + session); for (Fixing fixing: fixings) { - fixing.loadColumnsQRanges(query, gaugeFinder); + fixing.loadColumnsQRanges(loader, gaugeFinder); } } diff -r b87b20c71407 -r b0c88ac9f63b artifacts/src/main/java/org/dive4elements/river/utils/BatchLoader.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/artifacts/src/main/java/org/dive4elements/river/utils/BatchLoader.java Sat Aug 17 20:08:51 2013 +0200 @@ -0,0 +1,116 @@ +/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde + * Software engineering by Intevation GmbH + * + * This file is Free Software under the GNU AGPL (>=v3) + * and comes with ABSOLUTELY NO WARRANTY! Check out the + * documentation coming with Dive4Elements River for details. + */ + +package org.dive4elements.river.utils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; +import org.hibernate.SQLQuery; +import org.hibernate.Session; + +/** To reduce the number of SQL queries send to the backend + * (mainly by the fixings overviews) we execute them in batches of ids + * and store the results in a small cache. + * TODO: It currently relies on dynamic SQL. + * Is there a way to use Hibernate with java.sql.Array + * in cross database compatible manner? + */ +public abstract class BatchLoader { + + private static Logger log = Logger.getLogger(BatchLoader.class); + + public static final int BATCH_SIZE = 100; + + private Map loaded; + private List rest; + private Session session; + private String sqlTemplate; + + public BatchLoader( + List columns, + Session session, + String sqlTemplate + ) { + rest = new ArrayList(columns.size()); + loaded = new HashMap(); + this.session = session; + this.sqlTemplate = sqlTemplate; + + // Insert in reverse order to minize searching. + for (int i = columns.size()-1; i >= 0; --i) { + rest.add(columns.get(i)); + } + } + + /** Searches for id and fill a batch to load containing the found id. */ + private List prepareBatch(int id) { + List batch = new ArrayList(BATCH_SIZE); + + boolean found = false; + + for (int i = rest.size()-1; batch.size() < BATCH_SIZE && i >= 0; --i) { + Integer cid = rest.get(i); + if (cid == id) { + found = true; + batch.add(cid); + rest.remove(i); + } + else if ((found && batch.size() < BATCH_SIZE) + || (!found && batch.size() < BATCH_SIZE-1)) { + batch.add(cid); + rest.remove(i); + } + } + + return batch; + } + + /** Converts id to a list of comma separated ints. */ + private static String idsAsString(List ids) { + StringBuilder sb = new StringBuilder(); + for (Iterator i = ids.iterator(); i.hasNext();) { + sb.append(i.next()); + if (i.hasNext()) { + sb.append(','); + } + } + return sb.toString(); + } + + /** Get data for id. */ + public T get(int id) { + T already = loaded.get(id); + if (already != null) { + return already; + } + + List batch = prepareBatch(id); + if (batch.isEmpty()) { + return null; + } + String sql = sqlTemplate.replace("$IDS", idsAsString(batch)); + if (log.isDebugEnabled()) { + log.debug(sql + " " + sql.length()); + } + fill(session.createSQLQuery(sql)); + return get(id); + } + + /** Call this from fill() to store data in the cache. */ + protected void cache(int key, T data) { + loaded.put(key, data); + } + + /** Override this to fill the cache */ + protected abstract void fill(SQLQuery query); +}