Mercurial > dive4elements > river
view backend/src/main/java/org/dive4elements/river/importer/parsers/SedimentLoadLSParser.java @ 8042:9342d7fe0ee7
Introduce LineParserException for CSV file parsing.
author | Sascha L. Teichmann <teichmann@intevation.de> |
---|---|
date | Wed, 16 Jul 2014 18:33:28 +0200 |
parents | fd3a24336e6a |
children | bd0dea643440 |
line wrap: on
line source
/* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde * Software engineering by Intevation GmbH * * This file is Free Software under the GNU AGPL (>=v3) * and comes with ABSOLUTELY NO WARRANTY! Check out the * documentation coming with Dive4Elements River for details. */ package org.dive4elements.river.importer.parsers; import java.io.File; import java.io.IOException; import java.text.NumberFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.dive4elements.river.importer.ImporterSession; import org.dive4elements.river.importer.ImportGrainFraction; import org.dive4elements.river.importer.ImportSedimentLoadLS; import org.dive4elements.river.importer.ImportSedimentLoadLSValue; import org.dive4elements.river.importer.ImportTimeInterval; import org.dive4elements.river.importer.ImportUnit; import org.dive4elements.river.model.GrainFraction; import org.dive4elements.river.utils.DateUtil; import org.dive4elements.river.utils.EpsilonComparator; /** Parses sediment load longitudinal section files. */ public class SedimentLoadLSParser extends LineParser { private static final Logger log = Logger.getLogger(SedimentLoadLSParser.class); public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); public static final Pattern TIMEINTERVAL_SINGLE = Pattern.compile("\\D*([0-9]+?)\\D*"); public static final Pattern TIMEINTERVAL_EPOCH = Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); public static final Pattern META_FRACTION = Pattern.compile("^Fraktion: (.*)"); public static final Pattern META_FRACTION_NAME = Pattern.compile("^Fraktionsname: (.*)"); public static final Pattern META_UNIT = Pattern.compile("^Einheit: \\[(.*)\\].*"); public static final Pattern META_COLUMN_NAMES = Pattern.compile("^Fluss-km.*"); public static final Pattern META_GRAIN_SIZE = Pattern.compile("([0-9]*,*[0-9]+)-([0-9]*,*[0-9]+) *mm"); protected List<ImportSedimentLoadLS> sedimentLoadLSs; protected ImportSedimentLoadLS[] current; protected ImportGrainFraction grainFraction; protected ImportUnit unit; protected String description; protected String[] columnNames; private String upper; private String lower; public SedimentLoadLSParser() { sedimentLoadLSs = new ArrayList<ImportSedimentLoadLS>(); } @Override public void parse(File file) throws IOException { description = file.getName(); super.parse(file); } @Override protected void reset() { current = null; grainFraction = null; unit = null; } @Override protected void finish() { if (current != null) { for (ImportSedimentLoadLS isy: current) { sedimentLoadLSs.add(isy); } } description = null; } @Override protected void handleLine(int lineNum, String line) throws LineParserException { if (line.startsWith(START_META_CHAR)) { handleMetaLine(stripMetaLine(line)); } else { handleDataLine(line); } } protected void handleMetaLine(String line) throws LineParserException { if (handleMetaUnit(line)) { return; } if (handleMetaFraction(line)) { return; } if (handleMetaFractionName(line)) { return; } if (handleColumnNames(line)) { return; } log.warn("SLLSP: Unknown meta line: '" + line + "'"); } protected boolean handleMetaUnit(String line) { Matcher m = META_UNIT.matcher(line); if (m.matches()) { unit = new ImportUnit(m.group(1)); return true; } return false; } public boolean handleMetaFraction(String line) { Matcher m = META_FRACTION.matcher(line); if (m.matches()) { String interval = m.group(1); Matcher sizes = META_GRAIN_SIZE.matcher(interval); if (sizes.matches()) { lower = sizes.group(1); upper = sizes.group(2); return true; } log.warn("SLLSP: Unrecognized grain-size interval. Ignored."); return true; } return false; } public boolean handleMetaFractionName(String line) { Matcher m = META_FRACTION_NAME.matcher(line); if (m.matches()) { String name = m.group(1); GrainFraction gf = ImporterSession.getInstance().getGrainFraction(name); if (gf != null) { if (lower != null && upper != null) { // Validate grain size interval try { Double lowval = nf.parse(lower).doubleValue(); Double upval = nf.parse(upper).doubleValue(); if (EpsilonComparator.CMP.compare(lowval, gf.getLower()) != 0 || EpsilonComparator.CMP.compare(upval, gf.getUpper()) != 0) { log.warn("SLLSP: Invalid grain size for grain fraction '" + name + "'. Ignored."); } } catch (ParseException pe) { log.warn("SLLSP: Could not parse grain-size interval. Ignored."); } } grainFraction = new ImportGrainFraction(gf); return true; } log.error("SLLSP: Unknown grain fraction: '" + name + "'"); } return false; } public boolean handleColumnNames(String line) throws LineParserException { Matcher m = META_COLUMN_NAMES.matcher(line); if (m.matches()) { columnNames = line.split(SEPERATOR_CHAR); // 'Fluss-km', 'Hinweise' and at least one data column required if (columnNames.length < 3) { throw new LineParserException("SLLSP: missing columns."); } initializeSedimentLoadLSs(); return true; } return false; } protected void handleDataLine(String line) { String[] vals = line.split(SEPERATOR_CHAR); if (vals == null || vals.length < columnNames.length-1) { log.warn("SLLSP: skip invalid data line: '" + line + "'"); return; } try { Double km = nf.parse(vals[0]).doubleValue(); for (int i = 1, n = columnNames.length-1; i < n; i++) { String curVal = vals[i]; if (curVal != null && curVal.length() > 0) { current[i-1].addValue(new ImportSedimentLoadLSValue( km, nf.parse(vals[i]).doubleValue() )); } } } catch (ParseException pe) { log.warn("SLLSP: unparseable number in data row '" + line + "':", pe); } } /** Initialize SedimentLoadLSs from columns, set the kind * with respect to file location (offical epoch or not?) */ private void initializeSedimentLoadLSs() { // skip first column (Fluss-km) and last column (Hinweise) current = new ImportSedimentLoadLS[columnNames.length-2]; Integer kind; if (inputFile.getAbsolutePath().contains("amtliche Epochen")) { kind = new Integer(1); } else { kind = new Integer(0); } for (int i = 0, n = columnNames.length; i < n-2; i++) { current[i] = new ImportSedimentLoadLS(this.description); current[i].setTimeInterval(getTimeInterval(columnNames[i+1])); current[i].setUnit(unit); current[i].setGrainFraction(grainFraction); current[i].setKind(kind); } } private ImportTimeInterval getTimeInterval(String column) { try { Matcher a = TIMEINTERVAL_EPOCH.matcher(column); if (a.matches()) { int yearA = nf.parse(a.group(1)).intValue(); int yearB = nf.parse(a.group(2)).intValue(); return new ImportTimeInterval( DateUtil.getStartDateFromYear(yearA), DateUtil.getEndDateFromYear(yearB) ); } Matcher b = TIMEINTERVAL_SINGLE.matcher(column); if (b.matches()) { int year = nf.parse(b.group(1)).intValue(); return new ImportTimeInterval(DateUtil.getStartDateFromYear(year)); } log.warn("SLLSP: Unknown time interval string: '" + column + "'"); } catch (ParseException pe) { log.warn("SLLSP: Could not parse years: " + column, pe); } return null; } public List<ImportSedimentLoadLS> getSedimentLoadLSs() { return sedimentLoadLSs; } } // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :