tom@8043: /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde tom@8043: * Software engineering by Intevation GmbH tom@8043: * tom@8043: * This file is Free Software under the GNU AGPL (>=v3) tom@8043: * and comes with ABSOLUTELY NO WARRANTY! Check out the tom@8043: * documentation coming with Dive4Elements River for details. tom@8043: */ tom@8043: tom@8043: package org.dive4elements.river.importer.parsers; tom@8043: tom@8043: import java.io.File; tom@8043: import java.io.IOException; tom@8043: tom@8043: import java.text.NumberFormat; tom@8043: import java.text.ParseException; tom@8043: tom@8043: import java.util.regex.Matcher; tom@8043: import java.util.regex.Pattern; tom@8043: tom@8043: import org.apache.log4j.Logger; tom@8043: tom@8043: import org.dive4elements.river.importer.ImporterSession; tom@8043: import org.dive4elements.river.importer.ImportGrainFraction; tom@8043: import org.dive4elements.river.importer.ImportTimeInterval; tom@8043: tom@8043: import org.dive4elements.river.model.GrainFraction; tom@8043: tom@8043: import org.dive4elements.river.utils.DateUtil; tom@8043: import org.dive4elements.river.utils.EpsilonComparator; tom@8043: tom@8043: /** Parses sediment load files. */ tom@8043: public abstract class AbstractSedimentLoadParser extends LineParser { tom@8043: tom@8043: private static final Logger log = tom@8043: Logger.getLogger(AbstractSedimentLoadParser.class); tom@8043: tom@8043: tom@8043: public static final NumberFormat nf = NumberFormat.getInstance(DEFAULT_LOCALE); tom@8043: tom@8043: tom@8043: public static final Pattern TIMEINTERVAL_SINGLE = tom@8043: Pattern.compile("\\D*([0-9]+?)\\D*"); tom@8043: tom@8043: public static final Pattern TIMEINTERVAL_EPOCH = tom@8043: Pattern.compile("\\D*([0-9]+?)\\s*-\\s*([0-9]+?)\\D*"); tom@8043: tom@8043: public static final Pattern META_FRACTION = tom@8043: Pattern.compile("^Fraktion: (.*)"); tom@8043: tom@8043: public static final Pattern META_FRACTION_NAME = tom@8043: Pattern.compile("^Fraktionsname: (.*)"); tom@8043: tom@8059: public static final Pattern META_SQ_TIMEINTERVAL = tom@8059: Pattern.compile("^S-Q-Beziehung: (.*)"); tom@8059: tom@8043: public static final Pattern META_COLUMN_NAMES = tom@8043: Pattern.compile("^Fluss-km.*"); tom@8043: tom@8043: public static final Pattern META_GRAIN_SIZE = tom@8043: Pattern.compile("([0-9]*,*[0-9]+)-([0-9]*,*[0-9]+) *mm"); tom@8043: tom@8043: tom@8043: protected abstract void handleDataLine(String line); tom@8043: tom@8043: /** Initialize SedimentLoadLSs from columns, set the kind tom@8043: * with respect to file location (offical epoch or not?) */ tom@8043: protected abstract void initializeSedimentLoads(); tom@8043: tom@8056: protected abstract void handleMetaLine(String line) tom@8056: throws LineParserException; tom@8056: tom@8043: tom@8043: protected ImportGrainFraction grainFraction; tom@8043: tom@8059: protected ImportTimeInterval sqTimeInterval; tom@8059: tom@8043: protected String description; tom@8043: tom@8043: protected String[] columnNames; tom@8043: tom@8043: private String upper; tom@8043: tom@8043: private String lower; tom@8043: tom@8043: tom@8043: @Override tom@8043: public void parse(File file) throws IOException { tom@8043: description = file.getName(); tom@8043: tom@8043: super.parse(file); tom@8043: } tom@8043: tom@8043: tom@8043: @Override tom@8043: protected void handleLine(int lineNum, String line) throws LineParserException { tom@8043: if (line.startsWith(START_META_CHAR)) { tom@8043: handleMetaLine(stripMetaLine(line)); tom@8043: } tom@8043: else { tom@8043: handleDataLine(line); tom@8043: } tom@8043: } tom@8043: tom@8043: tom@8043: public boolean handleMetaFraction(String line) { tom@8043: Matcher m = META_FRACTION.matcher(line); tom@8043: tom@8043: if (m.matches()) { tom@8043: String interval = m.group(1); tom@8043: tom@8043: Matcher sizes = META_GRAIN_SIZE.matcher(interval); tom@8043: if (sizes.matches()) { tom@8043: lower = sizes.group(1); tom@8043: upper = sizes.group(2); tom@8043: tom@8043: return true; tom@8043: } tom@8043: tom@8043: log.warn("ASLP: Unrecognized grain-size interval. Ignored."); tom@8043: return true; tom@8043: tom@8043: } tom@8043: tom@8043: return false; tom@8043: } tom@8043: tom@8043: tom@8043: public boolean handleMetaFractionName(String line) throws LineParserException { tom@8043: Matcher m = META_FRACTION_NAME.matcher(line); tom@8043: tom@8043: if (m.matches()) { tom@8043: String name = m.group(1); tom@8043: tom@8043: tom@8043: GrainFraction gf = ImporterSession.getInstance().getGrainFraction(name); tom@8043: tom@8043: if (gf != null) { tom@8043: tom@8043: if (lower != null && upper != null) { tom@8043: // Validate grain size interval tom@8043: try { tom@8043: Double lowval = nf.parse(lower).doubleValue(); tom@8043: Double upval = nf.parse(upper).doubleValue(); tom@8043: tom@8043: if (EpsilonComparator.CMP.compare(lowval, tom@8043: gf.getLower()) != 0 || tom@8043: EpsilonComparator.CMP.compare(upval, tom@8043: gf.getUpper()) != 0) { tom@8043: log.warn("ASLP: Invalid grain size for grain fraction '" + tom@8043: name + "'. Ignored."); tom@8043: } tom@8043: } tom@8043: catch (ParseException pe) { tom@8043: log.warn("ASLP: Could not parse grain-size interval. Ignored."); tom@8043: } tom@8043: } tom@8043: tom@8043: grainFraction = new ImportGrainFraction(gf); tom@8043: return true; tom@8043: } tom@8043: tom@8043: throw new LineParserException("ASLP: Unknown grain fraction: '" + tom@8043: name + "'"); tom@8043: } tom@8043: tom@8043: return false; tom@8043: } tom@8043: tom@8043: tom@8059: public boolean handleMetaSQTimeInterval(String line) { tom@8059: Matcher m = META_SQ_TIMEINTERVAL.matcher(line); tom@8059: tom@8059: if (m.matches()) { tom@8059: String interval = m.group(1); tom@8059: tom@8059: try { tom@8059: Matcher a = TIMEINTERVAL_EPOCH.matcher(interval); tom@8059: if (a.matches()) { tom@8059: int yearA = nf.parse(a.group(1)).intValue(); tom@8059: int yearB = nf.parse(a.group(2)).intValue(); tom@8059: tom@8059: sqTimeInterval = new ImportTimeInterval( tom@8059: DateUtil.getStartDateFromYear(yearA), tom@8059: DateUtil.getEndDateFromYear(yearB) tom@8059: ); tom@8059: } tom@8059: else { tom@8059: log.warn("ASLP: Unknown SQ-time string: '" + interval + tom@8059: "'. Ignored."); tom@8059: } tom@8059: } tom@8059: catch (ParseException pe) { tom@8059: log.error("ASLP: Could not parse SQ-time string: '" + tom@8059: interval + "'. Ignored.", pe); tom@8059: } tom@8059: tom@8059: return true; tom@8059: tom@8059: } tom@8059: tom@8059: return false; tom@8059: } tom@8059: tom@8059: tom@8043: public boolean handleColumnNames(String line) throws LineParserException { tom@8043: Matcher m = META_COLUMN_NAMES.matcher(line); tom@8043: tom@8043: if (m.matches()) { tom@8043: columnNames = line.split(SEPERATOR_CHAR); tom@8043: tom@8043: // 'Fluss-km', 'Hinweise' and at least one data column required tom@8043: if (columnNames.length < 3) { tom@8043: throw new LineParserException("ASLP: missing columns in '" + tom@8043: line + "'"); tom@8043: } tom@8043: tom@8043: initializeSedimentLoads(); tom@8043: tom@8043: return true; tom@8043: } tom@8043: tom@8043: return false; tom@8043: } tom@8043: tom@8043: tom@8043: protected ImportTimeInterval getTimeInterval(String column) { tom@8043: try { tom@8043: Matcher a = TIMEINTERVAL_EPOCH.matcher(column); tom@8043: if (a.matches()) { tom@8043: int yearA = nf.parse(a.group(1)).intValue(); tom@8043: int yearB = nf.parse(a.group(2)).intValue(); tom@8043: tom@8043: return new ImportTimeInterval( tom@8043: DateUtil.getStartDateFromYear(yearA), tom@8043: DateUtil.getEndDateFromYear(yearB) tom@8043: ); tom@8043: } tom@8043: tom@8043: Matcher b = TIMEINTERVAL_SINGLE.matcher(column); tom@8043: if (b.matches()) { tom@8043: int year = nf.parse(b.group(1)).intValue(); tom@8043: tom@8043: return new ImportTimeInterval(DateUtil.getStartDateFromYear(year)); tom@8043: } tom@8043: tom@8043: log.warn("ASLP: Unknown time interval string: '" + column + "'"); tom@8043: } tom@8043: catch (ParseException pe) { tom@8043: log.warn("ASLP: Could not parse years: " + column, pe); tom@8043: } tom@8043: tom@8043: return null; tom@8043: } tom@8043: tom@8043: } tom@8043: // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :