# HG changeset patch # User Sascha L. Teichmann # Date 1300294915 0 # Node ID 31895d24387e3eee408963e08c290406632f74d4 # Parent 3035d861a57636342ca7eb2088b623c4dad6c212 Importer: Added info gew parser. flys-backend/trunk@1485 c6561f87-3c4e-4783-a992-168aeb5c3f6f diff -r 3035d861a576 -r 31895d24387e flys-backend/ChangeLog --- a/flys-backend/ChangeLog Tue Mar 15 17:29:11 2011 +0000 +++ b/flys-backend/ChangeLog Wed Mar 16 17:01:55 2011 +0000 @@ -1,3 +1,23 @@ +2011-03-16 Sascha L. Teichmann + + * src/main/java/de/intevation/flys/utils/FileTools.java: + Tools for handling with filenames. Currently there is + a repair(File) method with repairs letter case errors + which is useful when reading windows filenames on a + un*x platform. + + * src/main/java/de/intevation/flys/importer/Importer.java: + Standalone app to read data from the file system and + store it in a database. Currently it does not store + anything. It only loads info gew files. + + * src/main/java/de/intevation/flys/importer/InfoGewParser.java: + Info gew parser. + + * src/main/java/de/intevation/flys/importer/ImportRiver.java: + Helper model of a river used produced by parsing the + info gew files. + 2011-03-15 Sascha L. Teichmann * src/main/java/de/intevation/flys/backend/SessionFactoryProvider.java: diff -r 3035d861a576 -r 31895d24387e flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java Wed Mar 16 17:01:55 2011 +0000 @@ -0,0 +1,46 @@ +package de.intevation.flys.importer; + +import java.io.File; + +public class ImportRiver +{ + protected String name; + + protected File wstFile; + + protected File bbInfoFile; + + public ImportRiver() { + } + + public ImportRiver(String name, File wstFile, File bbInfoFile) { + this.name = name; + this.wstFile = wstFile; + this.bbInfoFile = bbInfoFile; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public File getWstFile() { + return wstFile; + } + + public void setWstFile(File wstFile) { + this.wstFile = wstFile; + } + + public File getBBInfo() { + return bbInfoFile; + } + + public void setBBInfo(File bbInfoFile) { + this.bbInfoFile = bbInfoFile; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r 3035d861a576 -r 31895d24387e flys-backend/src/main/java/de/intevation/flys/importer/Importer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java Wed Mar 16 17:01:55 2011 +0000 @@ -0,0 +1,28 @@ +package de.intevation.flys.importer; + +import java.io.File; +import java.io.IOException; + +import org.apache.log4j.Logger; + +public class Importer +{ + private static Logger log = Logger.getLogger(Importer.class); + + public static void main(String [] args) { + + InfoGewParser infoGewParser = new InfoGewParser(); + + for (String gew: args) { + log.info("parsing info gew file: " + gew); + try { + infoGewParser.parse(new File(gew)); + } + catch (IOException ioe) { + log.error("cannot while parsing: " + gew); + } + + } + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r 3035d861a576 -r 31895d24387e flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java Wed Mar 16 17:01:55 2011 +0000 @@ -0,0 +1,111 @@ +package de.intevation.flys.importer; + +import java.io.File; + +import java.util.List; +import java.util.ArrayList; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.FileInputStream; +import java.io.InputStreamReader; + +import org.apache.log4j.Logger; + +import de.intevation.flys.utils.FileTools; + +public class InfoGewParser +{ + private static Logger log = Logger.getLogger(InfoGewParser.class); + + public static final String ENCODING = "ISO-8859-1"; + + public static final Pattern GEWAESSER = + Pattern.compile("^\\s*Gew\u00e4sser\\s*:\\s*(.+)"); + + public static final Pattern WST_DATEI = + Pattern.compile("^\\s*WSTDatei\\s*:\\s*(.+)"); + + public static final Pattern BB_INFO = + Pattern.compile("^\\s*B\\+B-Info\\s*:\\s*(.+)"); + + protected ArrayList rivers; + + public InfoGewParser() { + rivers = new ArrayList(); + } + + public static final String normalize(String f) { + return f.replace("\\", "/").replace("/", File.separator); + } + + public void parse(File file) throws IOException { + + LineNumberReader in = null; + + File root = file.getParentFile(); + + ImportRiver importRiver = new ImportRiver(); + try { + in = + new LineNumberReader( + new InputStreamReader( + new FileInputStream(file), ENCODING)); + + String line = null; + + String riverName = null; + File wstFile = null; + File bbInfoFile = null; + + while ((line = in.readLine()) != null) { + if ((line = line.trim()).length() == 0) { + continue; + } + Matcher m = GEWAESSER.matcher(line); + + if (m.matches()) { + String river = m.group(1); + log.info("Found river '" + river + "'"); + if (riverName != null) { + rivers.add(new ImportRiver(riverName, wstFile, bbInfoFile)); + } + riverName = river; + wstFile = null; + bbInfoFile = null; + } + else if ((m = WST_DATEI.matcher(line)).matches()) { + String wstFilename = m.group(1); + File wst = new File(wstFilename = normalize(wstFilename)); + if (!wst.isAbsolute()) { + wst = new File(root, wstFilename); + } + wst = FileTools.repair(wst); + log.info("Found wst file '" + wst + "'"); + if (!wst.isFile() || !wst.canRead()) { + log.warn("cannot access WST file '" + wstFilename + "'"); + continue; + } + wstFile = wst; + } + else if ((m = BB_INFO.matcher(line)).matches()) { + //TODO: Make it relative to the wst file. + String bbInfo = m.group(1); + bbInfoFile = new File(normalize(bbInfo)); + } + } + if (riverName != null) { + rivers.add(new ImportRiver(riverName, wstFile, bbInfoFile)); + } + } + finally { + if (in != null) { + in.close(); + } + } + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 : diff -r 3035d861a576 -r 31895d24387e flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java Wed Mar 16 17:01:55 2011 +0000 @@ -0,0 +1,78 @@ +package de.intevation.flys.utils; + +import java.io.File; + +import java.util.Stack; + +import org.apache.log4j.Logger; + +public class FileTools +{ + private static Logger log = Logger.getLogger(FileTools.class); + + private FileTools() { + } + + public static File repair(File file) { + file = file.getAbsoluteFile(); + if (file.exists()) { + return file; + } + Stack parts = new Stack(); + File curr = file; + while (curr != null) { + String name = curr.getName(); + if (name.length() > 0) { + parts.push(curr.getName()); + } + curr = curr.getParentFile(); + } + + curr = null; + OUTER: while (!parts.isEmpty()) { + String f = parts.pop(); + log.debug("fixing: '" + f + "'"); + if (curr == null) { + // XXX: Not totaly correct because there + // more than one root on none unix systems. + for (File root: File.listRoots()) { + File [] files = root.listFiles(); + if (files == null) { + log.warn("cannot list '" + root); + continue; + } + for (File candidate: files) { + if (candidate.getName().equalsIgnoreCase(f)) { + curr = new File(root, candidate.getName()); + continue OUTER; + } + } + } + break; + } + else { + File [] files = curr.listFiles(); + if (files == null) { + log.warn("cannot list: '" + curr + "'"); + return file; + } + for (File candidate: files) { + if (candidate.getName().equalsIgnoreCase(f)) { + curr = new File(curr, candidate.getName()); + continue OUTER; + } + } + curr = null; + break; + } + } + + if (curr == null) { + log.warn("cannot repair path '" + file + "'"); + return file; + } + + return curr; + } +} +// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :