changeset 177:31895d24387e

Importer: Added info gew parser. flys-backend/trunk@1485 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Wed, 16 Mar 2011 17:01:55 +0000 (2011-03-16)
parents 3035d861a576
children 82bc55666a97
files flys-backend/ChangeLog flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java flys-backend/src/main/java/de/intevation/flys/importer/Importer.java flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java
diffstat 5 files changed, 283 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/flys-backend/ChangeLog	Tue Mar 15 17:29:11 2011 +0000
+++ b/flys-backend/ChangeLog	Wed Mar 16 17:01:55 2011 +0000
@@ -1,3 +1,23 @@
+2011-03-16	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
+
+	* src/main/java/de/intevation/flys/utils/FileTools.java: 
+	  Tools for handling with filenames. Currently there is
+	  a repair(File) method with repairs letter case errors
+	  which is useful when reading windows filenames on a
+	  un*x platform.
+
+	* src/main/java/de/intevation/flys/importer/Importer.java:
+	  Standalone app to read data from the file system and
+	  store it in a database. Currently it does not store 
+	  anything. It only loads info gew files.
+
+	* src/main/java/de/intevation/flys/importer/InfoGewParser.java:
+	  Info gew parser.
+
+	* src/main/java/de/intevation/flys/importer/ImportRiver.java:
+	  Helper model of a river used produced by parsing the
+	  info gew files.
+
 2011-03-15	Sascha L. Teichmann	<sascha.teichmann@intevation.de>
 
 	* src/main/java/de/intevation/flys/backend/SessionFactoryProvider.java:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/ImportRiver.java	Wed Mar 16 17:01:55 2011 +0000
@@ -0,0 +1,46 @@
+package de.intevation.flys.importer;
+
+import java.io.File;
+
+public class ImportRiver
+{
+    protected String name;
+
+    protected File   wstFile;
+
+    protected File   bbInfoFile;
+
+    public ImportRiver() {
+    }
+
+    public ImportRiver(String name, File wstFile, File bbInfoFile) {
+        this.name       = name;
+        this.wstFile    = wstFile;
+        this.bbInfoFile = bbInfoFile;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public File getWstFile() {
+        return wstFile;
+    }
+
+    public void setWstFile(File wstFile) {
+        this.wstFile = wstFile;
+    }
+
+    public File getBBInfo() {
+        return bbInfoFile;
+    }
+
+    public void setBBInfo(File bbInfoFile) {
+        this.bbInfoFile = bbInfoFile;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/Importer.java	Wed Mar 16 17:01:55 2011 +0000
@@ -0,0 +1,28 @@
+package de.intevation.flys.importer;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+
+public class Importer
+{
+    private static Logger log = Logger.getLogger(Importer.class);
+
+    public static void main(String [] args) {
+
+        InfoGewParser infoGewParser = new InfoGewParser();
+
+        for (String gew: args) {
+            log.info("parsing info gew file: " + gew);
+            try {
+                infoGewParser.parse(new File(gew));
+            }
+            catch (IOException ioe) {
+                log.error("cannot while parsing: " + gew);
+            }
+            
+        }
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/importer/InfoGewParser.java	Wed Mar 16 17:01:55 2011 +0000
@@ -0,0 +1,111 @@
+package de.intevation.flys.importer;
+
+import java.io.File;
+
+import java.util.List;
+import java.util.ArrayList;
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+
+import org.apache.log4j.Logger;
+
+import de.intevation.flys.utils.FileTools;
+
+public class InfoGewParser
+{
+    private static Logger log = Logger.getLogger(InfoGewParser.class);
+
+    public static final String ENCODING = "ISO-8859-1";
+
+    public static final Pattern GEWAESSER =
+        Pattern.compile("^\\s*Gew\u00e4sser\\s*:\\s*(.+)");
+
+    public static final Pattern WST_DATEI =
+        Pattern.compile("^\\s*WSTDatei\\s*:\\s*(.+)");
+
+    public static final Pattern BB_INFO =
+        Pattern.compile("^\\s*B\\+B-Info\\s*:\\s*(.+)");
+
+    protected ArrayList<ImportRiver> rivers;
+
+    public InfoGewParser() {
+        rivers = new ArrayList<ImportRiver>();
+    }
+
+    public static final String normalize(String f) {
+        return f.replace("\\", "/").replace("/", File.separator);
+    }
+
+    public void parse(File file) throws IOException {
+
+        LineNumberReader in = null;
+
+        File root = file.getParentFile();
+
+        ImportRiver importRiver = new ImportRiver();
+        try {
+            in =
+                new LineNumberReader(
+                new InputStreamReader(
+                new FileInputStream(file), ENCODING));
+
+            String line = null;
+
+            String riverName  = null;
+            File   wstFile    = null;
+            File   bbInfoFile = null;
+
+            while ((line = in.readLine()) != null) {
+                if ((line = line.trim()).length() == 0) {
+                    continue;
+                }
+                Matcher m = GEWAESSER.matcher(line);
+
+                if (m.matches()) {
+                    String river = m.group(1);
+                    log.info("Found river '" + river + "'");
+                    if (riverName != null) {
+                        rivers.add(new ImportRiver(riverName, wstFile, bbInfoFile));
+                    }
+                    riverName  = river;
+                    wstFile    = null;
+                    bbInfoFile = null;
+                }
+                else if ((m = WST_DATEI.matcher(line)).matches()) {
+                    String wstFilename = m.group(1);
+                    File wst = new File(wstFilename = normalize(wstFilename));
+                    if (!wst.isAbsolute()) {
+                        wst = new File(root, wstFilename);
+                    }
+                    wst = FileTools.repair(wst);
+                    log.info("Found wst file '" + wst + "'");
+                    if (!wst.isFile() || !wst.canRead()) {
+                        log.warn("cannot access WST file '" + wstFilename + "'");
+                        continue;
+                    }
+                    wstFile = wst;
+                }
+                else if ((m = BB_INFO.matcher(line)).matches()) {
+                    //TODO: Make it relative to the wst file.
+                    String bbInfo = m.group(1);
+                    bbInfoFile = new File(normalize(bbInfo));
+                }
+            }
+            if (riverName != null) {
+                rivers.add(new ImportRiver(riverName, wstFile, bbInfoFile));
+            }
+        }
+        finally {
+            if (in != null) {
+                in.close();
+            }
+        }
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flys-backend/src/main/java/de/intevation/flys/utils/FileTools.java	Wed Mar 16 17:01:55 2011 +0000
@@ -0,0 +1,78 @@
+package de.intevation.flys.utils;
+
+import java.io.File;
+
+import java.util.Stack;
+
+import org.apache.log4j.Logger;
+
+public class FileTools
+{
+    private static Logger log = Logger.getLogger(FileTools.class);
+
+    private FileTools() {
+    }
+
+    public static File repair(File file) {
+        file = file.getAbsoluteFile();
+        if (file.exists()) {
+            return file;
+        }
+        Stack<String> parts = new Stack<String>();
+        File curr = file;
+        while (curr != null) {
+            String name = curr.getName();
+            if (name.length() > 0) {
+                parts.push(curr.getName());
+            }
+            curr = curr.getParentFile();
+        }
+
+        curr = null;
+        OUTER: while (!parts.isEmpty()) {
+            String f = parts.pop();
+            log.debug("fixing: '" + f + "'");
+            if (curr == null) {
+                // XXX: Not totaly correct because there
+                // more than one root on none unix systems.
+                for (File root: File.listRoots()) {
+                    File [] files = root.listFiles();
+                    if (files == null) {
+                        log.warn("cannot list '" + root);
+                        continue;
+                    }
+                    for (File candidate: files) {
+                        if (candidate.getName().equalsIgnoreCase(f)) {
+                            curr = new File(root, candidate.getName());
+                            continue OUTER;
+                        }
+                    }
+                }
+                break;
+            }
+            else {
+                File [] files = curr.listFiles();
+                if (files == null) {
+                    log.warn("cannot list: '" + curr + "'");
+                    return file;
+                }
+                for (File candidate: files) {
+                    if (candidate.getName().equalsIgnoreCase(f)) {
+                        curr = new File(curr, candidate.getName());
+                        continue OUTER;
+                    }
+                }
+                curr = null;
+                break;
+            }
+        }
+
+        if (curr == null) {
+            log.warn("cannot repair path '" + file + "'");
+            return file;
+        }
+
+        return curr;
+    }
+}
+// vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org