annotate flys-backend/src/main/java/de/intevation/flys/importer/AnnotationsParser.java @ 186:cf8cbcb6a10d

Added parser to read *.KM files. flys-backend/trunk@1506 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author Sascha L. Teichmann <sascha.teichmann@intevation.de>
date Thu, 17 Mar 2011 17:43:57 +0000
parents
children bc3747a371cc
rev   line source
186
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
1 package de.intevation.flys.importer;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
2
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
3 import java.util.HashMap;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
4 import java.util.TreeSet;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
5 import java.util.List;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
6 import java.util.ArrayList;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
7
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
8 import java.io.IOException;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
9 import java.io.File;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
10 import java.io.LineNumberReader;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
11 import java.io.InputStreamReader;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
12 import java.io.FileInputStream;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
13
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
14 import org.apache.log4j.Logger;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
15
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
16 import de.intevation.flys.utils.FileTools;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
17
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
18 public class AnnotationsParser
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
19 {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
20 private static Logger log = Logger.getLogger(AnnotationsParser.class);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
21
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
22 public static final String ENCODING = "ISO-8859-1";
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
23
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
24 public static final String [] TO_SCAN = {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
25 "Basisdaten",
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
26 "Streckendaten"
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
27 };
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
28
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
29 protected HashMap<String, ImportAttribute> attributes;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
30 protected HashMap<String, ImportPosition> positions;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
31 protected TreeSet<ImportAnnotation> annotations;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
32
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
33 public AnnotationsParser() {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
34 attributes = new HashMap<String, ImportAttribute>();
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
35 positions = new HashMap<String, ImportPosition>();
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
36 annotations = new TreeSet<ImportAnnotation>();
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
37 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
38
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
39 public void parseFile(File file) throws IOException {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
40 log.info("parsing km file: '" + file + "'");
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
41 LineNumberReader in = null;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
42 try {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
43 in =
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
44 new LineNumberReader(
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
45 new InputStreamReader(
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
46 new FileInputStream(file), ENCODING));
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
47
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
48 String line = null;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
49 while ((line = in.readLine()) != null) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
50 if ((line = line.trim()).length() == 0
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
51 || line.startsWith("*")) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
52 continue;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
53 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
54
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
55 String [] parts = line.split("\\s*;\\s*");
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
56
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
57 if (parts.length < 3) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
58 log.warn("not enough columns in line "
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
59 + in.getLineNumber());
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
60 continue;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
61 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
62
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
63 ImportPosition position = positions.get(parts[0]);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
64 if (position == null) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
65 position = new ImportPosition(parts[0]);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
66 positions.put(parts[0], position);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
67 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
68
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
69 ImportAttribute attribute = attributes.get(parts[1]);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
70 if (attribute == null) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
71 attribute = new ImportAttribute(parts[1]);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
72 attributes.put(parts[1], attribute);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
73 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
74
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
75 String [] r = parts[2].replace(",", ".").split("\\s*#\\s*");
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
76
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
77 Double from, to;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
78
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
79 try {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
80 from = Double.valueOf(r[0]);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
81 to = r.length < 2 ? null : Double.valueOf(r[1]);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
82 if (to != null && from > to) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
83 Double t = from; from = to; to = t;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
84 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
85 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
86 catch (NumberFormatException nfe) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
87 log.warn("invalid number in line " + in.getLineNumber());
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
88 continue;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
89 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
90
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
91 ImportRange range = new ImportRange(from, to);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
92
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
93 ImportAnnotation annotation = new ImportAnnotation(
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
94 attribute, position, range);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
95
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
96 if (!annotations.add(annotation)) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
97 log.debug("duplicated annotation in line "
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
98 + in.getLineNumber());
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
99 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
100 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
101 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
102 finally {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
103 if (in != null) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
104 in.close();
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
105 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
106 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
107 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
108
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
109 public void parse(File root) throws IOException {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
110
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
111 for (String toScan: TO_SCAN) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
112 File directory = FileTools.repair(new File(root, toScan));
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
113 if (!directory.isDirectory()) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
114 log.warn("'" + directory + "' is not a directory.");
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
115 continue;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
116 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
117 File [] files = directory.listFiles();
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
118 if (files == null) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
119 log.warn("cannot list directory '" + directory + "'");
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
120 continue;
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
121 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
122
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
123 for (File file: files) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
124 if (file.isFile() && file.canRead()
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
125 && file.getName().toLowerCase().endsWith(".km")) {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
126 parseFile(file);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
127 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
128 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
129 } // for all directories to scan
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
130 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
131
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
132 public List<ImportAnnotation> getAnnotations() {
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
133 return new ArrayList<ImportAnnotation>(annotations);
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
134 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
135 }
cf8cbcb6a10d Added parser to read *.KM files.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
136 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org