annotate backend/src/main/java/org/dive4elements/river/importer/parsers/AnnotationClassifier.java @ 9650:a2a42a6bac6b

Importer (s/u-info) extensions: outer try/catch for parse and log of line no, catching parsing exception if not enough value fields, parsing error and warning log messages with line number, detecting and rejecting duplicate data series, better differentiation between error and warning log messages
author mschaefer
date Mon, 23 Mar 2020 14:57:03 +0100
parents 11bc2d6a2059
children 0a5239a1e46e
rev   line source
5844
4dd33b86dc61 Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5838
diff changeset
1 /* Copyright (C) 2011, 2012, 2013 by Bundesanstalt für Gewässerkunde
4dd33b86dc61 Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5838
diff changeset
2 * Software engineering by Intevation GmbH
4dd33b86dc61 Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5838
diff changeset
3 *
5992
4c3ccf2b0304 Removed trailing whitespace.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5844
diff changeset
4 * This file is Free Software under the GNU AGPL (>=v3)
5844
4dd33b86dc61 Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5838
diff changeset
5 * and comes with ABSOLUTELY NO WARRANTY! Check out the
5992
4c3ccf2b0304 Removed trailing whitespace.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5844
diff changeset
6 * documentation coming with Dive4Elements River for details.
5844
4dd33b86dc61 Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5838
diff changeset
7 */
4dd33b86dc61 Added header to river backend.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5838
diff changeset
8
5829
18619c1e7c2a Repaired internal references.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5828
diff changeset
9 package org.dive4elements.river.importer.parsers;
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
10
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
11 import org.w3c.dom.Document;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
12 import org.w3c.dom.NodeList;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
13 import org.w3c.dom.Element;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
14
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
15 import javax.xml.xpath.XPathConstants;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
16
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
17 import java.util.Map;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
18 import java.util.HashMap;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
19 import java.util.List;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
20 import java.util.ArrayList;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
21
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
22 import java.util.regex.Pattern;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
23 import java.util.regex.Matcher;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
24
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
25 import org.apache.log4j.Logger;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
26
5829
18619c1e7c2a Repaired internal references.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5828
diff changeset
27 import org.dive4elements.artifacts.common.utils.XMLUtils;
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
28
5829
18619c1e7c2a Repaired internal references.
Sascha L. Teichmann <teichmann@intevation.de>
parents: 5828
diff changeset
29 import org.dive4elements.river.importer.ImportAnnotationType;
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
30
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
31 public class AnnotationClassifier
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
32 {
7540
9344aa0fb021 Fixed logger instantiations that were done with wrong class.
Felix Wolfsteller <felix.wolfsteller@intevation.de>
parents: 5992
diff changeset
33 private static Logger log = Logger.getLogger(AnnotationClassifier.class);
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
34
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
35 public static final String TYPES_XPATH =
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
36 "/annotation/types/type";
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
37
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
38 public static final String FILE_PATTERNS_XPATH =
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
39 "/annotation/patterns/file";
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
40
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
41 public static final String DESCRIPTION_PATTERNS_XPATH =
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
42 "/annotation/patterns/line";
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
43
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
44
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
45 public static class Pair {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
46
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
47 protected Pattern pattern;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
48 protected ImportAnnotationType annType;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
49
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
50 public Pair(Pattern pattern, ImportAnnotationType annType) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
51 this.pattern = pattern;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
52 this.annType = annType;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
53 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
54
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
55 public ImportAnnotationType match(String s) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
56 Matcher m = pattern.matcher(s);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
57 return m.matches() ? annType : null;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
58 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
59 } // class Pair
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
60
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
61
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
62 protected Map<String, ImportAnnotationType> types;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
63 protected List<Pair> filePatterns;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
64 protected List<Pair> descPatterns;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
65
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
66 protected ImportAnnotationType defaultType;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
67
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
68 public AnnotationClassifier() {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
69 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
70
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
71 public AnnotationClassifier(Document rules) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
72 types = new HashMap<String, ImportAnnotationType>();
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
73 filePatterns = new ArrayList<Pair>();
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
74 descPatterns = new ArrayList<Pair>();
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
75
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
76 buildRules(rules);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
77 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
78
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
79 protected void buildRules(Document rules) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
80 buildTypes(rules);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
81 buildFilePatterns(rules);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
82 buildDescriptionPatterns(rules);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
83 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
84
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
85 protected void buildTypes(Document rules) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
86
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
87 NodeList typeList = (NodeList)XMLUtils.xpath(
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
88 rules,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
89 TYPES_XPATH,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
90 XPathConstants.NODESET,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
91 null);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
92
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
93 if (typeList == null) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
94 log.info("no rules found.");
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
95 return;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
96 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
97
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
98 for (int i = 0, N = typeList.getLength(); i < N; ++i) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
99 Element typeElement = (Element)typeList.item(i);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
100 String name = typeElement.getAttribute("name");
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
101 if (name.length() == 0) {
3662
0d27d02b1208 backend: Completed the error messages of the importer.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 1211
diff changeset
102 log.warn("ANNCLASS: rule has no name");
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
103 continue;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
104 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
105
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
106 ImportAnnotationType aic = new ImportAnnotationType(name);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
107
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
108 types.put(name, aic);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
109
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
110 if (typeElement.getAttribute("default").equals("true")) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
111 defaultType = aic;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
112 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
113 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
114 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
115
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
116 protected void buildFilePatterns(Document rules) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
117
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
118 NodeList patternList = (NodeList)XMLUtils.xpath(
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
119 rules,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
120 FILE_PATTERNS_XPATH,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
121 XPathConstants.NODESET,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
122 null);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
123
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
124 if (patternList == null) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
125 log.info("no file patterns found.");
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
126 return;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
127 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
128
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
129 for (int i = 0, N = patternList.getLength(); i < N; ++i) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
130 Element element = (Element)patternList.item(i);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
131 Pair pair = buildPair(element);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
132 if (pair != null) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
133 filePatterns.add(pair);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
134 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
135 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
136 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
137
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
138 protected void buildDescriptionPatterns(Document rules) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
139
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
140 NodeList patternList = (NodeList)XMLUtils.xpath(
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
141 rules,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
142 DESCRIPTION_PATTERNS_XPATH,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
143 XPathConstants.NODESET,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
144 null);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
145
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
146 if (patternList == null) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
147 log.info("no line patterns found.");
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
148 return;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
149 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
150
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
151 for (int i = 0, N = patternList.getLength(); i < N; ++i) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
152 Element element = (Element)patternList.item(i);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
153 Pair pair = buildPair(element);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
154 if (pair != null) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
155 descPatterns.add(pair);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
156 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
157 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
158 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
159
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
160 protected Pair buildPair(Element element) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
161 String pattern = element.getAttribute("pattern");
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
162 String type = element.getAttribute("type");
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
163
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
164 if (pattern.length() == 0) {
3662
0d27d02b1208 backend: Completed the error messages of the importer.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 1211
diff changeset
165 log.warn("ANNCLASS: pattern has no 'pattern' attribute.");
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
166 return null;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
167 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
168
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
169 if (type.length() == 0) {
3662
0d27d02b1208 backend: Completed the error messages of the importer.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 1211
diff changeset
170 log.warn("ANNCLASS: pattern has no 'type' attribute.");
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
171 return null;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
172 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
173
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
174 ImportAnnotationType annType = types.get(type);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
175
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
176 if (annType == null) {
3662
0d27d02b1208 backend: Completed the error messages of the importer.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 1211
diff changeset
177 log.warn("ANNCLASS: pattern has unknown type '" + type + "'");
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
178 return null;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
179 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
180
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
181 Pattern p;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
182
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
183 try {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
184 p = Pattern.compile(pattern,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
185 Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
186 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
187 catch (IllegalArgumentException iae) {
3662
0d27d02b1208 backend: Completed the error messages of the importer.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents: 1211
diff changeset
188 log.warn("ANNCLASS: pattern '" + pattern + "' is invalid.", iae);
1211
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
189 return null;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
190 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
191
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
192 return new Pair(p, annType);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
193 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
194
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
195 public ImportAnnotationType getDefaultType() {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
196 return defaultType;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
197 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
198
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
199 public ImportAnnotationType classifyFile(String filename) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
200 return classifyFile(filename, null);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
201 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
202
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
203 public ImportAnnotationType classifyFile(
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
204 String filename,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
205 ImportAnnotationType def
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
206 ) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
207 if (filename.toLowerCase().endsWith(".km")) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
208 filename = filename.substring(0, filename.length()-3);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
209 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
210
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
211 for (Pair pair: filePatterns) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
212 ImportAnnotationType annType = pair.match(filename);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
213 if (annType != null) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
214 return annType;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
215 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
216 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
217
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
218 return def;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
219 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
220
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
221 public ImportAnnotationType classifyDescription(String description) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
222 return classifyDescription(description, null);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
223 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
224
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
225 public ImportAnnotationType classifyDescription(
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
226 String description,
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
227 ImportAnnotationType def
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
228 ) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
229 for (Pair pair: descPatterns) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
230 ImportAnnotationType annType = pair.match(description);
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
231 if (annType != null) {
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
232 return annType;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
233 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
234 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
235
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
236 return def;
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
237 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
238 }
f08fe480092c Moved file parsers to separate package.
Sascha L. Teichmann <sascha.teichmann@intevation.de>
parents:
diff changeset
239 // vim:set ts=4 sw=4 si et sta sts=4 fenc=utf8 :

http://dive4elements.wald.intevation.org